cesear64 commited on
Commit
fb3ee41
·
verified ·
1 Parent(s): a6339d4

Training in progress, step 25395, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b64a8c5c9c45c847ee9749e308f164cca0b62ea7a0eadbd62743209b62d4936c
3
  size 1230207488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:496ff499aedb0ce97ed3a66cc050ade4c7b18f28ddfc368d0252dcde6c6caaff
3
  size 1230207488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9625bd0c592b0f12935bb339b14cb4f020840a853ccee3f8bb7b7e44eb6fe2fb
3
  size 2460722266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a226c815c64a6223d2e29d0b7924537ea47ca35d20305489fac23083648d163
3
  size 2460722266
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:977dcbce44dc112e81ce232ba292a2987f23c3109ccace9ad089d6fe42b17db9
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a11e2e02a23ac5bb22f239d204a60875ed400da34d9862f2092a8bbfa558025
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c9afd38371c0ea8a30afeea2a83b5feda14966e61c68d2e39fa59a47c408813
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8584a03231a936447aeb179c66bfa2afda8aa94439b68eb56489d3fd90d88397
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 23000,
3
  "best_metric": 1.7628060579299927,
4
  "best_model_checkpoint": "/home/ubuntu/sangoai-training/output/nllb-sango-finetuned/checkpoint-23000",
5
- "epoch": 2.9534191843792623,
6
  "eval_steps": 500,
7
- "global_step": 25000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3908,6 +3908,63 @@
3908
  "eval_samples_per_second": 484.683,
3909
  "eval_steps_per_second": 40.396,
3910
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3911
  }
3912
  ],
3913
  "logging_steps": 50,
@@ -3922,12 +3979,12 @@
3922
  "should_evaluate": false,
3923
  "should_log": false,
3924
  "should_save": true,
3925
- "should_training_stop": false
3926
  },
3927
  "attributes": {}
3928
  }
3929
  },
3930
- "total_flos": 2.6144185253894554e+17,
3931
  "train_batch_size": 12,
3932
  "trial_name": null,
3933
  "trial_params": null
 
2
  "best_global_step": 23000,
3
  "best_metric": 1.7628060579299927,
4
  "best_model_checkpoint": "/home/ubuntu/sangoai-training/output/nllb-sango-finetuned/checkpoint-23000",
5
+ "epoch": 3.0,
6
  "eval_steps": 500,
7
+ "global_step": 25395,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3908
  "eval_samples_per_second": 484.683,
3909
  "eval_steps_per_second": 40.396,
3910
  "step": 25000
3911
+ },
3912
+ {
3913
+ "epoch": 2.9593263664386136,
3914
+ "grad_norm": 8.0,
3915
+ "learning_rate": 6.949186583651336e-07,
3916
+ "loss": 20.5435107421875,
3917
+ "step": 25050
3918
+ },
3919
+ {
3920
+ "epoch": 2.965233548497965,
3921
+ "grad_norm": 8.5,
3922
+ "learning_rate": 5.944968869250854e-07,
3923
+ "loss": 20.75857666015625,
3924
+ "step": 25100
3925
+ },
3926
+ {
3927
+ "epoch": 2.9711407305573156,
3928
+ "grad_norm": 7.96875,
3929
+ "learning_rate": 4.940751154850371e-07,
3930
+ "loss": 20.8127978515625,
3931
+ "step": 25150
3932
+ },
3933
+ {
3934
+ "epoch": 2.977047912616667,
3935
+ "grad_norm": 8.8125,
3936
+ "learning_rate": 3.936533440449889e-07,
3937
+ "loss": 20.79349853515625,
3938
+ "step": 25200
3939
+ },
3940
+ {
3941
+ "epoch": 2.9829550946760177,
3942
+ "grad_norm": 8.3125,
3943
+ "learning_rate": 2.932315726049408e-07,
3944
+ "loss": 20.7541015625,
3945
+ "step": 25250
3946
+ },
3947
+ {
3948
+ "epoch": 2.988862276735369,
3949
+ "grad_norm": 8.9375,
3950
+ "learning_rate": 1.9280980116489258e-07,
3951
+ "loss": 20.79868408203125,
3952
+ "step": 25300
3953
+ },
3954
+ {
3955
+ "epoch": 2.9947694587947202,
3956
+ "grad_norm": 8.1875,
3957
+ "learning_rate": 9.238802972484435e-08,
3958
+ "loss": 20.6675341796875,
3959
+ "step": 25350
3960
+ },
3961
+ {
3962
+ "epoch": 3.0,
3963
+ "eval_loss": 1.7628397941589355,
3964
+ "eval_runtime": 121.1922,
3965
+ "eval_samples_per_second": 485.205,
3966
+ "eval_steps_per_second": 40.44,
3967
+ "step": 25395
3968
  }
3969
  ],
3970
  "logging_steps": 50,
 
3979
  "should_evaluate": false,
3980
  "should_log": false,
3981
  "should_save": true,
3982
+ "should_training_stop": true
3983
  },
3984
  "attributes": {}
3985
  }
3986
  },
3987
+ "total_flos": 2.655110146920284e+17,
3988
  "train_batch_size": 12,
3989
  "trial_name": null,
3990
  "trial_params": null