Training in progress, epoch 3, checkpoint

Files changed (6) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7606954d9b149ad6da4ba7287d3eca4aab7f96e7e0c05af1ef72c6718c0665d1
 size 437961700

 version https://git-lfs.github.com/spec/v1
+oid sha256:4de66461b285186046f05cf8c9930e5ada0e3020ca4762399d59313044475ff5
 size 437961700

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8efe5909e5060fea686e30328d0c0f21c5eb98f603f6b70922a4788ae72129f7
 size 118180619

 version https://git-lfs.github.com/spec/v1
+oid sha256:84ecf8f3a21cd9c5465c5646b40ff679305ca73e42a836475588610795c8282d
 size 118180619

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96231dbbd61b433bb6d6121f51c259cf0e1ad0a076de7ae1c77f190e1ce69384
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:30cd7f2e948438b68553097ba0a0eb4a894fda376f31a677f8238066dd201d3d
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f4fed85ea0917cc2d05bf8af0c7463301adc898027ff20864aac2aa94a9e56b
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa8f9c79c40c52add04fc8f64e1f918436a34eb087c6b1500ed8db075d6f0e89
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8d5acdd64798810c4bd3cf19ecd254b731043662f14025cd0a76112a9a3e21b
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:47d461e0a24e49e017f7b0d3a73c58b356814d52598c84ba4f408c3edc8c3527
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 392,
   "best_metric": 1.9849951267242432,
   "best_model_checkpoint": "./bert-resume/checkpoint-392",
-  "epoch": 2.0,
   "eval_steps": 500,
-  "global_step": 392,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -24,6 +24,21 @@
       "eval_samples_per_second": 53.842,
       "eval_steps_per_second": 1.684,
       "step": 392
     }
   ],
   "logging_steps": 500,
@@ -43,7 +58,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3284181680080896.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

   "best_global_step": 392,
   "best_metric": 1.9849951267242432,
   "best_model_checkpoint": "./bert-resume/checkpoint-392",
+  "epoch": 3.0,
   "eval_steps": 500,
+  "global_step": 588,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 53.842,
       "eval_steps_per_second": 1.684,
       "step": 392
+    },
+    {
+      "epoch": 2.5510204081632653,
+      "grad_norm": 7.228590965270996,
+      "learning_rate": 1.7454081632653063e-05,
+      "loss": 1.8102947998046874,
+      "step": 500
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 2.102543830871582,
+      "eval_runtime": 33.1614,
+      "eval_samples_per_second": 53.044,
+      "eval_steps_per_second": 1.659,
+      "step": 588
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 4926272520121344.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null