jmmr-8282 commited on
Commit
eca183d
·
verified ·
1 Parent(s): 63dbd14

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef5bbbcd0d72dd9a98500b082a9cadfb5bd536f06c837fbfd076c7a31ae25121
3
  size 437958624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb7b9ee5d5a289ecee12276ea139b3446e5d2b0749738e76ee12ab4e6815011a
3
  size 437958624
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cee3f88d13eca097ad988e3946d4e5e2b78e5031ed9c27713f2ab8e6a7a5182f
3
  size 4741923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52c9c737c0616d379ccd0e217fd4da602c37c20f09befad90a3b45ec2414448
3
  size 4741923
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d6229e433aac8ba00399b2d3e1739a0ffc561a03e67c695fffe4e9c152c867d
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb370ab700093b2f074f90e23d7f00c5453a68d643bf15d029b1e8596742be3
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94c344f6013d13222fd8d75e91004cbc1f0e87864b748261b25eb8acb82e4035
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9343bdb5d2121fead6995ccf81e3591c0ddf27b60c835e5527a4bd7a66af257
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28fa8dc868b06424b5dda3c79e35f00a8c94368a97dd3ed628f117e949101659
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6e8e008c9fcdd6b519f5ca1b89689630a9e27532a04ab80fd57e825818c9a4a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 1401,
3
- "best_metric": 0.6182085871696472,
4
- "best_model_checkpoint": "./bert-email/checkpoint-1401",
5
- "epoch": 3.0,
6
  "eval_steps": 500,
7
- "global_step": 1401,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -46,6 +46,21 @@
46
  "eval_samples_per_second": 55.737,
47
  "eval_steps_per_second": 1.748,
48
  "step": 1401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  }
50
  ],
51
  "logging_steps": 500,
@@ -65,7 +80,7 @@
65
  "attributes": {}
66
  }
67
  },
68
- "total_flos": 1.17768508379136e+16,
69
  "train_batch_size": 32,
70
  "trial_name": null,
71
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1868,
3
+ "best_metric": 0.5720958113670349,
4
+ "best_model_checkpoint": "./bert-email/checkpoint-1868",
5
+ "epoch": 4.0,
6
  "eval_steps": 500,
7
+ "global_step": 1868,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
46
  "eval_samples_per_second": 55.737,
47
  "eval_steps_per_second": 1.748,
48
  "step": 1401
49
+ },
50
+ {
51
+ "epoch": 3.2119914346895073,
52
+ "grad_norm": 2.9384329319000244,
53
+ "learning_rate": 3.580299785867238e-06,
54
+ "loss": 0.6724396362304688,
55
+ "step": 1500
56
+ },
57
+ {
58
+ "epoch": 4.0,
59
+ "eval_loss": 0.5720958113670349,
60
+ "eval_runtime": 66.8887,
61
+ "eval_samples_per_second": 55.764,
62
+ "eval_steps_per_second": 1.749,
63
+ "step": 1868
64
  }
65
  ],
66
  "logging_steps": 500,
 
80
  "attributes": {}
81
  }
82
  },
83
+ "total_flos": 1.57024677838848e+16,
84
  "train_batch_size": 32,
85
  "trial_name": null,
86
  "trial_params": null