baby-dev commited on
Commit
fd8ed3d
·
verified ·
1 Parent(s): f1686d0

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
- "up_proj",
25
- "k_proj",
26
- "gate_proj",
27
  "q_proj",
 
 
 
28
  "o_proj",
29
- "down_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
 
23
  "q_proj",
24
+ "k_proj",
25
+ "v_proj",
26
+ "down_proj",
27
  "o_proj",
28
+ "gate_proj",
29
+ "up_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8a26400f02bacfd74ee8b49a7c90fadb1a72080eb2518955295d9693a694224
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22706b121f81bb296584c212eb888c0d72ecebc2f8542002ac3cd8e0a117af64
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7a5f82506037604163acd2fee5f625ca7163cb766404a233c449f458eb1b8db
3
  size 35778900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e74596b69fb137171bbd17a48936c71902b0da20b069cc7ab29696a7dbb0158c
3
  size 35778900
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a42bd36c0f9a8fadfb871d460063ae3fa111f4dddc808f3268062fb12d6edec2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f08c0df86b91ccc3d76337c061c328c32352c965a142af309382269ddc79a40b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d040e6e1204973e0da14b16b2c6719d0f9bf30c7c451a2db740a44e5309c31c3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcf348532606e290f3cddebc7f00005cce6f05bb1cced2bad1d4a15482755657
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,92 +1,85 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0899182561307903,
5
  "eval_steps": 134,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0027247956403269754,
13
- "eval_loss": 3.2025532722473145,
14
- "eval_runtime": 8.7865,
15
- "eval_samples_per_second": 35.168,
16
- "eval_steps_per_second": 8.877,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.1362397820163488,
21
- "grad_norm": 0.8217579126358032,
22
  "learning_rate": 0.0004,
23
- "loss": 1.3909,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2724795640326976,
28
- "grad_norm": 0.5776450634002686,
29
  "learning_rate": 0.0004,
30
- "loss": 0.8498,
31
  "step": 100
32
  },
33
  {
34
  "epoch": 0.3651226158038147,
35
- "eval_loss": 0.6123294234275818,
36
- "eval_runtime": 8.6305,
37
- "eval_samples_per_second": 35.803,
38
- "eval_steps_per_second": 9.038,
39
  "step": 134
40
  },
41
  {
42
  "epoch": 0.4087193460490463,
43
- "grad_norm": 0.7184346318244934,
44
  "learning_rate": 0.0004,
45
- "loss": 0.6462,
46
  "step": 150
47
  },
48
  {
49
  "epoch": 0.5449591280653951,
50
- "grad_norm": 0.9781283736228943,
51
  "learning_rate": 0.0004,
52
- "loss": 0.5408,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 0.6811989100817438,
57
- "grad_norm": 0.5606921315193176,
58
  "learning_rate": 0.0004,
59
- "loss": 0.4235,
60
  "step": 250
61
  },
62
  {
63
  "epoch": 0.7302452316076294,
64
- "eval_loss": 0.3998368978500366,
65
- "eval_runtime": 4.2414,
66
- "eval_samples_per_second": 72.854,
67
- "eval_steps_per_second": 18.39,
68
  "step": 268
69
  },
70
  {
71
  "epoch": 0.8174386920980926,
72
- "grad_norm": 0.31943920254707336,
73
  "learning_rate": 0.0004,
74
- "loss": 0.366,
75
  "step": 300
76
  },
77
  {
78
  "epoch": 0.9536784741144414,
79
- "grad_norm": 0.44990459084510803,
80
  "learning_rate": 0.0004,
81
- "loss": 0.295,
82
  "step": 350
83
- },
84
- {
85
- "epoch": 1.0899182561307903,
86
- "grad_norm": 0.21941453218460083,
87
- "learning_rate": 0.0004,
88
- "loss": 0.2775,
89
- "step": 400
90
  }
91
  ],
92
  "logging_steps": 50,
@@ -101,12 +94,12 @@
101
  "should_evaluate": false,
102
  "should_log": false,
103
  "should_save": true,
104
- "should_training_stop": true
105
  },
106
  "attributes": {}
107
  }
108
  },
109
- "total_flos": 1.297020667723776e+16,
110
  "train_batch_size": 4,
111
  "trial_name": null,
112
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 134,
6
+ "global_step": 367,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0027247956403269754,
13
+ "eval_loss": 3.203904390335083,
14
+ "eval_runtime": 4.2695,
15
+ "eval_samples_per_second": 72.375,
16
+ "eval_steps_per_second": 18.269,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.1362397820163488,
21
+ "grad_norm": 0.7537987232208252,
22
  "learning_rate": 0.0004,
23
+ "loss": 1.3941,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.2724795640326976,
28
+ "grad_norm": 0.5926509499549866,
29
  "learning_rate": 0.0004,
30
+ "loss": 0.8495,
31
  "step": 100
32
  },
33
  {
34
  "epoch": 0.3651226158038147,
35
+ "eval_loss": 0.6168258190155029,
36
+ "eval_runtime": 4.2281,
37
+ "eval_samples_per_second": 73.082,
38
+ "eval_steps_per_second": 18.448,
39
  "step": 134
40
  },
41
  {
42
  "epoch": 0.4087193460490463,
43
+ "grad_norm": 0.7156445384025574,
44
  "learning_rate": 0.0004,
45
+ "loss": 0.6471,
46
  "step": 150
47
  },
48
  {
49
  "epoch": 0.5449591280653951,
50
+ "grad_norm": 0.9673421382904053,
51
  "learning_rate": 0.0004,
52
+ "loss": 0.5422,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 0.6811989100817438,
57
+ "grad_norm": 0.6035718321800232,
58
  "learning_rate": 0.0004,
59
+ "loss": 0.4256,
60
  "step": 250
61
  },
62
  {
63
  "epoch": 0.7302452316076294,
64
+ "eval_loss": 0.4029657542705536,
65
+ "eval_runtime": 4.2417,
66
+ "eval_samples_per_second": 72.848,
67
+ "eval_steps_per_second": 18.389,
68
  "step": 268
69
  },
70
  {
71
  "epoch": 0.8174386920980926,
72
+ "grad_norm": 0.5255013704299927,
73
  "learning_rate": 0.0004,
74
+ "loss": 0.3641,
75
  "step": 300
76
  },
77
  {
78
  "epoch": 0.9536784741144414,
79
+ "grad_norm": 0.41406139731407166,
80
  "learning_rate": 0.0004,
81
+ "loss": 0.2957,
82
  "step": 350
 
 
 
 
 
 
 
83
  }
84
  ],
85
  "logging_steps": 50,
 
94
  "should_evaluate": false,
95
  "should_log": false,
96
  "should_save": true,
97
+ "should_training_stop": false
98
  },
99
  "attributes": {}
100
  }
101
  },
102
+ "total_flos": 1.189318078660608e+16,
103
  "train_batch_size": 4,
104
  "trial_name": null,
105
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e53c057403e5611359609e457af2c9b642379f9797df9487b9ae467b559c71a
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af5f04a654a3e0383a63c452ad764d1c521231acb0b65503a2aa1b6747f36b89
3
  size 6776