sravanthib commited on
Commit
921cfb2
·
verified ·
1 Parent(s): 5d19e0b

Training completed

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. all_results.json +9 -9
  3. train_results.json +9 -9
  4. trainer_state.json +11 -11
README.md CHANGED
@@ -38,10 +38,10 @@ The following hyperparameters were used during training:
38
  - eval_batch_size: 8
39
  - seed: 42
40
  - distributed_type: multi-GPU
41
- - num_devices: 2
42
  - gradient_accumulation_steps: 10
43
- - total_train_batch_size: 40
44
- - total_eval_batch_size: 16
45
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: cosine
47
  - lr_scheduler_warmup_ratio: 0.05
 
38
  - eval_batch_size: 8
39
  - seed: 42
40
  - distributed_type: multi-GPU
41
+ - num_devices: 4
42
  - gradient_accumulation_steps: 10
43
+ - total_train_batch_size: 80
44
+ - total_eval_batch_size: 32
45
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: cosine
47
  - lr_scheduler_warmup_ratio: 0.05
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "avg_step_time": 6.689460206031799,
3
- "epoch": 0.01,
4
- "total_flos": 4810910442979328.0,
5
- "total_training_time": 79.5918402671814,
6
- "total_training_time_mins": 1.32653067111969,
7
- "train_loss": 3.181192398071289,
8
- "train_runtime": 72.5058,
9
- "train_samples_per_second": 5.517,
10
- "train_steps_per_second": 0.138
11
  }
 
1
  {
2
+ "avg_step_time": 6.874203062057495,
3
+ "epoch": 0.02,
4
+ "total_flos": 9621820885958656.0,
5
+ "total_training_time": 90.24143290519714,
6
+ "total_training_time_mins": 1.5040238817532858,
7
+ "train_loss": 3.0087697982788084,
8
+ "train_runtime": 75.0646,
9
+ "train_samples_per_second": 10.657,
10
+ "train_steps_per_second": 0.133
11
  }
train_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "avg_step_time": 6.689460206031799,
3
- "epoch": 0.01,
4
- "total_flos": 4810910442979328.0,
5
- "total_training_time": 79.5918402671814,
6
- "total_training_time_mins": 1.32653067111969,
7
- "train_loss": 3.181192398071289,
8
- "train_runtime": 72.5058,
9
- "train_samples_per_second": 5.517,
10
- "train_steps_per_second": 0.138
11
  }
 
1
  {
2
+ "avg_step_time": 6.874203062057495,
3
+ "epoch": 0.02,
4
+ "total_flos": 9621820885958656.0,
5
+ "total_training_time": 90.24143290519714,
6
+ "total_training_time_mins": 1.5040238817532858,
7
+ "train_loss": 3.0087697982788084,
8
+ "train_runtime": 75.0646,
9
+ "train_samples_per_second": 10.657,
10
+ "train_steps_per_second": 0.133
11
  }
trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.01,
6
  "eval_steps": 0,
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
@@ -10,20 +10,20 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.01,
14
- "grad_norm": 0.5649898052215576,
15
  "learning_rate": 0.0001,
16
- "loss": 3.1812,
17
  "step": 10
18
  },
19
  {
20
- "epoch": 0.01,
21
  "step": 10,
22
- "total_flos": 4810910442979328.0,
23
- "train_loss": 3.181192398071289,
24
- "train_runtime": 72.5058,
25
- "train_samples_per_second": 5.517,
26
- "train_steps_per_second": 0.138
27
  }
28
  ],
29
  "logging_steps": 10,
@@ -43,7 +43,7 @@
43
  "attributes": {}
44
  }
45
  },
46
- "total_flos": 4810910442979328.0,
47
  "train_batch_size": 2,
48
  "trial_name": null,
49
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.02,
6
  "eval_steps": 0,
7
  "global_step": 10,
8
  "is_hyper_param_search": false,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.02,
14
+ "grad_norm": 0.561686098575592,
15
  "learning_rate": 0.0001,
16
+ "loss": 3.0088,
17
  "step": 10
18
  },
19
  {
20
+ "epoch": 0.02,
21
  "step": 10,
22
+ "total_flos": 9621820885958656.0,
23
+ "train_loss": 3.0087697982788084,
24
+ "train_runtime": 75.0646,
25
+ "train_samples_per_second": 10.657,
26
+ "train_steps_per_second": 0.133
27
  }
28
  ],
29
  "logging_steps": 10,
 
43
  "attributes": {}
44
  }
45
  },
46
+ "total_flos": 9621820885958656.0,
47
  "train_batch_size": 2,
48
  "trial_name": null,
49
  "trial_params": null