penfever commited on
Commit
e4dcc5e
·
verified ·
1 Parent(s): b723daa

Model save

Browse files
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - llama-factory
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: swesmith-bugsseq
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # swesmith-bugsseq
15
+
16
+ This model was trained from scratch on the None dataset.
17
+
18
+ ## Model description
19
+
20
+ More information needed
21
+
22
+ ## Intended uses & limitations
23
+
24
+ More information needed
25
+
26
+ ## Training and evaluation data
27
+
28
+ More information needed
29
+
30
+ ## Training procedure
31
+
32
+ ### Training hyperparameters
33
+
34
+ The following hyperparameters were used during training:
35
+ - learning_rate: 4e-05
36
+ - train_batch_size: 1
37
+ - eval_batch_size: 8
38
+ - seed: 42
39
+ - distributed_type: multi-GPU
40
+ - num_devices: 8
41
+ - gradient_accumulation_steps: 2
42
+ - total_train_batch_size: 16
43
+ - total_eval_batch_size: 64
44
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
45
+ - lr_scheduler_type: cosine
46
+ - lr_scheduler_warmup_ratio: 0.1
47
+ - num_epochs: 7.0
48
+
49
+ ### Training results
50
+
51
+
52
+
53
+ ### Framework versions
54
+
55
+ - Transformers 4.56.1
56
+ - Pytorch 2.9.1+cu128
57
+ - Datasets 4.4.1
58
+ - Tokenizers 0.22.1
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151643
6
+ ],
7
+ "pad_token_id": 151643,
8
+ "temperature": 0.6,
9
+ "top_k": 20,
10
+ "top_p": 0.95,
11
+ "transformers_version": "4.56.1"
12
+ }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18e83f2b5221e41fdd3624a025c3acb8d8958aa75268e7e6ff89611de75c2dfd
3
  size 4902257696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b151c06476d45f1bb596f56858a245a489e0dee6b215dc4fc37c9d193225ec9
3
  size 4902257696
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09426ad3fa66ca6a96bd07fcb7885f70a6f48d655b7ab893ad7af9f813ffc331
3
  size 4915960368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a58349febdc71b26c97f8898f31be24e050a738ce22e64b142af357b7d54e4e1
3
  size 4915960368
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3437c8aa083f3e58f7cad87b11a21dcdfc0d6fbd451f3047205455e285c7e81
3
  size 4983068496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd2521505018b775ee9902a4cad1ceaad43f59a866e4c898da18bd161036d2fd
3
  size 4983068496
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:043edd566027c17325e768ce5db984d44bbec9330239212d940b31aa3d5ee328
3
  size 1580230264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04a00362b6cc09bcebcf3491a4dcaf1509fd5dcc4bfbff100a3a1effe0d0ffce
3
  size 1580230264
trainer_log.jsonl CHANGED
@@ -1200,3 +1200,41 @@
1200
  {"current_steps": 5990, "total_steps": 6188, "loss": 0.0734, "lr": 1.2589118465566875e-07, "epoch": 6.777589134125637, "percentage": 96.8, "elapsed_time": "22:14:38", "remaining_time": "0:44:07"}
1201
  {"current_steps": 5995, "total_steps": 6188, "loss": 0.0728, "lr": 1.1965070541585912e-07, "epoch": 6.78324844368987, "percentage": 96.88, "elapsed_time": "22:16:12", "remaining_time": "0:43:01"}
1202
  {"current_steps": 6000, "total_steps": 6188, "loss": 0.0845, "lr": 1.1356839068632053e-07, "epoch": 6.788907753254103, "percentage": 96.96, "elapsed_time": "22:18:02", "remaining_time": "0:41:55"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1200
  {"current_steps": 5990, "total_steps": 6188, "loss": 0.0734, "lr": 1.2589118465566875e-07, "epoch": 6.777589134125637, "percentage": 96.8, "elapsed_time": "22:14:38", "remaining_time": "0:44:07"}
1201
  {"current_steps": 5995, "total_steps": 6188, "loss": 0.0728, "lr": 1.1965070541585912e-07, "epoch": 6.78324844368987, "percentage": 96.88, "elapsed_time": "22:16:12", "remaining_time": "0:43:01"}
1202
  {"current_steps": 6000, "total_steps": 6188, "loss": 0.0845, "lr": 1.1356839068632053e-07, "epoch": 6.788907753254103, "percentage": 96.96, "elapsed_time": "22:18:02", "remaining_time": "0:41:55"}
1203
+ {"current_steps": 6005, "total_steps": 6188, "loss": 0.0719, "lr": 1.0764428885686073e-07, "epoch": 6.794567062818336, "percentage": 97.04, "elapsed_time": "22:20:54", "remaining_time": "0:40:51"}
1204
+ {"current_steps": 6010, "total_steps": 6188, "loss": 0.0796, "lr": 1.0187844705857875e-07, "epoch": 6.8002263723825696, "percentage": 97.12, "elapsed_time": "22:22:23", "remaining_time": "0:39:45"}
1205
+ {"current_steps": 6015, "total_steps": 6188, "loss": 0.0777, "lr": 9.627091116348076e-08, "epoch": 6.805885681946802, "percentage": 97.2, "elapsed_time": "22:24:20", "remaining_time": "0:38:39"}
1206
+ {"current_steps": 6020, "total_steps": 6188, "loss": 0.0778, "lr": 9.082172578412263e-08, "epoch": 6.8115449915110355, "percentage": 97.29, "elapsed_time": "22:25:59", "remaining_time": "0:37:33"}
1207
+ {"current_steps": 6025, "total_steps": 6188, "loss": 0.0665, "lr": 8.553093427325243e-08, "epoch": 6.817204301075269, "percentage": 97.37, "elapsed_time": "22:27:34", "remaining_time": "0:36:27"}
1208
+ {"current_steps": 6030, "total_steps": 6188, "loss": 0.066, "lr": 8.039857872345736e-08, "epoch": 6.8228636106395015, "percentage": 97.45, "elapsed_time": "22:29:01", "remaining_time": "0:35:20"}
1209
+ {"current_steps": 6035, "total_steps": 6188, "loss": 0.0915, "lr": 7.542469996684843e-08, "epoch": 6.828522920203735, "percentage": 97.53, "elapsed_time": "22:31:05", "remaining_time": "0:34:15"}
1210
+ {"current_steps": 6040, "total_steps": 6188, "loss": 0.0608, "lr": 7.06093375747141e-08, "epoch": 6.834182229767968, "percentage": 97.61, "elapsed_time": "22:32:37", "remaining_time": "0:33:08"}
1211
+ {"current_steps": 6045, "total_steps": 6188, "loss": 0.0706, "lr": 6.595252985721834e-08, "epoch": 6.839841539332202, "percentage": 97.69, "elapsed_time": "22:34:36", "remaining_time": "0:32:02"}
1212
+ {"current_steps": 6050, "total_steps": 6188, "loss": 0.0685, "lr": 6.145431386309186e-08, "epoch": 6.845500848896434, "percentage": 97.77, "elapsed_time": "22:36:18", "remaining_time": "0:30:56"}
1213
+ {"current_steps": 6055, "total_steps": 6188, "loss": 0.0574, "lr": 5.711472537933693e-08, "epoch": 6.851160158460668, "percentage": 97.85, "elapsed_time": "22:37:56", "remaining_time": "0:29:49"}
1214
+ {"current_steps": 6060, "total_steps": 6188, "loss": 0.0704, "lr": 5.293379893094752e-08, "epoch": 6.856819468024901, "percentage": 97.93, "elapsed_time": "22:39:32", "remaining_time": "0:28:42"}
1215
+ {"current_steps": 6065, "total_steps": 6188, "loss": 0.071, "lr": 4.891156778062734e-08, "epoch": 6.862478777589134, "percentage": 98.01, "elapsed_time": "22:41:09", "remaining_time": "0:27:36"}
1216
+ {"current_steps": 6070, "total_steps": 6188, "loss": 0.0919, "lr": 4.5048063928527785e-08, "epoch": 6.868138087153367, "percentage": 98.09, "elapsed_time": "22:43:00", "remaining_time": "0:26:29"}
1217
+ {"current_steps": 6075, "total_steps": 6188, "loss": 0.0935, "lr": 4.134331811199932e-08, "epoch": 6.873797396717601, "percentage": 98.17, "elapsed_time": "22:44:41", "remaining_time": "0:25:23"}
1218
+ {"current_steps": 6080, "total_steps": 6188, "loss": 0.0535, "lr": 3.7797359805333836e-08, "epoch": 6.879456706281833, "percentage": 98.25, "elapsed_time": "22:46:11", "remaining_time": "0:24:16"}
1219
+ {"current_steps": 6085, "total_steps": 6188, "loss": 0.0871, "lr": 3.441021721954485e-08, "epoch": 6.885116015846067, "percentage": 98.34, "elapsed_time": "22:47:46", "remaining_time": "0:23:09"}
1220
+ {"current_steps": 6090, "total_steps": 6188, "loss": 0.0674, "lr": 3.11819173021366e-08, "epoch": 6.8907753254103, "percentage": 98.42, "elapsed_time": "22:49:21", "remaining_time": "0:22:02"}
1221
+ {"current_steps": 6095, "total_steps": 6188, "loss": 0.0697, "lr": 2.8112485736881967e-08, "epoch": 6.896434634974533, "percentage": 98.5, "elapsed_time": "22:50:51", "remaining_time": "0:20:55"}
1222
+ {"current_steps": 6100, "total_steps": 6188, "loss": 0.0821, "lr": 2.520194694363376e-08, "epoch": 6.902093944538766, "percentage": 98.58, "elapsed_time": "22:52:38", "remaining_time": "0:19:48"}
1223
+ {"current_steps": 6105, "total_steps": 6188, "loss": 0.1072, "lr": 2.2450324078120423e-08, "epoch": 6.907753254103, "percentage": 98.66, "elapsed_time": "22:54:33", "remaining_time": "0:18:41"}
1224
+ {"current_steps": 6110, "total_steps": 6188, "loss": 0.0786, "lr": 1.9857639031759522e-08, "epoch": 6.913412563667233, "percentage": 98.74, "elapsed_time": "22:56:28", "remaining_time": "0:17:34"}
1225
+ {"current_steps": 6115, "total_steps": 6188, "loss": 0.0716, "lr": 1.7423912431489e-08, "epoch": 6.9190718732314656, "percentage": 98.82, "elapsed_time": "22:58:08", "remaining_time": "0:16:27"}
1226
+ {"current_steps": 6120, "total_steps": 6188, "loss": 0.0675, "lr": 1.51491636396095e-08, "epoch": 6.924731182795699, "percentage": 98.9, "elapsed_time": "22:59:40", "remaining_time": "0:15:19"}
1227
+ {"current_steps": 6125, "total_steps": 6188, "loss": 0.0678, "lr": 1.3033410753608977e-08, "epoch": 6.930390492359932, "percentage": 98.98, "elapsed_time": "23:01:11", "remaining_time": "0:14:12"}
1228
+ {"current_steps": 6130, "total_steps": 6188, "loss": 0.0582, "lr": 1.1076670606045004e-08, "epoch": 6.936049801924165, "percentage": 99.06, "elapsed_time": "23:02:40", "remaining_time": "0:13:04"}
1229
+ {"current_steps": 6135, "total_steps": 6188, "loss": 0.0824, "lr": 9.278958764391554e-09, "epoch": 6.941709111488398, "percentage": 99.14, "elapsed_time": "23:04:11", "remaining_time": "0:11:57"}
1230
+ {"current_steps": 6140, "total_steps": 6188, "loss": 0.0622, "lr": 7.64028953092133e-09, "epoch": 6.947368421052632, "percentage": 99.22, "elapsed_time": "23:05:46", "remaining_time": "0:10:50"}
1231
+ {"current_steps": 6145, "total_steps": 6188, "loss": 0.0647, "lr": 6.16067594259695e-09, "epoch": 6.953027730616864, "percentage": 99.31, "elapsed_time": "23:07:21", "remaining_time": "0:09:42"}
1232
+ {"current_steps": 6150, "total_steps": 6188, "loss": 0.0742, "lr": 4.840129770957713e-09, "epoch": 6.958687040181098, "percentage": 99.39, "elapsed_time": "23:08:54", "remaining_time": "0:08:34"}
1233
+ {"current_steps": 6155, "total_steps": 6188, "loss": 0.0639, "lr": 3.6786615220352208e-09, "epoch": 6.964346349745331, "percentage": 99.47, "elapsed_time": "23:10:49", "remaining_time": "0:07:27"}
1234
+ {"current_steps": 6160, "total_steps": 6188, "loss": 0.0625, "lr": 2.6762804362623353e-09, "epoch": 6.970005659309564, "percentage": 99.55, "elapsed_time": "23:12:33", "remaining_time": "0:06:19"}
1235
+ {"current_steps": 6165, "total_steps": 6188, "loss": 0.0663, "lr": 1.8329944884021288e-09, "epoch": 6.975664968873797, "percentage": 99.63, "elapsed_time": "23:14:29", "remaining_time": "0:05:12"}
1236
+ {"current_steps": 6170, "total_steps": 6188, "loss": 0.1437, "lr": 1.1488103874923717e-09, "epoch": 6.981324278438031, "percentage": 99.71, "elapsed_time": "23:17:11", "remaining_time": "0:04:04"}
1237
+ {"current_steps": 6175, "total_steps": 6188, "loss": 0.0781, "lr": 6.237335767744767e-10, "epoch": 6.986983588002264, "percentage": 99.79, "elapsed_time": "23:19:01", "remaining_time": "0:02:56"}
1238
+ {"current_steps": 6180, "total_steps": 6188, "loss": 0.0741, "lr": 2.577682336690757e-10, "epoch": 6.992642897566497, "percentage": 99.87, "elapsed_time": "23:20:51", "remaining_time": "0:01:48"}
1239
+ {"current_steps": 6185, "total_steps": 6188, "loss": 0.0728, "lr": 5.091726972938915e-11, "epoch": 6.99830220713073, "percentage": 99.95, "elapsed_time": "23:22:31", "remaining_time": "0:00:40"}
1240
+ {"current_steps": 6187, "total_steps": 6188, "epoch": 7.0, "percentage": 99.98, "elapsed_time": "23:23:14", "remaining_time": "0:00:13"}