Model save
Browse files- README.md +58 -0
- generation_config.json +12 -0
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +38 -0
README.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: transformers
|
| 3 |
+
tags:
|
| 4 |
+
- llama-factory
|
| 5 |
+
- generated_from_trainer
|
| 6 |
+
model-index:
|
| 7 |
+
- name: swesmith-bugsseq
|
| 8 |
+
results: []
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 12 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 13 |
+
|
| 14 |
+
# swesmith-bugsseq
|
| 15 |
+
|
| 16 |
+
This model was trained from scratch on the None dataset.
|
| 17 |
+
|
| 18 |
+
## Model description
|
| 19 |
+
|
| 20 |
+
More information needed
|
| 21 |
+
|
| 22 |
+
## Intended uses & limitations
|
| 23 |
+
|
| 24 |
+
More information needed
|
| 25 |
+
|
| 26 |
+
## Training and evaluation data
|
| 27 |
+
|
| 28 |
+
More information needed
|
| 29 |
+
|
| 30 |
+
## Training procedure
|
| 31 |
+
|
| 32 |
+
### Training hyperparameters
|
| 33 |
+
|
| 34 |
+
The following hyperparameters were used during training:
|
| 35 |
+
- learning_rate: 4e-05
|
| 36 |
+
- train_batch_size: 1
|
| 37 |
+
- eval_batch_size: 8
|
| 38 |
+
- seed: 42
|
| 39 |
+
- distributed_type: multi-GPU
|
| 40 |
+
- num_devices: 8
|
| 41 |
+
- gradient_accumulation_steps: 2
|
| 42 |
+
- total_train_batch_size: 16
|
| 43 |
+
- total_eval_batch_size: 64
|
| 44 |
+
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 45 |
+
- lr_scheduler_type: cosine
|
| 46 |
+
- lr_scheduler_warmup_ratio: 0.1
|
| 47 |
+
- num_epochs: 7.0
|
| 48 |
+
|
| 49 |
+
### Training results
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
### Framework versions
|
| 54 |
+
|
| 55 |
+
- Transformers 4.56.1
|
| 56 |
+
- Pytorch 2.9.1+cu128
|
| 57 |
+
- Datasets 4.4.1
|
| 58 |
+
- Tokenizers 0.22.1
|
generation_config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_sample": true,
|
| 3 |
+
"eos_token_id": [
|
| 4 |
+
151645,
|
| 5 |
+
151643
|
| 6 |
+
],
|
| 7 |
+
"pad_token_id": 151643,
|
| 8 |
+
"temperature": 0.6,
|
| 9 |
+
"top_k": 20,
|
| 10 |
+
"top_p": 0.95,
|
| 11 |
+
"transformers_version": "4.56.1"
|
| 12 |
+
}
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4902257696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b151c06476d45f1bb596f56858a245a489e0dee6b215dc4fc37c9d193225ec9
|
| 3 |
size 4902257696
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915960368
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a58349febdc71b26c97f8898f31be24e050a738ce22e64b142af357b7d54e4e1
|
| 3 |
size 4915960368
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4983068496
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd2521505018b775ee9902a4cad1ceaad43f59a866e4c898da18bd161036d2fd
|
| 3 |
size 4983068496
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1580230264
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04a00362b6cc09bcebcf3491a4dcaf1509fd5dcc4bfbff100a3a1effe0d0ffce
|
| 3 |
size 1580230264
|
trainer_log.jsonl
CHANGED
|
@@ -1200,3 +1200,41 @@
|
|
| 1200 |
{"current_steps": 5990, "total_steps": 6188, "loss": 0.0734, "lr": 1.2589118465566875e-07, "epoch": 6.777589134125637, "percentage": 96.8, "elapsed_time": "22:14:38", "remaining_time": "0:44:07"}
|
| 1201 |
{"current_steps": 5995, "total_steps": 6188, "loss": 0.0728, "lr": 1.1965070541585912e-07, "epoch": 6.78324844368987, "percentage": 96.88, "elapsed_time": "22:16:12", "remaining_time": "0:43:01"}
|
| 1202 |
{"current_steps": 6000, "total_steps": 6188, "loss": 0.0845, "lr": 1.1356839068632053e-07, "epoch": 6.788907753254103, "percentage": 96.96, "elapsed_time": "22:18:02", "remaining_time": "0:41:55"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1200 |
{"current_steps": 5990, "total_steps": 6188, "loss": 0.0734, "lr": 1.2589118465566875e-07, "epoch": 6.777589134125637, "percentage": 96.8, "elapsed_time": "22:14:38", "remaining_time": "0:44:07"}
|
| 1201 |
{"current_steps": 5995, "total_steps": 6188, "loss": 0.0728, "lr": 1.1965070541585912e-07, "epoch": 6.78324844368987, "percentage": 96.88, "elapsed_time": "22:16:12", "remaining_time": "0:43:01"}
|
| 1202 |
{"current_steps": 6000, "total_steps": 6188, "loss": 0.0845, "lr": 1.1356839068632053e-07, "epoch": 6.788907753254103, "percentage": 96.96, "elapsed_time": "22:18:02", "remaining_time": "0:41:55"}
|
| 1203 |
+
{"current_steps": 6005, "total_steps": 6188, "loss": 0.0719, "lr": 1.0764428885686073e-07, "epoch": 6.794567062818336, "percentage": 97.04, "elapsed_time": "22:20:54", "remaining_time": "0:40:51"}
|
| 1204 |
+
{"current_steps": 6010, "total_steps": 6188, "loss": 0.0796, "lr": 1.0187844705857875e-07, "epoch": 6.8002263723825696, "percentage": 97.12, "elapsed_time": "22:22:23", "remaining_time": "0:39:45"}
|
| 1205 |
+
{"current_steps": 6015, "total_steps": 6188, "loss": 0.0777, "lr": 9.627091116348076e-08, "epoch": 6.805885681946802, "percentage": 97.2, "elapsed_time": "22:24:20", "remaining_time": "0:38:39"}
|
| 1206 |
+
{"current_steps": 6020, "total_steps": 6188, "loss": 0.0778, "lr": 9.082172578412263e-08, "epoch": 6.8115449915110355, "percentage": 97.29, "elapsed_time": "22:25:59", "remaining_time": "0:37:33"}
|
| 1207 |
+
{"current_steps": 6025, "total_steps": 6188, "loss": 0.0665, "lr": 8.553093427325243e-08, "epoch": 6.817204301075269, "percentage": 97.37, "elapsed_time": "22:27:34", "remaining_time": "0:36:27"}
|
| 1208 |
+
{"current_steps": 6030, "total_steps": 6188, "loss": 0.066, "lr": 8.039857872345736e-08, "epoch": 6.8228636106395015, "percentage": 97.45, "elapsed_time": "22:29:01", "remaining_time": "0:35:20"}
|
| 1209 |
+
{"current_steps": 6035, "total_steps": 6188, "loss": 0.0915, "lr": 7.542469996684843e-08, "epoch": 6.828522920203735, "percentage": 97.53, "elapsed_time": "22:31:05", "remaining_time": "0:34:15"}
|
| 1210 |
+
{"current_steps": 6040, "total_steps": 6188, "loss": 0.0608, "lr": 7.06093375747141e-08, "epoch": 6.834182229767968, "percentage": 97.61, "elapsed_time": "22:32:37", "remaining_time": "0:33:08"}
|
| 1211 |
+
{"current_steps": 6045, "total_steps": 6188, "loss": 0.0706, "lr": 6.595252985721834e-08, "epoch": 6.839841539332202, "percentage": 97.69, "elapsed_time": "22:34:36", "remaining_time": "0:32:02"}
|
| 1212 |
+
{"current_steps": 6050, "total_steps": 6188, "loss": 0.0685, "lr": 6.145431386309186e-08, "epoch": 6.845500848896434, "percentage": 97.77, "elapsed_time": "22:36:18", "remaining_time": "0:30:56"}
|
| 1213 |
+
{"current_steps": 6055, "total_steps": 6188, "loss": 0.0574, "lr": 5.711472537933693e-08, "epoch": 6.851160158460668, "percentage": 97.85, "elapsed_time": "22:37:56", "remaining_time": "0:29:49"}
|
| 1214 |
+
{"current_steps": 6060, "total_steps": 6188, "loss": 0.0704, "lr": 5.293379893094752e-08, "epoch": 6.856819468024901, "percentage": 97.93, "elapsed_time": "22:39:32", "remaining_time": "0:28:42"}
|
| 1215 |
+
{"current_steps": 6065, "total_steps": 6188, "loss": 0.071, "lr": 4.891156778062734e-08, "epoch": 6.862478777589134, "percentage": 98.01, "elapsed_time": "22:41:09", "remaining_time": "0:27:36"}
|
| 1216 |
+
{"current_steps": 6070, "total_steps": 6188, "loss": 0.0919, "lr": 4.5048063928527785e-08, "epoch": 6.868138087153367, "percentage": 98.09, "elapsed_time": "22:43:00", "remaining_time": "0:26:29"}
|
| 1217 |
+
{"current_steps": 6075, "total_steps": 6188, "loss": 0.0935, "lr": 4.134331811199932e-08, "epoch": 6.873797396717601, "percentage": 98.17, "elapsed_time": "22:44:41", "remaining_time": "0:25:23"}
|
| 1218 |
+
{"current_steps": 6080, "total_steps": 6188, "loss": 0.0535, "lr": 3.7797359805333836e-08, "epoch": 6.879456706281833, "percentage": 98.25, "elapsed_time": "22:46:11", "remaining_time": "0:24:16"}
|
| 1219 |
+
{"current_steps": 6085, "total_steps": 6188, "loss": 0.0871, "lr": 3.441021721954485e-08, "epoch": 6.885116015846067, "percentage": 98.34, "elapsed_time": "22:47:46", "remaining_time": "0:23:09"}
|
| 1220 |
+
{"current_steps": 6090, "total_steps": 6188, "loss": 0.0674, "lr": 3.11819173021366e-08, "epoch": 6.8907753254103, "percentage": 98.42, "elapsed_time": "22:49:21", "remaining_time": "0:22:02"}
|
| 1221 |
+
{"current_steps": 6095, "total_steps": 6188, "loss": 0.0697, "lr": 2.8112485736881967e-08, "epoch": 6.896434634974533, "percentage": 98.5, "elapsed_time": "22:50:51", "remaining_time": "0:20:55"}
|
| 1222 |
+
{"current_steps": 6100, "total_steps": 6188, "loss": 0.0821, "lr": 2.520194694363376e-08, "epoch": 6.902093944538766, "percentage": 98.58, "elapsed_time": "22:52:38", "remaining_time": "0:19:48"}
|
| 1223 |
+
{"current_steps": 6105, "total_steps": 6188, "loss": 0.1072, "lr": 2.2450324078120423e-08, "epoch": 6.907753254103, "percentage": 98.66, "elapsed_time": "22:54:33", "remaining_time": "0:18:41"}
|
| 1224 |
+
{"current_steps": 6110, "total_steps": 6188, "loss": 0.0786, "lr": 1.9857639031759522e-08, "epoch": 6.913412563667233, "percentage": 98.74, "elapsed_time": "22:56:28", "remaining_time": "0:17:34"}
|
| 1225 |
+
{"current_steps": 6115, "total_steps": 6188, "loss": 0.0716, "lr": 1.7423912431489e-08, "epoch": 6.9190718732314656, "percentage": 98.82, "elapsed_time": "22:58:08", "remaining_time": "0:16:27"}
|
| 1226 |
+
{"current_steps": 6120, "total_steps": 6188, "loss": 0.0675, "lr": 1.51491636396095e-08, "epoch": 6.924731182795699, "percentage": 98.9, "elapsed_time": "22:59:40", "remaining_time": "0:15:19"}
|
| 1227 |
+
{"current_steps": 6125, "total_steps": 6188, "loss": 0.0678, "lr": 1.3033410753608977e-08, "epoch": 6.930390492359932, "percentage": 98.98, "elapsed_time": "23:01:11", "remaining_time": "0:14:12"}
|
| 1228 |
+
{"current_steps": 6130, "total_steps": 6188, "loss": 0.0582, "lr": 1.1076670606045004e-08, "epoch": 6.936049801924165, "percentage": 99.06, "elapsed_time": "23:02:40", "remaining_time": "0:13:04"}
|
| 1229 |
+
{"current_steps": 6135, "total_steps": 6188, "loss": 0.0824, "lr": 9.278958764391554e-09, "epoch": 6.941709111488398, "percentage": 99.14, "elapsed_time": "23:04:11", "remaining_time": "0:11:57"}
|
| 1230 |
+
{"current_steps": 6140, "total_steps": 6188, "loss": 0.0622, "lr": 7.64028953092133e-09, "epoch": 6.947368421052632, "percentage": 99.22, "elapsed_time": "23:05:46", "remaining_time": "0:10:50"}
|
| 1231 |
+
{"current_steps": 6145, "total_steps": 6188, "loss": 0.0647, "lr": 6.16067594259695e-09, "epoch": 6.953027730616864, "percentage": 99.31, "elapsed_time": "23:07:21", "remaining_time": "0:09:42"}
|
| 1232 |
+
{"current_steps": 6150, "total_steps": 6188, "loss": 0.0742, "lr": 4.840129770957713e-09, "epoch": 6.958687040181098, "percentage": 99.39, "elapsed_time": "23:08:54", "remaining_time": "0:08:34"}
|
| 1233 |
+
{"current_steps": 6155, "total_steps": 6188, "loss": 0.0639, "lr": 3.6786615220352208e-09, "epoch": 6.964346349745331, "percentage": 99.47, "elapsed_time": "23:10:49", "remaining_time": "0:07:27"}
|
| 1234 |
+
{"current_steps": 6160, "total_steps": 6188, "loss": 0.0625, "lr": 2.6762804362623353e-09, "epoch": 6.970005659309564, "percentage": 99.55, "elapsed_time": "23:12:33", "remaining_time": "0:06:19"}
|
| 1235 |
+
{"current_steps": 6165, "total_steps": 6188, "loss": 0.0663, "lr": 1.8329944884021288e-09, "epoch": 6.975664968873797, "percentage": 99.63, "elapsed_time": "23:14:29", "remaining_time": "0:05:12"}
|
| 1236 |
+
{"current_steps": 6170, "total_steps": 6188, "loss": 0.1437, "lr": 1.1488103874923717e-09, "epoch": 6.981324278438031, "percentage": 99.71, "elapsed_time": "23:17:11", "remaining_time": "0:04:04"}
|
| 1237 |
+
{"current_steps": 6175, "total_steps": 6188, "loss": 0.0781, "lr": 6.237335767744767e-10, "epoch": 6.986983588002264, "percentage": 99.79, "elapsed_time": "23:19:01", "remaining_time": "0:02:56"}
|
| 1238 |
+
{"current_steps": 6180, "total_steps": 6188, "loss": 0.0741, "lr": 2.577682336690757e-10, "epoch": 6.992642897566497, "percentage": 99.87, "elapsed_time": "23:20:51", "remaining_time": "0:01:48"}
|
| 1239 |
+
{"current_steps": 6185, "total_steps": 6188, "loss": 0.0728, "lr": 5.091726972938915e-11, "epoch": 6.99830220713073, "percentage": 99.95, "elapsed_time": "23:22:31", "remaining_time": "0:00:40"}
|
| 1240 |
+
{"current_steps": 6187, "total_steps": 6188, "epoch": 7.0, "percentage": 99.98, "elapsed_time": "23:23:14", "remaining_time": "0:00:13"}
|