edbeeching HF Staff commited on
Commit
e429615
·
verified ·
1 Parent(s): 5dc4d11

End of training

Browse files
README.md CHANGED
@@ -4,10 +4,10 @@ library_name: transformers
4
  model_name: Qwen3-4B-Instruct-2507-SFT-tr5
5
  tags:
6
  - generated_from_trainer
7
- - trl
8
- - trackio
9
- - trackio:https://huggingface.co/spaces/hf-imo-colab/trackio-distillation-sft
10
  - sft
 
 
 
11
  - trl-internal
12
  licence: license
13
  ---
@@ -30,7 +30,7 @@ print(output["generated_text"])
30
 
31
  ## Training procedure
32
 
33
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/imo-distillation/runs/4uhf3egj)
34
 
35
 
36
  This model was trained with SFT.
 
4
  model_name: Qwen3-4B-Instruct-2507-SFT-tr5
5
  tags:
6
  - generated_from_trainer
 
 
 
7
  - sft
8
+ - trackio:https://huggingface.co/spaces/hf-imo-colab/trackio-distillation-sft
9
+ - trackio
10
+ - trl
11
  - trl-internal
12
  licence: license
13
  ---
 
30
 
31
  ## Training procedure
32
 
33
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/imo-distillation/runs/menw08rt)
34
 
35
 
36
  This model was trained with SFT.
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 4.6268656716417915,
3
  "total_flos": 829937030004736.0,
4
- "train_loss": 0.3161669834246559,
5
- "train_runtime": 18599.2522,
6
  "train_samples": 4281,
7
- "train_samples_per_second": 1.067,
8
  "train_steps_per_second": 0.033
9
  }
 
1
  {
2
  "epoch": 4.6268656716417915,
3
  "total_flos": 829937030004736.0,
4
+ "train_loss": 0.4202386662844689,
5
+ "train_runtime": 18585.0074,
6
  "train_samples": 4281,
7
+ "train_samples_per_second": 1.068,
8
  "train_steps_per_second": 0.033
9
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:586bde3de3e0548d8b1e1449a19956bfa34fd86f55775ca4a8e0b94dae8cbf92
3
  size 8044982080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f48cc1c0bb7c443abca2a6c7afef30c5b7e16cb0af9657bb269183c49ca76a76
3
  size 8044982080
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 4.6268656716417915,
3
  "total_flos": 829937030004736.0,
4
- "train_loss": 0.3161669834246559,
5
- "train_runtime": 18599.2522,
6
  "train_samples": 4281,
7
- "train_samples_per_second": 1.067,
8
  "train_steps_per_second": 0.033
9
  }
 
1
  {
2
  "epoch": 4.6268656716417915,
3
  "total_flos": 829937030004736.0,
4
+ "train_loss": 0.4202386662844689,
5
+ "train_runtime": 18585.0074,
6
  "train_samples": 4281,
7
+ "train_samples_per_second": 1.068,
8
  "train_steps_per_second": 0.033
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e45aaeff312780d39bc02970f00193a16717653200a4bd41605fe0c011a5c30f
3
  size 7633
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eec048e628a653c916a3717a9f2d295434c00bf772eb742f14f7f0d21a36376
3
  size 7633