Starred commited on
Commit
c711895
·
verified ·
1 Parent(s): 3993c48

Training in progress, step 2250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d3ca1d8f31e80248773ed8ea5c99dec1649dfd015e7b4e09fb8801a2f87acf4
3
  size 84972248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afc9e576ce294178c6b2fd6e738300f820db1a7fac04d47b3eed7d1fe7b964da
3
  size 84972248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6034f11a3ed80ef2f342d581363b8c8785b383cb84cc32912b9ec65c5130a5d8
3
  size 43434405
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ad1ba58e3875c031c561de720dee24bbcaf2368184170ad0d5e17db4ea54f8e
3
  size 43434405
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b3d335d4434dc38a54623d6efa9ce5966526f63356dfa46e44de858a461289a
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8da9f55f44143145d0788031a26e62bf2132ba4d1937c34de59b503aa7038a88
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:049acc437e6ffa36f74a2a4841d3cfeb93a5bc183f816e8aa5a3f989039dbda6
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f438b55116651a9ee16e24296470229073fe7ec34f3976810d422db5e68d5204
3
  size 14917
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7372927d3408d4e65fe807b9c8c1f0b91b9b1800b28f92f162508ffcabddc6e6
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c231d2fef38a21f0b802526519cca758a148fd1c0a091bc4593caa9fef72be0
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 2000,
3
- "best_metric": 0.1900114119052887,
4
- "best_model_checkpoint": "/kaggle/working/obsidian_critic_qwen35_t4x2_unsloth/runs/obsidian_critic_full_epoch/checkpoint-2000",
5
- "epoch": 0.8868687988470706,
6
  "eval_steps": 125,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -708,6 +708,92 @@
708
  "tokens_per_second": 340.07002840114217,
709
  "tokens_per_step": 1149.8275,
710
  "total_tokens_seen": 2299655
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
  }
712
  ],
713
  "logging_steps": 50,
@@ -736,7 +822,7 @@
736
  "attributes": {}
737
  }
738
  },
739
- "total_flos": 1.4776071086722253e+17,
740
  "train_batch_size": 1,
741
  "trial_name": null,
742
  "trial_params": null
 
1
  {
2
+ "best_global_step": 2250,
3
+ "best_metric": 0.18876151740550995,
4
+ "best_model_checkpoint": "/kaggle/working/obsidian_critic_qwen35_t4x2_unsloth/runs/obsidian_critic_full_epoch/checkpoint-2250",
5
+ "epoch": 0.9977273987029543,
6
  "eval_steps": 125,
7
+ "global_step": 2250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
708
  "tokens_per_second": 340.07002840114217,
709
  "tokens_per_step": 1149.8275,
710
  "total_tokens_seen": 2299655
711
+ },
712
+ {
713
+ "epoch": 0.9090405188182473,
714
+ "grad_norm": 0.44616127014160156,
715
+ "last_batch_tokens": 151,
716
+ "learning_rate": 2.072081132410253e-06,
717
+ "loss": 0.1782122802734375,
718
+ "lr": 2.0522471462437796e-06,
719
+ "step": 2050,
720
+ "tokens_per_second": 81.0157351221381,
721
+ "tokens_per_step": 1160.878536585366,
722
+ "total_tokens_seen": 2379801
723
+ },
724
+ {
725
+ "epoch": 0.931212238789424,
726
+ "grad_norm": 0.4230777621269226,
727
+ "last_batch_tokens": 188,
728
+ "learning_rate": 1.195500515894149e-06,
729
+ "loss": 0.17306018829345704,
730
+ "lr": 1.1803797270814765e-06,
731
+ "step": 2100,
732
+ "tokens_per_second": 80.14939686559167,
733
+ "tokens_per_step": 1170.3680952380953,
734
+ "total_tokens_seen": 2457773
735
+ },
736
+ {
737
+ "epoch": 0.9422980987750125,
738
+ "eval_loss": 0.18897105753421783,
739
+ "eval_runtime": 95.0115,
740
+ "eval_samples_per_second": 3.821,
741
+ "eval_steps_per_second": 1.916,
742
+ "last_batch_tokens": 172,
743
+ "lr": 8.333381642750881e-07,
744
+ "step": 2125,
745
+ "tokens_per_second": 120.10909547338339,
746
+ "tokens_per_step": 1188.5943529411766,
747
+ "total_tokens_seen": 2525763
748
+ },
749
+ {
750
+ "epoch": 0.9533839587606009,
751
+ "grad_norm": 0.2957008183002472,
752
+ "last_batch_tokens": 305,
753
+ "learning_rate": 5.564793899281884e-07,
754
+ "loss": 0.1782497787475586,
755
+ "lr": 5.461454000209198e-07,
756
+ "step": 2150,
757
+ "tokens_per_second": 83.24645935651418,
758
+ "tokens_per_step": 1193.8697674418604,
759
+ "total_tokens_seen": 2566820
760
+ },
761
+ {
762
+ "epoch": 0.9755556787317776,
763
+ "grad_norm": 0.49967435002326965,
764
+ "last_batch_tokens": 156,
765
+ "learning_rate": 1.5812823683962197e-07,
766
+ "loss": 0.19703115463256837,
767
+ "lr": 1.5263134729363583e-07,
768
+ "step": 2200,
769
+ "tokens_per_second": 75.00656410059429,
770
+ "tokens_per_step": 1199.9336363636364,
771
+ "total_tokens_seen": 2639854
772
+ },
773
+ {
774
+ "epoch": 0.9977273987029543,
775
+ "grad_norm": 0.26038259267807007,
776
+ "last_batch_tokens": 322,
777
+ "learning_rate": 2.386060162717918e-09,
778
+ "loss": 0.17010717391967772,
779
+ "lr": 1.7530274921462308e-09,
780
+ "step": 2250,
781
+ "tokens_per_second": 78.96076733362268,
782
+ "tokens_per_step": 1208.1137777777778,
783
+ "total_tokens_seen": 2718256
784
+ },
785
+ {
786
+ "epoch": 0.9977273987029543,
787
+ "eval_loss": 0.18876151740550995,
788
+ "eval_runtime": 95.314,
789
+ "eval_samples_per_second": 3.808,
790
+ "eval_steps_per_second": 1.909,
791
+ "last_batch_tokens": 172,
792
+ "lr": 1.7530274921462308e-09,
793
+ "step": 2250,
794
+ "tokens_per_second": 337.1431434660513,
795
+ "tokens_per_step": 1222.3973333333333,
796
+ "total_tokens_seen": 2750394
797
  }
798
  ],
799
  "logging_steps": 50,
 
822
  "attributes": {}
823
  }
824
  },
825
+ "total_flos": 1.6621215424289178e+17,
826
  "train_batch_size": 1,
827
  "trial_name": null,
828
  "trial_params": null