Spaces:

ycwhencpp
/

final-iteration

Paused

vaibhavkhandare commited on 12 days ago

Commit

e955a2d

verified ·

1 Parent(s): f7b5241

Upload folder using huggingface_hub

Files changed (3) hide show

run-output/plots/training_log.csv CHANGED Viewed

@@ -1,2 +1,5 @@
 round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
-1,1.593,1.593,1.593,0.0268,0.0268,4,2.3314

 round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
+1,3.154,4.348,2.316,0.3017,0.647,38,2.6893
+2,2.855,3.71,2.249,0.2058,0.5125,37,2.554
+3,2.969,4.14,2.25,0.2041,0.4656,45,2.0757
+4,3.294,4.527,2.32,0.2793,0.5243,45,1.9805

run-output/plots/training_summary.json CHANGED Viewed

@@ -1,52 +1,76 @@
 {
-  "model": "Qwen/Qwen2.5-1.5B-Instruct",
   "training": "LoRA SFT (real weight updates)",
-  "rounds": 1,
-  "episodes_per_round": 1,
   "before": {
-    "monthly_engage": 0.3048,
-    "monthly_strategic": 0.3456,
-    "monthly_competitive": 0.4808
   },
   "after": {
-    "monthly_engage": 0.0162,
-    "monthly_strategic": 0.1749,
-    "monthly_competitive": 0.3621
   },
   "smart_heuristic": {
-    "monthly_engage": 0.6342,
-    "monthly_strategic": 0.7218,
-    "monthly_competitive": 0.8315
   },
   "improvement": {
-    "monthly_engage": -0.2886,
-    "monthly_strategic": -0.17070000000000002,
-    "monthly_competitive": -0.11870000000000003
   },
   "training_log": {
     "round": [
-      1
     ],
     "avg_episode_reward": [
-      1.593
     ],
     "max_episode_reward": [
-      1.593
     ],
     "min_episode_reward": [
-      1.593
     ],
     "avg_grader": [
-      0.0268
     ],
     "max_grader": [
-      0.0268
     ],
     "n_training_samples": [
-      4
     ],
     "train_loss": [
-      2.3314
     ]
   }
 }

 {
+  "model": "Qwen/Qwen2.5-3B-Instruct",
   "training": "LoRA SFT (real weight updates)",
+  "rounds": 4,
+  "episodes_per_round": 6,
   "before": {
+    "monthly_engage": 0.5642,
+    "monthly_strategic": 0.5903,
+    "monthly_competitive": 0.8313
   },
   "after": {
+    "monthly_engage": 0.1071,
+    "monthly_strategic": 0.3174,
+    "monthly_competitive": 0.5233
   },
   "smart_heuristic": {
+    "monthly_engage": 0.7352,
+    "monthly_strategic": 0.9043,
+    "monthly_competitive": 0.9066
   },
   "improvement": {
+    "monthly_engage": -0.45710000000000006,
+    "monthly_strategic": -0.27290000000000003,
+    "monthly_competitive": -0.30800000000000005
   },
   "training_log": {
     "round": [
+      1,
+      2,
+      3,
+      4
     ],
     "avg_episode_reward": [
+      3.154,
+      2.855,
+      2.969,
+      3.294
     ],
     "max_episode_reward": [
+      4.348,
+      3.71,
+      4.14,
+      4.527
     ],
     "min_episode_reward": [
+      2.316,
+      2.249,
+      2.25,
+      2.32
     ],
     "avg_grader": [
+      0.3017,
+      0.2058,
+      0.2041,
+      0.2793
     ],
     "max_grader": [
+      0.647,
+      0.5125,
+      0.4656,
+      0.5243
     ],
     "n_training_samples": [
+      38,
+      37,
+      45,
+      45
     ],
     "train_loss": [
+      2.6893,
+      2.554,
+      2.0757,
+      1.9805
     ]
   }
 }

run-output/training/train_grpo.executed.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff