Spaces:

ycwhencpp
/

final-iteration

Paused

vaibhavkhandare commited on 12 days ago

Commit

3419724

verified ·

1 Parent(s): e299415

Upload folder using huggingface_hub

Files changed (4) hide show

run-output/plots/io_log.jsonl CHANGED Viewed

The diff for this file is too large to render. See raw diff

run-output/plots/training_log.csv CHANGED Viewed

	@@ -1 +1,3 @@
1	round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss

 round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
+1,3.904,4.514,3.287,0.6202,0.8268,101,2.6723
+2,4.215,4.658,3.566,0.7325,0.8703,102,2.5934

run-output/plots/training_summary.json CHANGED Viewed

@@ -1,17 +1,17 @@
 {
   "model": "Qwen/Qwen2.5-3B-Instruct",
   "training": "LoRA SFT (real weight updates)",
-  "rounds": 1,
   "episodes_per_round": 6,
   "before": {
-    "monthly_engage": 0.1026,
-    "monthly_strategic": 0.2396,
-    "monthly_competitive": 0.2967
   },
   "after": {
-    "monthly_engage": 0.144,
-    "monthly_strategic": 0.2124,
-    "monthly_competitive": 0.3122
   },
   "smart_heuristic": {
     "monthly_engage": 0.7352,
@@ -19,18 +19,42 @@
     "monthly_competitive": 0.9066
   },
   "improvement": {
-    "monthly_engage": 0.04139999999999999,
-    "monthly_strategic": -0.027200000000000002,
-    "monthly_competitive": 0.015499999999999958
   },
   "training_log": {
-    "round": [],
-    "avg_episode_reward": [],
-    "max_episode_reward": [],
-    "min_episode_reward": [],
-    "avg_grader": [],
-    "max_grader": [],
-    "n_training_samples": [],
-    "train_loss": []
   }
 }

 {
   "model": "Qwen/Qwen2.5-3B-Instruct",
   "training": "LoRA SFT (real weight updates)",
+  "rounds": 2,
   "episodes_per_round": 6,
   "before": {
+    "monthly_engage": 1.0,
+    "monthly_strategic": 0.8426,
+    "monthly_competitive": 0.9521
   },
   "after": {
+    "monthly_engage": 1.0,
+    "monthly_strategic": 0.8416,
+    "monthly_competitive": 0.964
   },
   "smart_heuristic": {
     "monthly_engage": 0.7352,
     "monthly_competitive": 0.9066
   },
   "improvement": {
+    "monthly_engage": 0.0,
+    "monthly_strategic": -0.0010000000000000009,
+    "monthly_competitive": 0.011900000000000022
   },
   "training_log": {
+    "round": [
+      1,
+      2
+    ],
+    "avg_episode_reward": [
+      3.904,
+      4.215
+    ],
+    "max_episode_reward": [
+      4.514,
+      4.658
+    ],
+    "min_episode_reward": [
+      3.287,
+      3.566
+    ],
+    "avg_grader": [
+      0.6202,
+      0.7325
+    ],
+    "max_grader": [
+      0.8268,
+      0.8703
+    ],
+    "n_training_samples": [
+      101,
+      102
+    ],
+    "train_loss": [
+      2.6723,
+      2.5934
+    ]
   }
 }

run-output/training/train_grpo.executed.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff