Spaces:

ycwhencpp
/

final-iteration

Paused

App Files Files Community

vaibhavkhandare commited on 12 days ago

Commit

e52d302

verified ·

1 Parent(s): a402a82

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

run-output/plots/io_log.jsonl +0 -0
run-output/plots/training_log.csv +7 -3
run-output/plots/training_summary.json +88 -28
run-output/training/train_grpo.executed.ipynb +0 -0

run-output/plots/io_log.jsonl CHANGED Viewed

The diff for this file is too large to render. See raw diff

run-output/plots/training_log.csv CHANGED Viewed

@@ -1,3 +1,7 @@
-round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
-1,3.904,4.514,3.287,0.6202,0.8268,101,2.6723
-2,4.215,4.658,3.566,0.7325,0.8703,102,2.5934

+phase,round,global_step,use_hint,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
+phase1_timing,1,1,True,5.127,5.315,4.96,0.9498,1.0,81,2.833
+phase1_timing,2,2,False,3.04,3.303,2.6,0.259,0.3614,96,3.1413
+phase1_timing,3,3,False,2.867,3.016,2.555,0.2083,0.3042,102,3.1255
+phase2_content,1,4,True,3.538,3.837,3.338,0.8697,1.0,77,2.8381
+phase2_content,2,5,False,2.15,2.807,1.587,0.3763,0.5979,90,2.9281
+phase2_content,3,6,False,1.924,2.609,1.375,0.2855,0.5027,76,2.9184

run-output/plots/training_summary.json CHANGED Viewed

@@ -1,60 +1,120 @@
 {
   "model": "Qwen/Qwen2.5-3B-Instruct",
-  "training": "LoRA SFT (real weight updates)",
-  "rounds": 2,
   "episodes_per_round": 6,
   "before": {
-    "monthly_engage": 1.0,
-    "monthly_strategic": 0.8426,
-    "monthly_competitive": 0.9521
   },
   "after": {
-    "monthly_engage": 1.0,
-    "monthly_strategic": 0.8416,
-    "monthly_competitive": 0.964
   },
   "smart_heuristic": {
-    "monthly_engage": 0.7352,
-    "monthly_strategic": 0.9043,
-    "monthly_competitive": 0.9066
   },
   "improvement": {
     "monthly_engage": 0.0,
-    "monthly_strategic": -0.0010000000000000009,
-    "monthly_competitive": 0.011900000000000022
   },
   "training_log": {
     "round": [
       1,
-      2
     ],
     "avg_episode_reward": [
-      3.904,
-      4.215
     ],
     "max_episode_reward": [
-      4.514,
-      4.658
     ],
     "min_episode_reward": [
-      3.287,
-      3.566
     ],
     "avg_grader": [
-      0.6202,
-      0.7325
     ],
     "max_grader": [
-      0.8268,
-      0.8703
     ],
     "n_training_samples": [
-      101,
-      102
     ],
     "train_loss": [
-      2.6723,
-      2.5934
     ]
   }
 }

 {
   "model": "Qwen/Qwen2.5-3B-Instruct",
+  "training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase",
+  "phases": [
+    "phase1_timing",
+    "phase2_content"
+  ],
+  "rounds_per_phase": 3,
   "episodes_per_round": 6,
   "before": {
+    "monthly_engage": 0.0,
+    "monthly_strategic": 0.175,
+    "monthly_competitive": 0.035
   },
   "after": {
+    "monthly_engage": 0.0,
+    "monthly_strategic": 0.175,
+    "monthly_competitive": 0.035
   },
   "smart_heuristic": {
+    "monthly_engage": 0.7519,
+    "monthly_strategic": 0.9101,
+    "monthly_competitive": 0.9141
   },
   "improvement": {
     "monthly_engage": 0.0,
+    "monthly_strategic": 0.0,
+    "monthly_competitive": 0.0
   },
   "training_log": {
+    "phase": [
+      "phase1_timing",
+      "phase1_timing",
+      "phase1_timing",
+      "phase2_content",
+      "phase2_content",
+      "phase2_content"
+    ],
     "round": [
       1,
+      2,
+      3,
+      1,
+      2,
+      3
+    ],
+    "global_step": [
+      1,
+      2,
+      3,
+      4,
+      5,
+      6
+    ],
+    "use_hint": [
+      true,
+      false,
+      false,
+      true,
+      false,
+      false
     ],
     "avg_episode_reward": [
+      5.127,
+      3.04,
+      2.867,
+      3.538,
+      2.15,
+      1.924
     ],
     "max_episode_reward": [
+      5.315,
+      3.303,
+      3.016,
+      3.837,
+      2.807,
+      2.609
     ],
     "min_episode_reward": [
+      4.96,
+      2.6,
+      2.555,
+      3.338,
+      1.587,
+      1.375
     ],
     "avg_grader": [
+      0.9498,
+      0.259,
+      0.2083,
+      0.8697,
+      0.3763,
+      0.2855
     ],
     "max_grader": [
+      1.0,
+      0.3614,
+      0.3042,
+      1.0,
+      0.5979,
+      0.5027
     ],
     "n_training_samples": [
+      81,
+      96,
+      102,
+      77,
+      90,
+      76
     ],
     "train_loss": [
+      2.833,
+      3.1413,
+      3.1255,
+      2.8381,
+      2.9281,
+      2.9184
     ]
   }
 }

run-output/training/train_grpo.executed.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff