Spaces:

ycwhencpp
/

final-iteration

Paused

vaibhavkhandare commited on 12 days ago

Commit

302be2b

verified ·

1 Parent(s): 4bfe286

Upload folder using huggingface_hub

Files changed (4) hide show

run-output/plots/.gitkeep ADDED Viewed

File without changes

run-output/plots/training_log.csv ADDED Viewed


1	+ round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2	+ 1,1.593,1.593,1.593,0.0268,0.0268,4,2.3314

run-output/plots/training_summary.json ADDED Viewed

+{
+  "model": "Qwen/Qwen2.5-1.5B-Instruct",
+  "training": "LoRA SFT (real weight updates)",
+  "rounds": 1,
+  "episodes_per_round": 1,
+  "before": {
+    "monthly_engage": 0.3048,
+    "monthly_strategic": 0.3456,
+    "monthly_competitive": 0.4808
+  },
+  "after": {
+    "monthly_engage": 0.0162,
+    "monthly_strategic": 0.1749,
+    "monthly_competitive": 0.3621
+  },
+  "smart_heuristic": {
+    "monthly_engage": 0.6342,
+    "monthly_strategic": 0.7218,
+    "monthly_competitive": 0.8315
+  },
+  "improvement": {
+    "monthly_engage": -0.2886,
+    "monthly_strategic": -0.17070000000000002,
+    "monthly_competitive": -0.11870000000000003
+  },
+  "training_log": {
+    "round": [
+      1
+    ],
+    "avg_episode_reward": [
+      1.593
+    ],
+    "max_episode_reward": [
+      1.593
+    ],
+    "min_episode_reward": [
+      1.593
+    ],
+    "avg_grader": [
+      0.0268
+    ],
+    "max_grader": [
+      0.0268
+    ],
+    "n_training_samples": [
+      4
+    ],
+    "train_loss": [
+      2.3314
+    ]
+  }
+}

run-output/training/train_grpo.executed.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff