vaibhavkhandare commited on
Commit
3419724
·
verified ·
1 Parent(s): e299415

Upload folder using huggingface_hub

Browse files
run-output/plots/io_log.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
run-output/plots/training_log.csv CHANGED
@@ -1 +1,3 @@
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
 
 
 
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
+ 1,3.904,4.514,3.287,0.6202,0.8268,101,2.6723
3
+ 2,4.215,4.658,3.566,0.7325,0.8703,102,2.5934
run-output/plots/training_summary.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "model": "Qwen/Qwen2.5-3B-Instruct",
3
  "training": "LoRA SFT (real weight updates)",
4
- "rounds": 1,
5
  "episodes_per_round": 6,
6
  "before": {
7
- "monthly_engage": 0.1026,
8
- "monthly_strategic": 0.2396,
9
- "monthly_competitive": 0.2967
10
  },
11
  "after": {
12
- "monthly_engage": 0.144,
13
- "monthly_strategic": 0.2124,
14
- "monthly_competitive": 0.3122
15
  },
16
  "smart_heuristic": {
17
  "monthly_engage": 0.7352,
@@ -19,18 +19,42 @@
19
  "monthly_competitive": 0.9066
20
  },
21
  "improvement": {
22
- "monthly_engage": 0.04139999999999999,
23
- "monthly_strategic": -0.027200000000000002,
24
- "monthly_competitive": 0.015499999999999958
25
  },
26
  "training_log": {
27
- "round": [],
28
- "avg_episode_reward": [],
29
- "max_episode_reward": [],
30
- "min_episode_reward": [],
31
- "avg_grader": [],
32
- "max_grader": [],
33
- "n_training_samples": [],
34
- "train_loss": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  }
36
  }
 
1
  {
2
  "model": "Qwen/Qwen2.5-3B-Instruct",
3
  "training": "LoRA SFT (real weight updates)",
4
+ "rounds": 2,
5
  "episodes_per_round": 6,
6
  "before": {
7
+ "monthly_engage": 1.0,
8
+ "monthly_strategic": 0.8426,
9
+ "monthly_competitive": 0.9521
10
  },
11
  "after": {
12
+ "monthly_engage": 1.0,
13
+ "monthly_strategic": 0.8416,
14
+ "monthly_competitive": 0.964
15
  },
16
  "smart_heuristic": {
17
  "monthly_engage": 0.7352,
 
19
  "monthly_competitive": 0.9066
20
  },
21
  "improvement": {
22
+ "monthly_engage": 0.0,
23
+ "monthly_strategic": -0.0010000000000000009,
24
+ "monthly_competitive": 0.011900000000000022
25
  },
26
  "training_log": {
27
+ "round": [
28
+ 1,
29
+ 2
30
+ ],
31
+ "avg_episode_reward": [
32
+ 3.904,
33
+ 4.215
34
+ ],
35
+ "max_episode_reward": [
36
+ 4.514,
37
+ 4.658
38
+ ],
39
+ "min_episode_reward": [
40
+ 3.287,
41
+ 3.566
42
+ ],
43
+ "avg_grader": [
44
+ 0.6202,
45
+ 0.7325
46
+ ],
47
+ "max_grader": [
48
+ 0.8268,
49
+ 0.8703
50
+ ],
51
+ "n_training_samples": [
52
+ 101,
53
+ 102
54
+ ],
55
+ "train_loss": [
56
+ 2.6723,
57
+ 2.5934
58
+ ]
59
  }
60
  }
run-output/training/train_grpo.executed.ipynb CHANGED
The diff for this file is too large to render. See raw diff