vaibhavkhandare commited on
Commit
e955a2d
·
verified ·
1 Parent(s): f7b5241

Upload folder using huggingface_hub

Browse files
run-output/plots/training_log.csv CHANGED
@@ -1,2 +1,5 @@
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
- 1,1.593,1.593,1.593,0.0268,0.0268,4,2.3314
 
 
 
 
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
+ 1,3.154,4.348,2.316,0.3017,0.647,38,2.6893
3
+ 2,2.855,3.71,2.249,0.2058,0.5125,37,2.554
4
+ 3,2.969,4.14,2.25,0.2041,0.4656,45,2.0757
5
+ 4,3.294,4.527,2.32,0.2793,0.5243,45,1.9805
run-output/plots/training_summary.json CHANGED
@@ -1,52 +1,76 @@
1
  {
2
- "model": "Qwen/Qwen2.5-1.5B-Instruct",
3
  "training": "LoRA SFT (real weight updates)",
4
- "rounds": 1,
5
- "episodes_per_round": 1,
6
  "before": {
7
- "monthly_engage": 0.3048,
8
- "monthly_strategic": 0.3456,
9
- "monthly_competitive": 0.4808
10
  },
11
  "after": {
12
- "monthly_engage": 0.0162,
13
- "monthly_strategic": 0.1749,
14
- "monthly_competitive": 0.3621
15
  },
16
  "smart_heuristic": {
17
- "monthly_engage": 0.6342,
18
- "monthly_strategic": 0.7218,
19
- "monthly_competitive": 0.8315
20
  },
21
  "improvement": {
22
- "monthly_engage": -0.2886,
23
- "monthly_strategic": -0.17070000000000002,
24
- "monthly_competitive": -0.11870000000000003
25
  },
26
  "training_log": {
27
  "round": [
28
- 1
 
 
 
29
  ],
30
  "avg_episode_reward": [
31
- 1.593
 
 
 
32
  ],
33
  "max_episode_reward": [
34
- 1.593
 
 
 
35
  ],
36
  "min_episode_reward": [
37
- 1.593
 
 
 
38
  ],
39
  "avg_grader": [
40
- 0.0268
 
 
 
41
  ],
42
  "max_grader": [
43
- 0.0268
 
 
 
44
  ],
45
  "n_training_samples": [
46
- 4
 
 
 
47
  ],
48
  "train_loss": [
49
- 2.3314
 
 
 
50
  ]
51
  }
52
  }
 
1
  {
2
+ "model": "Qwen/Qwen2.5-3B-Instruct",
3
  "training": "LoRA SFT (real weight updates)",
4
+ "rounds": 4,
5
+ "episodes_per_round": 6,
6
  "before": {
7
+ "monthly_engage": 0.5642,
8
+ "monthly_strategic": 0.5903,
9
+ "monthly_competitive": 0.8313
10
  },
11
  "after": {
12
+ "monthly_engage": 0.1071,
13
+ "monthly_strategic": 0.3174,
14
+ "monthly_competitive": 0.5233
15
  },
16
  "smart_heuristic": {
17
+ "monthly_engage": 0.7352,
18
+ "monthly_strategic": 0.9043,
19
+ "monthly_competitive": 0.9066
20
  },
21
  "improvement": {
22
+ "monthly_engage": -0.45710000000000006,
23
+ "monthly_strategic": -0.27290000000000003,
24
+ "monthly_competitive": -0.30800000000000005
25
  },
26
  "training_log": {
27
  "round": [
28
+ 1,
29
+ 2,
30
+ 3,
31
+ 4
32
  ],
33
  "avg_episode_reward": [
34
+ 3.154,
35
+ 2.855,
36
+ 2.969,
37
+ 3.294
38
  ],
39
  "max_episode_reward": [
40
+ 4.348,
41
+ 3.71,
42
+ 4.14,
43
+ 4.527
44
  ],
45
  "min_episode_reward": [
46
+ 2.316,
47
+ 2.249,
48
+ 2.25,
49
+ 2.32
50
  ],
51
  "avg_grader": [
52
+ 0.3017,
53
+ 0.2058,
54
+ 0.2041,
55
+ 0.2793
56
  ],
57
  "max_grader": [
58
+ 0.647,
59
+ 0.5125,
60
+ 0.4656,
61
+ 0.5243
62
  ],
63
  "n_training_samples": [
64
+ 38,
65
+ 37,
66
+ 45,
67
+ 45
68
  ],
69
  "train_loss": [
70
+ 2.6893,
71
+ 2.554,
72
+ 2.0757,
73
+ 1.9805
74
  ]
75
  }
76
  }
run-output/training/train_grpo.executed.ipynb CHANGED
The diff for this file is too large to render. See raw diff