vaibhavkhandare commited on
Commit
9fac734
·
verified ·
1 Parent(s): 271bf42

Upload folder using huggingface_hub

Browse files
run-output/plots/io_log.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
run-output/plots/training_log.csv CHANGED
@@ -1,5 +1,2 @@
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
- 1,3.463,4.232,2.793,0.3947,0.6341,44,2.4064
3
- 2,3.072,3.802,2.25,0.2737,0.5068,48,2.434
4
- 3,3.469,3.956,2.979,0.3738,0.574,41,2.4042
5
- 4,3.316,3.517,3.073,0.3453,0.4575,47,2.4202
 
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
+ 1,3.138,3.62,2.504,0.3196,0.4857,46,2.4383
 
 
 
run-output/plots/training_summary.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "model": "Qwen/Qwen2.5-3B-Instruct",
3
  "training": "LoRA SFT (real weight updates)",
4
- "rounds": 4,
5
  "episodes_per_round": 6,
6
  "before": {
7
- "monthly_engage": 0.0,
8
- "monthly_strategic": 0.174,
9
- "monthly_competitive": 0.028
10
  },
11
  "after": {
12
- "monthly_engage": 0.0,
13
- "monthly_strategic": 0.1744,
14
- "monthly_competitive": 0.028
15
  },
16
  "smart_heuristic": {
17
  "monthly_engage": 0.7352,
@@ -19,58 +19,34 @@
19
  "monthly_competitive": 0.9066
20
  },
21
  "improvement": {
22
- "monthly_engage": 0.0,
23
- "monthly_strategic": 0.00040000000000001146,
24
- "monthly_competitive": 0.0
25
  },
26
  "training_log": {
27
  "round": [
28
- 1,
29
- 2,
30
- 3,
31
- 4
32
  ],
33
  "avg_episode_reward": [
34
- 3.463,
35
- 3.072,
36
- 3.469,
37
- 3.316
38
  ],
39
  "max_episode_reward": [
40
- 4.232,
41
- 3.802,
42
- 3.956,
43
- 3.517
44
  ],
45
  "min_episode_reward": [
46
- 2.793,
47
- 2.25,
48
- 2.979,
49
- 3.073
50
  ],
51
  "avg_grader": [
52
- 0.3947,
53
- 0.2737,
54
- 0.3738,
55
- 0.3453
56
  ],
57
  "max_grader": [
58
- 0.6341,
59
- 0.5068,
60
- 0.574,
61
- 0.4575
62
  ],
63
  "n_training_samples": [
64
- 44,
65
- 48,
66
- 41,
67
- 47
68
  ],
69
  "train_loss": [
70
- 2.4064,
71
- 2.434,
72
- 2.4042,
73
- 2.4202
74
  ]
75
  }
76
  }
 
1
  {
2
  "model": "Qwen/Qwen2.5-3B-Instruct",
3
  "training": "LoRA SFT (real weight updates)",
4
+ "rounds": 1,
5
  "episodes_per_round": 6,
6
  "before": {
7
+ "monthly_engage": 0.0709,
8
+ "monthly_strategic": 0.334,
9
+ "monthly_competitive": 0.5095
10
  },
11
  "after": {
12
+ "monthly_engage": 0.1193,
13
+ "monthly_strategic": 0.1942,
14
+ "monthly_competitive": 0.3471
15
  },
16
  "smart_heuristic": {
17
  "monthly_engage": 0.7352,
 
19
  "monthly_competitive": 0.9066
20
  },
21
  "improvement": {
22
+ "monthly_engage": 0.0484,
23
+ "monthly_strategic": -0.1398,
24
+ "monthly_competitive": -0.16239999999999993
25
  },
26
  "training_log": {
27
  "round": [
28
+ 1
 
 
 
29
  ],
30
  "avg_episode_reward": [
31
+ 3.138
 
 
 
32
  ],
33
  "max_episode_reward": [
34
+ 3.62
 
 
 
35
  ],
36
  "min_episode_reward": [
37
+ 2.504
 
 
 
38
  ],
39
  "avg_grader": [
40
+ 0.3196
 
 
 
41
  ],
42
  "max_grader": [
43
+ 0.4857
 
 
 
44
  ],
45
  "n_training_samples": [
46
+ 46
 
 
 
47
  ],
48
  "train_loss": [
49
+ 2.4383
 
 
 
50
  ]
51
  }
52
  }
run-output/training/train_grpo.executed.ipynb CHANGED
The diff for this file is too large to render. See raw diff