vaibhav12332112312 commited on
Commit
21edd7d
·
2 Parent(s): a6b8df0302be2b

Merge branch 'main' of https://huggingface.co/spaces/vaibhavkhandare/train-bhai-train

Browse files
run-output/plots/.gitkeep ADDED
File without changes
run-output/plots/training_log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
+ 1,1.593,1.593,1.593,0.0268,0.0268,4,2.3314
run-output/plots/training_summary.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "Qwen/Qwen2.5-1.5B-Instruct",
3
+ "training": "LoRA SFT (real weight updates)",
4
+ "rounds": 1,
5
+ "episodes_per_round": 1,
6
+ "before": {
7
+ "monthly_engage": 0.3048,
8
+ "monthly_strategic": 0.3456,
9
+ "monthly_competitive": 0.4808
10
+ },
11
+ "after": {
12
+ "monthly_engage": 0.0162,
13
+ "monthly_strategic": 0.1749,
14
+ "monthly_competitive": 0.3621
15
+ },
16
+ "smart_heuristic": {
17
+ "monthly_engage": 0.6342,
18
+ "monthly_strategic": 0.7218,
19
+ "monthly_competitive": 0.8315
20
+ },
21
+ "improvement": {
22
+ "monthly_engage": -0.2886,
23
+ "monthly_strategic": -0.17070000000000002,
24
+ "monthly_competitive": -0.11870000000000003
25
+ },
26
+ "training_log": {
27
+ "round": [
28
+ 1
29
+ ],
30
+ "avg_episode_reward": [
31
+ 1.593
32
+ ],
33
+ "max_episode_reward": [
34
+ 1.593
35
+ ],
36
+ "min_episode_reward": [
37
+ 1.593
38
+ ],
39
+ "avg_grader": [
40
+ 0.0268
41
+ ],
42
+ "max_grader": [
43
+ 0.0268
44
+ ],
45
+ "n_training_samples": [
46
+ 4
47
+ ],
48
+ "train_loss": [
49
+ 2.3314
50
+ ]
51
+ }
52
+ }
run-output/training/train_grpo.executed.ipynb ADDED
The diff for this file is too large to render. See raw diff