vaibhavkhandare commited on
Commit
1d8435e
·
verified ·
1 Parent(s): e82b235

Upload folder using huggingface_hub

Browse files
run-output/plots/io_log.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
run-output/plots/training_log.csv CHANGED
@@ -1,3 +1,3 @@
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
- 1,3.904,4.514,3.287,0.6202,0.8268,101,2.6723
3
- 2,4.215,4.658,3.566,0.7325,0.8703,102,2.5934
 
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
+ 1,2.511,2.866,2.25,0.1072,0.2462,98,3.0041
3
+ 2,2.885,3.315,2.383,0.2398,0.4023,100,2.9678
run-output/plots/training_summary.json CHANGED
@@ -4,14 +4,14 @@
4
  "rounds": 2,
5
  "episodes_per_round": 6,
6
  "before": {
7
- "monthly_engage": 1.0,
8
- "monthly_strategic": 0.8426,
9
- "monthly_competitive": 0.9521
10
  },
11
  "after": {
12
- "monthly_engage": 1.0,
13
- "monthly_strategic": 0.8416,
14
- "monthly_competitive": 0.964
15
  },
16
  "smart_heuristic": {
17
  "monthly_engage": 0.7352,
@@ -20,8 +20,8 @@
20
  },
21
  "improvement": {
22
  "monthly_engage": 0.0,
23
- "monthly_strategic": -0.0010000000000000009,
24
- "monthly_competitive": 0.011900000000000022
25
  },
26
  "training_log": {
27
  "round": [
@@ -29,32 +29,32 @@
29
  2
30
  ],
31
  "avg_episode_reward": [
32
- 3.904,
33
- 4.215
34
  ],
35
  "max_episode_reward": [
36
- 4.514,
37
- 4.658
38
  ],
39
  "min_episode_reward": [
40
- 3.287,
41
- 3.566
42
  ],
43
  "avg_grader": [
44
- 0.6202,
45
- 0.7325
46
  ],
47
  "max_grader": [
48
- 0.8268,
49
- 0.8703
50
  ],
51
  "n_training_samples": [
52
- 101,
53
- 102
54
  ],
55
  "train_loss": [
56
- 2.6723,
57
- 2.5934
58
  ]
59
  }
60
  }
 
4
  "rounds": 2,
5
  "episodes_per_round": 6,
6
  "before": {
7
+ "monthly_engage": 0.0,
8
+ "monthly_strategic": 0.175,
9
+ "monthly_competitive": 0.035
10
  },
11
  "after": {
12
+ "monthly_engage": 0.0,
13
+ "monthly_strategic": 0.175,
14
+ "monthly_competitive": 0.035
15
  },
16
  "smart_heuristic": {
17
  "monthly_engage": 0.7352,
 
20
  },
21
  "improvement": {
22
  "monthly_engage": 0.0,
23
+ "monthly_strategic": 0.0,
24
+ "monthly_competitive": 0.0
25
  },
26
  "training_log": {
27
  "round": [
 
29
  2
30
  ],
31
  "avg_episode_reward": [
32
+ 2.511,
33
+ 2.885
34
  ],
35
  "max_episode_reward": [
36
+ 2.866,
37
+ 3.315
38
  ],
39
  "min_episode_reward": [
40
+ 2.25,
41
+ 2.383
42
  ],
43
  "avg_grader": [
44
+ 0.1072,
45
+ 0.2398
46
  ],
47
  "max_grader": [
48
+ 0.2462,
49
+ 0.4023
50
  ],
51
  "n_training_samples": [
52
+ 98,
53
+ 100
54
  ],
55
  "train_loss": [
56
+ 3.0041,
57
+ 2.9678
58
  ]
59
  }
60
  }
run-output/training/train_grpo.executed.ipynb CHANGED
The diff for this file is too large to render. See raw diff