vaibhav12332112312 commited on
Commit
e299415
·
2 Parent(s): 30614d31dc66ef

Merge HF run-output upload

Browse files
run-output/plots/io_log.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
run-output/plots/training_log.csv CHANGED
@@ -1,2 +1 @@
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
- 1,3.138,3.62,2.504,0.3196,0.4857,46,2.4383
 
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
 
run-output/plots/training_summary.json CHANGED
@@ -4,14 +4,14 @@
4
  "rounds": 1,
5
  "episodes_per_round": 6,
6
  "before": {
7
- "monthly_engage": 0.0709,
8
- "monthly_strategic": 0.334,
9
- "monthly_competitive": 0.5095
10
  },
11
  "after": {
12
- "monthly_engage": 0.1193,
13
- "monthly_strategic": 0.1942,
14
- "monthly_competitive": 0.3471
15
  },
16
  "smart_heuristic": {
17
  "monthly_engage": 0.7352,
@@ -19,34 +19,18 @@
19
  "monthly_competitive": 0.9066
20
  },
21
  "improvement": {
22
- "monthly_engage": 0.0484,
23
- "monthly_strategic": -0.1398,
24
- "monthly_competitive": -0.16239999999999993
25
  },
26
  "training_log": {
27
- "round": [
28
- 1
29
- ],
30
- "avg_episode_reward": [
31
- 3.138
32
- ],
33
- "max_episode_reward": [
34
- 3.62
35
- ],
36
- "min_episode_reward": [
37
- 2.504
38
- ],
39
- "avg_grader": [
40
- 0.3196
41
- ],
42
- "max_grader": [
43
- 0.4857
44
- ],
45
- "n_training_samples": [
46
- 46
47
- ],
48
- "train_loss": [
49
- 2.4383
50
- ]
51
  }
52
  }
 
4
  "rounds": 1,
5
  "episodes_per_round": 6,
6
  "before": {
7
+ "monthly_engage": 0.1026,
8
+ "monthly_strategic": 0.2396,
9
+ "monthly_competitive": 0.2967
10
  },
11
  "after": {
12
+ "monthly_engage": 0.144,
13
+ "monthly_strategic": 0.2124,
14
+ "monthly_competitive": 0.3122
15
  },
16
  "smart_heuristic": {
17
  "monthly_engage": 0.7352,
 
19
  "monthly_competitive": 0.9066
20
  },
21
  "improvement": {
22
+ "monthly_engage": 0.04139999999999999,
23
+ "monthly_strategic": -0.027200000000000002,
24
+ "monthly_competitive": 0.015499999999999958
25
  },
26
  "training_log": {
27
+ "round": [],
28
+ "avg_episode_reward": [],
29
+ "max_episode_reward": [],
30
+ "min_episode_reward": [],
31
+ "avg_grader": [],
32
+ "max_grader": [],
33
+ "n_training_samples": [],
34
+ "train_loss": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  }
36
  }
run-output/training/train_grpo.executed.ipynb CHANGED
The diff for this file is too large to render. See raw diff