Spaces:
Paused
Paused
Upload folder using huggingface_hub
Browse files
run-output/plots/io_log.jsonl
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
run-output/plots/training_log.csv
CHANGED
|
@@ -1,3 +1,7 @@
|
|
| 1 |
-
round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
|
| 2 |
-
1,
|
| 3 |
-
2,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
phase,round,global_step,use_hint,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
|
| 2 |
+
phase1_timing,1,1,True,5.127,5.315,4.96,0.9498,1.0,81,2.833
|
| 3 |
+
phase1_timing,2,2,False,3.04,3.303,2.6,0.259,0.3614,96,3.1413
|
| 4 |
+
phase1_timing,3,3,False,2.867,3.016,2.555,0.2083,0.3042,102,3.1255
|
| 5 |
+
phase2_content,1,4,True,3.538,3.837,3.338,0.8697,1.0,77,2.8381
|
| 6 |
+
phase2_content,2,5,False,2.15,2.807,1.587,0.3763,0.5979,90,2.9281
|
| 7 |
+
phase2_content,3,6,False,1.924,2.609,1.375,0.2855,0.5027,76,2.9184
|
run-output/plots/training_summary.json
CHANGED
|
@@ -1,60 +1,120 @@
|
|
| 1 |
{
|
| 2 |
"model": "Qwen/Qwen2.5-3B-Instruct",
|
| 3 |
-
"training": "LoRA SFT (
|
| 4 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"episodes_per_round": 6,
|
| 6 |
"before": {
|
| 7 |
-
"monthly_engage":
|
| 8 |
-
"monthly_strategic": 0.
|
| 9 |
-
"monthly_competitive": 0.
|
| 10 |
},
|
| 11 |
"after": {
|
| 12 |
-
"monthly_engage":
|
| 13 |
-
"monthly_strategic": 0.
|
| 14 |
-
"monthly_competitive": 0.
|
| 15 |
},
|
| 16 |
"smart_heuristic": {
|
| 17 |
-
"monthly_engage": 0.
|
| 18 |
-
"monthly_strategic": 0.
|
| 19 |
-
"monthly_competitive": 0.
|
| 20 |
},
|
| 21 |
"improvement": {
|
| 22 |
"monthly_engage": 0.0,
|
| 23 |
-
"monthly_strategic":
|
| 24 |
-
"monthly_competitive": 0.
|
| 25 |
},
|
| 26 |
"training_log": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
"round": [
|
| 28 |
1,
|
| 29 |
-
2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
],
|
| 31 |
"avg_episode_reward": [
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
],
|
| 35 |
"max_episode_reward": [
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
],
|
| 39 |
"min_episode_reward": [
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
],
|
| 43 |
"avg_grader": [
|
| 44 |
-
0.
|
| 45 |
-
0.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
],
|
| 47 |
"max_grader": [
|
| 48 |
-
|
| 49 |
-
0.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
],
|
| 51 |
"n_training_samples": [
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
],
|
| 55 |
"train_loss": [
|
| 56 |
-
2.
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
]
|
| 59 |
}
|
| 60 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"model": "Qwen/Qwen2.5-3B-Instruct",
|
| 3 |
+
"training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase",
|
| 4 |
+
"phases": [
|
| 5 |
+
"phase1_timing",
|
| 6 |
+
"phase2_content"
|
| 7 |
+
],
|
| 8 |
+
"rounds_per_phase": 3,
|
| 9 |
"episodes_per_round": 6,
|
| 10 |
"before": {
|
| 11 |
+
"monthly_engage": 0.0,
|
| 12 |
+
"monthly_strategic": 0.175,
|
| 13 |
+
"monthly_competitive": 0.035
|
| 14 |
},
|
| 15 |
"after": {
|
| 16 |
+
"monthly_engage": 0.0,
|
| 17 |
+
"monthly_strategic": 0.175,
|
| 18 |
+
"monthly_competitive": 0.035
|
| 19 |
},
|
| 20 |
"smart_heuristic": {
|
| 21 |
+
"monthly_engage": 0.7519,
|
| 22 |
+
"monthly_strategic": 0.9101,
|
| 23 |
+
"monthly_competitive": 0.9141
|
| 24 |
},
|
| 25 |
"improvement": {
|
| 26 |
"monthly_engage": 0.0,
|
| 27 |
+
"monthly_strategic": 0.0,
|
| 28 |
+
"monthly_competitive": 0.0
|
| 29 |
},
|
| 30 |
"training_log": {
|
| 31 |
+
"phase": [
|
| 32 |
+
"phase1_timing",
|
| 33 |
+
"phase1_timing",
|
| 34 |
+
"phase1_timing",
|
| 35 |
+
"phase2_content",
|
| 36 |
+
"phase2_content",
|
| 37 |
+
"phase2_content"
|
| 38 |
+
],
|
| 39 |
"round": [
|
| 40 |
1,
|
| 41 |
+
2,
|
| 42 |
+
3,
|
| 43 |
+
1,
|
| 44 |
+
2,
|
| 45 |
+
3
|
| 46 |
+
],
|
| 47 |
+
"global_step": [
|
| 48 |
+
1,
|
| 49 |
+
2,
|
| 50 |
+
3,
|
| 51 |
+
4,
|
| 52 |
+
5,
|
| 53 |
+
6
|
| 54 |
+
],
|
| 55 |
+
"use_hint": [
|
| 56 |
+
true,
|
| 57 |
+
false,
|
| 58 |
+
false,
|
| 59 |
+
true,
|
| 60 |
+
false,
|
| 61 |
+
false
|
| 62 |
],
|
| 63 |
"avg_episode_reward": [
|
| 64 |
+
5.127,
|
| 65 |
+
3.04,
|
| 66 |
+
2.867,
|
| 67 |
+
3.538,
|
| 68 |
+
2.15,
|
| 69 |
+
1.924
|
| 70 |
],
|
| 71 |
"max_episode_reward": [
|
| 72 |
+
5.315,
|
| 73 |
+
3.303,
|
| 74 |
+
3.016,
|
| 75 |
+
3.837,
|
| 76 |
+
2.807,
|
| 77 |
+
2.609
|
| 78 |
],
|
| 79 |
"min_episode_reward": [
|
| 80 |
+
4.96,
|
| 81 |
+
2.6,
|
| 82 |
+
2.555,
|
| 83 |
+
3.338,
|
| 84 |
+
1.587,
|
| 85 |
+
1.375
|
| 86 |
],
|
| 87 |
"avg_grader": [
|
| 88 |
+
0.9498,
|
| 89 |
+
0.259,
|
| 90 |
+
0.2083,
|
| 91 |
+
0.8697,
|
| 92 |
+
0.3763,
|
| 93 |
+
0.2855
|
| 94 |
],
|
| 95 |
"max_grader": [
|
| 96 |
+
1.0,
|
| 97 |
+
0.3614,
|
| 98 |
+
0.3042,
|
| 99 |
+
1.0,
|
| 100 |
+
0.5979,
|
| 101 |
+
0.5027
|
| 102 |
],
|
| 103 |
"n_training_samples": [
|
| 104 |
+
81,
|
| 105 |
+
96,
|
| 106 |
+
102,
|
| 107 |
+
77,
|
| 108 |
+
90,
|
| 109 |
+
76
|
| 110 |
],
|
| 111 |
"train_loss": [
|
| 112 |
+
2.833,
|
| 113 |
+
3.1413,
|
| 114 |
+
3.1255,
|
| 115 |
+
2.8381,
|
| 116 |
+
2.9281,
|
| 117 |
+
2.9184
|
| 118 |
]
|
| 119 |
}
|
| 120 |
}
|
run-output/training/train_grpo.executed.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|