File size: 1,035 Bytes
302be2b
e955a2d
e52d302
17149c8
e52d302
e955a2d
302be2b
17149c8
 
 
302be2b
 
17149c8
 
 
302be2b
 
e52d302
 
 
302be2b
 
17149c8
 
 
302be2b
 
17149c8
 
 
 
 
 
 
 
 
 
 
302be2b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{
  "model": "Qwen/Qwen2.5-3B-Instruct",
  "training": "Two-phase LoRA SFT (timing -> content) with hardcoded peak-hours hint on round 1 of each phase",
  "phases": [],
  "rounds_per_phase": 3,
  "episodes_per_round": 6,
  "before": {
    "monthly_engage": 1.0,
    "monthly_strategic": 0.8357,
    "monthly_competitive": 0.9414
  },
  "after": {
    "monthly_engage": 0.999,
    "monthly_strategic": 0.9321439559505211,
    "monthly_competitive": 0.999
  },
  "smart_heuristic": {
    "monthly_engage": 0.7519,
    "monthly_strategic": 0.9101,
    "monthly_competitive": 0.9141
  },
  "improvement": {
    "monthly_engage": -0.0010000000000000009,
    "monthly_strategic": 0.09644395595052113,
    "monthly_competitive": 0.057599999999999985
  },
  "training_log": {
    "phase": [],
    "round": [],
    "global_step": [],
    "use_hint": [],
    "avg_episode_reward": [],
    "max_episode_reward": [],
    "min_episode_reward": [],
    "avg_grader": [],
    "max_grader": [],
    "n_training_samples": [],
    "train_loss": []
  }
}