File size: 997 Bytes
0813516
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
{
  "model": "Qwen/Qwen2.5-1.5B-Instruct",
  "training": "LoRA SFT (real weight updates)",
  "rounds": 1,
  "episodes_per_round": 1,
  "before": {
    "monthly_engage": 0.3048,
    "monthly_strategic": 0.3456,
    "monthly_competitive": 0.4808
  },
  "after": {
    "monthly_engage": 0.0162,
    "monthly_strategic": 0.1749,
    "monthly_competitive": 0.3621
  },
  "smart_heuristic": {
    "monthly_engage": 0.6342,
    "monthly_strategic": 0.7218,
    "monthly_competitive": 0.8315
  },
  "improvement": {
    "monthly_engage": -0.2886,
    "monthly_strategic": -0.17070000000000002,
    "monthly_competitive": -0.11870000000000003
  },
  "training_log": {
    "round": [
      1
    ],
    "avg_episode_reward": [
      1.593
    ],
    "max_episode_reward": [
      1.593
    ],
    "min_episode_reward": [
      1.593
    ],
    "avg_grader": [
      0.0268
    ],
    "max_grader": [
      0.0268
    ],
    "n_training_samples": [
      4
    ],
    "train_loss": [
      2.3314
    ]
  }
}