Timusgeorge commited on
Commit
4ce33b7
·
verified ·
1 Parent(s): d18ed86

Fix graph visibility: bigger text, higher contrast, boxed annotations

Browse files
Files changed (1) hide show
  1. app.py +40 -38
app.py CHANGED
@@ -43,36 +43,37 @@ def make_reward_plot():
43
  w = 10
44
  avg = [float(np.mean(REWARDS_200[max(0,i-w+1):i+1])) for i in range(200)]
45
 
46
- fig, ax = plt.subplots(figsize=(14, 5), facecolor='#0a0e17')
47
- ax.set_facecolor('#0f1520')
48
- ax.tick_params(colors='#8b949e', labelsize=9)
49
- for s in ax.spines.values(): s.set_color('#1e2a3a')
50
- ax.grid(True, alpha=0.1, color='#58a6ff')
51
 
52
- ax.fill_between(STEPS, REWARDS_200, alpha=0.12, color='#58a6ff')
53
- ax.plot(STEPS, REWARDS_200, '-', color='#58a6ff', linewidth=0.8, alpha=0.5, label='Step Reward')
54
- ax.plot(STEPS, avg, '-', color='#f0883e', linewidth=2.5, label=f'Running Avg (w={w})')
55
 
56
  # Phase bands
57
- ax.axvspan(1, 120, alpha=0.03, color='#3fb950')
58
- ax.axvspan(120, 170, alpha=0.03, color='#f0883e')
59
- ax.axvspan(170, 200, alpha=0.03, color='#f85149')
60
- ax.text(60, 0.02, 'WARM-UP', color='#3fb950', fontsize=9, ha='center', alpha=0.6, fontweight='bold')
61
- ax.text(145, 0.02, 'SCALING', color='#f0883e', fontsize=9, ha='center', alpha=0.6, fontweight='bold')
62
- ax.text(185, 0.02, 'HARD', color='#f85149', fontsize=9, ha='center', alpha=0.6, fontweight='bold')
63
 
64
  # Peak annotation
65
  peak_i = int(np.argmax(REWARDS_200))
66
  ax.annotate(f'Peak: {REWARDS_200[peak_i]:.3f}', xy=(STEPS[peak_i], REWARDS_200[peak_i]),
67
- xytext=(STEPS[peak_i]-30, REWARDS_200[peak_i]+0.05),
68
- arrowprops=dict(arrowstyle='->', color='#f85149', lw=1.5),
69
- fontsize=11, fontweight='bold', color='#f85149')
 
70
 
71
- ax.set_xlabel('Training Step', color='#8b949e', fontsize=11)
72
- ax.set_ylabel('Mean Reward', color='#8b949e', fontsize=11)
73
  ax.set_title('GRPO 200-Step Reward Curve — Qwen2.5-3B-Instruct | 4-bit QLoRA | Tesla T4',
74
- color='#f0f6fc', fontsize=12, fontweight='bold', pad=10)
75
- ax.legend(fontsize=9, facecolor='#161b22', edgecolor='#30363d', labelcolor='#c9d1d9')
76
  ax.set_xlim(0.5, 200.5)
77
  plt.tight_layout()
78
  return fig
@@ -82,11 +83,11 @@ def make_comparison_plot():
82
  import matplotlib; matplotlib.use('Agg')
83
  import matplotlib.pyplot as plt
84
 
85
- fig, ax = plt.subplots(figsize=(10, 5), facecolor='#0a0e17')
86
- ax.set_facecolor('#0f1520')
87
- ax.tick_params(colors='#8b949e', labelsize=10)
88
- for s in ax.spines.values(): s.set_color('#1e2a3a')
89
- ax.grid(True, alpha=0.1, color='#58a6ff', axis='y')
90
 
91
  diffs = ['Easy', 'Medium', 'Hard', 'Overall']
92
  base = [0.087, 0.018, 0.015, 0.040]
@@ -94,27 +95,28 @@ def make_comparison_plot():
94
  x = np.arange(4)
95
  w = 0.35
96
 
97
- b1 = ax.bar(x - w/2, base, w, label='Base Model', color='#f85149', alpha=0.8)
98
- b2 = ax.bar(x + w/2, trained, w, label='GRPO-Trained', color='#3fb950', alpha=0.8)
99
 
100
  for bar in b1:
101
- ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.005, f'{bar.get_height():.3f}',
102
- ha='center', fontsize=9, color='#f85149')
103
  for bar in b2:
104
- ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.005, f'{bar.get_height():.3f}',
105
- ha='center', fontsize=9, color='#3fb950')
106
 
107
  imps = ['+230%', '+617%', '+193%', '+283%']
108
  for i, imp in enumerate(imps):
109
- ax.text(x[i]+w/2, trained[i]+0.02, imp, ha='center', fontsize=8, color='#f0883e', fontweight='bold')
 
110
 
111
  ax.set_xticks(x)
112
- ax.set_xticklabels(diffs, color='#c9d1d9')
113
- ax.set_ylabel('Episode Score', color='#8b949e', fontsize=11)
114
  ax.set_title('Base vs GRPO-Trained — Post-Training Evaluation (5 seeds × 3 difficulties)',
115
- color='#f0f6fc', fontsize=12, fontweight='bold', pad=10)
116
- ax.legend(fontsize=10, facecolor='#161b22', edgecolor='#30363d', labelcolor='#c9d1d9')
117
- ax.set_ylim(0, 0.35)
118
  plt.tight_layout()
119
  return fig
120
 
 
43
  w = 10
44
  avg = [float(np.mean(REWARDS_200[max(0,i-w+1):i+1])) for i in range(200)]
45
 
46
+ fig, ax = plt.subplots(figsize=(14, 6), facecolor='#0d1117')
47
+ ax.set_facecolor('#161b22')
48
+ ax.tick_params(colors='#c9d1d9', labelsize=11)
49
+ for s in ax.spines.values(): s.set_color('#30363d')
50
+ ax.grid(True, alpha=0.15, color='#58a6ff')
51
 
52
+ ax.fill_between(STEPS, REWARDS_200, alpha=0.18, color='#58a6ff')
53
+ ax.plot(STEPS, REWARDS_200, '-', color='#58a6ff', linewidth=1.0, alpha=0.6, label='Step Reward')
54
+ ax.plot(STEPS, avg, '-', color='#f0883e', linewidth=3, label=f'Running Avg (w={w})')
55
 
56
  # Phase bands
57
+ ax.axvspan(1, 120, alpha=0.06, color='#3fb950')
58
+ ax.axvspan(120, 170, alpha=0.06, color='#f0883e')
59
+ ax.axvspan(170, 200, alpha=0.06, color='#f85149')
60
+ ax.text(60, 0.02, 'WARM-UP', color='#3fb950', fontsize=12, ha='center', alpha=0.9, fontweight='bold')
61
+ ax.text(145, 0.02, 'SCALING', color='#f0883e', fontsize=12, ha='center', alpha=0.9, fontweight='bold')
62
+ ax.text(185, 0.02, 'HARD', color='#f85149', fontsize=12, ha='center', alpha=0.9, fontweight='bold')
63
 
64
  # Peak annotation
65
  peak_i = int(np.argmax(REWARDS_200))
66
  ax.annotate(f'Peak: {REWARDS_200[peak_i]:.3f}', xy=(STEPS[peak_i], REWARDS_200[peak_i]),
67
+ xytext=(STEPS[peak_i]-40, REWARDS_200[peak_i]+0.08),
68
+ arrowprops=dict(arrowstyle='->', color='#ff7b72', lw=2),
69
+ fontsize=13, fontweight='bold', color='#ff7b72',
70
+ bbox=dict(boxstyle='round,pad=0.3', facecolor='#21262d', edgecolor='#ff7b72', alpha=0.9))
71
 
72
+ ax.set_xlabel('Training Step', color='#c9d1d9', fontsize=13)
73
+ ax.set_ylabel('Mean Reward', color='#c9d1d9', fontsize=13)
74
  ax.set_title('GRPO 200-Step Reward Curve — Qwen2.5-3B-Instruct | 4-bit QLoRA | Tesla T4',
75
+ color='#f0f6fc', fontsize=14, fontweight='bold', pad=12)
76
+ ax.legend(fontsize=11, facecolor='#21262d', edgecolor='#30363d', labelcolor='#f0f6fc')
77
  ax.set_xlim(0.5, 200.5)
78
  plt.tight_layout()
79
  return fig
 
83
  import matplotlib; matplotlib.use('Agg')
84
  import matplotlib.pyplot as plt
85
 
86
+ fig, ax = plt.subplots(figsize=(10, 6), facecolor='#0d1117')
87
+ ax.set_facecolor('#161b22')
88
+ ax.tick_params(colors='#c9d1d9', labelsize=11)
89
+ for s in ax.spines.values(): s.set_color('#30363d')
90
+ ax.grid(True, alpha=0.15, color='#58a6ff', axis='y')
91
 
92
  diffs = ['Easy', 'Medium', 'Hard', 'Overall']
93
  base = [0.087, 0.018, 0.015, 0.040]
 
95
  x = np.arange(4)
96
  w = 0.35
97
 
98
+ b1 = ax.bar(x - w/2, base, w, label='Base Model', color='#f85149', alpha=0.9, edgecolor='#ff7b72', linewidth=0.5)
99
+ b2 = ax.bar(x + w/2, trained, w, label='GRPO-Trained', color='#3fb950', alpha=0.9, edgecolor='#56d364', linewidth=0.5)
100
 
101
  for bar in b1:
102
+ ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.006, f'{bar.get_height():.3f}',
103
+ ha='center', fontsize=11, color='#ff7b72', fontweight='bold')
104
  for bar in b2:
105
+ ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.006, f'{bar.get_height():.3f}',
106
+ ha='center', fontsize=11, color='#56d364', fontweight='bold')
107
 
108
  imps = ['+230%', '+617%', '+193%', '+283%']
109
  for i, imp in enumerate(imps):
110
+ ax.text(x[i]+w/2, trained[i]+0.025, imp, ha='center', fontsize=10, color='#f0883e', fontweight='bold',
111
+ bbox=dict(boxstyle='round,pad=0.2', facecolor='#21262d', edgecolor='#f0883e', alpha=0.8))
112
 
113
  ax.set_xticks(x)
114
+ ax.set_xticklabels(diffs, color='#f0f6fc', fontsize=12, fontweight='bold')
115
+ ax.set_ylabel('Episode Score', color='#c9d1d9', fontsize=13)
116
  ax.set_title('Base vs GRPO-Trained — Post-Training Evaluation (5 seeds × 3 difficulties)',
117
+ color='#f0f6fc', fontsize=14, fontweight='bold', pad=12)
118
+ ax.legend(fontsize=11, facecolor='#21262d', edgecolor='#30363d', labelcolor='#f0f6fc')
119
+ ax.set_ylim(0, 0.38)
120
  plt.tight_layout()
121
  return fig
122