Spaces:
Sleeping
Sleeping
Fix graph visibility: bigger text, higher contrast, boxed annotations
Browse files
app.py
CHANGED
|
@@ -43,36 +43,37 @@ def make_reward_plot():
|
|
| 43 |
w = 10
|
| 44 |
avg = [float(np.mean(REWARDS_200[max(0,i-w+1):i+1])) for i in range(200)]
|
| 45 |
|
| 46 |
-
fig, ax = plt.subplots(figsize=(14,
|
| 47 |
-
ax.set_facecolor('#
|
| 48 |
-
ax.tick_params(colors='#
|
| 49 |
-
for s in ax.spines.values(): s.set_color('#
|
| 50 |
-
ax.grid(True, alpha=0.
|
| 51 |
|
| 52 |
-
ax.fill_between(STEPS, REWARDS_200, alpha=0.
|
| 53 |
-
ax.plot(STEPS, REWARDS_200, '-', color='#58a6ff', linewidth=
|
| 54 |
-
ax.plot(STEPS, avg, '-', color='#f0883e', linewidth=
|
| 55 |
|
| 56 |
# Phase bands
|
| 57 |
-
ax.axvspan(1, 120, alpha=0.
|
| 58 |
-
ax.axvspan(120, 170, alpha=0.
|
| 59 |
-
ax.axvspan(170, 200, alpha=0.
|
| 60 |
-
ax.text(60, 0.02, 'WARM-UP', color='#3fb950', fontsize=
|
| 61 |
-
ax.text(145, 0.02, 'SCALING', color='#f0883e', fontsize=
|
| 62 |
-
ax.text(185, 0.02, 'HARD', color='#f85149', fontsize=
|
| 63 |
|
| 64 |
# Peak annotation
|
| 65 |
peak_i = int(np.argmax(REWARDS_200))
|
| 66 |
ax.annotate(f'Peak: {REWARDS_200[peak_i]:.3f}', xy=(STEPS[peak_i], REWARDS_200[peak_i]),
|
| 67 |
-
xytext=(STEPS[peak_i]-
|
| 68 |
-
arrowprops=dict(arrowstyle='->', color='#
|
| 69 |
-
fontsize=
|
|
|
|
| 70 |
|
| 71 |
-
ax.set_xlabel('Training Step', color='#
|
| 72 |
-
ax.set_ylabel('Mean Reward', color='#
|
| 73 |
ax.set_title('GRPO 200-Step Reward Curve — Qwen2.5-3B-Instruct | 4-bit QLoRA | Tesla T4',
|
| 74 |
-
color='#f0f6fc', fontsize=
|
| 75 |
-
ax.legend(fontsize=
|
| 76 |
ax.set_xlim(0.5, 200.5)
|
| 77 |
plt.tight_layout()
|
| 78 |
return fig
|
|
@@ -82,11 +83,11 @@ def make_comparison_plot():
|
|
| 82 |
import matplotlib; matplotlib.use('Agg')
|
| 83 |
import matplotlib.pyplot as plt
|
| 84 |
|
| 85 |
-
fig, ax = plt.subplots(figsize=(10,
|
| 86 |
-
ax.set_facecolor('#
|
| 87 |
-
ax.tick_params(colors='#
|
| 88 |
-
for s in ax.spines.values(): s.set_color('#
|
| 89 |
-
ax.grid(True, alpha=0.
|
| 90 |
|
| 91 |
diffs = ['Easy', 'Medium', 'Hard', 'Overall']
|
| 92 |
base = [0.087, 0.018, 0.015, 0.040]
|
|
@@ -94,27 +95,28 @@ def make_comparison_plot():
|
|
| 94 |
x = np.arange(4)
|
| 95 |
w = 0.35
|
| 96 |
|
| 97 |
-
b1 = ax.bar(x - w/2, base, w, label='Base Model', color='#f85149', alpha=0.
|
| 98 |
-
b2 = ax.bar(x + w/2, trained, w, label='GRPO-Trained', color='#3fb950', alpha=0.
|
| 99 |
|
| 100 |
for bar in b1:
|
| 101 |
-
ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.
|
| 102 |
-
ha='center', fontsize=
|
| 103 |
for bar in b2:
|
| 104 |
-
ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.
|
| 105 |
-
ha='center', fontsize=
|
| 106 |
|
| 107 |
imps = ['+230%', '+617%', '+193%', '+283%']
|
| 108 |
for i, imp in enumerate(imps):
|
| 109 |
-
ax.text(x[i]+w/2, trained[i]+0.
|
|
|
|
| 110 |
|
| 111 |
ax.set_xticks(x)
|
| 112 |
-
ax.set_xticklabels(diffs, color='#
|
| 113 |
-
ax.set_ylabel('Episode Score', color='#
|
| 114 |
ax.set_title('Base vs GRPO-Trained — Post-Training Evaluation (5 seeds × 3 difficulties)',
|
| 115 |
-
color='#f0f6fc', fontsize=
|
| 116 |
-
ax.legend(fontsize=
|
| 117 |
-
ax.set_ylim(0, 0.
|
| 118 |
plt.tight_layout()
|
| 119 |
return fig
|
| 120 |
|
|
|
|
| 43 |
w = 10
|
| 44 |
avg = [float(np.mean(REWARDS_200[max(0,i-w+1):i+1])) for i in range(200)]
|
| 45 |
|
| 46 |
+
fig, ax = plt.subplots(figsize=(14, 6), facecolor='#0d1117')
|
| 47 |
+
ax.set_facecolor('#161b22')
|
| 48 |
+
ax.tick_params(colors='#c9d1d9', labelsize=11)
|
| 49 |
+
for s in ax.spines.values(): s.set_color('#30363d')
|
| 50 |
+
ax.grid(True, alpha=0.15, color='#58a6ff')
|
| 51 |
|
| 52 |
+
ax.fill_between(STEPS, REWARDS_200, alpha=0.18, color='#58a6ff')
|
| 53 |
+
ax.plot(STEPS, REWARDS_200, '-', color='#58a6ff', linewidth=1.0, alpha=0.6, label='Step Reward')
|
| 54 |
+
ax.plot(STEPS, avg, '-', color='#f0883e', linewidth=3, label=f'Running Avg (w={w})')
|
| 55 |
|
| 56 |
# Phase bands
|
| 57 |
+
ax.axvspan(1, 120, alpha=0.06, color='#3fb950')
|
| 58 |
+
ax.axvspan(120, 170, alpha=0.06, color='#f0883e')
|
| 59 |
+
ax.axvspan(170, 200, alpha=0.06, color='#f85149')
|
| 60 |
+
ax.text(60, 0.02, 'WARM-UP', color='#3fb950', fontsize=12, ha='center', alpha=0.9, fontweight='bold')
|
| 61 |
+
ax.text(145, 0.02, 'SCALING', color='#f0883e', fontsize=12, ha='center', alpha=0.9, fontweight='bold')
|
| 62 |
+
ax.text(185, 0.02, 'HARD', color='#f85149', fontsize=12, ha='center', alpha=0.9, fontweight='bold')
|
| 63 |
|
| 64 |
# Peak annotation
|
| 65 |
peak_i = int(np.argmax(REWARDS_200))
|
| 66 |
ax.annotate(f'Peak: {REWARDS_200[peak_i]:.3f}', xy=(STEPS[peak_i], REWARDS_200[peak_i]),
|
| 67 |
+
xytext=(STEPS[peak_i]-40, REWARDS_200[peak_i]+0.08),
|
| 68 |
+
arrowprops=dict(arrowstyle='->', color='#ff7b72', lw=2),
|
| 69 |
+
fontsize=13, fontweight='bold', color='#ff7b72',
|
| 70 |
+
bbox=dict(boxstyle='round,pad=0.3', facecolor='#21262d', edgecolor='#ff7b72', alpha=0.9))
|
| 71 |
|
| 72 |
+
ax.set_xlabel('Training Step', color='#c9d1d9', fontsize=13)
|
| 73 |
+
ax.set_ylabel('Mean Reward', color='#c9d1d9', fontsize=13)
|
| 74 |
ax.set_title('GRPO 200-Step Reward Curve — Qwen2.5-3B-Instruct | 4-bit QLoRA | Tesla T4',
|
| 75 |
+
color='#f0f6fc', fontsize=14, fontweight='bold', pad=12)
|
| 76 |
+
ax.legend(fontsize=11, facecolor='#21262d', edgecolor='#30363d', labelcolor='#f0f6fc')
|
| 77 |
ax.set_xlim(0.5, 200.5)
|
| 78 |
plt.tight_layout()
|
| 79 |
return fig
|
|
|
|
| 83 |
import matplotlib; matplotlib.use('Agg')
|
| 84 |
import matplotlib.pyplot as plt
|
| 85 |
|
| 86 |
+
fig, ax = plt.subplots(figsize=(10, 6), facecolor='#0d1117')
|
| 87 |
+
ax.set_facecolor('#161b22')
|
| 88 |
+
ax.tick_params(colors='#c9d1d9', labelsize=11)
|
| 89 |
+
for s in ax.spines.values(): s.set_color('#30363d')
|
| 90 |
+
ax.grid(True, alpha=0.15, color='#58a6ff', axis='y')
|
| 91 |
|
| 92 |
diffs = ['Easy', 'Medium', 'Hard', 'Overall']
|
| 93 |
base = [0.087, 0.018, 0.015, 0.040]
|
|
|
|
| 95 |
x = np.arange(4)
|
| 96 |
w = 0.35
|
| 97 |
|
| 98 |
+
b1 = ax.bar(x - w/2, base, w, label='Base Model', color='#f85149', alpha=0.9, edgecolor='#ff7b72', linewidth=0.5)
|
| 99 |
+
b2 = ax.bar(x + w/2, trained, w, label='GRPO-Trained', color='#3fb950', alpha=0.9, edgecolor='#56d364', linewidth=0.5)
|
| 100 |
|
| 101 |
for bar in b1:
|
| 102 |
+
ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.006, f'{bar.get_height():.3f}',
|
| 103 |
+
ha='center', fontsize=11, color='#ff7b72', fontweight='bold')
|
| 104 |
for bar in b2:
|
| 105 |
+
ax.text(bar.get_x()+bar.get_width()/2, bar.get_height()+0.006, f'{bar.get_height():.3f}',
|
| 106 |
+
ha='center', fontsize=11, color='#56d364', fontweight='bold')
|
| 107 |
|
| 108 |
imps = ['+230%', '+617%', '+193%', '+283%']
|
| 109 |
for i, imp in enumerate(imps):
|
| 110 |
+
ax.text(x[i]+w/2, trained[i]+0.025, imp, ha='center', fontsize=10, color='#f0883e', fontweight='bold',
|
| 111 |
+
bbox=dict(boxstyle='round,pad=0.2', facecolor='#21262d', edgecolor='#f0883e', alpha=0.8))
|
| 112 |
|
| 113 |
ax.set_xticks(x)
|
| 114 |
+
ax.set_xticklabels(diffs, color='#f0f6fc', fontsize=12, fontweight='bold')
|
| 115 |
+
ax.set_ylabel('Episode Score', color='#c9d1d9', fontsize=13)
|
| 116 |
ax.set_title('Base vs GRPO-Trained — Post-Training Evaluation (5 seeds × 3 difficulties)',
|
| 117 |
+
color='#f0f6fc', fontsize=14, fontweight='bold', pad=12)
|
| 118 |
+
ax.legend(fontsize=11, facecolor='#21262d', edgecolor='#30363d', labelcolor='#f0f6fc')
|
| 119 |
+
ax.set_ylim(0, 0.38)
|
| 120 |
plt.tight_layout()
|
| 121 |
return fig
|
| 122 |
|