Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- demo/ui.py +129 -12
demo/ui.py
CHANGED
|
@@ -850,6 +850,90 @@ footer, .gradio-container footer { display: none !important; }
|
|
| 850 |
margin-top: -0.5rem !important;
|
| 851 |
}
|
| 852 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 853 |
/* Main pane plots+outputs */
|
| 854 |
.gradio-container .gr-plot, .gradio-container .plot-wrap {
|
| 855 |
background: var(--lf-surface-alt) !important;
|
|
@@ -1539,17 +1623,50 @@ def _llm_auto_run(endpoint_choice, custom_url, api_key, model_name,
|
|
| 1539 |
|
| 1540 |
if obs.done:
|
| 1541 |
bk = obs.r_optcoder_breakdown or {}
|
| 1542 |
-
|
| 1543 |
-
|
| 1544 |
-
|
| 1545 |
-
|
| 1546 |
-
|
| 1547 |
-
|
| 1548 |
-
|
| 1549 |
-
|
| 1550 |
-
|
| 1551 |
-
|
| 1552 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1553 |
reward_plot = _reward_breakdown_plot({
|
| 1554 |
"r_regret": bk.get("r_regret", 0),
|
| 1555 |
"r_convergence": bk.get("r_convergence", 0),
|
|
@@ -1557,7 +1674,7 @@ def _llm_auto_run(endpoint_choice, custom_url, api_key, model_name,
|
|
| 1557 |
"r_novelty": bk.get("r_novelty", 0),
|
| 1558 |
"-r_budget": -bk.get("r_budget", 0),
|
| 1559 |
"-r_eval_fail": -bk.get("r_eval_failures", 0),
|
| 1560 |
-
},
|
| 1561 |
yield ("\n".join(log_lines),
|
| 1562 |
obs.model_dump(exclude_none=True),
|
| 1563 |
reward_plot)
|
|
|
|
| 850 |
margin-top: -0.5rem !important;
|
| 851 |
}
|
| 852 |
|
| 853 |
+
/* Episode-done dashboard: KPI row with big metric cards */
|
| 854 |
+
.gradio-container .lf-done {
|
| 855 |
+
background: linear-gradient(180deg,
|
| 856 |
+
rgba(226,135,99,0.06) 0%,
|
| 857 |
+
rgba(42,40,36,0) 60%);
|
| 858 |
+
border: 1px solid var(--lf-border);
|
| 859 |
+
border-radius: 12px;
|
| 860 |
+
padding: 1.2rem 1.25rem;
|
| 861 |
+
margin: 1.1rem 0 0.6rem;
|
| 862 |
+
}
|
| 863 |
+
.gradio-container .lf-done-head {
|
| 864 |
+
display: flex; align-items: baseline; gap: 0.85rem;
|
| 865 |
+
margin-bottom: 0.9rem;
|
| 866 |
+
}
|
| 867 |
+
.gradio-container .lf-done-flag {
|
| 868 |
+
color: var(--lf-accent);
|
| 869 |
+
font-family: "Inter", sans-serif;
|
| 870 |
+
font-weight: 600; font-size: 0.75rem;
|
| 871 |
+
letter-spacing: 0.11em; text-transform: uppercase;
|
| 872 |
+
padding: 0.15rem 0.55rem;
|
| 873 |
+
border: 1px solid var(--lf-accent);
|
| 874 |
+
border-radius: 5px;
|
| 875 |
+
}
|
| 876 |
+
.gradio-container .lf-done-reason {
|
| 877 |
+
color: var(--lf-text-subtle);
|
| 878 |
+
font-size: 0.84rem;
|
| 879 |
+
}
|
| 880 |
+
.gradio-container .lf-done-reason code {
|
| 881 |
+
font-family: "JetBrains Mono", monospace;
|
| 882 |
+
background: transparent !important;
|
| 883 |
+
border: none !important;
|
| 884 |
+
color: var(--lf-text-muted) !important;
|
| 885 |
+
padding: 0 !important;
|
| 886 |
+
}
|
| 887 |
+
|
| 888 |
+
.gradio-container .lf-kpi-row {
|
| 889 |
+
display: grid;
|
| 890 |
+
grid-template-columns: repeat(3, 1fr);
|
| 891 |
+
gap: 0.8rem;
|
| 892 |
+
}
|
| 893 |
+
.gradio-container .lf-kpi {
|
| 894 |
+
background: var(--lf-surface-alt);
|
| 895 |
+
border: 1px solid var(--lf-border-soft);
|
| 896 |
+
border-radius: 10px;
|
| 897 |
+
padding: 0.9rem 1rem;
|
| 898 |
+
min-width: 0;
|
| 899 |
+
}
|
| 900 |
+
.gradio-container .lf-kpi-label {
|
| 901 |
+
color: var(--lf-text-subtle);
|
| 902 |
+
font-family: "Inter", sans-serif;
|
| 903 |
+
font-size: 0.7rem;
|
| 904 |
+
font-weight: 600;
|
| 905 |
+
letter-spacing: 0.1em;
|
| 906 |
+
text-transform: uppercase;
|
| 907 |
+
margin-bottom: 0.35rem;
|
| 908 |
+
}
|
| 909 |
+
.gradio-container .lf-kpi-value {
|
| 910 |
+
font-family: "Source Serif 4", Georgia, serif;
|
| 911 |
+
font-weight: 500;
|
| 912 |
+
font-size: 1.9rem;
|
| 913 |
+
color: var(--lf-text);
|
| 914 |
+
letter-spacing: -0.025em;
|
| 915 |
+
line-height: 1.1;
|
| 916 |
+
}
|
| 917 |
+
.gradio-container .lf-kpi-sub {
|
| 918 |
+
color: var(--lf-text-subtle);
|
| 919 |
+
font-size: 0.72rem;
|
| 920 |
+
margin-top: 0.3rem;
|
| 921 |
+
font-family: "JetBrains Mono", monospace;
|
| 922 |
+
}
|
| 923 |
+
.gradio-container .lf-kpi-good .lf-kpi-value { color: #7ab68c; }
|
| 924 |
+
.gradio-container .lf-kpi-warn .lf-kpi-value { color: #e4b264; }
|
| 925 |
+
.gradio-container .lf-kpi-bad .lf-kpi-value { color: #d47d6a; }
|
| 926 |
+
.gradio-container .lf-kpi-good { border-color: rgba(122,182,140,0.35); }
|
| 927 |
+
.gradio-container .lf-kpi-warn { border-color: rgba(228,178,100,0.35); }
|
| 928 |
+
.gradio-container .lf-kpi-bad { border-color: rgba(212,125,106,0.35); }
|
| 929 |
+
|
| 930 |
+
/* Responsive: stack KPIs on narrow */
|
| 931 |
+
@media (max-width: 720px) {
|
| 932 |
+
.gradio-container .lf-kpi-row {
|
| 933 |
+
grid-template-columns: 1fr;
|
| 934 |
+
}
|
| 935 |
+
}
|
| 936 |
+
|
| 937 |
/* Main pane plots+outputs */
|
| 938 |
.gradio-container .gr-plot, .gradio-container .plot-wrap {
|
| 939 |
background: var(--lf-surface-alt) !important;
|
|
|
|
| 1623 |
|
| 1624 |
if obs.done:
|
| 1625 |
bk = obs.r_optcoder_breakdown or {}
|
| 1626 |
+
reward_val = obs.r_optcoder or 0.0
|
| 1627 |
+
my_prog = bk.get("my_progress", 0.0)
|
| 1628 |
+
adam_prog = bk.get("adam_progress", 0.0)
|
| 1629 |
+
speedup = bk.get("speedup_vs_adam", 0.0)
|
| 1630 |
+
reason = (obs.last_action_result or {}).get("reason", "?")
|
| 1631 |
+
|
| 1632 |
+
# Tone of the reward KPI — green if positive, red if negative
|
| 1633 |
+
reward_tone = ("lf-kpi-good" if reward_val >= 0.5 else
|
| 1634 |
+
("lf-kpi-warn" if reward_val >= 0 else "lf-kpi-bad"))
|
| 1635 |
+
speedup_display = (f"{speedup:.2f}×" if speedup < 100
|
| 1636 |
+
else f"{speedup:.0f}×")
|
| 1637 |
+
speedup_tone = ("lf-kpi-good" if speedup >= 1.0
|
| 1638 |
+
else "lf-kpi-warn")
|
| 1639 |
+
|
| 1640 |
+
episode_done_html = (
|
| 1641 |
+
"<div class='lf-done'>"
|
| 1642 |
+
" <div class='lf-done-head'>"
|
| 1643 |
+
" <span class='lf-done-flag'>Episode complete</span>"
|
| 1644 |
+
f" <span class='lf-done-reason'>ended by "
|
| 1645 |
+
f"<code>{reason}</code></span>"
|
| 1646 |
+
" </div>"
|
| 1647 |
+
" <div class='lf-kpi-row'>"
|
| 1648 |
+
" <div class='lf-kpi " f"{reward_tone}" "'>"
|
| 1649 |
+
" <div class='lf-kpi-label'>Terminal reward</div>"
|
| 1650 |
+
f" <div class='lf-kpi-value'>{reward_val:+.3f}</div>"
|
| 1651 |
+
" <div class='lf-kpi-sub'>GRPO training scalar</div>"
|
| 1652 |
+
" </div>"
|
| 1653 |
+
" <div class='lf-kpi " f"{speedup_tone}" "'>"
|
| 1654 |
+
" <div class='lf-kpi-label'>Speedup vs tuned Adam</div>"
|
| 1655 |
+
f" <div class='lf-kpi-value'>{speedup_display}</div>"
|
| 1656 |
+
f" <div class='lf-kpi-sub'>my {my_prog:.3g} · "
|
| 1657 |
+
f"adam {adam_prog:.3g}</div>"
|
| 1658 |
+
" </div>"
|
| 1659 |
+
" <div class='lf-kpi'>"
|
| 1660 |
+
" <div class='lf-kpi-label'>Adam shortfall</div>"
|
| 1661 |
+
f" <div class='lf-kpi-value'>{obs.final_regret:.3f}</div>"
|
| 1662 |
+
" <div class='lf-kpi-sub'>0 = matched/beat Adam</div>"
|
| 1663 |
+
" </div>"
|
| 1664 |
+
" </div>"
|
| 1665 |
+
"</div>"
|
| 1666 |
+
)
|
| 1667 |
+
|
| 1668 |
+
log_lines.extend([f"", episode_done_html, f""])
|
| 1669 |
+
|
| 1670 |
reward_plot = _reward_breakdown_plot({
|
| 1671 |
"r_regret": bk.get("r_regret", 0),
|
| 1672 |
"r_convergence": bk.get("r_convergence", 0),
|
|
|
|
| 1674 |
"r_novelty": bk.get("r_novelty", 0),
|
| 1675 |
"-r_budget": -bk.get("r_budget", 0),
|
| 1676 |
"-r_eval_fail": -bk.get("r_eval_failures", 0),
|
| 1677 |
+
}, reward_val)
|
| 1678 |
yield ("\n".join(log_lines),
|
| 1679 |
obs.model_dump(exclude_none=True),
|
| 1680 |
reward_plot)
|