mnawfal29 commited on
Commit
e290bbe
·
verified ·
1 Parent(s): 70c8404

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. demo/ui.py +129 -12
demo/ui.py CHANGED
@@ -850,6 +850,90 @@ footer, .gradio-container footer { display: none !important; }
850
  margin-top: -0.5rem !important;
851
  }
852
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
853
  /* Main pane plots+outputs */
854
  .gradio-container .gr-plot, .gradio-container .plot-wrap {
855
  background: var(--lf-surface-alt) !important;
@@ -1539,17 +1623,50 @@ def _llm_auto_run(endpoint_choice, custom_url, api_key, model_name,
1539
 
1540
  if obs.done:
1541
  bk = obs.r_optcoder_breakdown or {}
1542
- log_lines.extend([
1543
- f"---",
1544
- f"### 🏁 Episode done",
1545
- f"- Reason: `{(obs.last_action_result or {}).get('reason')}`",
1546
- f"- **Terminal reward: `{obs.r_optcoder:+.3f}`**",
1547
- f"- Final Adam-shortfall: `{obs.final_regret:.3f}`",
1548
- "",
1549
- f"| component | value |",
1550
- f"|---|---|",
1551
- *[f"| `{k}` | `{v:+.4f}` " for k, v in bk.items()],
1552
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1553
  reward_plot = _reward_breakdown_plot({
1554
  "r_regret": bk.get("r_regret", 0),
1555
  "r_convergence": bk.get("r_convergence", 0),
@@ -1557,7 +1674,7 @@ def _llm_auto_run(endpoint_choice, custom_url, api_key, model_name,
1557
  "r_novelty": bk.get("r_novelty", 0),
1558
  "-r_budget": -bk.get("r_budget", 0),
1559
  "-r_eval_fail": -bk.get("r_eval_failures", 0),
1560
- }, obs.r_optcoder or 0.0)
1561
  yield ("\n".join(log_lines),
1562
  obs.model_dump(exclude_none=True),
1563
  reward_plot)
 
850
  margin-top: -0.5rem !important;
851
  }
852
 
853
+ /* Episode-done dashboard: KPI row with big metric cards */
854
+ .gradio-container .lf-done {
855
+ background: linear-gradient(180deg,
856
+ rgba(226,135,99,0.06) 0%,
857
+ rgba(42,40,36,0) 60%);
858
+ border: 1px solid var(--lf-border);
859
+ border-radius: 12px;
860
+ padding: 1.2rem 1.25rem;
861
+ margin: 1.1rem 0 0.6rem;
862
+ }
863
+ .gradio-container .lf-done-head {
864
+ display: flex; align-items: baseline; gap: 0.85rem;
865
+ margin-bottom: 0.9rem;
866
+ }
867
+ .gradio-container .lf-done-flag {
868
+ color: var(--lf-accent);
869
+ font-family: "Inter", sans-serif;
870
+ font-weight: 600; font-size: 0.75rem;
871
+ letter-spacing: 0.11em; text-transform: uppercase;
872
+ padding: 0.15rem 0.55rem;
873
+ border: 1px solid var(--lf-accent);
874
+ border-radius: 5px;
875
+ }
876
+ .gradio-container .lf-done-reason {
877
+ color: var(--lf-text-subtle);
878
+ font-size: 0.84rem;
879
+ }
880
+ .gradio-container .lf-done-reason code {
881
+ font-family: "JetBrains Mono", monospace;
882
+ background: transparent !important;
883
+ border: none !important;
884
+ color: var(--lf-text-muted) !important;
885
+ padding: 0 !important;
886
+ }
887
+
888
+ .gradio-container .lf-kpi-row {
889
+ display: grid;
890
+ grid-template-columns: repeat(3, 1fr);
891
+ gap: 0.8rem;
892
+ }
893
+ .gradio-container .lf-kpi {
894
+ background: var(--lf-surface-alt);
895
+ border: 1px solid var(--lf-border-soft);
896
+ border-radius: 10px;
897
+ padding: 0.9rem 1rem;
898
+ min-width: 0;
899
+ }
900
+ .gradio-container .lf-kpi-label {
901
+ color: var(--lf-text-subtle);
902
+ font-family: "Inter", sans-serif;
903
+ font-size: 0.7rem;
904
+ font-weight: 600;
905
+ letter-spacing: 0.1em;
906
+ text-transform: uppercase;
907
+ margin-bottom: 0.35rem;
908
+ }
909
+ .gradio-container .lf-kpi-value {
910
+ font-family: "Source Serif 4", Georgia, serif;
911
+ font-weight: 500;
912
+ font-size: 1.9rem;
913
+ color: var(--lf-text);
914
+ letter-spacing: -0.025em;
915
+ line-height: 1.1;
916
+ }
917
+ .gradio-container .lf-kpi-sub {
918
+ color: var(--lf-text-subtle);
919
+ font-size: 0.72rem;
920
+ margin-top: 0.3rem;
921
+ font-family: "JetBrains Mono", monospace;
922
+ }
923
+ .gradio-container .lf-kpi-good .lf-kpi-value { color: #7ab68c; }
924
+ .gradio-container .lf-kpi-warn .lf-kpi-value { color: #e4b264; }
925
+ .gradio-container .lf-kpi-bad .lf-kpi-value { color: #d47d6a; }
926
+ .gradio-container .lf-kpi-good { border-color: rgba(122,182,140,0.35); }
927
+ .gradio-container .lf-kpi-warn { border-color: rgba(228,178,100,0.35); }
928
+ .gradio-container .lf-kpi-bad { border-color: rgba(212,125,106,0.35); }
929
+
930
+ /* Responsive: stack KPIs on narrow */
931
+ @media (max-width: 720px) {
932
+ .gradio-container .lf-kpi-row {
933
+ grid-template-columns: 1fr;
934
+ }
935
+ }
936
+
937
  /* Main pane plots+outputs */
938
  .gradio-container .gr-plot, .gradio-container .plot-wrap {
939
  background: var(--lf-surface-alt) !important;
 
1623
 
1624
  if obs.done:
1625
  bk = obs.r_optcoder_breakdown or {}
1626
+ reward_val = obs.r_optcoder or 0.0
1627
+ my_prog = bk.get("my_progress", 0.0)
1628
+ adam_prog = bk.get("adam_progress", 0.0)
1629
+ speedup = bk.get("speedup_vs_adam", 0.0)
1630
+ reason = (obs.last_action_result or {}).get("reason", "?")
1631
+
1632
+ # Tone of the reward KPI — green if positive, red if negative
1633
+ reward_tone = ("lf-kpi-good" if reward_val >= 0.5 else
1634
+ ("lf-kpi-warn" if reward_val >= 0 else "lf-kpi-bad"))
1635
+ speedup_display = (f"{speedup:.2f}×" if speedup < 100
1636
+ else f"{speedup:.0f}×")
1637
+ speedup_tone = ("lf-kpi-good" if speedup >= 1.0
1638
+ else "lf-kpi-warn")
1639
+
1640
+ episode_done_html = (
1641
+ "<div class='lf-done'>"
1642
+ " <div class='lf-done-head'>"
1643
+ " <span class='lf-done-flag'>Episode complete</span>"
1644
+ f" <span class='lf-done-reason'>ended by "
1645
+ f"<code>{reason}</code></span>"
1646
+ " </div>"
1647
+ " <div class='lf-kpi-row'>"
1648
+ " <div class='lf-kpi " f"{reward_tone}" "'>"
1649
+ " <div class='lf-kpi-label'>Terminal reward</div>"
1650
+ f" <div class='lf-kpi-value'>{reward_val:+.3f}</div>"
1651
+ " <div class='lf-kpi-sub'>GRPO training scalar</div>"
1652
+ " </div>"
1653
+ " <div class='lf-kpi " f"{speedup_tone}" "'>"
1654
+ " <div class='lf-kpi-label'>Speedup vs tuned Adam</div>"
1655
+ f" <div class='lf-kpi-value'>{speedup_display}</div>"
1656
+ f" <div class='lf-kpi-sub'>my {my_prog:.3g} · "
1657
+ f"adam {adam_prog:.3g}</div>"
1658
+ " </div>"
1659
+ " <div class='lf-kpi'>"
1660
+ " <div class='lf-kpi-label'>Adam shortfall</div>"
1661
+ f" <div class='lf-kpi-value'>{obs.final_regret:.3f}</div>"
1662
+ " <div class='lf-kpi-sub'>0 = matched/beat Adam</div>"
1663
+ " </div>"
1664
+ " </div>"
1665
+ "</div>"
1666
+ )
1667
+
1668
+ log_lines.extend([f"", episode_done_html, f""])
1669
+
1670
  reward_plot = _reward_breakdown_plot({
1671
  "r_regret": bk.get("r_regret", 0),
1672
  "r_convergence": bk.get("r_convergence", 0),
 
1674
  "r_novelty": bk.get("r_novelty", 0),
1675
  "-r_budget": -bk.get("r_budget", 0),
1676
  "-r_eval_fail": -bk.get("r_eval_failures", 0),
1677
+ }, reward_val)
1678
  yield ("\n".join(log_lines),
1679
  obs.model_dump(exclude_none=True),
1680
  reward_plot)