mnawfal29's picture
Upload folder using huggingface_hub
b89c27d verified
"""Gradio demo for LandscapeForge β€” Claude-inspired visual design.
Four tabs:
1. Landscape β€” pick a template, see 2D contour + structural hints
2. Baseline Race β€” SGD / Momentum / tuned-Adam / L-BFGS racing, same init
3. Optimizer Arena β€” paste a custom Optimizer class, full-arena eval vs
tuned-Adam, reward breakdown
4. OpenEnv API β€” live reset/step against the same container's FastAPI
Design: warm off-white background, coral primary, generous spacing,
minimal chrome, no heavy shadows.
"""
from __future__ import annotations
import json
from typing import Any
import gradio as gr
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
def _fmt_obs(obs_dict: dict) -> str:
"""Pretty-print an observation as indented JSON for gr.Code display.
Shrinks very long arrays (baseline trajectories etc.) so the rendered view
stays readable. `json.dumps(indent=2)` gives one value per line which
looks much cleaner than gr.JSON's component-per-field tree.
"""
def _shrink(v):
if isinstance(v, list):
if len(v) > 8:
return (
[_shrink(x) for x in v[:3]]
+ [f"... ({len(v)-6} more) ..."]
+ [_shrink(x) for x in v[-3:]]
)
return [_shrink(x) for x in v]
if isinstance(v, dict):
return {k: _shrink(x) for k, x in v.items()}
if isinstance(v, float):
return round(v, 6)
return v
return json.dumps(_shrink(obs_dict), indent=2, default=str)
try:
from ..arena import auto_test_draft, run_arena
from ..landscapes import BUILDERS, build_landscape, structural_hints
from ..reference_optimizers import (
run_baseline, run_baseline_tuned, tune_adam_lr,
)
from ..rewards import ast_novelty_score, compute_optcoder_reward
from ..sandbox import SandboxError, compile_optimizer
from ..models import LandscapeforgeAction
except ImportError: # flat layout (HF Space container)
from arena import auto_test_draft, run_arena # type: ignore
from landscapes import BUILDERS, build_landscape, structural_hints # type: ignore
from reference_optimizers import ( # type: ignore
run_baseline, run_baseline_tuned, tune_adam_lr,
)
from rewards import ast_novelty_score, compute_optcoder_reward # type: ignore
from sandbox import SandboxError, compile_optimizer # type: ignore
from models import LandscapeforgeAction # type: ignore
# ----------------- Claude-inspired palette + CSS -----------------
# Mimics Anthropic's actual surface colors: warmer parchment background,
# deep warm ink for text, Anthropic burnt-sienna as primary accent.
CLAUDE_CSS = """
/* Variables β€” dark mode default, warm ink + sienna accent */
:root {
--lf-bg: #1f1d1a; /* warm near-black page */
--lf-surface: #2a2824; /* card surface */
--lf-surface-alt: #332f2a; /* elevated surface (code, plots) */
--lf-border: #403b34; /* card edge */
--lf-border-soft: #332f2a; /* soft inner divider */
--lf-text: #f3f0e8; /* warm off-white */
--lf-text-muted: #b5ada0; /* muted body */
--lf-text-subtle: #857d72; /* labels, captions */
--lf-accent: #e28763; /* brighter sienna for dark bg */
--lf-accent-dk: #c96442; /* hover / pressed */
--lf-accent-soft: #4a2f22; /* accent-tinted dark for selected bg */
--lf-good: #7ab68c;
--lf-bad: #d47d6a;
}
/* Page */
html, body, .gradio-container {
background: var(--lf-bg) !important;
}
.gradio-container {
font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI",
Helvetica, Arial, sans-serif !important;
color: var(--lf-text) !important;
max-width: none !important;
width: 100% !important;
margin: 0 auto !important;
padding: 1.5rem 2rem 3rem !important;
/* Override Gradio's internal theme variables so every component
inherits the warm palette instead of Gradio's blue-on-white defaults */
--body-text-color: var(--lf-text) !important;
--body-text-color-subdued: var(--lf-text-muted) !important;
--body-background-fill: var(--lf-bg) !important;
--background-fill-primary: var(--lf-surface) !important;
--background-fill-secondary: var(--lf-bg) !important;
--border-color-primary: var(--lf-border) !important;
--border-color-accent: var(--lf-accent) !important;
--input-background-fill: var(--lf-surface) !important;
--input-border-color: var(--lf-border) !important;
--input-text-color: var(--lf-text) !important;
--input-placeholder-color: var(--lf-text-subtle) !important;
--block-background-fill: var(--lf-surface) !important;
--block-border-color: var(--lf-border-soft) !important;
--block-label-background-fill: transparent !important;
--block-label-text-color: var(--lf-text) !important;
--block-title-text-color: var(--lf-text) !important;
--block-info-text-color: var(--lf-text-muted) !important;
--neutral-50: var(--lf-surface) !important;
--neutral-100: var(--lf-bg) !important;
--neutral-200: var(--lf-border-soft) !important;
--neutral-300: var(--lf-border) !important;
--neutral-400: var(--lf-text-subtle) !important;
--neutral-500: var(--lf-text-muted) !important;
--neutral-600: var(--lf-text-muted) !important;
--neutral-700: var(--lf-text) !important;
--neutral-800: var(--lf-text) !important;
--neutral-900: var(--lf-text) !important;
--color-accent: var(--lf-accent) !important;
--color-accent-soft: var(--lf-accent-soft) !important;
--link-text-color: var(--lf-accent) !important;
--link-text-color-hover: var(--lf-accent-dk) !important;
--button-primary-background-fill: var(--lf-accent) !important;
--button-primary-background-fill-hover: var(--lf-accent-dk) !important;
--button-primary-text-color: #ffffff !important;
--button-primary-border-color: var(--lf-accent) !important;
/* Kill the `<span data-testid="block-info">` pill that Gradio 5 uses
for every component label β€” it was defaulting to the primary accent.
We want labels to be plain muted text above the input. */
--block-title-background-fill: transparent !important;
--block-title-border-color: transparent !important;
--block-title-border-width: 0 !important;
--block-title-radius: 0 !important;
--block-title-padding: 0 0 0.3rem 0 !important;
--block-title-text-color: var(--lf-text-muted) !important;
--block-title-text-weight: 500 !important;
--block-title-text-size: 0.8rem !important;
/* Input outlines β€” dropdowns/text/number all need obvious borders */
--input-shadow: none !important;
--input-shadow-focus: 0 0 0 3px rgba(226,135,99,0.18) !important;
--input-border-color-focus: var(--lf-accent) !important;
--input-background-fill-focus:var(--lf-surface) !important;
/* Checkbox / radio variables */
--checkbox-background-color: var(--lf-surface) !important;
--checkbox-background-color-hover: var(--lf-surface-alt) !important;
--checkbox-background-color-focus: var(--lf-surface-alt) !important;
--checkbox-background-color-selected: var(--lf-accent) !important;
--checkbox-border-color: var(--lf-border) !important;
--checkbox-border-color-hover: var(--lf-accent) !important;
--checkbox-border-color-focus: var(--lf-accent) !important;
--checkbox-border-color-selected: var(--lf-accent) !important;
--checkbox-label-background-fill: transparent !important;
--checkbox-label-background-fill-hover: var(--lf-surface-alt) !important;
--checkbox-label-background-fill-selected:var(--lf-accent-soft) !important;
--checkbox-label-text-color: var(--lf-text) !important;
--checkbox-label-text-color-selected: var(--lf-accent) !important;
--checkbox-label-border-color: var(--lf-border) !important;
--checkbox-label-border-color-hover: var(--lf-accent) !important;
--checkbox-label-border-color-selected:var(--lf-accent) !important;
--checkbox-check: var(--lf-accent) !important;
}
/* Typography β€” serif for headings to match Claude's Tiempos-style hero */
.gradio-container h1,
.gradio-container h2,
.gradio-container h3,
.gradio-container h4 {
color: var(--lf-text) !important;
font-family: "Source Serif 4", "Source Serif Pro", Georgia, "Times New Roman",
serif !important;
font-weight: 500 !important;
letter-spacing: -0.015em !important;
line-height: 1.2 !important;
}
.gradio-container h1 { font-size: 2.5rem !important; margin: 0.25rem 0 0.5rem !important; }
.gradio-container h2 { font-size: 1.5rem !important; margin: 1.4rem 0 0.5rem !important; }
.gradio-container h3 { font-size: 1.15rem !important; margin: 1.1rem 0 0.5rem !important; font-weight: 600 !important; }
.gradio-container p, .gradio-container li {
color: var(--lf-text-muted) !important;
line-height: 1.65 !important;
font-size: 0.97rem !important;
}
.gradio-container strong { color: var(--lf-text) !important; }
/* Top bar β€” Linear/Vercel-style fixed header */
.lf-topbar {
display: flex; align-items: center; justify-content: space-between;
padding: 0.6rem 0.2rem 1.1rem;
border-bottom: 1px solid var(--lf-border);
margin-bottom: 1.25rem;
}
.lf-brand { display: flex; align-items: center; gap: 0.75rem; }
.lf-brand-mark {
width: 28px; height: 28px; border-radius: 7px;
background: linear-gradient(135deg, var(--lf-accent) 0%, var(--lf-accent-dk) 100%);
box-shadow: inset 0 0 0 1px rgba(255,255,255,0.08),
0 1px 3px rgba(0,0,0,0.3);
position: relative;
}
.lf-brand-mark::after {
/* little contour-ring motif inside the mark */
content: ""; position: absolute; inset: 5px;
border: 1.5px solid rgba(255,255,255,0.55);
border-radius: 4px;
clip-path: polygon(0 0, 100% 0, 100% 70%, 30% 100%, 0 100%);
}
.lf-brand-name {
font-family: "Inter", sans-serif;
font-weight: 600; font-size: 0.95rem; color: var(--lf-text);
letter-spacing: -0.01em; line-height: 1.1;
}
.lf-brand-sub {
font-family: "Inter", sans-serif;
font-size: 0.72rem; color: var(--lf-text-subtle);
letter-spacing: 0.04em; text-transform: uppercase; margin-top: 1px;
}
.lf-topbar-actions { display: flex; gap: 0.25rem; align-items: center; }
.lf-link {
color: var(--lf-text-muted) !important;
font-family: "Inter", sans-serif;
font-size: 0.82rem; text-decoration: none !important;
padding: 0.4rem 0.75rem; border-radius: 6px;
border: 1px solid transparent;
transition: background 0.12s, color 0.12s, border-color 0.12s;
}
.lf-link:hover {
color: var(--lf-text) !important;
background: var(--lf-surface);
border-color: var(--lf-border);
}
/* Hero β€” modern dashboard banner, serif headline */
.lf-hero {
margin-bottom: 1.5rem;
padding: 0.25rem 0 1rem;
}
.lf-hero h1 {
margin: 0 0 0.55rem 0 !important;
font-family: "Source Serif 4", "Source Serif Pro", Georgia, serif !important;
font-size: 2.1rem !important;
font-weight: 500 !important;
color: var(--lf-text) !important;
max-width: 820px;
line-height: 1.2 !important;
letter-spacing: -0.018em !important;
}
.lf-hero p {
margin: 0 !important;
max-width: 720px;
font-size: 0.98rem !important;
line-height: 1.6 !important;
color: var(--lf-text-muted) !important;
}
/* Tabs β€” Gradio 5 uses `.tab-container` with scoped `button` */
.gradio-container .tab-container {
border-bottom: 1px solid var(--lf-border) !important;
margin-bottom: 1.1rem !important;
}
.gradio-container .tab-container button,
.gradio-container .tab-container button[role="tab"] {
background: transparent !important;
color: var(--lf-text-muted) !important;
border: none !important;
border-bottom: 2px solid transparent !important;
font-family: "Inter", sans-serif !important;
font-weight: 500 !important;
font-size: 0.96rem !important;
padding: 0.7rem 1.15rem !important;
letter-spacing: -0.005em !important;
transition: color 0.15s, border-color 0.15s !important;
border-radius: 0 !important;
}
.gradio-container .tab-container button:hover:not(:disabled):not(.selected) {
color: var(--lf-text) !important;
background-color: transparent !important;
}
.gradio-container .tab-container button.selected {
color: var(--lf-accent) !important;
border-bottom: 2px solid var(--lf-accent) !important;
font-weight: 600 !important;
background: transparent !important;
}
/* Primary buttons β€” burnt sienna solid */
.gradio-container button.primary,
.gradio-container .primary button,
.gradio-container button.gradio-button.primary {
background: var(--lf-accent) !important;
color: #ffffff !important;
border: none !important;
font-family: "Inter", sans-serif !important;
font-weight: 600 !important;
font-size: 0.9rem !important;
letter-spacing: -0.005em !important;
border-radius: 8px !important;
padding: 0.6rem 1.1rem !important;
box-shadow: 0 1px 2px rgba(201,100,66,0.15) !important;
transition: background 0.15s, box-shadow 0.15s !important;
}
.gradio-container button.primary:hover,
.gradio-container .primary button:hover {
background: var(--lf-accent-dk) !important;
box-shadow: 0 2px 6px rgba(201,100,66,0.25) !important;
}
/* Secondary buttons */
.gradio-container button.secondary {
background: var(--lf-surface) !important;
color: var(--lf-text) !important;
border: 1px solid var(--lf-border) !important;
font-weight: 500 !important;
border-radius: 8px !important;
}
/* Inputs + selects + textareas + dropdowns β€” clearly bordered */
.gradio-container input[type="text"],
.gradio-container input[type="number"],
.gradio-container input[type="password"],
.gradio-container select,
.gradio-container textarea,
.gradio-container .wrap-inner,
.gradio-container [role="combobox"],
.gradio-container .dropdown > div,
.gradio-container [data-testid="dropdown"] > div {
border: 1px solid var(--lf-border) !important;
background: var(--lf-surface-alt) !important;
color: var(--lf-text) !important;
border-radius: 8px !important;
font-family: "Inter", sans-serif !important;
font-size: 0.92rem !important;
min-height: 38px !important;
box-sizing: border-box !important;
transition: border-color 0.15s, box-shadow 0.15s !important;
}
.gradio-container input[type="text"],
.gradio-container input[type="number"],
.gradio-container input[type="password"],
.gradio-container textarea {
padding: 0.55rem 0.75rem !important;
}
.gradio-container input:focus,
.gradio-container textarea:focus,
.gradio-container select:focus,
.gradio-container [role="combobox"]:focus-within {
border-color: var(--lf-accent) !important;
outline: none !important;
box-shadow: 0 0 0 3px rgba(226,135,99,0.18) !important;
}
/* Number input wrapper (Gradio renders a wrapper around input+reset) β€”
give it enough room so "0.7" doesn't clip */
.gradio-container .number-input-container,
.gradio-container input[type="number"] {
min-width: 72px !important;
text-align: left !important;
}
.gradio-container input[type="number"] {
padding-right: 0.4rem !important;
}
/* Labels β€” kill the accent-coloured "chip" treatment Gradio 5 applies,
make them plain inline text above the input */
.gradio-container label,
.gradio-container .label,
.gradio-container .block > .label-wrap,
.gradio-container .block > .label-wrap > span,
.gradio-container [data-testid="block-label"],
.gradio-container [data-testid="block-label"] > *,
.gradio-container .label > span {
background: transparent !important;
color: var(--lf-text-muted) !important;
font-weight: 500 !important;
font-size: 0.82rem !important;
letter-spacing: 0.01em !important;
text-transform: none !important;
padding: 0 !important;
margin-bottom: 0.3rem !important;
border: none !important;
border-radius: 0 !important;
box-shadow: none !important;
}
/* The block-label "pill" that wraps the label + icon: flatten it */
.gradio-container .block .wrap > .label-wrap,
.gradio-container .block > .wrap-inner > .label-wrap,
.gradio-container .block > div > .label-wrap,
.gradio-container .block > span[data-testid],
.gradio-container .block > span.svelte-1gfkn6j,
.gradio-container .block-label,
.gradio-container div[aria-label][class*="label"] {
background: transparent !important;
padding: 0 0 0.25rem 0 !important;
color: var(--lf-text-muted) !important;
font-weight: 500 !important;
border: none !important;
border-radius: 0 !important;
}
/* For elements whose rendered icon-prefixed label (e.g. JSON "{...}" icon,
Plot chart icon) is inside the label-wrap, keep them subtle */
.gradio-container .block-label svg,
.gradio-container .label-wrap svg,
.gradio-container [data-testid="block-label"] svg {
color: var(--lf-text-subtle) !important;
opacity: 0.7;
}
/* Reset / refresh icon buttons (the circular arrow next to number inputs) */
.gradio-container button[aria-label*="Reset"],
.gradio-container button[title*="Reset"],
.gradio-container .icon-button {
background: transparent !important;
color: var(--lf-text-subtle) !important;
border: none !important;
box-shadow: none !important;
}
.gradio-container button[aria-label*="Reset"]:hover,
.gradio-container .icon-button:hover {
color: var(--lf-accent) !important;
background: var(--lf-accent-soft) !important;
}
/* Gradio block container (the outer "card" of each component) */
.gradio-container .block,
.gradio-container .gr-box,
.gradio-container .gr-panel,
.gradio-container .form {
background: var(--lf-surface) !important;
border: 1px solid var(--lf-border-soft) !important;
border-radius: 10px !important;
padding: 1.1rem !important;
}
/* Slider colors */
.gradio-container input[type="range"]::-webkit-slider-thumb {
background: var(--lf-accent) !important;
}
.gradio-container .svelte-range-slider .handle,
.gradio-container .svelte-range-slider .rangeBar {
background: var(--lf-accent) !important;
}
/* Code blocks */
.gradio-container pre,
.gradio-container code,
.gradio-container .cm-editor,
.gradio-container .cm-content {
font-family: "JetBrains Mono", ui-monospace, Menlo, Consolas, monospace !important;
font-size: 0.84rem !important;
}
.gradio-container pre {
background: var(--lf-surface-alt) !important;
border: 1px solid var(--lf-border-soft) !important;
border-radius: 8px !important;
padding: 0.9rem 1.1rem !important;
}
/* Dataframes */
.gradio-container table {
border-collapse: collapse !important;
width: 100% !important;
font-family: "Inter", sans-serif !important;
}
.gradio-container table th {
background: var(--lf-bg) !important;
color: var(--lf-text) !important;
font-weight: 600 !important;
font-size: 0.82rem !important;
letter-spacing: 0.01em !important;
text-transform: uppercase !important;
border-bottom: 1px solid var(--lf-border) !important;
padding: 0.6rem 0.85rem !important;
}
.gradio-container table td {
border-bottom: 1px solid var(--lf-border-soft) !important;
padding: 0.55rem 0.85rem !important;
color: var(--lf-text) !important;
font-size: 0.9rem !important;
}
/* JSON renderer β€” force warm ink for every node + muted for keys */
.gradio-container .json-holder,
.gradio-container .json-container,
.gradio-container .json-node {
background: var(--lf-surface) !important;
border: 1px solid var(--lf-border-soft) !important;
border-radius: 8px !important;
padding: 0.9rem !important;
color: var(--lf-text) !important;
font-family: "JetBrains Mono", ui-monospace, Menlo, monospace !important;
font-size: 0.82rem !important;
}
.gradio-container .json-holder *,
.gradio-container .json-container * {
color: var(--lf-text) !important;
}
.gradio-container .json-holder .key,
.gradio-container .json-container .key {
color: var(--lf-accent-dk) !important;
font-weight: 600 !important;
}
.gradio-container .json-holder .string-value {
color: #3d6b4c !important;
}
.gradio-container .json-holder .number-value {
color: #874123 !important;
}
/* Dropdown option list (open state) β€” Gradio defaults to white-on-white */
.gradio-container .options,
.gradio-container .options .item,
.gradio-container [role="listbox"],
.gradio-container [role="option"] {
background: var(--lf-surface) !important;
color: var(--lf-text) !important;
border-color: var(--lf-border) !important;
}
.gradio-container [role="option"]:hover,
.gradio-container .options .item:hover {
background: var(--lf-accent-soft) !important;
color: var(--lf-text) !important;
}
.gradio-container [role="option"][aria-selected="true"] {
background: var(--lf-accent) !important;
color: #ffffff !important;
}
/* Markdown rendered inside blocks */
.gradio-container .prose,
.gradio-container .markdown,
.gradio-container [data-testid="markdown"] {
color: var(--lf-text) !important;
}
.gradio-container .prose p,
.gradio-container .markdown p,
.gradio-container [data-testid="markdown"] p {
color: var(--lf-text-muted) !important;
}
.gradio-container .prose strong,
.gradio-container .markdown strong {
color: var(--lf-text) !important;
}
.gradio-container .prose a,
.gradio-container .markdown a {
color: var(--lf-accent) !important;
text-decoration: underline;
text-underline-offset: 2px;
}
.gradio-container .prose code,
.gradio-container .markdown code {
background: var(--lf-bg) !important;
color: var(--lf-accent-dk) !important;
padding: 0.12em 0.4em !important;
border-radius: 4px !important;
font-size: 0.84em !important;
}
/* Inline label / info text under inputs */
.gradio-container .block-info,
.gradio-container .info {
color: var(--lf-text-muted) !important;
font-size: 0.82rem !important;
}
/* Slider track+value labels */
.gradio-container .svelte-range-slider,
.gradio-container .min-val,
.gradio-container .max-val,
.gradio-container .value {
color: var(--lf-text) !important;
}
.gradio-container .value-text {
color: var(--lf-accent-dk) !important;
font-weight: 600 !important;
}
/* Radio buttons β€” labels should be visible */
.gradio-container .wrap label,
.gradio-container [role="radio"] + label {
color: var(--lf-text) !important;
}
/* Status badges inside obs.done etc */
.gradio-container .status-text {
color: var(--lf-text) !important;
}
/* Accordion headers */
.gradio-container .label-wrap,
.gradio-container .accordion-header {
font-weight: 500 !important;
color: var(--lf-text) !important;
}
/* Footer β€” hide "Built with Gradio" */
footer, .gradio-container footer { display: none !important; }
/* Scrollbars */
.gradio-container ::-webkit-scrollbar { width: 10px; height: 10px; }
.gradio-container ::-webkit-scrollbar-track { background: var(--lf-bg); }
.gradio-container ::-webkit-scrollbar-thumb {
background: var(--lf-border);
border-radius: 5px;
}
.gradio-container ::-webkit-scrollbar-thumb:hover { background: var(--lf-text-subtle); }
/* Sidebar column β€” one card; inside is flat */
.gradio-container .lf-sidebar {
background: var(--lf-surface) !important;
border: 1px solid var(--lf-border) !important;
border-radius: 12px !important;
padding: 1.5rem 1.35rem 1.35rem !important;
box-shadow: 0 1px 0 rgba(20,20,19,0.02);
}
.gradio-container .lf-sidebar h3 {
margin-top: 0.15rem !important;
margin-bottom: 0.3rem !important;
}
.gradio-container .lf-sidebar p {
font-size: 0.88rem !important;
margin-bottom: 0.85rem !important;
color: var(--lf-text-muted) !important;
}
/* Flatten ALL nested blocks inside the sidebar β€” no card-in-card */
.gradio-container .lf-sidebar .block,
.gradio-container .lf-sidebar .form,
.gradio-container .lf-sidebar .gr-box,
.gradio-container .lf-sidebar .gr-panel,
.gradio-container .lf-sidebar .wrap,
.gradio-container .lf-sidebar fieldset {
background: transparent !important;
border: none !important;
padding: 0 !important;
margin: 0 !important;
border-radius: 0 !important;
box-shadow: none !important;
}
/* Space between consecutive controls in the sidebar */
.gradio-container .lf-sidebar > div > *,
.gradio-container .lf-sidebar > .form > * {
margin-bottom: 0.85rem !important;
}
.gradio-container .lf-sidebar button {
width: 100% !important;
}
.gradio-container .lf-sidebar hr,
.gradio-container .lf-sidebar .prose hr {
border: none !important;
border-top: 1px solid var(--lf-border) !important;
margin: 1.1rem 0 !important;
}
/* Hide ugly number-input spinner arrows (β–²β–Ό) */
.gradio-container input[type="number"]::-webkit-outer-spin-button,
.gradio-container input[type="number"]::-webkit-inner-spin-button {
-webkit-appearance: none !important;
appearance: none !important;
margin: 0 !important;
}
.gradio-container input[type="number"] {
-moz-appearance: textfield !important;
appearance: textfield !important;
}
/* Slider value-input on the right β€” align + size so "0.95" doesn't clip */
.gradio-container .slider-container,
.gradio-container [data-testid="slider"] {
display: flex !important;
flex-direction: column !important;
gap: 0.4rem !important;
}
.gradio-container [data-testid="slider"] .head,
.gradio-container .tab-like-container {
display: flex !important;
align-items: center !important;
justify-content: space-between !important;
gap: 0.5rem !important;
}
.gradio-container [data-testid="slider"] input[type="number"] {
width: 68px !important;
min-width: 68px !important;
max-width: 80px !important;
text-align: right !important;
padding: 0.3rem 0.5rem !important;
min-height: 30px !important;
font-size: 0.85rem !important;
}
/* Reset-button next to number inputs β€” make it transparent & subtle */
.gradio-container [data-testid="slider"] button,
.gradio-container .reset-button {
background: transparent !important;
border: none !important;
color: var(--lf-text-subtle) !important;
padding: 0.15rem !important;
min-width: 26px !important;
width: 26px !important;
height: 26px !important;
}
.gradio-container [data-testid="slider"] button:hover {
color: var(--lf-accent) !important;
background: var(--lf-accent-soft) !important;
}
/* Inline code tag β€” softer across the whole app, not only the sidebar */
.gradio-container .prose code,
.gradio-container .markdown code,
.gradio-container code {
background: var(--lf-surface-alt) !important;
border: 1px solid var(--lf-border) !important;
color: var(--lf-text) !important;
padding: 0.05em 0.42em !important;
border-radius: 4px !important;
font-size: 0.85em !important;
font-weight: 400 !important;
}
/* Fenced code blocks β€” proper code-box with mono font + subtle bg */
.gradio-container .prose pre,
.gradio-container .markdown pre {
background: #14120f !important;
border: 1px solid var(--lf-border) !important;
border-radius: 8px !important;
padding: 0.9rem 1rem !important;
margin: 0.4rem 0 0.8rem 0 !important;
overflow-x: auto !important;
}
.gradio-container .prose pre code,
.gradio-container .markdown pre code {
background: transparent !important;
border: none !important;
color: #e8e3d6 !important;
font-size: 0.82rem !important;
line-height: 1.55 !important;
padding: 0 !important;
}
/* Chips β€” lightweight tags for action kind, model, endpoint */
.gradio-container .lf-chip {
display: inline-block;
padding: 0.08rem 0.5rem;
border-radius: 5px;
background: var(--lf-surface-alt);
color: var(--lf-text);
border: 1px solid var(--lf-border);
font-family: "JetBrains Mono", ui-monospace, monospace;
font-size: 0.78rem;
font-weight: 500;
letter-spacing: -0.01em;
}
.gradio-container .lf-chip-draft { color: var(--lf-accent); border-color: var(--lf-accent); }
.gradio-container .lf-chip-run_baseline { color: #7ecfc5; border-color: #5a9c94; }
.gradio-container .lf-chip-inspect { color: #b5a5e0; border-color: #7e6ea8; }
.gradio-container .lf-chip-commit { color: #7ab68c; border-color: #4e7c5c; }
/* Soft divider inside transcript */
.gradio-container .lf-hr-soft,
.gradio-container hr.lf-hr-soft {
border: none !important;
border-top: 1px solid var(--lf-border-soft) !important;
margin: 0.9rem 0 0.6rem !important;
opacity: 0.6;
}
/* Turn card β€” one per REPL step. Clearly demarcates Action vs Output */
.gradio-container .lf-turn {
background: var(--lf-surface);
border: 1px solid var(--lf-border);
border-radius: 10px;
padding: 0.9rem 1rem;
margin: 0.85rem 0;
box-shadow: 0 1px 0 rgba(0,0,0,0.2);
}
.gradio-container .lf-turn-head {
display: flex; align-items: center; gap: 0.55rem;
margin-bottom: 0.7rem;
padding-bottom: 0.55rem;
border-bottom: 1px dashed var(--lf-border-soft);
}
.gradio-container .lf-turn-num {
font-family: "Source Serif 4", Georgia, serif;
font-weight: 600;
font-size: 0.98rem;
color: var(--lf-text);
letter-spacing: -0.01em;
}
.gradio-container .lf-turn-meta {
margin-left: auto;
font-family: "JetBrains Mono", monospace;
font-size: 0.76rem;
color: var(--lf-text-subtle);
}
.gradio-container .lf-turn-meta b {
color: var(--lf-text);
font-weight: 600;
}
.gradio-container .lf-turn-row {
display: grid;
grid-template-columns: 70px 1fr;
align-items: baseline;
gap: 0.75rem;
padding: 0.25rem 0;
}
.gradio-container .lf-section-label {
font-family: "Inter", sans-serif;
font-size: 0.68rem;
font-weight: 600;
letter-spacing: 0.1em;
text-transform: uppercase;
color: var(--lf-text-subtle);
padding-top: 0.15rem;
}
.gradio-container .lf-section-content {
color: var(--lf-text);
font-size: 0.9rem;
line-height: 1.55;
font-family: "Inter", sans-serif;
}
.gradio-container .lf-section-content code {
font-size: 0.82em !important;
}
.gradio-container .lf-section-content b {
color: var(--lf-text);
font-weight: 600;
}
/* Status chips inside the Output row */
.gradio-container .lf-status {
display: inline-block;
padding: 0.05rem 0.45rem;
border-radius: 4px;
font-size: 0.78rem;
font-weight: 500;
border: 1px solid;
background: transparent;
margin-right: 0.15rem;
}
.gradio-container .lf-status-good {
color: #7ab68c;
border-color: rgba(122,182,140,0.4);
background: rgba(122,182,140,0.08);
}
.gradio-container .lf-status-warn {
color: #e4b264;
border-color: rgba(228,178,100,0.4);
background: rgba(228,178,100,0.08);
}
.gradio-container .lf-status-bad {
color: #d47d6a;
border-color: rgba(212,125,106,0.4);
background: rgba(212,125,106,0.08);
}
/* Code fence that follows a turn card β€” tighten top margin */
.gradio-container .lf-turn + pre,
.gradio-container .prose pre:has(+ .lf-turn) {
margin-top: -0.5rem !important;
}
/* Episode-done dashboard: KPI row with big metric cards */
.gradio-container .lf-done {
background: linear-gradient(180deg,
rgba(226,135,99,0.06) 0%,
rgba(42,40,36,0) 60%);
border: 1px solid var(--lf-border);
border-radius: 12px;
padding: 1.2rem 1.25rem;
margin: 1.1rem 0 0.6rem;
}
.gradio-container .lf-done-head {
display: flex; align-items: baseline; gap: 0.85rem;
margin-bottom: 0.9rem;
}
.gradio-container .lf-done-flag {
color: var(--lf-accent);
font-family: "Inter", sans-serif;
font-weight: 600; font-size: 0.75rem;
letter-spacing: 0.11em; text-transform: uppercase;
padding: 0.15rem 0.55rem;
border: 1px solid var(--lf-accent);
border-radius: 5px;
}
.gradio-container .lf-done-reason {
color: var(--lf-text-subtle);
font-size: 0.84rem;
}
.gradio-container .lf-done-reason code {
font-family: "JetBrains Mono", monospace;
background: transparent !important;
border: none !important;
color: var(--lf-text-muted) !important;
padding: 0 !important;
}
.gradio-container .lf-kpi-row {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 0.8rem;
}
.gradio-container .lf-kpi {
background: var(--lf-surface-alt);
border: 1px solid var(--lf-border-soft);
border-radius: 10px;
padding: 0.9rem 1rem;
min-width: 0;
}
.gradio-container .lf-kpi-label {
color: var(--lf-text-subtle);
font-family: "Inter", sans-serif;
font-size: 0.7rem;
font-weight: 600;
letter-spacing: 0.1em;
text-transform: uppercase;
margin-bottom: 0.35rem;
}
.gradio-container .lf-kpi-value {
font-family: "Source Serif 4", Georgia, serif;
font-weight: 500;
font-size: 1.9rem;
color: var(--lf-text);
letter-spacing: -0.025em;
line-height: 1.1;
}
.gradio-container .lf-kpi-sub {
color: var(--lf-text-subtle);
font-size: 0.72rem;
margin-top: 0.3rem;
font-family: "JetBrains Mono", monospace;
}
.gradio-container .lf-kpi-good .lf-kpi-value { color: #7ab68c; }
.gradio-container .lf-kpi-warn .lf-kpi-value { color: #e4b264; }
.gradio-container .lf-kpi-bad .lf-kpi-value { color: #d47d6a; }
.gradio-container .lf-kpi-good { border-color: rgba(122,182,140,0.35); }
.gradio-container .lf-kpi-warn { border-color: rgba(228,178,100,0.35); }
.gradio-container .lf-kpi-bad { border-color: rgba(212,125,106,0.35); }
/* Responsive: stack KPIs on narrow */
@media (max-width: 720px) {
.gradio-container .lf-kpi-row {
grid-template-columns: 1fr;
}
}
/* Main pane plots+outputs */
.gradio-container .gr-plot, .gradio-container .plot-wrap {
background: var(--lf-surface-alt) !important;
border-radius: 10px !important;
}
"""
# Plotly layout template β€” matches dark Claude palette.
# Margin is intentionally factored out so per-plot overrides don't collide.
_PLOTLY_LAYOUT = dict(
font=dict(family="Inter, -apple-system, system-ui, sans-serif",
color="#f3f0e8", size=12),
paper_bgcolor="#2a2824", # card surface
plot_bgcolor="#1f1d1a", # page background, slightly darker
hoverlabel=dict(bgcolor="#f3f0e8", font_color="#1f1d1a",
font_family="Inter", bordercolor="#e28763"),
legend=dict(bgcolor="rgba(31,29,26,0.85)",
bordercolor="#403b34", borderwidth=1,
font=dict(color="#f3f0e8")),
)
_DEFAULT_MARGIN = dict(l=60, r=30, t=60, b=55)
_AXIS_STYLE = dict(gridcolor="#403b34", zerolinecolor="#554e45",
showline=True, linecolor="#554e45",
tickfont=dict(color="#b5ada0"))
_TITLE_STYLE = dict(x=0.02, xanchor="left",
font=dict(size=14, color="#f3f0e8", weight=500))
OPT_COLORS = {
"sgd": "#c05450",
"momentum": "#d9865b",
"adam": "#5b7a6b",
"lbfgs": "#556b99",
"custom": "#d97757",
}
BAR_GOOD = "#4a7c59"
BAR_BAD = "#a85c4c"
# ----------------- plotting helpers (Plotly) -----------------
TEMPLATES_2D_SAFE = ["quadratic", "rosenbrock", "styblinski_tang", "huber",
"gaussian_mix", "himmelblau", "plateau", "cliff"]
def _color(name: str) -> str:
"""Look up a trajectory colour, stripping any `(tuned lr=…)` suffix."""
return OPT_COLORS.get(name.split("(")[0].strip(), "#2a2319")
def _trajectory_diverged(arr: np.ndarray, clip: float = 8.0) -> bool:
"""True if trajectory escapes the viewing window (e.g. SGD on a stiff LP)."""
return bool(np.any(np.abs(arr) > clip) or np.any(~np.isfinite(arr)))
def _contour_plot(ls, trajectories=None, title=None, subtitle=None):
assert ls.dim == 2, "contour plot requires dim=2"
# Compute view extents from *finite, non-divergent* trajectory points only.
# Divergent ones (e.g. SGD exploding to 1e6) are clipped/marked separately.
CLIP = 8.0
xs_all, ys_all = [0.0], [0.0]
for traj in (trajectories or {}).values():
arr = np.array(traj)
if arr.size == 0: continue
mask = (np.abs(arr) <= CLIP).all(axis=1) & np.isfinite(arr).all(axis=1)
if mask.any():
good = arr[mask]
xs_all.extend(good[:, 0].tolist())
ys_all.extend(good[:, 1].tolist())
x_min = min(min(xs_all) - 1.5, -3.5); x_max = max(max(xs_all) + 1.5, 3.5)
y_min = min(min(ys_all) - 1.5, -3.5); y_max = max(max(ys_all) + 1.5, 3.5)
x_min = max(x_min, -CLIP); x_max = min(x_max, CLIP)
y_min = max(y_min, -CLIP); y_max = min(y_max, CLIP)
g = 70
xs = np.linspace(x_min, x_max, g); ys = np.linspace(y_min, y_max, g)
X, Y = np.meshgrid(xs, ys)
Z = np.empty_like(X)
for i in range(g):
for j in range(g):
Z[i, j] = ls.f(np.array([X[i, j], Y[i, j]]))
finite = Z[np.isfinite(Z)]
lo, hi = np.percentile(finite, [2, 95])
fig = go.Figure()
fig.add_trace(go.Contour(
x=xs, y=ys, z=Z,
zmin=float(lo), zmax=float(hi),
# Dark-mode colorscale: deep warm valleys β†’ glowing sienna peaks
colorscale=[
[0.0, "#1f1d1a"], [0.15, "#2f2a22"], [0.3, "#4a2f22"],
[0.5, "#7a4229"], [0.7, "#c25a3a"], [0.85, "#e28763"],
[1.0, "#f4d6c5"],
],
contours=dict(coloring="heatmap", showlabels=False),
line=dict(width=0.5, color="rgba(243,240,232,0.12)"),
colorbar=dict(title=dict(text="f(x)",
font=dict(size=11, color="#f3f0e8")),
thickness=12, len=0.85,
tickfont=dict(size=10, color="#b5ada0"),
outlinewidth=0),
hovertemplate="x₁=%{x:.3f}<br>xβ‚‚=%{y:.3f}<br>f=%{z:.3f}<extra></extra>",
))
divergent_names: list[str] = []
if trajectories:
for name, traj in trajectories.items():
if not traj: continue
color = _color(name)
arr = np.array(traj)
# Clip to view; mark divergent for annotation
diverged = _trajectory_diverged(arr, clip=CLIP)
if diverged:
divergent_names.append(name)
# Keep only finite, in-window points for plotting
mask = (np.abs(arr) <= CLIP).all(axis=1) & np.isfinite(arr).all(axis=1)
arr = arr[mask]
if arr.shape[0] == 0:
continue
display_name = f"{name} Β· diverged" if diverged else name
line_style = "dash" if diverged else "solid"
hover = [f"step {i}<br>x₁={a[0]:.3f}<br>xβ‚‚={a[1]:.3f}"
for i, a in enumerate(arr)]
fig.add_trace(go.Scatter(
x=arr[:, 0], y=arr[:, 1],
mode="lines+markers",
name=display_name,
line=dict(color=color, width=2.5, dash=line_style),
marker=dict(size=4, color=color,
line=dict(color="#ffffff", width=0.8)),
hovertemplate="%{text}<extra>" + display_name + "</extra>",
text=hover,
))
fig.add_trace(go.Scatter(
x=[arr[0, 0]], y=[arr[0, 1]],
mode="markers", showlegend=False,
marker=dict(size=12, color=color, symbol="circle-open",
line=dict(color=color, width=2.5)),
hovertemplate=f"start<extra>{display_name}</extra>",
))
end_symbol = "x" if diverged else "star"
end_size = 14 if diverged else 16
fig.add_trace(go.Scatter(
x=[arr[-1, 0]], y=[arr[-1, 1]],
mode="markers", showlegend=False,
marker=dict(size=end_size, color=color, symbol=end_symbol,
line=dict(color="#ffffff", width=1.2)),
hovertemplate=(f"{'diverged-exit' if diverged else 'end'}"
f"<extra>{display_name}</extra>"),
))
full_title = title or f"{ls.name} (dim=2)"
sub_text = subtitle or (
f"diverged: {', '.join(divergent_names)}" if divergent_names else None
)
if sub_text:
full_title = f"{full_title}<br><span style='font-size:11px;color:#a85c4c'>⚠ {sub_text}</span>"
fig.update_layout(
**_PLOTLY_LAYOUT,
title=dict(text=full_title, **_TITLE_STYLE),
height=480,
margin=_DEFAULT_MARGIN,
xaxis=dict(title="x₁", range=[x_min, x_max], **_AXIS_STYLE),
yaxis=dict(title="xβ‚‚", range=[y_min, y_max],
scaleanchor="x", scaleratio=1, **_AXIS_STYLE),
)
return fig
def _loss_curves(traj_map, title):
fig = go.Figure()
for name, fs in traj_map.items():
if not fs: continue
color = _color(name)
# Drop non-finite / negative-infty tail if optimiser diverged
fs_clean = [v if np.isfinite(v) else None for v in fs]
xs = list(range(len(fs_clean)))
fig.add_trace(go.Scatter(
x=xs, y=fs_clean,
mode="lines+markers", name=name,
line=dict(color=color, width=2.2, shape="spline"),
marker=dict(size=4, color=color),
hovertemplate="step=%{x}<br>f=%{y:.4g}<extra>" + name + "</extra>",
connectgaps=False,
))
fig.update_layout(
**_PLOTLY_LAYOUT,
title=dict(text=title, **_TITLE_STYLE),
height=360,
margin=_DEFAULT_MARGIN,
xaxis=dict(title="optimizer step", **_AXIS_STYLE),
yaxis=dict(title="f(x) (symlog)", type="log", **_AXIS_STYLE),
)
return fig
def _bar_plot(values, title, ylabel):
names = list(values.keys())
vs = [values[n] for n in names]
colors = [_color(n) for n in names]
fig = go.Figure(go.Bar(
x=names, y=vs,
marker=dict(color=colors, line=dict(color="#ffffff", width=1)),
text=[f"{v:.3g}" for v in vs],
textposition="outside", textfont=dict(size=11),
hovertemplate="%{x}<br>" + ylabel + "=%{y:.4g}<extra></extra>",
))
fig.update_layout(
**_PLOTLY_LAYOUT,
title=dict(text=title, **_TITLE_STYLE),
height=280,
margin=_DEFAULT_MARGIN,
xaxis=dict(**_AXIS_STYLE),
yaxis=dict(title=ylabel, **_AXIS_STYLE),
showlegend=False,
)
return fig
def _reward_breakdown_plot(components, total):
# Horizontal bars β€” more readable in a narrow column, aligns values nicely.
names = list(components.keys())
vs = [components[n] for n in names]
colors = [BAR_GOOD if v >= 0 else BAR_BAD for v in vs]
fig = go.Figure(go.Bar(
y=names, x=vs, orientation="h",
marker=dict(color=colors, line=dict(color="#1f1d1a", width=1)),
text=[f"{v:+.3f}" for v in vs],
textposition="outside", textfont=dict(size=11, color="#f3f0e8"),
cliponaxis=False,
hovertemplate="%{y}<br>contribution=%{x:+.3f}<extra></extra>",
))
fig.add_vline(x=0, line_width=1, line_color="#554e45")
fig.update_layout(
**_PLOTLY_LAYOUT,
title=dict(
text=f"Reward breakdown Β· total = {total:+.3f}", **_TITLE_STYLE),
height=240,
margin=dict(l=110, r=50, t=50, b=30),
xaxis=dict(title="weighted contribution",
range=[min(vs + [0]) - 0.15, max(vs + [0]) + 0.15],
**_AXIS_STYLE),
yaxis=dict(autorange="reversed", **_AXIS_STYLE),
showlegend=False, bargap=0.25,
)
return fig
def _empty_plot(msg):
fig = go.Figure()
fig.add_annotation(
text=msg, x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=14, color="#6b6258"),
)
fig.update_layout(
**_PLOTLY_LAYOUT,
height=480, showlegend=False,
margin=_DEFAULT_MARGIN,
xaxis=dict(visible=False), yaxis=dict(visible=False),
)
return fig
# ----------------- tab 1: Landscape Explorer -----------------
def _explore_landscape(template, dim, seed):
rng = np.random.default_rng(int(seed))
params: dict[str, Any] = {}
if template == "quadratic":
params = {"cond": 10.0}
if template == "gaussian_mix":
params = {"k": 3, "sigma": 0.5, "spread": 2.0}
if template == "himmelblau":
dim = 2
ls = build_landscape(template=template, dim=int(dim), params=params, rng=rng)
hints = structural_hints(ls, rng=rng)
if ls.dim == 2:
fig = _contour_plot(ls, title=f"{template} Β· dim=2")
else:
fig = _empty_plot(f"{template} Β· dim={ls.dim}\nContour view is 2-D only")
rows = []
for k, v in hints.items():
rows.append([k, f"{v:.4g}" if isinstance(v, float) else str(v)])
rows.append(["dim", ls.dim])
rows.append(["f_min (known)", f"{ls.f_min:.4g}"])
rows.append(["description", ls.description])
return fig, rows
# ----------------- tab 2: Baseline Race -----------------
def _baseline_race(template, seed):
rng = np.random.default_rng(int(seed))
params: dict[str, Any] = {}
if template == "quadratic":
params = {"cond": 10.0}
if template == "gaussian_mix":
params = {"k": 3, "sigma": 0.5, "spread": 2.0}
ls = build_landscape(template=template, dim=2, params=params, rng=rng)
x0 = np.random.default_rng(int(seed) + 999).normal(0.0, 0.5, size=2)
# Tune EACH baseline's LR to the landscape (not just Adam). Without this,
# SGD at lr=0.01 diverges on stiff landscapes (Rosenbrock, high-cond
# quadratics) and produces a monotone-up curve. Per-baseline tuning makes
# the race compare *algorithms*, not "default hparams".
traj_2d: dict[str, list[tuple[float, float]]] = {}
curves: dict[str, list[float]] = {}
finals: dict[str, float] = {}
tuned_lrs: dict[str, float] = {}
for name in ["sgd", "momentum", "adam", "lbfgs"]:
r = run_baseline_tuned(name, ls.f, ls.grad, x0, steps=50)
tuned_lrs[name] = r["lr"]
traj = [s for s in r["trajectory"] if s.get("x") is not None]
traj_2d[name] = [(s["x"][0], s["x"][1]) for s in traj]
curves[name] = [s["f"] for s in traj if s.get("f") is not None]
finals[name] = curves[name][-1] if curves[name] else float("inf")
contour = _contour_plot(ls, trajectories=traj_2d,
title=f"{template} β€” baselines racing (LR-tuned)")
curves_fig = _loss_curves(curves, "f(x) vs step")
finals_fig = _bar_plot(finals, "Final f after 50 steps",
ylabel="f(x) at step 50")
lr_table = " Β· ".join(f"`{name}`: `{lr:g}`"
for name, lr in tuned_lrs.items())
summary = (
f"**{ls.description}**\n\n"
f"Tuned LR per baseline (7-point sweep, 30 steps): {lr_table}\n\n"
f"Best baseline: `{min(finals, key=finals.get)}` at f = "
f"`{min(finals.values()):.4f}`"
)
return contour, curves_fig, finals_fig, summary
# ----------------- tab 3: Optimizer Arena -----------------
SAMPLE_OPTIMIZER = """
class Optimizer:
def __init__(self, dim):
self.lr = 0.05
self.beta = 0.9
self.v = np.zeros(dim)
def step(self, x, f_val, grad):
# SGD with heavy-ball momentum
self.v = self.beta * self.v - self.lr * grad
return x + self.v
""".strip()
ADAM_ARENA_TEMPLATE = """
class Optimizer:
def __init__(self, dim):
self.lr = {lr}
self.b1, self.b2, self.eps = 0.9, 0.999, 1e-8
self.m = np.zeros(dim); self.v = np.zeros(dim); self.t = 0
def step(self, x, f_val, grad):
self.t += 1
self.m = self.b1*self.m + (1-self.b1)*grad
self.v = self.b2*self.v + (1-self.b2)*grad*grad
mh = self.m/(1-self.b1**self.t); vh = self.v/(1-self.b2**self.t)
return x - self.lr * mh / (np.sqrt(vh) + self.eps)
""".strip()
ARENA_SEEDS = [101, 202, 303, 404, 505, 606, 707, 808, 909, 1010]
def _arena_compare(template, dim, seed, code):
rng = np.random.default_rng(int(seed))
dim = int(dim)
params: dict[str, Any] = {}
if template == "quadratic":
params = {"cond": 10.0}
if template == "gaussian_mix":
params = {"k": 3, "sigma": 0.5, "spread": 2.0}
if template == "himmelblau":
dim = 2
ls = build_landscape(template=template, dim=dim, params=params, rng=rng)
tune_x0 = np.random.default_rng(0).normal(0.0, 0.5, size=dim)
best_lr = tune_adam_lr(ls.f, ls.grad, tune_x0, sweep_steps=30)
adam_src = ADAM_ARENA_TEMPLATE.format(lr=best_lr)
# Compile user code
try:
opt = compile_optimizer(code, dim=dim)
except SandboxError as e:
return (None, None, None,
f"### ❌ Compile error\n\n```\n{e}\n```", {})
test = auto_test_draft(opt, ls, seed=int(seed), steps=20)
user_arena = run_arena(opt, ls, seeds=ARENA_SEEDS, steps=200)
adam_opt = compile_optimizer(adam_src, dim=dim)
adam_arena = run_arena(adam_opt, ls, seeds=ARENA_SEEDS, steps=200)
reward = compute_optcoder_reward(
arena=user_arena, adam_arena=adam_arena,
actions_used_cost=0, budget_total=12,
novelty_score=ast_novelty_score(code, [adam_src]),
convergence_step=None, arena_steps=200,
)
if dim == 2:
user_traj = [(s["x"][0], s["x"][1]) for s in test["detail"]]
adam_run = run_baseline("adam", ls.f, ls.grad,
np.random.default_rng(int(seed)).normal(0.0, 0.5, 2),
steps=50)
adam_traj = [(s["x"][0], s["x"][1]) for s in adam_run["trajectory"]
if s.get("x") is not None]
contour = _contour_plot(ls,
trajectories={"custom": user_traj, "adam": adam_traj},
title=f"{template} β€” your optimizer vs tuned Adam")
else:
contour = _empty_plot(f"{template} Β· dim={dim}\nContour view is 2-D only")
progress_fig = _bar_plot(
{"custom": user_arena.mean_progress,
"adam (tuned)": adam_arena.mean_progress},
"Arena mean progress", ylabel="mean(fβ‚€ βˆ’ f_N) over 10 seeds",
)
bk = reward.breakdown
components = {
"r_regret": bk["r_regret"],
"r_convergence": bk["r_convergence"],
"r_robustness": bk["r_robustness"],
"r_novelty": bk["r_novelty"],
"-r_budget": -bk["r_budget"],
"-r_eval_fail": -bk["r_eval_failures"],
}
reward_fig = _reward_breakdown_plot(components, reward.r_total)
summary = (
f"### Results\n\n"
f"- Your mean progress: `{user_arena.mean_progress:.4g}`\n"
f"- Tuned Adam progress: `{adam_arena.mean_progress:.4g}` "
f"(lr = `{best_lr:g}`)\n"
f"- Speedup vs Adam: `{bk.get('speedup_vs_adam', 0):.3g}Γ—`\n"
f"- Your crash fraction: `{user_arena.crash_fraction:.0%}`\n"
f"- **Total reward: `{reward.r_total:+.3f}`**"
)
return contour, progress_fig, reward_fig, summary, dict(bk)
# ----------------- OpenEnv API + LLM auto-run -----------------
#
# Drives the env in-process (no HTTP round trip) so this tab works inside the
# Space container without localhost access.
#
# One session-scoped env lives in _API_ENV_STATE; reset/step mutate it.
# Additionally, `_llm_auto_run` connects to any OpenAI-compatible endpoint
# and drives a full episode end-to-end, streaming actions as they happen.
import os as _os
import time as _time
import requests as _requests
_API_ENV_STATE: dict[str, Any] = {"env": None}
def _make_env(tier: str, seed: int):
try:
from ..server.landscapeforge_environment import LandscapeforgeEnvironment
except ImportError:
from server.landscapeforge_environment import LandscapeforgeEnvironment # type: ignore
return LandscapeforgeEnvironment(tier=tier, seed=int(seed))
def _api_reset(tier, seed):
env = _make_env(tier, seed)
obs = env.reset()
_API_ENV_STATE["env"] = env
return (
_fmt_obs(obs.model_dump(exclude_none=True)),
f"βœ“ Reset complete Β· landscape: **{obs.landscape_description}** Β· "
f"dim = {obs.dim} Β· budget = {obs.budget_remaining}",
)
def _api_step(kind, baseline_name, code, draft_idx, step_start, step_end):
env = _API_ENV_STATE.get("env")
if env is None:
return {"error": "call /reset first"}, "⚠ No active env β€” hit **reset** first."
kwargs: dict[str, Any] = {"kind": kind}
if kind == "run_baseline":
kwargs["baseline_name"] = baseline_name or "adam"
elif kind == "draft":
kwargs["code"] = code or ""
elif kind == "inspect":
kwargs["draft_idx"] = int(draft_idx) if draft_idx is not None else 0
kwargs["step_range_start"] = int(step_start)
kwargs["step_range_end"] = int(step_end)
try:
action = LandscapeforgeAction(**kwargs)
except Exception as e:
return {"error": str(e)}, f"❌ Invalid action: {e}"
obs = env.step(action)
dump = _fmt_obs(obs.model_dump(exclude_none=True))
banner = (
f"βœ“ {kind} executed Β· budget remaining = {obs.budget_remaining}"
+ (" Β· **episode done**" if obs.done else "")
)
return dump, banner
# ---- LLM auto-run (OpenAI-compat endpoint) ----
PRESET_ENDPOINTS = {
"Ollama (localhost:11434)": ("http://localhost:11434/v1", ""),
"Hugging Face Router": ("https://router.huggingface.co/v1",
_os.getenv("HF_TOKEN", "")),
"OpenAI": ("https://api.openai.com/v1",
_os.getenv("OPENAI_API_KEY", "")),
"Custom": ("", ""),
}
PRESET_MODELS = [
"qwen2.5:3b",
"qwen2.5:7b",
"qwen2.5:1.5b",
"Qwen/Qwen2.5-7B-Instruct",
"Qwen/Qwen2.5-3B-Instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"gpt-4o-mini",
]
def _llm_auto_run(endpoint_choice, custom_url, api_key, model_name,
tier, seed, temperature, max_turns):
"""Drive a full episode end-to-end with an LLM. Yields progressive
markdown transcripts so the UI updates live."""
try:
from ..prompts import build_prompt, parse_action
except ImportError:
from prompts import build_prompt, parse_action # type: ignore
base, preset_key = PRESET_ENDPOINTS.get(endpoint_choice, ("", ""))
base_url = (custom_url.strip() or base).rstrip("/")
key = (api_key or "").strip() or preset_key or _os.getenv("API_KEY", "")
if not base_url:
yield ("Pick a preset endpoint or type a custom URL.", {}, None)
return
if not model_name:
yield ("Pick or type a model name.", {}, None)
return
url = base_url + "/chat/completions"
headers = {"Content-Type": "application/json"}
if key:
headers["Authorization"] = f"Bearer {key}"
env = _make_env(tier, int(seed))
obs = env.reset()
_API_ENV_STATE["env"] = env
log_lines: list[str] = [
f"### Episode running",
f"Model <span class='lf-chip'>{model_name}</span> "
f"via <span class='lf-chip'>{base_url}</span>",
"",
f"**Landscape:** {obs.landscape_description} ",
f"**Dim:** {obs.dim} Β· **Initial budget:** {obs.budget_remaining}",
"",
]
yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None)
for turn in range(1, int(max_turns) + 1):
messages = build_prompt(obs)
t0 = _time.time()
try:
r = _requests.post(url, headers=headers, json={
"model": model_name,
"messages": messages,
"temperature": float(temperature),
"max_tokens": 1200,
"stream": False,
}, timeout=180)
if r.status_code >= 400:
log_lines.append(f"**[LLM error {r.status_code}]** {r.text[:300]}")
yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None)
return
raw = r.json()["choices"][0]["message"]["content"]
except Exception as e:
log_lines.append(f"**[request failed]** `{type(e).__name__}: {e}`")
yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None)
return
dt = _time.time() - t0
try:
action = parse_action(raw)
except Exception as e:
log_lines.append(
f"**[turn {turn}] parse error:** `{e}`"
f"\n```\n{raw[:500]}\n```\n"
)
yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None)
return
obs = env.step(action)
_API_ENV_STATE["env"] = env
# Pretty action line
if action.kind == "draft":
action_str = f"draft *({len(action.code or '')} chars)*"
elif action.kind == "run_baseline":
action_str = f"run_baseline(`{action.baseline_name}`)"
elif action.kind == "inspect":
action_str = (f"inspect(draft={action.draft_idx}, "
f"[{action.step_range_start},{action.step_range_end}])")
else:
action_str = "commit"
# Build a self-contained "turn card" with explicit Action / Output
# demarcation. Rendered as HTML so we control the structure.
kind_chip = (f"<span class='lf-chip lf-chip-{action.kind}'>"
f"{action.kind}</span>")
# Output status badges β€” colored chips + key/value pairs.
output_badges: list[str] = []
lar = obs.last_action_result or {}
if lar.get("compile_error"):
output_badges.append(
"<span class='lf-status lf-status-bad'>compile error</span>")
if lar.get("summary"):
s = lar["summary"]
if s.get("converged"):
output_badges.append(
"<span class='lf-status lf-status-good'>auto-test converged</span>")
elif s.get("diverged"):
output_badges.append(
"<span class='lf-status lf-status-warn'>auto-test diverged</span>")
if s.get("final_f") is not None:
output_badges.append(
f"<code>final_f</code> = <b>{s['final_f']:.3g}</b>")
if action.kind == "run_baseline" and lar.get("final_f") is not None:
output_badges.append(
f"<code>final_f</code> = <b>{lar['final_f']:.3g}</b>")
fb = lar.get("feedback") or {}
for k, v in fb.items():
cls = "lf-status-good" if v >= 0 else "lf-status-warn"
output_badges.append(
f"<span class='lf-status {cls}'><code>{k}</code> "
f"<b>{v:+.3f}</b></span>")
output_html = " Β· ".join(output_badges) if output_badges else "ok"
turn_html = (
f"<div class='lf-turn'>"
f" <div class='lf-turn-head'>"
f" <span class='lf-turn-num'>Turn {turn}</span>"
f" {kind_chip}"
f" <span class='lf-turn-meta'>{dt:.1f}s Β· budget "
f"<b>{obs.budget_remaining}</b></span>"
f" </div>"
f" <div class='lf-turn-row'>"
f" <div class='lf-section-label'>Action</div>"
f" <div class='lf-section-content'>{action_str}</div>"
f" </div>"
f" <div class='lf-turn-row'>"
f" <div class='lf-section-label'>Output</div>"
f" <div class='lf-section-content'>{output_html}</div>"
f" </div>"
f"</div>"
)
log_lines.extend([f"", turn_html, f""])
if action.kind == "draft" and action.code:
log_lines.append(f"```python\n{action.code.strip()}\n```")
log_lines.append(f"")
yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None)
if obs.done:
bk = obs.r_optcoder_breakdown or {}
reward_val = obs.r_optcoder or 0.0
my_prog = bk.get("my_progress", 0.0)
adam_prog = bk.get("adam_progress", 0.0)
speedup = bk.get("speedup_vs_adam", 0.0)
reason = (obs.last_action_result or {}).get("reason", "?")
# Tone of the reward KPI β€” green if positive, red if negative
reward_tone = ("lf-kpi-good" if reward_val >= 0.5 else
("lf-kpi-warn" if reward_val >= 0 else "lf-kpi-bad"))
speedup_display = (f"{speedup:.2f}Γ—" if speedup < 100
else f"{speedup:.0f}Γ—")
speedup_tone = ("lf-kpi-good" if speedup >= 1.0
else "lf-kpi-warn")
episode_done_html = (
"<div class='lf-done'>"
" <div class='lf-done-head'>"
" <span class='lf-done-flag'>Episode complete</span>"
f" <span class='lf-done-reason'>ended by "
f"<code>{reason}</code></span>"
" </div>"
" <div class='lf-kpi-row'>"
" <div class='lf-kpi " f"{reward_tone}" "'>"
" <div class='lf-kpi-label'>Terminal reward</div>"
f" <div class='lf-kpi-value'>{reward_val:+.3f}</div>"
" <div class='lf-kpi-sub'>GRPO training scalar</div>"
" </div>"
" <div class='lf-kpi " f"{speedup_tone}" "'>"
" <div class='lf-kpi-label'>Speedup vs tuned Adam</div>"
f" <div class='lf-kpi-value'>{speedup_display}</div>"
f" <div class='lf-kpi-sub'>my {my_prog:.3g} Β· "
f"adam {adam_prog:.3g}</div>"
" </div>"
" <div class='lf-kpi'>"
" <div class='lf-kpi-label'>Adam shortfall</div>"
f" <div class='lf-kpi-value'>{obs.final_regret:.3f}</div>"
" <div class='lf-kpi-sub'>0 = matched/beat Adam</div>"
" </div>"
" </div>"
"</div>"
)
log_lines.extend([f"", episode_done_html, f""])
reward_plot = _reward_breakdown_plot({
"r_regret": bk.get("r_regret", 0),
"r_convergence": bk.get("r_convergence", 0),
"r_robustness": bk.get("r_robustness", 0),
"r_novelty": bk.get("r_novelty", 0),
"-r_budget": -bk.get("r_budget", 0),
"-r_eval_fail": -bk.get("r_eval_failures", 0),
}, reward_val)
yield ("\n".join(log_lines),
_fmt_obs(obs.model_dump(exclude_none=True)),
reward_plot)
return
log_lines.append("\n**[!] Reached max turns without commit** β€” episode unfinished.")
yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None)
# ----------------- top-level UI -----------------
HERO_HTML = """
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Source+Serif+4:opsz,wght@8..60,400;8..60,500;8..60,600&family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<header class="lf-topbar">
<div class="lf-brand">
<div class="lf-brand-mark"></div>
<div class="lf-brand-text">
<div class="lf-brand-name">LandscapeForge</div>
<div class="lf-brand-sub">OpenEnv Β· Hackathon Apr '26</div>
</div>
</div>
<div class="lf-topbar-actions">
<a class="lf-link" href="https://huggingface.co/spaces/mnawfal29/landscapeforge" target="_blank" rel="noopener">Space</a>
<a class="lf-link" href="/schema" target="_blank" rel="noopener">API schema</a>
<a class="lf-link" href="/openapi.json" target="_blank" rel="noopener">OpenAPI</a>
</div>
</header>
<section class="lf-hero">
<h1>An LLM designs optimizers, through a probe–draft–commit REPL.</h1>
<p>Two agents co-evolve: one writes optimizer code, the other picks adversarial landscapes.
Connect any OpenAI-compatible endpoint and watch a model play,
or explore the landscape library interactively.</p>
</section>
"""
ABOUT_MD = """
### How the environment works
**OptCoder** (the LLM policy) designs an `Optimizer` class that minimizes a
hidden loss landscape. Each episode:
1. **LandscapeForge** (v1: internal template picker) chooses a landscape at a
tier-appropriate difficulty β€” convex quadratic, Rosenbrock, Gaussian mix,
Himmelblau, stiff quadratic, cliff.
2. **OptCoder runs a 4-action REPL** with a 12-unit budget:
- `run_baseline(name)` β€” run SGD / Momentum / Adam / L-BFGS, see trajectory
(cost: 2)
- `draft(code)` β€” submit `Optimizer` class, env auto-tests 20 steps (cost: 2)
- `inspect(draft_idx, step_range)` β€” per-step detail for a prior draft
(cost: 1)
- `commit` β€” run the full 10-seed Γ— 200-step arena (cost: 0)
3. **Reward** is Adam-relative progress β€” `my_progress / tuned_adam_progress βˆ’ 1`,
clipped to `[βˆ’1, +1]`. No `f_min` dependency, so this extends to NN training
as a drop-in.
4. **GRPO** trains the policy against this reward; arena cost is ~50 ms so
~36 k episodes/hour on one H100.
### Research anchors
- **Thread 1** Β· LLMs as optimizer designers: [Lion](https://arxiv.org/abs/2302.06675),
[FunSearch](https://www.nature.com/articles/s41586-023-06924-6)
- **Thread 2** Β· Co-evolutionary LLM-env: Coevolve,
[GenEnv](https://arxiv.org/html/2512.19682v1)
- **Thread 3** Β· Iterative code refinement:
[Self-Refine](https://arxiv.org/abs/2303.17651)
- **Thread 4** Β· GRPO with measurable rewards:
[HPC GFLOPS reward paper](https://arxiv.org/abs/2602.12049v1)
- **Thread 5** Β· Analytical landscape benchmarks:
[BBOB/COCO](https://inria.hal.science/hal-00362649/document),
[POET](https://arxiv.org/abs/1901.01753)
### Use from code
```python
from landscapeforge import LandscapeforgeEnv, LandscapeforgeAction
with LandscapeforgeEnv.from_docker_image("landscapeforge-env:latest") as env:
env.reset()
env.step(LandscapeforgeAction(kind="run_baseline", baseline_name="adam"))
env.step(LandscapeforgeAction(kind="draft", code=MY_OPT_CLASS))
print(env.step(LandscapeforgeAction(kind="commit")).observation.r_optcoder_breakdown)
```
API endpoints on this Space: `/reset`, `/step`, `/schema`, `/openapi.json`,
`/health`, WebSocket `/ws`. See **OpenEnv API** tab for a live playground.
"""
def build_ui(*args, **kwargs) -> gr.Blocks:
"""Entry point for the Gradio app. Ignores OpenEnv's builder args."""
with gr.Blocks(
title="LandscapeForge",
theme=gr.themes.Soft(
primary_hue=gr.themes.Color(
c50="#fbf0ea", c100="#f4d6c5", c200="#ebb69b",
c300="#e09778", c400="#d37a58", c500="#c96442",
c600="#a8522f", c700="#874123", c800="#623018",
c900="#3f1e10", c950="#21100a",
),
neutral_hue="stone",
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif",
"system-ui", "sans-serif"],
font_mono=[gr.themes.GoogleFont("JetBrains Mono"),
"ui-monospace", "monospace"],
),
css=CLAUDE_CSS,
) as app:
gr.HTML(HERO_HTML)
with gr.Tabs():
# --- Tab 0: Run with LLM (primary β€” auto-run) ---
with gr.Tab("Run with LLM"):
with gr.Row(equal_height=False):
# -------- MAIN PANE (left, wider) --------
with gr.Column(scale=4, min_width=640):
gr.Markdown("### Transcript")
transcript = gr.Markdown(
"*Configure the LLM on the right and hit "
"**β–Ά Run episode** β€” each turn streams here "
"as the model plays.*",
)
with gr.Row():
with gr.Column(scale=1):
llm_reward_plot = gr.Plot(
label="Reward breakdown (on episode end)")
with gr.Column(scale=1):
latest_obs = gr.Code(
language="json", interactive=False,
label="Latest observation", lines=14)
# -------- SIDEBAR (right, narrower) --------
with gr.Column(scale=1, min_width=300, elem_classes="lf-sidebar"):
gr.Markdown("### Connect an LLM")
gr.Markdown(
"Point at any OpenAI-compatible "
"`/v1/chat/completions` endpoint."
)
ep_choice = gr.Dropdown(
list(PRESET_ENDPOINTS.keys()),
value="Ollama (localhost:11434)",
label="Endpoint",
)
model_name_in = gr.Dropdown(
PRESET_MODELS, value="qwen2.5:3b",
label="Model", allow_custom_value=True,
)
custom_url_in = gr.Textbox(
value="", label="Custom base URL",
placeholder="http://localhost:8080/v1",
)
key_in = gr.Textbox(
value="", label="API key",
placeholder="Bearer <key>",
type="password",
)
gr.Markdown("---")
gr.Markdown("### Episode config")
tier_llm = gr.Dropdown(["T0", "T1", "T2"], value="T0",
label="Tier")
seed_llm = gr.Slider(0, 100, value=42, step=1, label="Seed")
temp_llm = gr.Slider(0, 1.5, value=0.7, step=0.05,
label="Temperature")
max_turns_llm = gr.Slider(3, 15, value=10, step=1,
label="Max turns")
run_btn = gr.Button("β–Ά Run episode", variant="primary",
size="lg")
with gr.Accordion("System prompt (sent to LLM)",
open=False):
try:
from ..prompts import SYSTEM as _SYS, ACTION_SPEC as _ACT
except ImportError:
from prompts import SYSTEM as _SYS, ACTION_SPEC as _ACT # type: ignore
gr.Code(
value=f"# SYSTEM\n\n{_SYS}\n\n# ACTION_SPEC\n\n{_ACT}",
language="markdown", interactive=False,
lines=14,
)
run_btn.click(
_llm_auto_run,
[ep_choice, custom_url_in, key_in, model_name_in,
tier_llm, seed_llm, temp_llm, max_turns_llm],
[transcript, latest_obs, llm_reward_plot],
)
# --- Tab: Manual stepping (raw /reset + /step) ---
with gr.Tab("API playground"):
with gr.Row(equal_height=False):
with gr.Column(scale=1, min_width=340, elem_classes="lf-sidebar"):
gr.Markdown("### Manual stepping")
gr.Markdown(
"Drive the env one action at a time β€” exactly "
"the same contract as the HTTP `/reset` + `/step` "
"endpoints. Useful for sanity-checking an action "
"or debugging."
)
tier4 = gr.Dropdown(["T0", "T1", "T2"], value="T0",
label="Tier")
seed4 = gr.Slider(0, 100, value=42, step=1, label="Seed")
reset_btn = gr.Button("Reset env", variant="primary")
gr.Markdown("---")
kind4 = gr.Radio(
["run_baseline", "draft", "inspect", "commit"],
value="run_baseline", label="Action kind")
with gr.Accordion("run_baseline args", open=True):
bname4 = gr.Dropdown(
["sgd", "momentum", "adam", "lbfgs"],
value="adam", label="Reference optimizer")
with gr.Accordion("draft args", open=False):
code4 = gr.Code(value=SAMPLE_OPTIMIZER,
language="python",
label="Optimizer class", lines=10)
with gr.Accordion("inspect args", open=False):
didx4 = gr.Number(value=0, precision=0,
label="draft_idx")
s4s = gr.Number(value=0, precision=0,
label="step_range_start")
s4e = gr.Number(value=20, precision=0,
label="step_range_end")
step_btn = gr.Button("Step", variant="primary")
with gr.Column(scale=2, min_width=580):
status4 = gr.Markdown(
"*No active env β€” hit **Reset env** to begin.*")
obs4_reset = gr.Code(
language="json", interactive=False,
label="Initial observation", lines=12)
status4b = gr.Markdown()
obs4 = gr.Code(
language="json", interactive=False,
label="Step observation", lines=14)
reset_btn.click(_api_reset, [tier4, seed4],
[obs4_reset, status4])
step_btn.click(
_api_step,
[kind4, bname4, code4, didx4, s4s, s4e],
[obs4, status4b],
)
# --- Tab 1: Landscape ---
with gr.Tab("Landscape"):
with gr.Row(equal_height=False):
with gr.Column(scale=1, min_width=320, elem_classes="lf-sidebar"):
gr.Markdown("### Landscape Explorer")
gr.Markdown(
"Pick a template and see what the agent sees "
"at reset β€” the 2-D contour plus env-computed "
"structural hints used to calibrate the optimizer."
)
tmpl1 = gr.Dropdown(TEMPLATES_2D_SAFE,
value="rosenbrock", label="Template")
dim1 = gr.Slider(2, 10, value=2, step=1, label="Dim")
seed1 = gr.Slider(0, 100, value=0, step=1, label="Seed")
go1 = gr.Button("Build landscape", variant="primary",
size="lg")
with gr.Column(scale=2, min_width=580):
plot1 = gr.Plot(label="Contour")
hints1 = gr.Dataframe(
headers=["property", "value"],
datatype=["str", "str"],
label="Structural hints (shown to the agent at reset)",
wrap=True, row_count=(8, "dynamic"),
)
go1.click(_explore_landscape, [tmpl1, dim1, seed1], [plot1, hints1])
app.load(_explore_landscape,
[gr.State("rosenbrock"), gr.State(2), gr.State(0)],
[plot1, hints1])
# --- Tab 2: Baseline Race ---
with gr.Tab("Baseline Race"):
with gr.Row(equal_height=False):
with gr.Column(scale=1, min_width=320, elem_classes="lf-sidebar"):
gr.Markdown("### Baseline Race")
gr.Markdown(
"Race SGD, Momentum, L-BFGS, and **Adam with "
"per-landscape LR tuning** from the same init. "
"The tuned Adam is the bar the trained OptCoder "
"has to beat."
)
tmpl2 = gr.Dropdown(TEMPLATES_2D_SAFE,
value="rosenbrock", label="Template")
seed2 = gr.Slider(0, 100, value=1, step=1, label="Seed")
go2 = gr.Button("Race", variant="primary", size="lg")
with gr.Column(scale=2, min_width=580):
plot2a = gr.Plot(label="Contour + trajectories")
with gr.Row():
plot2b = gr.Plot(label="f(x) vs step")
plot2c = gr.Plot(label="Final f after 50 steps")
summary2 = gr.Markdown()
go2.click(_baseline_race, [tmpl2, seed2],
[plot2a, plot2b, plot2c, summary2])
# --- Tab 3: Optimizer Arena ---
with gr.Tab("Optimizer Arena"):
with gr.Row(equal_height=False):
with gr.Column(scale=1, min_width=340, elem_classes="lf-sidebar"):
gr.Markdown("### Optimizer Arena")
gr.Markdown(
"Paste or edit an `Optimizer` class. We run it "
"through the full Phase-D arena (10 seeds Γ— 200 "
"steps) against tuned Adam and show the reward "
"breakdown.<br><small>`np` is pre-injected β€” "
"do not write import lines.</small>"
)
tmpl3 = gr.Dropdown(list(BUILDERS.keys()),
value="quadratic", label="Template")
dim3 = gr.Slider(2, 10, value=5, step=1, label="Dim")
seed3 = gr.Slider(0, 100, value=42, step=1, label="Seed")
go3 = gr.Button("Run arena", variant="primary",
size="lg")
with gr.Column(scale=2, min_width=580):
code3 = gr.Code(value=SAMPLE_OPTIMIZER,
language="python",
label="Your Optimizer class",
lines=14)
with gr.Row():
plot3a = gr.Plot(label="2-D trajectory (if dim = 2)")
plot3b = gr.Plot(label="Mean arena progress")
plot3c = gr.Plot(label="Reward breakdown")
summary3 = gr.Markdown()
breakdown3 = gr.JSON(label="Full reward dict",
height=220)
go3.click(_arena_compare, [tmpl3, dim3, seed3, code3],
[plot3a, plot3b, plot3c, summary3, breakdown3])
# --- About ---
with gr.Tab("About"):
gr.Markdown(ABOUT_MD)
return app