Spaces:
Sleeping
Sleeping
| """Gradio demo for LandscapeForge β Claude-inspired visual design. | |
| Four tabs: | |
| 1. Landscape β pick a template, see 2D contour + structural hints | |
| 2. Baseline Race β SGD / Momentum / tuned-Adam / L-BFGS racing, same init | |
| 3. Optimizer Arena β paste a custom Optimizer class, full-arena eval vs | |
| tuned-Adam, reward breakdown | |
| 4. OpenEnv API β live reset/step against the same container's FastAPI | |
| Design: warm off-white background, coral primary, generous spacing, | |
| minimal chrome, no heavy shadows. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| from typing import Any | |
| import gradio as gr | |
| import numpy as np | |
| import plotly.graph_objects as go | |
| from plotly.subplots import make_subplots | |
| def _fmt_obs(obs_dict: dict) -> str: | |
| """Pretty-print an observation as indented JSON for gr.Code display. | |
| Shrinks very long arrays (baseline trajectories etc.) so the rendered view | |
| stays readable. `json.dumps(indent=2)` gives one value per line which | |
| looks much cleaner than gr.JSON's component-per-field tree. | |
| """ | |
| def _shrink(v): | |
| if isinstance(v, list): | |
| if len(v) > 8: | |
| return ( | |
| [_shrink(x) for x in v[:3]] | |
| + [f"... ({len(v)-6} more) ..."] | |
| + [_shrink(x) for x in v[-3:]] | |
| ) | |
| return [_shrink(x) for x in v] | |
| if isinstance(v, dict): | |
| return {k: _shrink(x) for k, x in v.items()} | |
| if isinstance(v, float): | |
| return round(v, 6) | |
| return v | |
| return json.dumps(_shrink(obs_dict), indent=2, default=str) | |
| try: | |
| from ..arena import auto_test_draft, run_arena | |
| from ..landscapes import BUILDERS, build_landscape, structural_hints | |
| from ..reference_optimizers import ( | |
| run_baseline, run_baseline_tuned, tune_adam_lr, | |
| ) | |
| from ..rewards import ast_novelty_score, compute_optcoder_reward | |
| from ..sandbox import SandboxError, compile_optimizer | |
| from ..models import LandscapeforgeAction | |
| except ImportError: # flat layout (HF Space container) | |
| from arena import auto_test_draft, run_arena # type: ignore | |
| from landscapes import BUILDERS, build_landscape, structural_hints # type: ignore | |
| from reference_optimizers import ( # type: ignore | |
| run_baseline, run_baseline_tuned, tune_adam_lr, | |
| ) | |
| from rewards import ast_novelty_score, compute_optcoder_reward # type: ignore | |
| from sandbox import SandboxError, compile_optimizer # type: ignore | |
| from models import LandscapeforgeAction # type: ignore | |
| # ----------------- Claude-inspired palette + CSS ----------------- | |
| # Mimics Anthropic's actual surface colors: warmer parchment background, | |
| # deep warm ink for text, Anthropic burnt-sienna as primary accent. | |
| CLAUDE_CSS = """ | |
| /* Variables β dark mode default, warm ink + sienna accent */ | |
| :root { | |
| --lf-bg: #1f1d1a; /* warm near-black page */ | |
| --lf-surface: #2a2824; /* card surface */ | |
| --lf-surface-alt: #332f2a; /* elevated surface (code, plots) */ | |
| --lf-border: #403b34; /* card edge */ | |
| --lf-border-soft: #332f2a; /* soft inner divider */ | |
| --lf-text: #f3f0e8; /* warm off-white */ | |
| --lf-text-muted: #b5ada0; /* muted body */ | |
| --lf-text-subtle: #857d72; /* labels, captions */ | |
| --lf-accent: #e28763; /* brighter sienna for dark bg */ | |
| --lf-accent-dk: #c96442; /* hover / pressed */ | |
| --lf-accent-soft: #4a2f22; /* accent-tinted dark for selected bg */ | |
| --lf-good: #7ab68c; | |
| --lf-bad: #d47d6a; | |
| } | |
| /* Page */ | |
| html, body, .gradio-container { | |
| background: var(--lf-bg) !important; | |
| } | |
| .gradio-container { | |
| font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", | |
| Helvetica, Arial, sans-serif !important; | |
| color: var(--lf-text) !important; | |
| max-width: none !important; | |
| width: 100% !important; | |
| margin: 0 auto !important; | |
| padding: 1.5rem 2rem 3rem !important; | |
| /* Override Gradio's internal theme variables so every component | |
| inherits the warm palette instead of Gradio's blue-on-white defaults */ | |
| --body-text-color: var(--lf-text) !important; | |
| --body-text-color-subdued: var(--lf-text-muted) !important; | |
| --body-background-fill: var(--lf-bg) !important; | |
| --background-fill-primary: var(--lf-surface) !important; | |
| --background-fill-secondary: var(--lf-bg) !important; | |
| --border-color-primary: var(--lf-border) !important; | |
| --border-color-accent: var(--lf-accent) !important; | |
| --input-background-fill: var(--lf-surface) !important; | |
| --input-border-color: var(--lf-border) !important; | |
| --input-text-color: var(--lf-text) !important; | |
| --input-placeholder-color: var(--lf-text-subtle) !important; | |
| --block-background-fill: var(--lf-surface) !important; | |
| --block-border-color: var(--lf-border-soft) !important; | |
| --block-label-background-fill: transparent !important; | |
| --block-label-text-color: var(--lf-text) !important; | |
| --block-title-text-color: var(--lf-text) !important; | |
| --block-info-text-color: var(--lf-text-muted) !important; | |
| --neutral-50: var(--lf-surface) !important; | |
| --neutral-100: var(--lf-bg) !important; | |
| --neutral-200: var(--lf-border-soft) !important; | |
| --neutral-300: var(--lf-border) !important; | |
| --neutral-400: var(--lf-text-subtle) !important; | |
| --neutral-500: var(--lf-text-muted) !important; | |
| --neutral-600: var(--lf-text-muted) !important; | |
| --neutral-700: var(--lf-text) !important; | |
| --neutral-800: var(--lf-text) !important; | |
| --neutral-900: var(--lf-text) !important; | |
| --color-accent: var(--lf-accent) !important; | |
| --color-accent-soft: var(--lf-accent-soft) !important; | |
| --link-text-color: var(--lf-accent) !important; | |
| --link-text-color-hover: var(--lf-accent-dk) !important; | |
| --button-primary-background-fill: var(--lf-accent) !important; | |
| --button-primary-background-fill-hover: var(--lf-accent-dk) !important; | |
| --button-primary-text-color: #ffffff !important; | |
| --button-primary-border-color: var(--lf-accent) !important; | |
| /* Kill the `<span data-testid="block-info">` pill that Gradio 5 uses | |
| for every component label β it was defaulting to the primary accent. | |
| We want labels to be plain muted text above the input. */ | |
| --block-title-background-fill: transparent !important; | |
| --block-title-border-color: transparent !important; | |
| --block-title-border-width: 0 !important; | |
| --block-title-radius: 0 !important; | |
| --block-title-padding: 0 0 0.3rem 0 !important; | |
| --block-title-text-color: var(--lf-text-muted) !important; | |
| --block-title-text-weight: 500 !important; | |
| --block-title-text-size: 0.8rem !important; | |
| /* Input outlines β dropdowns/text/number all need obvious borders */ | |
| --input-shadow: none !important; | |
| --input-shadow-focus: 0 0 0 3px rgba(226,135,99,0.18) !important; | |
| --input-border-color-focus: var(--lf-accent) !important; | |
| --input-background-fill-focus:var(--lf-surface) !important; | |
| /* Checkbox / radio variables */ | |
| --checkbox-background-color: var(--lf-surface) !important; | |
| --checkbox-background-color-hover: var(--lf-surface-alt) !important; | |
| --checkbox-background-color-focus: var(--lf-surface-alt) !important; | |
| --checkbox-background-color-selected: var(--lf-accent) !important; | |
| --checkbox-border-color: var(--lf-border) !important; | |
| --checkbox-border-color-hover: var(--lf-accent) !important; | |
| --checkbox-border-color-focus: var(--lf-accent) !important; | |
| --checkbox-border-color-selected: var(--lf-accent) !important; | |
| --checkbox-label-background-fill: transparent !important; | |
| --checkbox-label-background-fill-hover: var(--lf-surface-alt) !important; | |
| --checkbox-label-background-fill-selected:var(--lf-accent-soft) !important; | |
| --checkbox-label-text-color: var(--lf-text) !important; | |
| --checkbox-label-text-color-selected: var(--lf-accent) !important; | |
| --checkbox-label-border-color: var(--lf-border) !important; | |
| --checkbox-label-border-color-hover: var(--lf-accent) !important; | |
| --checkbox-label-border-color-selected:var(--lf-accent) !important; | |
| --checkbox-check: var(--lf-accent) !important; | |
| } | |
| /* Typography β serif for headings to match Claude's Tiempos-style hero */ | |
| .gradio-container h1, | |
| .gradio-container h2, | |
| .gradio-container h3, | |
| .gradio-container h4 { | |
| color: var(--lf-text) !important; | |
| font-family: "Source Serif 4", "Source Serif Pro", Georgia, "Times New Roman", | |
| serif !important; | |
| font-weight: 500 !important; | |
| letter-spacing: -0.015em !important; | |
| line-height: 1.2 !important; | |
| } | |
| .gradio-container h1 { font-size: 2.5rem !important; margin: 0.25rem 0 0.5rem !important; } | |
| .gradio-container h2 { font-size: 1.5rem !important; margin: 1.4rem 0 0.5rem !important; } | |
| .gradio-container h3 { font-size: 1.15rem !important; margin: 1.1rem 0 0.5rem !important; font-weight: 600 !important; } | |
| .gradio-container p, .gradio-container li { | |
| color: var(--lf-text-muted) !important; | |
| line-height: 1.65 !important; | |
| font-size: 0.97rem !important; | |
| } | |
| .gradio-container strong { color: var(--lf-text) !important; } | |
| /* Top bar β Linear/Vercel-style fixed header */ | |
| .lf-topbar { | |
| display: flex; align-items: center; justify-content: space-between; | |
| padding: 0.6rem 0.2rem 1.1rem; | |
| border-bottom: 1px solid var(--lf-border); | |
| margin-bottom: 1.25rem; | |
| } | |
| .lf-brand { display: flex; align-items: center; gap: 0.75rem; } | |
| .lf-brand-mark { | |
| width: 28px; height: 28px; border-radius: 7px; | |
| background: linear-gradient(135deg, var(--lf-accent) 0%, var(--lf-accent-dk) 100%); | |
| box-shadow: inset 0 0 0 1px rgba(255,255,255,0.08), | |
| 0 1px 3px rgba(0,0,0,0.3); | |
| position: relative; | |
| } | |
| .lf-brand-mark::after { | |
| /* little contour-ring motif inside the mark */ | |
| content: ""; position: absolute; inset: 5px; | |
| border: 1.5px solid rgba(255,255,255,0.55); | |
| border-radius: 4px; | |
| clip-path: polygon(0 0, 100% 0, 100% 70%, 30% 100%, 0 100%); | |
| } | |
| .lf-brand-name { | |
| font-family: "Inter", sans-serif; | |
| font-weight: 600; font-size: 0.95rem; color: var(--lf-text); | |
| letter-spacing: -0.01em; line-height: 1.1; | |
| } | |
| .lf-brand-sub { | |
| font-family: "Inter", sans-serif; | |
| font-size: 0.72rem; color: var(--lf-text-subtle); | |
| letter-spacing: 0.04em; text-transform: uppercase; margin-top: 1px; | |
| } | |
| .lf-topbar-actions { display: flex; gap: 0.25rem; align-items: center; } | |
| .lf-link { | |
| color: var(--lf-text-muted) !important; | |
| font-family: "Inter", sans-serif; | |
| font-size: 0.82rem; text-decoration: none !important; | |
| padding: 0.4rem 0.75rem; border-radius: 6px; | |
| border: 1px solid transparent; | |
| transition: background 0.12s, color 0.12s, border-color 0.12s; | |
| } | |
| .lf-link:hover { | |
| color: var(--lf-text) !important; | |
| background: var(--lf-surface); | |
| border-color: var(--lf-border); | |
| } | |
| /* Hero β modern dashboard banner, serif headline */ | |
| .lf-hero { | |
| margin-bottom: 1.5rem; | |
| padding: 0.25rem 0 1rem; | |
| } | |
| .lf-hero h1 { | |
| margin: 0 0 0.55rem 0 !important; | |
| font-family: "Source Serif 4", "Source Serif Pro", Georgia, serif !important; | |
| font-size: 2.1rem !important; | |
| font-weight: 500 !important; | |
| color: var(--lf-text) !important; | |
| max-width: 820px; | |
| line-height: 1.2 !important; | |
| letter-spacing: -0.018em !important; | |
| } | |
| .lf-hero p { | |
| margin: 0 !important; | |
| max-width: 720px; | |
| font-size: 0.98rem !important; | |
| line-height: 1.6 !important; | |
| color: var(--lf-text-muted) !important; | |
| } | |
| /* Tabs β Gradio 5 uses `.tab-container` with scoped `button` */ | |
| .gradio-container .tab-container { | |
| border-bottom: 1px solid var(--lf-border) !important; | |
| margin-bottom: 1.1rem !important; | |
| } | |
| .gradio-container .tab-container button, | |
| .gradio-container .tab-container button[role="tab"] { | |
| background: transparent !important; | |
| color: var(--lf-text-muted) !important; | |
| border: none !important; | |
| border-bottom: 2px solid transparent !important; | |
| font-family: "Inter", sans-serif !important; | |
| font-weight: 500 !important; | |
| font-size: 0.96rem !important; | |
| padding: 0.7rem 1.15rem !important; | |
| letter-spacing: -0.005em !important; | |
| transition: color 0.15s, border-color 0.15s !important; | |
| border-radius: 0 !important; | |
| } | |
| .gradio-container .tab-container button:hover:not(:disabled):not(.selected) { | |
| color: var(--lf-text) !important; | |
| background-color: transparent !important; | |
| } | |
| .gradio-container .tab-container button.selected { | |
| color: var(--lf-accent) !important; | |
| border-bottom: 2px solid var(--lf-accent) !important; | |
| font-weight: 600 !important; | |
| background: transparent !important; | |
| } | |
| /* Primary buttons β burnt sienna solid */ | |
| .gradio-container button.primary, | |
| .gradio-container .primary button, | |
| .gradio-container button.gradio-button.primary { | |
| background: var(--lf-accent) !important; | |
| color: #ffffff !important; | |
| border: none !important; | |
| font-family: "Inter", sans-serif !important; | |
| font-weight: 600 !important; | |
| font-size: 0.9rem !important; | |
| letter-spacing: -0.005em !important; | |
| border-radius: 8px !important; | |
| padding: 0.6rem 1.1rem !important; | |
| box-shadow: 0 1px 2px rgba(201,100,66,0.15) !important; | |
| transition: background 0.15s, box-shadow 0.15s !important; | |
| } | |
| .gradio-container button.primary:hover, | |
| .gradio-container .primary button:hover { | |
| background: var(--lf-accent-dk) !important; | |
| box-shadow: 0 2px 6px rgba(201,100,66,0.25) !important; | |
| } | |
| /* Secondary buttons */ | |
| .gradio-container button.secondary { | |
| background: var(--lf-surface) !important; | |
| color: var(--lf-text) !important; | |
| border: 1px solid var(--lf-border) !important; | |
| font-weight: 500 !important; | |
| border-radius: 8px !important; | |
| } | |
| /* Inputs + selects + textareas + dropdowns β clearly bordered */ | |
| .gradio-container input[type="text"], | |
| .gradio-container input[type="number"], | |
| .gradio-container input[type="password"], | |
| .gradio-container select, | |
| .gradio-container textarea, | |
| .gradio-container .wrap-inner, | |
| .gradio-container [role="combobox"], | |
| .gradio-container .dropdown > div, | |
| .gradio-container [data-testid="dropdown"] > div { | |
| border: 1px solid var(--lf-border) !important; | |
| background: var(--lf-surface-alt) !important; | |
| color: var(--lf-text) !important; | |
| border-radius: 8px !important; | |
| font-family: "Inter", sans-serif !important; | |
| font-size: 0.92rem !important; | |
| min-height: 38px !important; | |
| box-sizing: border-box !important; | |
| transition: border-color 0.15s, box-shadow 0.15s !important; | |
| } | |
| .gradio-container input[type="text"], | |
| .gradio-container input[type="number"], | |
| .gradio-container input[type="password"], | |
| .gradio-container textarea { | |
| padding: 0.55rem 0.75rem !important; | |
| } | |
| .gradio-container input:focus, | |
| .gradio-container textarea:focus, | |
| .gradio-container select:focus, | |
| .gradio-container [role="combobox"]:focus-within { | |
| border-color: var(--lf-accent) !important; | |
| outline: none !important; | |
| box-shadow: 0 0 0 3px rgba(226,135,99,0.18) !important; | |
| } | |
| /* Number input wrapper (Gradio renders a wrapper around input+reset) β | |
| give it enough room so "0.7" doesn't clip */ | |
| .gradio-container .number-input-container, | |
| .gradio-container input[type="number"] { | |
| min-width: 72px !important; | |
| text-align: left !important; | |
| } | |
| .gradio-container input[type="number"] { | |
| padding-right: 0.4rem !important; | |
| } | |
| /* Labels β kill the accent-coloured "chip" treatment Gradio 5 applies, | |
| make them plain inline text above the input */ | |
| .gradio-container label, | |
| .gradio-container .label, | |
| .gradio-container .block > .label-wrap, | |
| .gradio-container .block > .label-wrap > span, | |
| .gradio-container [data-testid="block-label"], | |
| .gradio-container [data-testid="block-label"] > *, | |
| .gradio-container .label > span { | |
| background: transparent !important; | |
| color: var(--lf-text-muted) !important; | |
| font-weight: 500 !important; | |
| font-size: 0.82rem !important; | |
| letter-spacing: 0.01em !important; | |
| text-transform: none !important; | |
| padding: 0 !important; | |
| margin-bottom: 0.3rem !important; | |
| border: none !important; | |
| border-radius: 0 !important; | |
| box-shadow: none !important; | |
| } | |
| /* The block-label "pill" that wraps the label + icon: flatten it */ | |
| .gradio-container .block .wrap > .label-wrap, | |
| .gradio-container .block > .wrap-inner > .label-wrap, | |
| .gradio-container .block > div > .label-wrap, | |
| .gradio-container .block > span[data-testid], | |
| .gradio-container .block > span.svelte-1gfkn6j, | |
| .gradio-container .block-label, | |
| .gradio-container div[aria-label][class*="label"] { | |
| background: transparent !important; | |
| padding: 0 0 0.25rem 0 !important; | |
| color: var(--lf-text-muted) !important; | |
| font-weight: 500 !important; | |
| border: none !important; | |
| border-radius: 0 !important; | |
| } | |
| /* For elements whose rendered icon-prefixed label (e.g. JSON "{...}" icon, | |
| Plot chart icon) is inside the label-wrap, keep them subtle */ | |
| .gradio-container .block-label svg, | |
| .gradio-container .label-wrap svg, | |
| .gradio-container [data-testid="block-label"] svg { | |
| color: var(--lf-text-subtle) !important; | |
| opacity: 0.7; | |
| } | |
| /* Reset / refresh icon buttons (the circular arrow next to number inputs) */ | |
| .gradio-container button[aria-label*="Reset"], | |
| .gradio-container button[title*="Reset"], | |
| .gradio-container .icon-button { | |
| background: transparent !important; | |
| color: var(--lf-text-subtle) !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| } | |
| .gradio-container button[aria-label*="Reset"]:hover, | |
| .gradio-container .icon-button:hover { | |
| color: var(--lf-accent) !important; | |
| background: var(--lf-accent-soft) !important; | |
| } | |
| /* Gradio block container (the outer "card" of each component) */ | |
| .gradio-container .block, | |
| .gradio-container .gr-box, | |
| .gradio-container .gr-panel, | |
| .gradio-container .form { | |
| background: var(--lf-surface) !important; | |
| border: 1px solid var(--lf-border-soft) !important; | |
| border-radius: 10px !important; | |
| padding: 1.1rem !important; | |
| } | |
| /* Slider colors */ | |
| .gradio-container input[type="range"]::-webkit-slider-thumb { | |
| background: var(--lf-accent) !important; | |
| } | |
| .gradio-container .svelte-range-slider .handle, | |
| .gradio-container .svelte-range-slider .rangeBar { | |
| background: var(--lf-accent) !important; | |
| } | |
| /* Code blocks */ | |
| .gradio-container pre, | |
| .gradio-container code, | |
| .gradio-container .cm-editor, | |
| .gradio-container .cm-content { | |
| font-family: "JetBrains Mono", ui-monospace, Menlo, Consolas, monospace !important; | |
| font-size: 0.84rem !important; | |
| } | |
| .gradio-container pre { | |
| background: var(--lf-surface-alt) !important; | |
| border: 1px solid var(--lf-border-soft) !important; | |
| border-radius: 8px !important; | |
| padding: 0.9rem 1.1rem !important; | |
| } | |
| /* Dataframes */ | |
| .gradio-container table { | |
| border-collapse: collapse !important; | |
| width: 100% !important; | |
| font-family: "Inter", sans-serif !important; | |
| } | |
| .gradio-container table th { | |
| background: var(--lf-bg) !important; | |
| color: var(--lf-text) !important; | |
| font-weight: 600 !important; | |
| font-size: 0.82rem !important; | |
| letter-spacing: 0.01em !important; | |
| text-transform: uppercase !important; | |
| border-bottom: 1px solid var(--lf-border) !important; | |
| padding: 0.6rem 0.85rem !important; | |
| } | |
| .gradio-container table td { | |
| border-bottom: 1px solid var(--lf-border-soft) !important; | |
| padding: 0.55rem 0.85rem !important; | |
| color: var(--lf-text) !important; | |
| font-size: 0.9rem !important; | |
| } | |
| /* JSON renderer β force warm ink for every node + muted for keys */ | |
| .gradio-container .json-holder, | |
| .gradio-container .json-container, | |
| .gradio-container .json-node { | |
| background: var(--lf-surface) !important; | |
| border: 1px solid var(--lf-border-soft) !important; | |
| border-radius: 8px !important; | |
| padding: 0.9rem !important; | |
| color: var(--lf-text) !important; | |
| font-family: "JetBrains Mono", ui-monospace, Menlo, monospace !important; | |
| font-size: 0.82rem !important; | |
| } | |
| .gradio-container .json-holder *, | |
| .gradio-container .json-container * { | |
| color: var(--lf-text) !important; | |
| } | |
| .gradio-container .json-holder .key, | |
| .gradio-container .json-container .key { | |
| color: var(--lf-accent-dk) !important; | |
| font-weight: 600 !important; | |
| } | |
| .gradio-container .json-holder .string-value { | |
| color: #3d6b4c !important; | |
| } | |
| .gradio-container .json-holder .number-value { | |
| color: #874123 !important; | |
| } | |
| /* Dropdown option list (open state) β Gradio defaults to white-on-white */ | |
| .gradio-container .options, | |
| .gradio-container .options .item, | |
| .gradio-container [role="listbox"], | |
| .gradio-container [role="option"] { | |
| background: var(--lf-surface) !important; | |
| color: var(--lf-text) !important; | |
| border-color: var(--lf-border) !important; | |
| } | |
| .gradio-container [role="option"]:hover, | |
| .gradio-container .options .item:hover { | |
| background: var(--lf-accent-soft) !important; | |
| color: var(--lf-text) !important; | |
| } | |
| .gradio-container [role="option"][aria-selected="true"] { | |
| background: var(--lf-accent) !important; | |
| color: #ffffff !important; | |
| } | |
| /* Markdown rendered inside blocks */ | |
| .gradio-container .prose, | |
| .gradio-container .markdown, | |
| .gradio-container [data-testid="markdown"] { | |
| color: var(--lf-text) !important; | |
| } | |
| .gradio-container .prose p, | |
| .gradio-container .markdown p, | |
| .gradio-container [data-testid="markdown"] p { | |
| color: var(--lf-text-muted) !important; | |
| } | |
| .gradio-container .prose strong, | |
| .gradio-container .markdown strong { | |
| color: var(--lf-text) !important; | |
| } | |
| .gradio-container .prose a, | |
| .gradio-container .markdown a { | |
| color: var(--lf-accent) !important; | |
| text-decoration: underline; | |
| text-underline-offset: 2px; | |
| } | |
| .gradio-container .prose code, | |
| .gradio-container .markdown code { | |
| background: var(--lf-bg) !important; | |
| color: var(--lf-accent-dk) !important; | |
| padding: 0.12em 0.4em !important; | |
| border-radius: 4px !important; | |
| font-size: 0.84em !important; | |
| } | |
| /* Inline label / info text under inputs */ | |
| .gradio-container .block-info, | |
| .gradio-container .info { | |
| color: var(--lf-text-muted) !important; | |
| font-size: 0.82rem !important; | |
| } | |
| /* Slider track+value labels */ | |
| .gradio-container .svelte-range-slider, | |
| .gradio-container .min-val, | |
| .gradio-container .max-val, | |
| .gradio-container .value { | |
| color: var(--lf-text) !important; | |
| } | |
| .gradio-container .value-text { | |
| color: var(--lf-accent-dk) !important; | |
| font-weight: 600 !important; | |
| } | |
| /* Radio buttons β labels should be visible */ | |
| .gradio-container .wrap label, | |
| .gradio-container [role="radio"] + label { | |
| color: var(--lf-text) !important; | |
| } | |
| /* Status badges inside obs.done etc */ | |
| .gradio-container .status-text { | |
| color: var(--lf-text) !important; | |
| } | |
| /* Accordion headers */ | |
| .gradio-container .label-wrap, | |
| .gradio-container .accordion-header { | |
| font-weight: 500 !important; | |
| color: var(--lf-text) !important; | |
| } | |
| /* Footer β hide "Built with Gradio" */ | |
| footer, .gradio-container footer { display: none !important; } | |
| /* Scrollbars */ | |
| .gradio-container ::-webkit-scrollbar { width: 10px; height: 10px; } | |
| .gradio-container ::-webkit-scrollbar-track { background: var(--lf-bg); } | |
| .gradio-container ::-webkit-scrollbar-thumb { | |
| background: var(--lf-border); | |
| border-radius: 5px; | |
| } | |
| .gradio-container ::-webkit-scrollbar-thumb:hover { background: var(--lf-text-subtle); } | |
| /* Sidebar column β one card; inside is flat */ | |
| .gradio-container .lf-sidebar { | |
| background: var(--lf-surface) !important; | |
| border: 1px solid var(--lf-border) !important; | |
| border-radius: 12px !important; | |
| padding: 1.5rem 1.35rem 1.35rem !important; | |
| box-shadow: 0 1px 0 rgba(20,20,19,0.02); | |
| } | |
| .gradio-container .lf-sidebar h3 { | |
| margin-top: 0.15rem !important; | |
| margin-bottom: 0.3rem !important; | |
| } | |
| .gradio-container .lf-sidebar p { | |
| font-size: 0.88rem !important; | |
| margin-bottom: 0.85rem !important; | |
| color: var(--lf-text-muted) !important; | |
| } | |
| /* Flatten ALL nested blocks inside the sidebar β no card-in-card */ | |
| .gradio-container .lf-sidebar .block, | |
| .gradio-container .lf-sidebar .form, | |
| .gradio-container .lf-sidebar .gr-box, | |
| .gradio-container .lf-sidebar .gr-panel, | |
| .gradio-container .lf-sidebar .wrap, | |
| .gradio-container .lf-sidebar fieldset { | |
| background: transparent !important; | |
| border: none !important; | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| border-radius: 0 !important; | |
| box-shadow: none !important; | |
| } | |
| /* Space between consecutive controls in the sidebar */ | |
| .gradio-container .lf-sidebar > div > *, | |
| .gradio-container .lf-sidebar > .form > * { | |
| margin-bottom: 0.85rem !important; | |
| } | |
| .gradio-container .lf-sidebar button { | |
| width: 100% !important; | |
| } | |
| .gradio-container .lf-sidebar hr, | |
| .gradio-container .lf-sidebar .prose hr { | |
| border: none !important; | |
| border-top: 1px solid var(--lf-border) !important; | |
| margin: 1.1rem 0 !important; | |
| } | |
| /* Hide ugly number-input spinner arrows (β²βΌ) */ | |
| .gradio-container input[type="number"]::-webkit-outer-spin-button, | |
| .gradio-container input[type="number"]::-webkit-inner-spin-button { | |
| -webkit-appearance: none !important; | |
| appearance: none !important; | |
| margin: 0 !important; | |
| } | |
| .gradio-container input[type="number"] { | |
| -moz-appearance: textfield !important; | |
| appearance: textfield !important; | |
| } | |
| /* Slider value-input on the right β align + size so "0.95" doesn't clip */ | |
| .gradio-container .slider-container, | |
| .gradio-container [data-testid="slider"] { | |
| display: flex !important; | |
| flex-direction: column !important; | |
| gap: 0.4rem !important; | |
| } | |
| .gradio-container [data-testid="slider"] .head, | |
| .gradio-container .tab-like-container { | |
| display: flex !important; | |
| align-items: center !important; | |
| justify-content: space-between !important; | |
| gap: 0.5rem !important; | |
| } | |
| .gradio-container [data-testid="slider"] input[type="number"] { | |
| width: 68px !important; | |
| min-width: 68px !important; | |
| max-width: 80px !important; | |
| text-align: right !important; | |
| padding: 0.3rem 0.5rem !important; | |
| min-height: 30px !important; | |
| font-size: 0.85rem !important; | |
| } | |
| /* Reset-button next to number inputs β make it transparent & subtle */ | |
| .gradio-container [data-testid="slider"] button, | |
| .gradio-container .reset-button { | |
| background: transparent !important; | |
| border: none !important; | |
| color: var(--lf-text-subtle) !important; | |
| padding: 0.15rem !important; | |
| min-width: 26px !important; | |
| width: 26px !important; | |
| height: 26px !important; | |
| } | |
| .gradio-container [data-testid="slider"] button:hover { | |
| color: var(--lf-accent) !important; | |
| background: var(--lf-accent-soft) !important; | |
| } | |
| /* Inline code tag β softer across the whole app, not only the sidebar */ | |
| .gradio-container .prose code, | |
| .gradio-container .markdown code, | |
| .gradio-container code { | |
| background: var(--lf-surface-alt) !important; | |
| border: 1px solid var(--lf-border) !important; | |
| color: var(--lf-text) !important; | |
| padding: 0.05em 0.42em !important; | |
| border-radius: 4px !important; | |
| font-size: 0.85em !important; | |
| font-weight: 400 !important; | |
| } | |
| /* Fenced code blocks β proper code-box with mono font + subtle bg */ | |
| .gradio-container .prose pre, | |
| .gradio-container .markdown pre { | |
| background: #14120f !important; | |
| border: 1px solid var(--lf-border) !important; | |
| border-radius: 8px !important; | |
| padding: 0.9rem 1rem !important; | |
| margin: 0.4rem 0 0.8rem 0 !important; | |
| overflow-x: auto !important; | |
| } | |
| .gradio-container .prose pre code, | |
| .gradio-container .markdown pre code { | |
| background: transparent !important; | |
| border: none !important; | |
| color: #e8e3d6 !important; | |
| font-size: 0.82rem !important; | |
| line-height: 1.55 !important; | |
| padding: 0 !important; | |
| } | |
| /* Chips β lightweight tags for action kind, model, endpoint */ | |
| .gradio-container .lf-chip { | |
| display: inline-block; | |
| padding: 0.08rem 0.5rem; | |
| border-radius: 5px; | |
| background: var(--lf-surface-alt); | |
| color: var(--lf-text); | |
| border: 1px solid var(--lf-border); | |
| font-family: "JetBrains Mono", ui-monospace, monospace; | |
| font-size: 0.78rem; | |
| font-weight: 500; | |
| letter-spacing: -0.01em; | |
| } | |
| .gradio-container .lf-chip-draft { color: var(--lf-accent); border-color: var(--lf-accent); } | |
| .gradio-container .lf-chip-run_baseline { color: #7ecfc5; border-color: #5a9c94; } | |
| .gradio-container .lf-chip-inspect { color: #b5a5e0; border-color: #7e6ea8; } | |
| .gradio-container .lf-chip-commit { color: #7ab68c; border-color: #4e7c5c; } | |
| /* Soft divider inside transcript */ | |
| .gradio-container .lf-hr-soft, | |
| .gradio-container hr.lf-hr-soft { | |
| border: none !important; | |
| border-top: 1px solid var(--lf-border-soft) !important; | |
| margin: 0.9rem 0 0.6rem !important; | |
| opacity: 0.6; | |
| } | |
| /* Turn card β one per REPL step. Clearly demarcates Action vs Output */ | |
| .gradio-container .lf-turn { | |
| background: var(--lf-surface); | |
| border: 1px solid var(--lf-border); | |
| border-radius: 10px; | |
| padding: 0.9rem 1rem; | |
| margin: 0.85rem 0; | |
| box-shadow: 0 1px 0 rgba(0,0,0,0.2); | |
| } | |
| .gradio-container .lf-turn-head { | |
| display: flex; align-items: center; gap: 0.55rem; | |
| margin-bottom: 0.7rem; | |
| padding-bottom: 0.55rem; | |
| border-bottom: 1px dashed var(--lf-border-soft); | |
| } | |
| .gradio-container .lf-turn-num { | |
| font-family: "Source Serif 4", Georgia, serif; | |
| font-weight: 600; | |
| font-size: 0.98rem; | |
| color: var(--lf-text); | |
| letter-spacing: -0.01em; | |
| } | |
| .gradio-container .lf-turn-meta { | |
| margin-left: auto; | |
| font-family: "JetBrains Mono", monospace; | |
| font-size: 0.76rem; | |
| color: var(--lf-text-subtle); | |
| } | |
| .gradio-container .lf-turn-meta b { | |
| color: var(--lf-text); | |
| font-weight: 600; | |
| } | |
| .gradio-container .lf-turn-row { | |
| display: grid; | |
| grid-template-columns: 70px 1fr; | |
| align-items: baseline; | |
| gap: 0.75rem; | |
| padding: 0.25rem 0; | |
| } | |
| .gradio-container .lf-section-label { | |
| font-family: "Inter", sans-serif; | |
| font-size: 0.68rem; | |
| font-weight: 600; | |
| letter-spacing: 0.1em; | |
| text-transform: uppercase; | |
| color: var(--lf-text-subtle); | |
| padding-top: 0.15rem; | |
| } | |
| .gradio-container .lf-section-content { | |
| color: var(--lf-text); | |
| font-size: 0.9rem; | |
| line-height: 1.55; | |
| font-family: "Inter", sans-serif; | |
| } | |
| .gradio-container .lf-section-content code { | |
| font-size: 0.82em !important; | |
| } | |
| .gradio-container .lf-section-content b { | |
| color: var(--lf-text); | |
| font-weight: 600; | |
| } | |
| /* Status chips inside the Output row */ | |
| .gradio-container .lf-status { | |
| display: inline-block; | |
| padding: 0.05rem 0.45rem; | |
| border-radius: 4px; | |
| font-size: 0.78rem; | |
| font-weight: 500; | |
| border: 1px solid; | |
| background: transparent; | |
| margin-right: 0.15rem; | |
| } | |
| .gradio-container .lf-status-good { | |
| color: #7ab68c; | |
| border-color: rgba(122,182,140,0.4); | |
| background: rgba(122,182,140,0.08); | |
| } | |
| .gradio-container .lf-status-warn { | |
| color: #e4b264; | |
| border-color: rgba(228,178,100,0.4); | |
| background: rgba(228,178,100,0.08); | |
| } | |
| .gradio-container .lf-status-bad { | |
| color: #d47d6a; | |
| border-color: rgba(212,125,106,0.4); | |
| background: rgba(212,125,106,0.08); | |
| } | |
| /* Code fence that follows a turn card β tighten top margin */ | |
| .gradio-container .lf-turn + pre, | |
| .gradio-container .prose pre:has(+ .lf-turn) { | |
| margin-top: -0.5rem !important; | |
| } | |
| /* Episode-done dashboard: KPI row with big metric cards */ | |
| .gradio-container .lf-done { | |
| background: linear-gradient(180deg, | |
| rgba(226,135,99,0.06) 0%, | |
| rgba(42,40,36,0) 60%); | |
| border: 1px solid var(--lf-border); | |
| border-radius: 12px; | |
| padding: 1.2rem 1.25rem; | |
| margin: 1.1rem 0 0.6rem; | |
| } | |
| .gradio-container .lf-done-head { | |
| display: flex; align-items: baseline; gap: 0.85rem; | |
| margin-bottom: 0.9rem; | |
| } | |
| .gradio-container .lf-done-flag { | |
| color: var(--lf-accent); | |
| font-family: "Inter", sans-serif; | |
| font-weight: 600; font-size: 0.75rem; | |
| letter-spacing: 0.11em; text-transform: uppercase; | |
| padding: 0.15rem 0.55rem; | |
| border: 1px solid var(--lf-accent); | |
| border-radius: 5px; | |
| } | |
| .gradio-container .lf-done-reason { | |
| color: var(--lf-text-subtle); | |
| font-size: 0.84rem; | |
| } | |
| .gradio-container .lf-done-reason code { | |
| font-family: "JetBrains Mono", monospace; | |
| background: transparent !important; | |
| border: none !important; | |
| color: var(--lf-text-muted) !important; | |
| padding: 0 !important; | |
| } | |
| .gradio-container .lf-kpi-row { | |
| display: grid; | |
| grid-template-columns: repeat(3, 1fr); | |
| gap: 0.8rem; | |
| } | |
| .gradio-container .lf-kpi { | |
| background: var(--lf-surface-alt); | |
| border: 1px solid var(--lf-border-soft); | |
| border-radius: 10px; | |
| padding: 0.9rem 1rem; | |
| min-width: 0; | |
| } | |
| .gradio-container .lf-kpi-label { | |
| color: var(--lf-text-subtle); | |
| font-family: "Inter", sans-serif; | |
| font-size: 0.7rem; | |
| font-weight: 600; | |
| letter-spacing: 0.1em; | |
| text-transform: uppercase; | |
| margin-bottom: 0.35rem; | |
| } | |
| .gradio-container .lf-kpi-value { | |
| font-family: "Source Serif 4", Georgia, serif; | |
| font-weight: 500; | |
| font-size: 1.9rem; | |
| color: var(--lf-text); | |
| letter-spacing: -0.025em; | |
| line-height: 1.1; | |
| } | |
| .gradio-container .lf-kpi-sub { | |
| color: var(--lf-text-subtle); | |
| font-size: 0.72rem; | |
| margin-top: 0.3rem; | |
| font-family: "JetBrains Mono", monospace; | |
| } | |
| .gradio-container .lf-kpi-good .lf-kpi-value { color: #7ab68c; } | |
| .gradio-container .lf-kpi-warn .lf-kpi-value { color: #e4b264; } | |
| .gradio-container .lf-kpi-bad .lf-kpi-value { color: #d47d6a; } | |
| .gradio-container .lf-kpi-good { border-color: rgba(122,182,140,0.35); } | |
| .gradio-container .lf-kpi-warn { border-color: rgba(228,178,100,0.35); } | |
| .gradio-container .lf-kpi-bad { border-color: rgba(212,125,106,0.35); } | |
| /* Responsive: stack KPIs on narrow */ | |
| @media (max-width: 720px) { | |
| .gradio-container .lf-kpi-row { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| /* Main pane plots+outputs */ | |
| .gradio-container .gr-plot, .gradio-container .plot-wrap { | |
| background: var(--lf-surface-alt) !important; | |
| border-radius: 10px !important; | |
| } | |
| """ | |
| # Plotly layout template β matches dark Claude palette. | |
| # Margin is intentionally factored out so per-plot overrides don't collide. | |
| _PLOTLY_LAYOUT = dict( | |
| font=dict(family="Inter, -apple-system, system-ui, sans-serif", | |
| color="#f3f0e8", size=12), | |
| paper_bgcolor="#2a2824", # card surface | |
| plot_bgcolor="#1f1d1a", # page background, slightly darker | |
| hoverlabel=dict(bgcolor="#f3f0e8", font_color="#1f1d1a", | |
| font_family="Inter", bordercolor="#e28763"), | |
| legend=dict(bgcolor="rgba(31,29,26,0.85)", | |
| bordercolor="#403b34", borderwidth=1, | |
| font=dict(color="#f3f0e8")), | |
| ) | |
| _DEFAULT_MARGIN = dict(l=60, r=30, t=60, b=55) | |
| _AXIS_STYLE = dict(gridcolor="#403b34", zerolinecolor="#554e45", | |
| showline=True, linecolor="#554e45", | |
| tickfont=dict(color="#b5ada0")) | |
| _TITLE_STYLE = dict(x=0.02, xanchor="left", | |
| font=dict(size=14, color="#f3f0e8", weight=500)) | |
| OPT_COLORS = { | |
| "sgd": "#c05450", | |
| "momentum": "#d9865b", | |
| "adam": "#5b7a6b", | |
| "lbfgs": "#556b99", | |
| "custom": "#d97757", | |
| } | |
| BAR_GOOD = "#4a7c59" | |
| BAR_BAD = "#a85c4c" | |
| # ----------------- plotting helpers (Plotly) ----------------- | |
| TEMPLATES_2D_SAFE = ["quadratic", "rosenbrock", "styblinski_tang", "huber", | |
| "gaussian_mix", "himmelblau", "plateau", "cliff"] | |
| def _color(name: str) -> str: | |
| """Look up a trajectory colour, stripping any `(tuned lr=β¦)` suffix.""" | |
| return OPT_COLORS.get(name.split("(")[0].strip(), "#2a2319") | |
| def _trajectory_diverged(arr: np.ndarray, clip: float = 8.0) -> bool: | |
| """True if trajectory escapes the viewing window (e.g. SGD on a stiff LP).""" | |
| return bool(np.any(np.abs(arr) > clip) or np.any(~np.isfinite(arr))) | |
| def _contour_plot(ls, trajectories=None, title=None, subtitle=None): | |
| assert ls.dim == 2, "contour plot requires dim=2" | |
| # Compute view extents from *finite, non-divergent* trajectory points only. | |
| # Divergent ones (e.g. SGD exploding to 1e6) are clipped/marked separately. | |
| CLIP = 8.0 | |
| xs_all, ys_all = [0.0], [0.0] | |
| for traj in (trajectories or {}).values(): | |
| arr = np.array(traj) | |
| if arr.size == 0: continue | |
| mask = (np.abs(arr) <= CLIP).all(axis=1) & np.isfinite(arr).all(axis=1) | |
| if mask.any(): | |
| good = arr[mask] | |
| xs_all.extend(good[:, 0].tolist()) | |
| ys_all.extend(good[:, 1].tolist()) | |
| x_min = min(min(xs_all) - 1.5, -3.5); x_max = max(max(xs_all) + 1.5, 3.5) | |
| y_min = min(min(ys_all) - 1.5, -3.5); y_max = max(max(ys_all) + 1.5, 3.5) | |
| x_min = max(x_min, -CLIP); x_max = min(x_max, CLIP) | |
| y_min = max(y_min, -CLIP); y_max = min(y_max, CLIP) | |
| g = 70 | |
| xs = np.linspace(x_min, x_max, g); ys = np.linspace(y_min, y_max, g) | |
| X, Y = np.meshgrid(xs, ys) | |
| Z = np.empty_like(X) | |
| for i in range(g): | |
| for j in range(g): | |
| Z[i, j] = ls.f(np.array([X[i, j], Y[i, j]])) | |
| finite = Z[np.isfinite(Z)] | |
| lo, hi = np.percentile(finite, [2, 95]) | |
| fig = go.Figure() | |
| fig.add_trace(go.Contour( | |
| x=xs, y=ys, z=Z, | |
| zmin=float(lo), zmax=float(hi), | |
| # Dark-mode colorscale: deep warm valleys β glowing sienna peaks | |
| colorscale=[ | |
| [0.0, "#1f1d1a"], [0.15, "#2f2a22"], [0.3, "#4a2f22"], | |
| [0.5, "#7a4229"], [0.7, "#c25a3a"], [0.85, "#e28763"], | |
| [1.0, "#f4d6c5"], | |
| ], | |
| contours=dict(coloring="heatmap", showlabels=False), | |
| line=dict(width=0.5, color="rgba(243,240,232,0.12)"), | |
| colorbar=dict(title=dict(text="f(x)", | |
| font=dict(size=11, color="#f3f0e8")), | |
| thickness=12, len=0.85, | |
| tickfont=dict(size=10, color="#b5ada0"), | |
| outlinewidth=0), | |
| hovertemplate="xβ=%{x:.3f}<br>xβ=%{y:.3f}<br>f=%{z:.3f}<extra></extra>", | |
| )) | |
| divergent_names: list[str] = [] | |
| if trajectories: | |
| for name, traj in trajectories.items(): | |
| if not traj: continue | |
| color = _color(name) | |
| arr = np.array(traj) | |
| # Clip to view; mark divergent for annotation | |
| diverged = _trajectory_diverged(arr, clip=CLIP) | |
| if diverged: | |
| divergent_names.append(name) | |
| # Keep only finite, in-window points for plotting | |
| mask = (np.abs(arr) <= CLIP).all(axis=1) & np.isfinite(arr).all(axis=1) | |
| arr = arr[mask] | |
| if arr.shape[0] == 0: | |
| continue | |
| display_name = f"{name} Β· diverged" if diverged else name | |
| line_style = "dash" if diverged else "solid" | |
| hover = [f"step {i}<br>xβ={a[0]:.3f}<br>xβ={a[1]:.3f}" | |
| for i, a in enumerate(arr)] | |
| fig.add_trace(go.Scatter( | |
| x=arr[:, 0], y=arr[:, 1], | |
| mode="lines+markers", | |
| name=display_name, | |
| line=dict(color=color, width=2.5, dash=line_style), | |
| marker=dict(size=4, color=color, | |
| line=dict(color="#ffffff", width=0.8)), | |
| hovertemplate="%{text}<extra>" + display_name + "</extra>", | |
| text=hover, | |
| )) | |
| fig.add_trace(go.Scatter( | |
| x=[arr[0, 0]], y=[arr[0, 1]], | |
| mode="markers", showlegend=False, | |
| marker=dict(size=12, color=color, symbol="circle-open", | |
| line=dict(color=color, width=2.5)), | |
| hovertemplate=f"start<extra>{display_name}</extra>", | |
| )) | |
| end_symbol = "x" if diverged else "star" | |
| end_size = 14 if diverged else 16 | |
| fig.add_trace(go.Scatter( | |
| x=[arr[-1, 0]], y=[arr[-1, 1]], | |
| mode="markers", showlegend=False, | |
| marker=dict(size=end_size, color=color, symbol=end_symbol, | |
| line=dict(color="#ffffff", width=1.2)), | |
| hovertemplate=(f"{'diverged-exit' if diverged else 'end'}" | |
| f"<extra>{display_name}</extra>"), | |
| )) | |
| full_title = title or f"{ls.name} (dim=2)" | |
| sub_text = subtitle or ( | |
| f"diverged: {', '.join(divergent_names)}" if divergent_names else None | |
| ) | |
| if sub_text: | |
| full_title = f"{full_title}<br><span style='font-size:11px;color:#a85c4c'>β {sub_text}</span>" | |
| fig.update_layout( | |
| **_PLOTLY_LAYOUT, | |
| title=dict(text=full_title, **_TITLE_STYLE), | |
| height=480, | |
| margin=_DEFAULT_MARGIN, | |
| xaxis=dict(title="xβ", range=[x_min, x_max], **_AXIS_STYLE), | |
| yaxis=dict(title="xβ", range=[y_min, y_max], | |
| scaleanchor="x", scaleratio=1, **_AXIS_STYLE), | |
| ) | |
| return fig | |
| def _loss_curves(traj_map, title): | |
| fig = go.Figure() | |
| for name, fs in traj_map.items(): | |
| if not fs: continue | |
| color = _color(name) | |
| # Drop non-finite / negative-infty tail if optimiser diverged | |
| fs_clean = [v if np.isfinite(v) else None for v in fs] | |
| xs = list(range(len(fs_clean))) | |
| fig.add_trace(go.Scatter( | |
| x=xs, y=fs_clean, | |
| mode="lines+markers", name=name, | |
| line=dict(color=color, width=2.2, shape="spline"), | |
| marker=dict(size=4, color=color), | |
| hovertemplate="step=%{x}<br>f=%{y:.4g}<extra>" + name + "</extra>", | |
| connectgaps=False, | |
| )) | |
| fig.update_layout( | |
| **_PLOTLY_LAYOUT, | |
| title=dict(text=title, **_TITLE_STYLE), | |
| height=360, | |
| margin=_DEFAULT_MARGIN, | |
| xaxis=dict(title="optimizer step", **_AXIS_STYLE), | |
| yaxis=dict(title="f(x) (symlog)", type="log", **_AXIS_STYLE), | |
| ) | |
| return fig | |
| def _bar_plot(values, title, ylabel): | |
| names = list(values.keys()) | |
| vs = [values[n] for n in names] | |
| colors = [_color(n) for n in names] | |
| fig = go.Figure(go.Bar( | |
| x=names, y=vs, | |
| marker=dict(color=colors, line=dict(color="#ffffff", width=1)), | |
| text=[f"{v:.3g}" for v in vs], | |
| textposition="outside", textfont=dict(size=11), | |
| hovertemplate="%{x}<br>" + ylabel + "=%{y:.4g}<extra></extra>", | |
| )) | |
| fig.update_layout( | |
| **_PLOTLY_LAYOUT, | |
| title=dict(text=title, **_TITLE_STYLE), | |
| height=280, | |
| margin=_DEFAULT_MARGIN, | |
| xaxis=dict(**_AXIS_STYLE), | |
| yaxis=dict(title=ylabel, **_AXIS_STYLE), | |
| showlegend=False, | |
| ) | |
| return fig | |
| def _reward_breakdown_plot(components, total): | |
| # Horizontal bars β more readable in a narrow column, aligns values nicely. | |
| names = list(components.keys()) | |
| vs = [components[n] for n in names] | |
| colors = [BAR_GOOD if v >= 0 else BAR_BAD for v in vs] | |
| fig = go.Figure(go.Bar( | |
| y=names, x=vs, orientation="h", | |
| marker=dict(color=colors, line=dict(color="#1f1d1a", width=1)), | |
| text=[f"{v:+.3f}" for v in vs], | |
| textposition="outside", textfont=dict(size=11, color="#f3f0e8"), | |
| cliponaxis=False, | |
| hovertemplate="%{y}<br>contribution=%{x:+.3f}<extra></extra>", | |
| )) | |
| fig.add_vline(x=0, line_width=1, line_color="#554e45") | |
| fig.update_layout( | |
| **_PLOTLY_LAYOUT, | |
| title=dict( | |
| text=f"Reward breakdown Β· total = {total:+.3f}", **_TITLE_STYLE), | |
| height=240, | |
| margin=dict(l=110, r=50, t=50, b=30), | |
| xaxis=dict(title="weighted contribution", | |
| range=[min(vs + [0]) - 0.15, max(vs + [0]) + 0.15], | |
| **_AXIS_STYLE), | |
| yaxis=dict(autorange="reversed", **_AXIS_STYLE), | |
| showlegend=False, bargap=0.25, | |
| ) | |
| return fig | |
| def _empty_plot(msg): | |
| fig = go.Figure() | |
| fig.add_annotation( | |
| text=msg, x=0.5, y=0.5, xref="paper", yref="paper", | |
| showarrow=False, font=dict(size=14, color="#6b6258"), | |
| ) | |
| fig.update_layout( | |
| **_PLOTLY_LAYOUT, | |
| height=480, showlegend=False, | |
| margin=_DEFAULT_MARGIN, | |
| xaxis=dict(visible=False), yaxis=dict(visible=False), | |
| ) | |
| return fig | |
| # ----------------- tab 1: Landscape Explorer ----------------- | |
| def _explore_landscape(template, dim, seed): | |
| rng = np.random.default_rng(int(seed)) | |
| params: dict[str, Any] = {} | |
| if template == "quadratic": | |
| params = {"cond": 10.0} | |
| if template == "gaussian_mix": | |
| params = {"k": 3, "sigma": 0.5, "spread": 2.0} | |
| if template == "himmelblau": | |
| dim = 2 | |
| ls = build_landscape(template=template, dim=int(dim), params=params, rng=rng) | |
| hints = structural_hints(ls, rng=rng) | |
| if ls.dim == 2: | |
| fig = _contour_plot(ls, title=f"{template} Β· dim=2") | |
| else: | |
| fig = _empty_plot(f"{template} Β· dim={ls.dim}\nContour view is 2-D only") | |
| rows = [] | |
| for k, v in hints.items(): | |
| rows.append([k, f"{v:.4g}" if isinstance(v, float) else str(v)]) | |
| rows.append(["dim", ls.dim]) | |
| rows.append(["f_min (known)", f"{ls.f_min:.4g}"]) | |
| rows.append(["description", ls.description]) | |
| return fig, rows | |
| # ----------------- tab 2: Baseline Race ----------------- | |
| def _baseline_race(template, seed): | |
| rng = np.random.default_rng(int(seed)) | |
| params: dict[str, Any] = {} | |
| if template == "quadratic": | |
| params = {"cond": 10.0} | |
| if template == "gaussian_mix": | |
| params = {"k": 3, "sigma": 0.5, "spread": 2.0} | |
| ls = build_landscape(template=template, dim=2, params=params, rng=rng) | |
| x0 = np.random.default_rng(int(seed) + 999).normal(0.0, 0.5, size=2) | |
| # Tune EACH baseline's LR to the landscape (not just Adam). Without this, | |
| # SGD at lr=0.01 diverges on stiff landscapes (Rosenbrock, high-cond | |
| # quadratics) and produces a monotone-up curve. Per-baseline tuning makes | |
| # the race compare *algorithms*, not "default hparams". | |
| traj_2d: dict[str, list[tuple[float, float]]] = {} | |
| curves: dict[str, list[float]] = {} | |
| finals: dict[str, float] = {} | |
| tuned_lrs: dict[str, float] = {} | |
| for name in ["sgd", "momentum", "adam", "lbfgs"]: | |
| r = run_baseline_tuned(name, ls.f, ls.grad, x0, steps=50) | |
| tuned_lrs[name] = r["lr"] | |
| traj = [s for s in r["trajectory"] if s.get("x") is not None] | |
| traj_2d[name] = [(s["x"][0], s["x"][1]) for s in traj] | |
| curves[name] = [s["f"] for s in traj if s.get("f") is not None] | |
| finals[name] = curves[name][-1] if curves[name] else float("inf") | |
| contour = _contour_plot(ls, trajectories=traj_2d, | |
| title=f"{template} β baselines racing (LR-tuned)") | |
| curves_fig = _loss_curves(curves, "f(x) vs step") | |
| finals_fig = _bar_plot(finals, "Final f after 50 steps", | |
| ylabel="f(x) at step 50") | |
| lr_table = " Β· ".join(f"`{name}`: `{lr:g}`" | |
| for name, lr in tuned_lrs.items()) | |
| summary = ( | |
| f"**{ls.description}**\n\n" | |
| f"Tuned LR per baseline (7-point sweep, 30 steps): {lr_table}\n\n" | |
| f"Best baseline: `{min(finals, key=finals.get)}` at f = " | |
| f"`{min(finals.values()):.4f}`" | |
| ) | |
| return contour, curves_fig, finals_fig, summary | |
| # ----------------- tab 3: Optimizer Arena ----------------- | |
| SAMPLE_OPTIMIZER = """ | |
| class Optimizer: | |
| def __init__(self, dim): | |
| self.lr = 0.05 | |
| self.beta = 0.9 | |
| self.v = np.zeros(dim) | |
| def step(self, x, f_val, grad): | |
| # SGD with heavy-ball momentum | |
| self.v = self.beta * self.v - self.lr * grad | |
| return x + self.v | |
| """.strip() | |
| ADAM_ARENA_TEMPLATE = """ | |
| class Optimizer: | |
| def __init__(self, dim): | |
| self.lr = {lr} | |
| self.b1, self.b2, self.eps = 0.9, 0.999, 1e-8 | |
| self.m = np.zeros(dim); self.v = np.zeros(dim); self.t = 0 | |
| def step(self, x, f_val, grad): | |
| self.t += 1 | |
| self.m = self.b1*self.m + (1-self.b1)*grad | |
| self.v = self.b2*self.v + (1-self.b2)*grad*grad | |
| mh = self.m/(1-self.b1**self.t); vh = self.v/(1-self.b2**self.t) | |
| return x - self.lr * mh / (np.sqrt(vh) + self.eps) | |
| """.strip() | |
| ARENA_SEEDS = [101, 202, 303, 404, 505, 606, 707, 808, 909, 1010] | |
| def _arena_compare(template, dim, seed, code): | |
| rng = np.random.default_rng(int(seed)) | |
| dim = int(dim) | |
| params: dict[str, Any] = {} | |
| if template == "quadratic": | |
| params = {"cond": 10.0} | |
| if template == "gaussian_mix": | |
| params = {"k": 3, "sigma": 0.5, "spread": 2.0} | |
| if template == "himmelblau": | |
| dim = 2 | |
| ls = build_landscape(template=template, dim=dim, params=params, rng=rng) | |
| tune_x0 = np.random.default_rng(0).normal(0.0, 0.5, size=dim) | |
| best_lr = tune_adam_lr(ls.f, ls.grad, tune_x0, sweep_steps=30) | |
| adam_src = ADAM_ARENA_TEMPLATE.format(lr=best_lr) | |
| # Compile user code | |
| try: | |
| opt = compile_optimizer(code, dim=dim) | |
| except SandboxError as e: | |
| return (None, None, None, | |
| f"### β Compile error\n\n```\n{e}\n```", {}) | |
| test = auto_test_draft(opt, ls, seed=int(seed), steps=20) | |
| user_arena = run_arena(opt, ls, seeds=ARENA_SEEDS, steps=200) | |
| adam_opt = compile_optimizer(adam_src, dim=dim) | |
| adam_arena = run_arena(adam_opt, ls, seeds=ARENA_SEEDS, steps=200) | |
| reward = compute_optcoder_reward( | |
| arena=user_arena, adam_arena=adam_arena, | |
| actions_used_cost=0, budget_total=12, | |
| novelty_score=ast_novelty_score(code, [adam_src]), | |
| convergence_step=None, arena_steps=200, | |
| ) | |
| if dim == 2: | |
| user_traj = [(s["x"][0], s["x"][1]) for s in test["detail"]] | |
| adam_run = run_baseline("adam", ls.f, ls.grad, | |
| np.random.default_rng(int(seed)).normal(0.0, 0.5, 2), | |
| steps=50) | |
| adam_traj = [(s["x"][0], s["x"][1]) for s in adam_run["trajectory"] | |
| if s.get("x") is not None] | |
| contour = _contour_plot(ls, | |
| trajectories={"custom": user_traj, "adam": adam_traj}, | |
| title=f"{template} β your optimizer vs tuned Adam") | |
| else: | |
| contour = _empty_plot(f"{template} Β· dim={dim}\nContour view is 2-D only") | |
| progress_fig = _bar_plot( | |
| {"custom": user_arena.mean_progress, | |
| "adam (tuned)": adam_arena.mean_progress}, | |
| "Arena mean progress", ylabel="mean(fβ β f_N) over 10 seeds", | |
| ) | |
| bk = reward.breakdown | |
| components = { | |
| "r_regret": bk["r_regret"], | |
| "r_convergence": bk["r_convergence"], | |
| "r_robustness": bk["r_robustness"], | |
| "r_novelty": bk["r_novelty"], | |
| "-r_budget": -bk["r_budget"], | |
| "-r_eval_fail": -bk["r_eval_failures"], | |
| } | |
| reward_fig = _reward_breakdown_plot(components, reward.r_total) | |
| summary = ( | |
| f"### Results\n\n" | |
| f"- Your mean progress: `{user_arena.mean_progress:.4g}`\n" | |
| f"- Tuned Adam progress: `{adam_arena.mean_progress:.4g}` " | |
| f"(lr = `{best_lr:g}`)\n" | |
| f"- Speedup vs Adam: `{bk.get('speedup_vs_adam', 0):.3g}Γ`\n" | |
| f"- Your crash fraction: `{user_arena.crash_fraction:.0%}`\n" | |
| f"- **Total reward: `{reward.r_total:+.3f}`**" | |
| ) | |
| return contour, progress_fig, reward_fig, summary, dict(bk) | |
| # ----------------- OpenEnv API + LLM auto-run ----------------- | |
| # | |
| # Drives the env in-process (no HTTP round trip) so this tab works inside the | |
| # Space container without localhost access. | |
| # | |
| # One session-scoped env lives in _API_ENV_STATE; reset/step mutate it. | |
| # Additionally, `_llm_auto_run` connects to any OpenAI-compatible endpoint | |
| # and drives a full episode end-to-end, streaming actions as they happen. | |
| import os as _os | |
| import time as _time | |
| import requests as _requests | |
| _API_ENV_STATE: dict[str, Any] = {"env": None} | |
| def _make_env(tier: str, seed: int): | |
| try: | |
| from ..server.landscapeforge_environment import LandscapeforgeEnvironment | |
| except ImportError: | |
| from server.landscapeforge_environment import LandscapeforgeEnvironment # type: ignore | |
| return LandscapeforgeEnvironment(tier=tier, seed=int(seed)) | |
| def _api_reset(tier, seed): | |
| env = _make_env(tier, seed) | |
| obs = env.reset() | |
| _API_ENV_STATE["env"] = env | |
| return ( | |
| _fmt_obs(obs.model_dump(exclude_none=True)), | |
| f"β Reset complete Β· landscape: **{obs.landscape_description}** Β· " | |
| f"dim = {obs.dim} Β· budget = {obs.budget_remaining}", | |
| ) | |
| def _api_step(kind, baseline_name, code, draft_idx, step_start, step_end): | |
| env = _API_ENV_STATE.get("env") | |
| if env is None: | |
| return {"error": "call /reset first"}, "β No active env β hit **reset** first." | |
| kwargs: dict[str, Any] = {"kind": kind} | |
| if kind == "run_baseline": | |
| kwargs["baseline_name"] = baseline_name or "adam" | |
| elif kind == "draft": | |
| kwargs["code"] = code or "" | |
| elif kind == "inspect": | |
| kwargs["draft_idx"] = int(draft_idx) if draft_idx is not None else 0 | |
| kwargs["step_range_start"] = int(step_start) | |
| kwargs["step_range_end"] = int(step_end) | |
| try: | |
| action = LandscapeforgeAction(**kwargs) | |
| except Exception as e: | |
| return {"error": str(e)}, f"β Invalid action: {e}" | |
| obs = env.step(action) | |
| dump = _fmt_obs(obs.model_dump(exclude_none=True)) | |
| banner = ( | |
| f"β {kind} executed Β· budget remaining = {obs.budget_remaining}" | |
| + (" Β· **episode done**" if obs.done else "") | |
| ) | |
| return dump, banner | |
| # ---- LLM auto-run (OpenAI-compat endpoint) ---- | |
| PRESET_ENDPOINTS = { | |
| "Ollama (localhost:11434)": ("http://localhost:11434/v1", ""), | |
| "Hugging Face Router": ("https://router.huggingface.co/v1", | |
| _os.getenv("HF_TOKEN", "")), | |
| "OpenAI": ("https://api.openai.com/v1", | |
| _os.getenv("OPENAI_API_KEY", "")), | |
| "Custom": ("", ""), | |
| } | |
| PRESET_MODELS = [ | |
| "qwen2.5:3b", | |
| "qwen2.5:7b", | |
| "qwen2.5:1.5b", | |
| "Qwen/Qwen2.5-7B-Instruct", | |
| "Qwen/Qwen2.5-3B-Instruct", | |
| "meta-llama/Llama-3.2-3B-Instruct", | |
| "gpt-4o-mini", | |
| ] | |
| def _llm_auto_run(endpoint_choice, custom_url, api_key, model_name, | |
| tier, seed, temperature, max_turns): | |
| """Drive a full episode end-to-end with an LLM. Yields progressive | |
| markdown transcripts so the UI updates live.""" | |
| try: | |
| from ..prompts import build_prompt, parse_action | |
| except ImportError: | |
| from prompts import build_prompt, parse_action # type: ignore | |
| base, preset_key = PRESET_ENDPOINTS.get(endpoint_choice, ("", "")) | |
| base_url = (custom_url.strip() or base).rstrip("/") | |
| key = (api_key or "").strip() or preset_key or _os.getenv("API_KEY", "") | |
| if not base_url: | |
| yield ("Pick a preset endpoint or type a custom URL.", {}, None) | |
| return | |
| if not model_name: | |
| yield ("Pick or type a model name.", {}, None) | |
| return | |
| url = base_url + "/chat/completions" | |
| headers = {"Content-Type": "application/json"} | |
| if key: | |
| headers["Authorization"] = f"Bearer {key}" | |
| env = _make_env(tier, int(seed)) | |
| obs = env.reset() | |
| _API_ENV_STATE["env"] = env | |
| log_lines: list[str] = [ | |
| f"### Episode running", | |
| f"Model <span class='lf-chip'>{model_name}</span> " | |
| f"via <span class='lf-chip'>{base_url}</span>", | |
| "", | |
| f"**Landscape:** {obs.landscape_description} ", | |
| f"**Dim:** {obs.dim} Β· **Initial budget:** {obs.budget_remaining}", | |
| "", | |
| ] | |
| yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) | |
| for turn in range(1, int(max_turns) + 1): | |
| messages = build_prompt(obs) | |
| t0 = _time.time() | |
| try: | |
| r = _requests.post(url, headers=headers, json={ | |
| "model": model_name, | |
| "messages": messages, | |
| "temperature": float(temperature), | |
| "max_tokens": 1200, | |
| "stream": False, | |
| }, timeout=180) | |
| if r.status_code >= 400: | |
| log_lines.append(f"**[LLM error {r.status_code}]** {r.text[:300]}") | |
| yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) | |
| return | |
| raw = r.json()["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| log_lines.append(f"**[request failed]** `{type(e).__name__}: {e}`") | |
| yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) | |
| return | |
| dt = _time.time() - t0 | |
| try: | |
| action = parse_action(raw) | |
| except Exception as e: | |
| log_lines.append( | |
| f"**[turn {turn}] parse error:** `{e}`" | |
| f"\n```\n{raw[:500]}\n```\n" | |
| ) | |
| yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) | |
| return | |
| obs = env.step(action) | |
| _API_ENV_STATE["env"] = env | |
| # Pretty action line | |
| if action.kind == "draft": | |
| action_str = f"draft *({len(action.code or '')} chars)*" | |
| elif action.kind == "run_baseline": | |
| action_str = f"run_baseline(`{action.baseline_name}`)" | |
| elif action.kind == "inspect": | |
| action_str = (f"inspect(draft={action.draft_idx}, " | |
| f"[{action.step_range_start},{action.step_range_end}])") | |
| else: | |
| action_str = "commit" | |
| # Build a self-contained "turn card" with explicit Action / Output | |
| # demarcation. Rendered as HTML so we control the structure. | |
| kind_chip = (f"<span class='lf-chip lf-chip-{action.kind}'>" | |
| f"{action.kind}</span>") | |
| # Output status badges β colored chips + key/value pairs. | |
| output_badges: list[str] = [] | |
| lar = obs.last_action_result or {} | |
| if lar.get("compile_error"): | |
| output_badges.append( | |
| "<span class='lf-status lf-status-bad'>compile error</span>") | |
| if lar.get("summary"): | |
| s = lar["summary"] | |
| if s.get("converged"): | |
| output_badges.append( | |
| "<span class='lf-status lf-status-good'>auto-test converged</span>") | |
| elif s.get("diverged"): | |
| output_badges.append( | |
| "<span class='lf-status lf-status-warn'>auto-test diverged</span>") | |
| if s.get("final_f") is not None: | |
| output_badges.append( | |
| f"<code>final_f</code> = <b>{s['final_f']:.3g}</b>") | |
| if action.kind == "run_baseline" and lar.get("final_f") is not None: | |
| output_badges.append( | |
| f"<code>final_f</code> = <b>{lar['final_f']:.3g}</b>") | |
| fb = lar.get("feedback") or {} | |
| for k, v in fb.items(): | |
| cls = "lf-status-good" if v >= 0 else "lf-status-warn" | |
| output_badges.append( | |
| f"<span class='lf-status {cls}'><code>{k}</code> " | |
| f"<b>{v:+.3f}</b></span>") | |
| output_html = " Β· ".join(output_badges) if output_badges else "ok" | |
| turn_html = ( | |
| f"<div class='lf-turn'>" | |
| f" <div class='lf-turn-head'>" | |
| f" <span class='lf-turn-num'>Turn {turn}</span>" | |
| f" {kind_chip}" | |
| f" <span class='lf-turn-meta'>{dt:.1f}s Β· budget " | |
| f"<b>{obs.budget_remaining}</b></span>" | |
| f" </div>" | |
| f" <div class='lf-turn-row'>" | |
| f" <div class='lf-section-label'>Action</div>" | |
| f" <div class='lf-section-content'>{action_str}</div>" | |
| f" </div>" | |
| f" <div class='lf-turn-row'>" | |
| f" <div class='lf-section-label'>Output</div>" | |
| f" <div class='lf-section-content'>{output_html}</div>" | |
| f" </div>" | |
| f"</div>" | |
| ) | |
| log_lines.extend([f"", turn_html, f""]) | |
| if action.kind == "draft" and action.code: | |
| log_lines.append(f"```python\n{action.code.strip()}\n```") | |
| log_lines.append(f"") | |
| yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) | |
| if obs.done: | |
| bk = obs.r_optcoder_breakdown or {} | |
| reward_val = obs.r_optcoder or 0.0 | |
| my_prog = bk.get("my_progress", 0.0) | |
| adam_prog = bk.get("adam_progress", 0.0) | |
| speedup = bk.get("speedup_vs_adam", 0.0) | |
| reason = (obs.last_action_result or {}).get("reason", "?") | |
| # Tone of the reward KPI β green if positive, red if negative | |
| reward_tone = ("lf-kpi-good" if reward_val >= 0.5 else | |
| ("lf-kpi-warn" if reward_val >= 0 else "lf-kpi-bad")) | |
| speedup_display = (f"{speedup:.2f}Γ" if speedup < 100 | |
| else f"{speedup:.0f}Γ") | |
| speedup_tone = ("lf-kpi-good" if speedup >= 1.0 | |
| else "lf-kpi-warn") | |
| episode_done_html = ( | |
| "<div class='lf-done'>" | |
| " <div class='lf-done-head'>" | |
| " <span class='lf-done-flag'>Episode complete</span>" | |
| f" <span class='lf-done-reason'>ended by " | |
| f"<code>{reason}</code></span>" | |
| " </div>" | |
| " <div class='lf-kpi-row'>" | |
| " <div class='lf-kpi " f"{reward_tone}" "'>" | |
| " <div class='lf-kpi-label'>Terminal reward</div>" | |
| f" <div class='lf-kpi-value'>{reward_val:+.3f}</div>" | |
| " <div class='lf-kpi-sub'>GRPO training scalar</div>" | |
| " </div>" | |
| " <div class='lf-kpi " f"{speedup_tone}" "'>" | |
| " <div class='lf-kpi-label'>Speedup vs tuned Adam</div>" | |
| f" <div class='lf-kpi-value'>{speedup_display}</div>" | |
| f" <div class='lf-kpi-sub'>my {my_prog:.3g} Β· " | |
| f"adam {adam_prog:.3g}</div>" | |
| " </div>" | |
| " <div class='lf-kpi'>" | |
| " <div class='lf-kpi-label'>Adam shortfall</div>" | |
| f" <div class='lf-kpi-value'>{obs.final_regret:.3f}</div>" | |
| " <div class='lf-kpi-sub'>0 = matched/beat Adam</div>" | |
| " </div>" | |
| " </div>" | |
| "</div>" | |
| ) | |
| log_lines.extend([f"", episode_done_html, f""]) | |
| reward_plot = _reward_breakdown_plot({ | |
| "r_regret": bk.get("r_regret", 0), | |
| "r_convergence": bk.get("r_convergence", 0), | |
| "r_robustness": bk.get("r_robustness", 0), | |
| "r_novelty": bk.get("r_novelty", 0), | |
| "-r_budget": -bk.get("r_budget", 0), | |
| "-r_eval_fail": -bk.get("r_eval_failures", 0), | |
| }, reward_val) | |
| yield ("\n".join(log_lines), | |
| _fmt_obs(obs.model_dump(exclude_none=True)), | |
| reward_plot) | |
| return | |
| log_lines.append("\n**[!] Reached max turns without commit** β episode unfinished.") | |
| yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) | |
| # ----------------- top-level UI ----------------- | |
| HERO_HTML = """ | |
| <link rel="preconnect" href="https://fonts.googleapis.com"> | |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> | |
| <link href="https://fonts.googleapis.com/css2?family=Source+Serif+4:opsz,wght@8..60,400;8..60,500;8..60,600&family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet"> | |
| <header class="lf-topbar"> | |
| <div class="lf-brand"> | |
| <div class="lf-brand-mark"></div> | |
| <div class="lf-brand-text"> | |
| <div class="lf-brand-name">LandscapeForge</div> | |
| <div class="lf-brand-sub">OpenEnv Β· Hackathon Apr '26</div> | |
| </div> | |
| </div> | |
| <div class="lf-topbar-actions"> | |
| <a class="lf-link" href="https://huggingface.co/spaces/mnawfal29/landscapeforge" target="_blank" rel="noopener">Space</a> | |
| <a class="lf-link" href="/schema" target="_blank" rel="noopener">API schema</a> | |
| <a class="lf-link" href="/openapi.json" target="_blank" rel="noopener">OpenAPI</a> | |
| </div> | |
| </header> | |
| <section class="lf-hero"> | |
| <h1>An LLM designs optimizers, through a probeβdraftβcommit REPL.</h1> | |
| <p>Two agents co-evolve: one writes optimizer code, the other picks adversarial landscapes. | |
| Connect any OpenAI-compatible endpoint and watch a model play, | |
| or explore the landscape library interactively.</p> | |
| </section> | |
| """ | |
| ABOUT_MD = """ | |
| ### How the environment works | |
| **OptCoder** (the LLM policy) designs an `Optimizer` class that minimizes a | |
| hidden loss landscape. Each episode: | |
| 1. **LandscapeForge** (v1: internal template picker) chooses a landscape at a | |
| tier-appropriate difficulty β convex quadratic, Rosenbrock, Gaussian mix, | |
| Himmelblau, stiff quadratic, cliff. | |
| 2. **OptCoder runs a 4-action REPL** with a 12-unit budget: | |
| - `run_baseline(name)` β run SGD / Momentum / Adam / L-BFGS, see trajectory | |
| (cost: 2) | |
| - `draft(code)` β submit `Optimizer` class, env auto-tests 20 steps (cost: 2) | |
| - `inspect(draft_idx, step_range)` β per-step detail for a prior draft | |
| (cost: 1) | |
| - `commit` β run the full 10-seed Γ 200-step arena (cost: 0) | |
| 3. **Reward** is Adam-relative progress β `my_progress / tuned_adam_progress β 1`, | |
| clipped to `[β1, +1]`. No `f_min` dependency, so this extends to NN training | |
| as a drop-in. | |
| 4. **GRPO** trains the policy against this reward; arena cost is ~50 ms so | |
| ~36 k episodes/hour on one H100. | |
| ### Research anchors | |
| - **Thread 1** Β· LLMs as optimizer designers: [Lion](https://arxiv.org/abs/2302.06675), | |
| [FunSearch](https://www.nature.com/articles/s41586-023-06924-6) | |
| - **Thread 2** Β· Co-evolutionary LLM-env: Coevolve, | |
| [GenEnv](https://arxiv.org/html/2512.19682v1) | |
| - **Thread 3** Β· Iterative code refinement: | |
| [Self-Refine](https://arxiv.org/abs/2303.17651) | |
| - **Thread 4** Β· GRPO with measurable rewards: | |
| [HPC GFLOPS reward paper](https://arxiv.org/abs/2602.12049v1) | |
| - **Thread 5** Β· Analytical landscape benchmarks: | |
| [BBOB/COCO](https://inria.hal.science/hal-00362649/document), | |
| [POET](https://arxiv.org/abs/1901.01753) | |
| ### Use from code | |
| ```python | |
| from landscapeforge import LandscapeforgeEnv, LandscapeforgeAction | |
| with LandscapeforgeEnv.from_docker_image("landscapeforge-env:latest") as env: | |
| env.reset() | |
| env.step(LandscapeforgeAction(kind="run_baseline", baseline_name="adam")) | |
| env.step(LandscapeforgeAction(kind="draft", code=MY_OPT_CLASS)) | |
| print(env.step(LandscapeforgeAction(kind="commit")).observation.r_optcoder_breakdown) | |
| ``` | |
| API endpoints on this Space: `/reset`, `/step`, `/schema`, `/openapi.json`, | |
| `/health`, WebSocket `/ws`. See **OpenEnv API** tab for a live playground. | |
| """ | |
| def build_ui(*args, **kwargs) -> gr.Blocks: | |
| """Entry point for the Gradio app. Ignores OpenEnv's builder args.""" | |
| with gr.Blocks( | |
| title="LandscapeForge", | |
| theme=gr.themes.Soft( | |
| primary_hue=gr.themes.Color( | |
| c50="#fbf0ea", c100="#f4d6c5", c200="#ebb69b", | |
| c300="#e09778", c400="#d37a58", c500="#c96442", | |
| c600="#a8522f", c700="#874123", c800="#623018", | |
| c900="#3f1e10", c950="#21100a", | |
| ), | |
| neutral_hue="stone", | |
| font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", | |
| "system-ui", "sans-serif"], | |
| font_mono=[gr.themes.GoogleFont("JetBrains Mono"), | |
| "ui-monospace", "monospace"], | |
| ), | |
| css=CLAUDE_CSS, | |
| ) as app: | |
| gr.HTML(HERO_HTML) | |
| with gr.Tabs(): | |
| # --- Tab 0: Run with LLM (primary β auto-run) --- | |
| with gr.Tab("Run with LLM"): | |
| with gr.Row(equal_height=False): | |
| # -------- MAIN PANE (left, wider) -------- | |
| with gr.Column(scale=4, min_width=640): | |
| gr.Markdown("### Transcript") | |
| transcript = gr.Markdown( | |
| "*Configure the LLM on the right and hit " | |
| "**βΆ Run episode** β each turn streams here " | |
| "as the model plays.*", | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| llm_reward_plot = gr.Plot( | |
| label="Reward breakdown (on episode end)") | |
| with gr.Column(scale=1): | |
| latest_obs = gr.Code( | |
| language="json", interactive=False, | |
| label="Latest observation", lines=14) | |
| # -------- SIDEBAR (right, narrower) -------- | |
| with gr.Column(scale=1, min_width=300, elem_classes="lf-sidebar"): | |
| gr.Markdown("### Connect an LLM") | |
| gr.Markdown( | |
| "Point at any OpenAI-compatible " | |
| "`/v1/chat/completions` endpoint." | |
| ) | |
| ep_choice = gr.Dropdown( | |
| list(PRESET_ENDPOINTS.keys()), | |
| value="Ollama (localhost:11434)", | |
| label="Endpoint", | |
| ) | |
| model_name_in = gr.Dropdown( | |
| PRESET_MODELS, value="qwen2.5:3b", | |
| label="Model", allow_custom_value=True, | |
| ) | |
| custom_url_in = gr.Textbox( | |
| value="", label="Custom base URL", | |
| placeholder="http://localhost:8080/v1", | |
| ) | |
| key_in = gr.Textbox( | |
| value="", label="API key", | |
| placeholder="Bearer <key>", | |
| type="password", | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("### Episode config") | |
| tier_llm = gr.Dropdown(["T0", "T1", "T2"], value="T0", | |
| label="Tier") | |
| seed_llm = gr.Slider(0, 100, value=42, step=1, label="Seed") | |
| temp_llm = gr.Slider(0, 1.5, value=0.7, step=0.05, | |
| label="Temperature") | |
| max_turns_llm = gr.Slider(3, 15, value=10, step=1, | |
| label="Max turns") | |
| run_btn = gr.Button("βΆ Run episode", variant="primary", | |
| size="lg") | |
| with gr.Accordion("System prompt (sent to LLM)", | |
| open=False): | |
| try: | |
| from ..prompts import SYSTEM as _SYS, ACTION_SPEC as _ACT | |
| except ImportError: | |
| from prompts import SYSTEM as _SYS, ACTION_SPEC as _ACT # type: ignore | |
| gr.Code( | |
| value=f"# SYSTEM\n\n{_SYS}\n\n# ACTION_SPEC\n\n{_ACT}", | |
| language="markdown", interactive=False, | |
| lines=14, | |
| ) | |
| run_btn.click( | |
| _llm_auto_run, | |
| [ep_choice, custom_url_in, key_in, model_name_in, | |
| tier_llm, seed_llm, temp_llm, max_turns_llm], | |
| [transcript, latest_obs, llm_reward_plot], | |
| ) | |
| # --- Tab: Manual stepping (raw /reset + /step) --- | |
| with gr.Tab("API playground"): | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=1, min_width=340, elem_classes="lf-sidebar"): | |
| gr.Markdown("### Manual stepping") | |
| gr.Markdown( | |
| "Drive the env one action at a time β exactly " | |
| "the same contract as the HTTP `/reset` + `/step` " | |
| "endpoints. Useful for sanity-checking an action " | |
| "or debugging." | |
| ) | |
| tier4 = gr.Dropdown(["T0", "T1", "T2"], value="T0", | |
| label="Tier") | |
| seed4 = gr.Slider(0, 100, value=42, step=1, label="Seed") | |
| reset_btn = gr.Button("Reset env", variant="primary") | |
| gr.Markdown("---") | |
| kind4 = gr.Radio( | |
| ["run_baseline", "draft", "inspect", "commit"], | |
| value="run_baseline", label="Action kind") | |
| with gr.Accordion("run_baseline args", open=True): | |
| bname4 = gr.Dropdown( | |
| ["sgd", "momentum", "adam", "lbfgs"], | |
| value="adam", label="Reference optimizer") | |
| with gr.Accordion("draft args", open=False): | |
| code4 = gr.Code(value=SAMPLE_OPTIMIZER, | |
| language="python", | |
| label="Optimizer class", lines=10) | |
| with gr.Accordion("inspect args", open=False): | |
| didx4 = gr.Number(value=0, precision=0, | |
| label="draft_idx") | |
| s4s = gr.Number(value=0, precision=0, | |
| label="step_range_start") | |
| s4e = gr.Number(value=20, precision=0, | |
| label="step_range_end") | |
| step_btn = gr.Button("Step", variant="primary") | |
| with gr.Column(scale=2, min_width=580): | |
| status4 = gr.Markdown( | |
| "*No active env β hit **Reset env** to begin.*") | |
| obs4_reset = gr.Code( | |
| language="json", interactive=False, | |
| label="Initial observation", lines=12) | |
| status4b = gr.Markdown() | |
| obs4 = gr.Code( | |
| language="json", interactive=False, | |
| label="Step observation", lines=14) | |
| reset_btn.click(_api_reset, [tier4, seed4], | |
| [obs4_reset, status4]) | |
| step_btn.click( | |
| _api_step, | |
| [kind4, bname4, code4, didx4, s4s, s4e], | |
| [obs4, status4b], | |
| ) | |
| # --- Tab 1: Landscape --- | |
| with gr.Tab("Landscape"): | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=1, min_width=320, elem_classes="lf-sidebar"): | |
| gr.Markdown("### Landscape Explorer") | |
| gr.Markdown( | |
| "Pick a template and see what the agent sees " | |
| "at reset β the 2-D contour plus env-computed " | |
| "structural hints used to calibrate the optimizer." | |
| ) | |
| tmpl1 = gr.Dropdown(TEMPLATES_2D_SAFE, | |
| value="rosenbrock", label="Template") | |
| dim1 = gr.Slider(2, 10, value=2, step=1, label="Dim") | |
| seed1 = gr.Slider(0, 100, value=0, step=1, label="Seed") | |
| go1 = gr.Button("Build landscape", variant="primary", | |
| size="lg") | |
| with gr.Column(scale=2, min_width=580): | |
| plot1 = gr.Plot(label="Contour") | |
| hints1 = gr.Dataframe( | |
| headers=["property", "value"], | |
| datatype=["str", "str"], | |
| label="Structural hints (shown to the agent at reset)", | |
| wrap=True, row_count=(8, "dynamic"), | |
| ) | |
| go1.click(_explore_landscape, [tmpl1, dim1, seed1], [plot1, hints1]) | |
| app.load(_explore_landscape, | |
| [gr.State("rosenbrock"), gr.State(2), gr.State(0)], | |
| [plot1, hints1]) | |
| # --- Tab 2: Baseline Race --- | |
| with gr.Tab("Baseline Race"): | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=1, min_width=320, elem_classes="lf-sidebar"): | |
| gr.Markdown("### Baseline Race") | |
| gr.Markdown( | |
| "Race SGD, Momentum, L-BFGS, and **Adam with " | |
| "per-landscape LR tuning** from the same init. " | |
| "The tuned Adam is the bar the trained OptCoder " | |
| "has to beat." | |
| ) | |
| tmpl2 = gr.Dropdown(TEMPLATES_2D_SAFE, | |
| value="rosenbrock", label="Template") | |
| seed2 = gr.Slider(0, 100, value=1, step=1, label="Seed") | |
| go2 = gr.Button("Race", variant="primary", size="lg") | |
| with gr.Column(scale=2, min_width=580): | |
| plot2a = gr.Plot(label="Contour + trajectories") | |
| with gr.Row(): | |
| plot2b = gr.Plot(label="f(x) vs step") | |
| plot2c = gr.Plot(label="Final f after 50 steps") | |
| summary2 = gr.Markdown() | |
| go2.click(_baseline_race, [tmpl2, seed2], | |
| [plot2a, plot2b, plot2c, summary2]) | |
| # --- Tab 3: Optimizer Arena --- | |
| with gr.Tab("Optimizer Arena"): | |
| with gr.Row(equal_height=False): | |
| with gr.Column(scale=1, min_width=340, elem_classes="lf-sidebar"): | |
| gr.Markdown("### Optimizer Arena") | |
| gr.Markdown( | |
| "Paste or edit an `Optimizer` class. We run it " | |
| "through the full Phase-D arena (10 seeds Γ 200 " | |
| "steps) against tuned Adam and show the reward " | |
| "breakdown.<br><small>`np` is pre-injected β " | |
| "do not write import lines.</small>" | |
| ) | |
| tmpl3 = gr.Dropdown(list(BUILDERS.keys()), | |
| value="quadratic", label="Template") | |
| dim3 = gr.Slider(2, 10, value=5, step=1, label="Dim") | |
| seed3 = gr.Slider(0, 100, value=42, step=1, label="Seed") | |
| go3 = gr.Button("Run arena", variant="primary", | |
| size="lg") | |
| with gr.Column(scale=2, min_width=580): | |
| code3 = gr.Code(value=SAMPLE_OPTIMIZER, | |
| language="python", | |
| label="Your Optimizer class", | |
| lines=14) | |
| with gr.Row(): | |
| plot3a = gr.Plot(label="2-D trajectory (if dim = 2)") | |
| plot3b = gr.Plot(label="Mean arena progress") | |
| plot3c = gr.Plot(label="Reward breakdown") | |
| summary3 = gr.Markdown() | |
| breakdown3 = gr.JSON(label="Full reward dict", | |
| height=220) | |
| go3.click(_arena_compare, [tmpl3, dim3, seed3, code3], | |
| [plot3a, plot3b, plot3c, summary3, breakdown3]) | |
| # --- About --- | |
| with gr.Tab("About"): | |
| gr.Markdown(ABOUT_MD) | |
| return app | |