"""Gradio demo for LandscapeForge — Claude-inspired visual design. Four tabs: 1. Landscape — pick a template, see 2D contour + structural hints 2. Baseline Race — SGD / Momentum / tuned-Adam / L-BFGS racing, same init 3. Optimizer Arena — paste a custom Optimizer class, full-arena eval vs tuned-Adam, reward breakdown 4. OpenEnv API — live reset/step against the same container's FastAPI Design: warm off-white background, coral primary, generous spacing, minimal chrome, no heavy shadows. """ from __future__ import annotations import json from typing import Any import gradio as gr import numpy as np import plotly.graph_objects as go from plotly.subplots import make_subplots def _fmt_obs(obs_dict: dict) -> str: """Pretty-print an observation as indented JSON for gr.Code display. Shrinks very long arrays (baseline trajectories etc.) so the rendered view stays readable. `json.dumps(indent=2)` gives one value per line which looks much cleaner than gr.JSON's component-per-field tree. """ def _shrink(v): if isinstance(v, list): if len(v) > 8: return ( [_shrink(x) for x in v[:3]] + [f"... ({len(v)-6} more) ..."] + [_shrink(x) for x in v[-3:]] ) return [_shrink(x) for x in v] if isinstance(v, dict): return {k: _shrink(x) for k, x in v.items()} if isinstance(v, float): return round(v, 6) return v return json.dumps(_shrink(obs_dict), indent=2, default=str) try: from ..arena import auto_test_draft, run_arena from ..landscapes import BUILDERS, build_landscape, structural_hints from ..reference_optimizers import ( run_baseline, run_baseline_tuned, tune_adam_lr, ) from ..rewards import ast_novelty_score, compute_optcoder_reward from ..sandbox import SandboxError, compile_optimizer from ..models import LandscapeforgeAction except ImportError: # flat layout (HF Space container) from arena import auto_test_draft, run_arena # type: ignore from landscapes import BUILDERS, build_landscape, structural_hints # type: ignore from reference_optimizers import ( # type: ignore run_baseline, run_baseline_tuned, tune_adam_lr, ) from rewards import ast_novelty_score, compute_optcoder_reward # type: ignore from sandbox import SandboxError, compile_optimizer # type: ignore from models import LandscapeforgeAction # type: ignore # ----------------- Claude-inspired palette + CSS ----------------- # Mimics Anthropic's actual surface colors: warmer parchment background, # deep warm ink for text, Anthropic burnt-sienna as primary accent. CLAUDE_CSS = """ /* Variables — dark mode default, warm ink + sienna accent */ :root { --lf-bg: #1f1d1a; /* warm near-black page */ --lf-surface: #2a2824; /* card surface */ --lf-surface-alt: #332f2a; /* elevated surface (code, plots) */ --lf-border: #403b34; /* card edge */ --lf-border-soft: #332f2a; /* soft inner divider */ --lf-text: #f3f0e8; /* warm off-white */ --lf-text-muted: #b5ada0; /* muted body */ --lf-text-subtle: #857d72; /* labels, captions */ --lf-accent: #e28763; /* brighter sienna for dark bg */ --lf-accent-dk: #c96442; /* hover / pressed */ --lf-accent-soft: #4a2f22; /* accent-tinted dark for selected bg */ --lf-good: #7ab68c; --lf-bad: #d47d6a; } /* Page */ html, body, .gradio-container { background: var(--lf-bg) !important; } .gradio-container { font-family: "Inter", -apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif !important; color: var(--lf-text) !important; max-width: none !important; width: 100% !important; margin: 0 auto !important; padding: 1.5rem 2rem 3rem !important; /* Override Gradio's internal theme variables so every component inherits the warm palette instead of Gradio's blue-on-white defaults */ --body-text-color: var(--lf-text) !important; --body-text-color-subdued: var(--lf-text-muted) !important; --body-background-fill: var(--lf-bg) !important; --background-fill-primary: var(--lf-surface) !important; --background-fill-secondary: var(--lf-bg) !important; --border-color-primary: var(--lf-border) !important; --border-color-accent: var(--lf-accent) !important; --input-background-fill: var(--lf-surface) !important; --input-border-color: var(--lf-border) !important; --input-text-color: var(--lf-text) !important; --input-placeholder-color: var(--lf-text-subtle) !important; --block-background-fill: var(--lf-surface) !important; --block-border-color: var(--lf-border-soft) !important; --block-label-background-fill: transparent !important; --block-label-text-color: var(--lf-text) !important; --block-title-text-color: var(--lf-text) !important; --block-info-text-color: var(--lf-text-muted) !important; --neutral-50: var(--lf-surface) !important; --neutral-100: var(--lf-bg) !important; --neutral-200: var(--lf-border-soft) !important; --neutral-300: var(--lf-border) !important; --neutral-400: var(--lf-text-subtle) !important; --neutral-500: var(--lf-text-muted) !important; --neutral-600: var(--lf-text-muted) !important; --neutral-700: var(--lf-text) !important; --neutral-800: var(--lf-text) !important; --neutral-900: var(--lf-text) !important; --color-accent: var(--lf-accent) !important; --color-accent-soft: var(--lf-accent-soft) !important; --link-text-color: var(--lf-accent) !important; --link-text-color-hover: var(--lf-accent-dk) !important; --button-primary-background-fill: var(--lf-accent) !important; --button-primary-background-fill-hover: var(--lf-accent-dk) !important; --button-primary-text-color: #ffffff !important; --button-primary-border-color: var(--lf-accent) !important; /* Kill the `` pill that Gradio 5 uses for every component label — it was defaulting to the primary accent. We want labels to be plain muted text above the input. */ --block-title-background-fill: transparent !important; --block-title-border-color: transparent !important; --block-title-border-width: 0 !important; --block-title-radius: 0 !important; --block-title-padding: 0 0 0.3rem 0 !important; --block-title-text-color: var(--lf-text-muted) !important; --block-title-text-weight: 500 !important; --block-title-text-size: 0.8rem !important; /* Input outlines — dropdowns/text/number all need obvious borders */ --input-shadow: none !important; --input-shadow-focus: 0 0 0 3px rgba(226,135,99,0.18) !important; --input-border-color-focus: var(--lf-accent) !important; --input-background-fill-focus:var(--lf-surface) !important; /* Checkbox / radio variables */ --checkbox-background-color: var(--lf-surface) !important; --checkbox-background-color-hover: var(--lf-surface-alt) !important; --checkbox-background-color-focus: var(--lf-surface-alt) !important; --checkbox-background-color-selected: var(--lf-accent) !important; --checkbox-border-color: var(--lf-border) !important; --checkbox-border-color-hover: var(--lf-accent) !important; --checkbox-border-color-focus: var(--lf-accent) !important; --checkbox-border-color-selected: var(--lf-accent) !important; --checkbox-label-background-fill: transparent !important; --checkbox-label-background-fill-hover: var(--lf-surface-alt) !important; --checkbox-label-background-fill-selected:var(--lf-accent-soft) !important; --checkbox-label-text-color: var(--lf-text) !important; --checkbox-label-text-color-selected: var(--lf-accent) !important; --checkbox-label-border-color: var(--lf-border) !important; --checkbox-label-border-color-hover: var(--lf-accent) !important; --checkbox-label-border-color-selected:var(--lf-accent) !important; --checkbox-check: var(--lf-accent) !important; } /* Typography — serif for headings to match Claude's Tiempos-style hero */ .gradio-container h1, .gradio-container h2, .gradio-container h3, .gradio-container h4 { color: var(--lf-text) !important; font-family: "Source Serif 4", "Source Serif Pro", Georgia, "Times New Roman", serif !important; font-weight: 500 !important; letter-spacing: -0.015em !important; line-height: 1.2 !important; } .gradio-container h1 { font-size: 2.5rem !important; margin: 0.25rem 0 0.5rem !important; } .gradio-container h2 { font-size: 1.5rem !important; margin: 1.4rem 0 0.5rem !important; } .gradio-container h3 { font-size: 1.15rem !important; margin: 1.1rem 0 0.5rem !important; font-weight: 600 !important; } .gradio-container p, .gradio-container li { color: var(--lf-text-muted) !important; line-height: 1.65 !important; font-size: 0.97rem !important; } .gradio-container strong { color: var(--lf-text) !important; } /* Top bar — Linear/Vercel-style fixed header */ .lf-topbar { display: flex; align-items: center; justify-content: space-between; padding: 0.6rem 0.2rem 1.1rem; border-bottom: 1px solid var(--lf-border); margin-bottom: 1.25rem; } .lf-brand { display: flex; align-items: center; gap: 0.75rem; } .lf-brand-mark { width: 28px; height: 28px; border-radius: 7px; background: linear-gradient(135deg, var(--lf-accent) 0%, var(--lf-accent-dk) 100%); box-shadow: inset 0 0 0 1px rgba(255,255,255,0.08), 0 1px 3px rgba(0,0,0,0.3); position: relative; } .lf-brand-mark::after { /* little contour-ring motif inside the mark */ content: ""; position: absolute; inset: 5px; border: 1.5px solid rgba(255,255,255,0.55); border-radius: 4px; clip-path: polygon(0 0, 100% 0, 100% 70%, 30% 100%, 0 100%); } .lf-brand-name { font-family: "Inter", sans-serif; font-weight: 600; font-size: 0.95rem; color: var(--lf-text); letter-spacing: -0.01em; line-height: 1.1; } .lf-brand-sub { font-family: "Inter", sans-serif; font-size: 0.72rem; color: var(--lf-text-subtle); letter-spacing: 0.04em; text-transform: uppercase; margin-top: 1px; } .lf-topbar-actions { display: flex; gap: 0.25rem; align-items: center; } .lf-link { color: var(--lf-text-muted) !important; font-family: "Inter", sans-serif; font-size: 0.82rem; text-decoration: none !important; padding: 0.4rem 0.75rem; border-radius: 6px; border: 1px solid transparent; transition: background 0.12s, color 0.12s, border-color 0.12s; } .lf-link:hover { color: var(--lf-text) !important; background: var(--lf-surface); border-color: var(--lf-border); } /* Hero — modern dashboard banner, serif headline */ .lf-hero { margin-bottom: 1.5rem; padding: 0.25rem 0 1rem; } .lf-hero h1 { margin: 0 0 0.55rem 0 !important; font-family: "Source Serif 4", "Source Serif Pro", Georgia, serif !important; font-size: 2.1rem !important; font-weight: 500 !important; color: var(--lf-text) !important; max-width: 820px; line-height: 1.2 !important; letter-spacing: -0.018em !important; } .lf-hero p { margin: 0 !important; max-width: 720px; font-size: 0.98rem !important; line-height: 1.6 !important; color: var(--lf-text-muted) !important; } /* Tabs — Gradio 5 uses `.tab-container` with scoped `button` */ .gradio-container .tab-container { border-bottom: 1px solid var(--lf-border) !important; margin-bottom: 1.1rem !important; } .gradio-container .tab-container button, .gradio-container .tab-container button[role="tab"] { background: transparent !important; color: var(--lf-text-muted) !important; border: none !important; border-bottom: 2px solid transparent !important; font-family: "Inter", sans-serif !important; font-weight: 500 !important; font-size: 0.96rem !important; padding: 0.7rem 1.15rem !important; letter-spacing: -0.005em !important; transition: color 0.15s, border-color 0.15s !important; border-radius: 0 !important; } .gradio-container .tab-container button:hover:not(:disabled):not(.selected) { color: var(--lf-text) !important; background-color: transparent !important; } .gradio-container .tab-container button.selected { color: var(--lf-accent) !important; border-bottom: 2px solid var(--lf-accent) !important; font-weight: 600 !important; background: transparent !important; } /* Primary buttons — burnt sienna solid */ .gradio-container button.primary, .gradio-container .primary button, .gradio-container button.gradio-button.primary { background: var(--lf-accent) !important; color: #ffffff !important; border: none !important; font-family: "Inter", sans-serif !important; font-weight: 600 !important; font-size: 0.9rem !important; letter-spacing: -0.005em !important; border-radius: 8px !important; padding: 0.6rem 1.1rem !important; box-shadow: 0 1px 2px rgba(201,100,66,0.15) !important; transition: background 0.15s, box-shadow 0.15s !important; } .gradio-container button.primary:hover, .gradio-container .primary button:hover { background: var(--lf-accent-dk) !important; box-shadow: 0 2px 6px rgba(201,100,66,0.25) !important; } /* Secondary buttons */ .gradio-container button.secondary { background: var(--lf-surface) !important; color: var(--lf-text) !important; border: 1px solid var(--lf-border) !important; font-weight: 500 !important; border-radius: 8px !important; } /* Inputs + selects + textareas + dropdowns — clearly bordered */ .gradio-container input[type="text"], .gradio-container input[type="number"], .gradio-container input[type="password"], .gradio-container select, .gradio-container textarea, .gradio-container .wrap-inner, .gradio-container [role="combobox"], .gradio-container .dropdown > div, .gradio-container [data-testid="dropdown"] > div { border: 1px solid var(--lf-border) !important; background: var(--lf-surface-alt) !important; color: var(--lf-text) !important; border-radius: 8px !important; font-family: "Inter", sans-serif !important; font-size: 0.92rem !important; min-height: 38px !important; box-sizing: border-box !important; transition: border-color 0.15s, box-shadow 0.15s !important; } .gradio-container input[type="text"], .gradio-container input[type="number"], .gradio-container input[type="password"], .gradio-container textarea { padding: 0.55rem 0.75rem !important; } .gradio-container input:focus, .gradio-container textarea:focus, .gradio-container select:focus, .gradio-container [role="combobox"]:focus-within { border-color: var(--lf-accent) !important; outline: none !important; box-shadow: 0 0 0 3px rgba(226,135,99,0.18) !important; } /* Number input wrapper (Gradio renders a wrapper around input+reset) — give it enough room so "0.7" doesn't clip */ .gradio-container .number-input-container, .gradio-container input[type="number"] { min-width: 72px !important; text-align: left !important; } .gradio-container input[type="number"] { padding-right: 0.4rem !important; } /* Labels — kill the accent-coloured "chip" treatment Gradio 5 applies, make them plain inline text above the input */ .gradio-container label, .gradio-container .label, .gradio-container .block > .label-wrap, .gradio-container .block > .label-wrap > span, .gradio-container [data-testid="block-label"], .gradio-container [data-testid="block-label"] > *, .gradio-container .label > span { background: transparent !important; color: var(--lf-text-muted) !important; font-weight: 500 !important; font-size: 0.82rem !important; letter-spacing: 0.01em !important; text-transform: none !important; padding: 0 !important; margin-bottom: 0.3rem !important; border: none !important; border-radius: 0 !important; box-shadow: none !important; } /* The block-label "pill" that wraps the label + icon: flatten it */ .gradio-container .block .wrap > .label-wrap, .gradio-container .block > .wrap-inner > .label-wrap, .gradio-container .block > div > .label-wrap, .gradio-container .block > span[data-testid], .gradio-container .block > span.svelte-1gfkn6j, .gradio-container .block-label, .gradio-container div[aria-label][class*="label"] { background: transparent !important; padding: 0 0 0.25rem 0 !important; color: var(--lf-text-muted) !important; font-weight: 500 !important; border: none !important; border-radius: 0 !important; } /* For elements whose rendered icon-prefixed label (e.g. JSON "{...}" icon, Plot chart icon) is inside the label-wrap, keep them subtle */ .gradio-container .block-label svg, .gradio-container .label-wrap svg, .gradio-container [data-testid="block-label"] svg { color: var(--lf-text-subtle) !important; opacity: 0.7; } /* Reset / refresh icon buttons (the circular arrow next to number inputs) */ .gradio-container button[aria-label*="Reset"], .gradio-container button[title*="Reset"], .gradio-container .icon-button { background: transparent !important; color: var(--lf-text-subtle) !important; border: none !important; box-shadow: none !important; } .gradio-container button[aria-label*="Reset"]:hover, .gradio-container .icon-button:hover { color: var(--lf-accent) !important; background: var(--lf-accent-soft) !important; } /* Gradio block container (the outer "card" of each component) */ .gradio-container .block, .gradio-container .gr-box, .gradio-container .gr-panel, .gradio-container .form { background: var(--lf-surface) !important; border: 1px solid var(--lf-border-soft) !important; border-radius: 10px !important; padding: 1.1rem !important; } /* Slider colors */ .gradio-container input[type="range"]::-webkit-slider-thumb { background: var(--lf-accent) !important; } .gradio-container .svelte-range-slider .handle, .gradio-container .svelte-range-slider .rangeBar { background: var(--lf-accent) !important; } /* Code blocks */ .gradio-container pre, .gradio-container code, .gradio-container .cm-editor, .gradio-container .cm-content { font-family: "JetBrains Mono", ui-monospace, Menlo, Consolas, monospace !important; font-size: 0.84rem !important; } .gradio-container pre { background: var(--lf-surface-alt) !important; border: 1px solid var(--lf-border-soft) !important; border-radius: 8px !important; padding: 0.9rem 1.1rem !important; } /* Dataframes */ .gradio-container table { border-collapse: collapse !important; width: 100% !important; font-family: "Inter", sans-serif !important; } .gradio-container table th { background: var(--lf-bg) !important; color: var(--lf-text) !important; font-weight: 600 !important; font-size: 0.82rem !important; letter-spacing: 0.01em !important; text-transform: uppercase !important; border-bottom: 1px solid var(--lf-border) !important; padding: 0.6rem 0.85rem !important; } .gradio-container table td { border-bottom: 1px solid var(--lf-border-soft) !important; padding: 0.55rem 0.85rem !important; color: var(--lf-text) !important; font-size: 0.9rem !important; } /* JSON renderer — force warm ink for every node + muted for keys */ .gradio-container .json-holder, .gradio-container .json-container, .gradio-container .json-node { background: var(--lf-surface) !important; border: 1px solid var(--lf-border-soft) !important; border-radius: 8px !important; padding: 0.9rem !important; color: var(--lf-text) !important; font-family: "JetBrains Mono", ui-monospace, Menlo, monospace !important; font-size: 0.82rem !important; } .gradio-container .json-holder *, .gradio-container .json-container * { color: var(--lf-text) !important; } .gradio-container .json-holder .key, .gradio-container .json-container .key { color: var(--lf-accent-dk) !important; font-weight: 600 !important; } .gradio-container .json-holder .string-value { color: #3d6b4c !important; } .gradio-container .json-holder .number-value { color: #874123 !important; } /* Dropdown option list (open state) — Gradio defaults to white-on-white */ .gradio-container .options, .gradio-container .options .item, .gradio-container [role="listbox"], .gradio-container [role="option"] { background: var(--lf-surface) !important; color: var(--lf-text) !important; border-color: var(--lf-border) !important; } .gradio-container [role="option"]:hover, .gradio-container .options .item:hover { background: var(--lf-accent-soft) !important; color: var(--lf-text) !important; } .gradio-container [role="option"][aria-selected="true"] { background: var(--lf-accent) !important; color: #ffffff !important; } /* Markdown rendered inside blocks */ .gradio-container .prose, .gradio-container .markdown, .gradio-container [data-testid="markdown"] { color: var(--lf-text) !important; } .gradio-container .prose p, .gradio-container .markdown p, .gradio-container [data-testid="markdown"] p { color: var(--lf-text-muted) !important; } .gradio-container .prose strong, .gradio-container .markdown strong { color: var(--lf-text) !important; } .gradio-container .prose a, .gradio-container .markdown a { color: var(--lf-accent) !important; text-decoration: underline; text-underline-offset: 2px; } .gradio-container .prose code, .gradio-container .markdown code { background: var(--lf-bg) !important; color: var(--lf-accent-dk) !important; padding: 0.12em 0.4em !important; border-radius: 4px !important; font-size: 0.84em !important; } /* Inline label / info text under inputs */ .gradio-container .block-info, .gradio-container .info { color: var(--lf-text-muted) !important; font-size: 0.82rem !important; } /* Slider track+value labels */ .gradio-container .svelte-range-slider, .gradio-container .min-val, .gradio-container .max-val, .gradio-container .value { color: var(--lf-text) !important; } .gradio-container .value-text { color: var(--lf-accent-dk) !important; font-weight: 600 !important; } /* Radio buttons — labels should be visible */ .gradio-container .wrap label, .gradio-container [role="radio"] + label { color: var(--lf-text) !important; } /* Status badges inside obs.done etc */ .gradio-container .status-text { color: var(--lf-text) !important; } /* Accordion headers */ .gradio-container .label-wrap, .gradio-container .accordion-header { font-weight: 500 !important; color: var(--lf-text) !important; } /* Footer — hide "Built with Gradio" */ footer, .gradio-container footer { display: none !important; } /* Scrollbars */ .gradio-container ::-webkit-scrollbar { width: 10px; height: 10px; } .gradio-container ::-webkit-scrollbar-track { background: var(--lf-bg); } .gradio-container ::-webkit-scrollbar-thumb { background: var(--lf-border); border-radius: 5px; } .gradio-container ::-webkit-scrollbar-thumb:hover { background: var(--lf-text-subtle); } /* Sidebar column — one card; inside is flat */ .gradio-container .lf-sidebar { background: var(--lf-surface) !important; border: 1px solid var(--lf-border) !important; border-radius: 12px !important; padding: 1.5rem 1.35rem 1.35rem !important; box-shadow: 0 1px 0 rgba(20,20,19,0.02); } .gradio-container .lf-sidebar h3 { margin-top: 0.15rem !important; margin-bottom: 0.3rem !important; } .gradio-container .lf-sidebar p { font-size: 0.88rem !important; margin-bottom: 0.85rem !important; color: var(--lf-text-muted) !important; } /* Flatten ALL nested blocks inside the sidebar — no card-in-card */ .gradio-container .lf-sidebar .block, .gradio-container .lf-sidebar .form, .gradio-container .lf-sidebar .gr-box, .gradio-container .lf-sidebar .gr-panel, .gradio-container .lf-sidebar .wrap, .gradio-container .lf-sidebar fieldset { background: transparent !important; border: none !important; padding: 0 !important; margin: 0 !important; border-radius: 0 !important; box-shadow: none !important; } /* Space between consecutive controls in the sidebar */ .gradio-container .lf-sidebar > div > *, .gradio-container .lf-sidebar > .form > * { margin-bottom: 0.85rem !important; } .gradio-container .lf-sidebar button { width: 100% !important; } .gradio-container .lf-sidebar hr, .gradio-container .lf-sidebar .prose hr { border: none !important; border-top: 1px solid var(--lf-border) !important; margin: 1.1rem 0 !important; } /* Hide ugly number-input spinner arrows (▲▼) */ .gradio-container input[type="number"]::-webkit-outer-spin-button, .gradio-container input[type="number"]::-webkit-inner-spin-button { -webkit-appearance: none !important; appearance: none !important; margin: 0 !important; } .gradio-container input[type="number"] { -moz-appearance: textfield !important; appearance: textfield !important; } /* Slider value-input on the right — align + size so "0.95" doesn't clip */ .gradio-container .slider-container, .gradio-container [data-testid="slider"] { display: flex !important; flex-direction: column !important; gap: 0.4rem !important; } .gradio-container [data-testid="slider"] .head, .gradio-container .tab-like-container { display: flex !important; align-items: center !important; justify-content: space-between !important; gap: 0.5rem !important; } .gradio-container [data-testid="slider"] input[type="number"] { width: 68px !important; min-width: 68px !important; max-width: 80px !important; text-align: right !important; padding: 0.3rem 0.5rem !important; min-height: 30px !important; font-size: 0.85rem !important; } /* Reset-button next to number inputs — make it transparent & subtle */ .gradio-container [data-testid="slider"] button, .gradio-container .reset-button { background: transparent !important; border: none !important; color: var(--lf-text-subtle) !important; padding: 0.15rem !important; min-width: 26px !important; width: 26px !important; height: 26px !important; } .gradio-container [data-testid="slider"] button:hover { color: var(--lf-accent) !important; background: var(--lf-accent-soft) !important; } /* Inline code tag — softer across the whole app, not only the sidebar */ .gradio-container .prose code, .gradio-container .markdown code, .gradio-container code { background: var(--lf-surface-alt) !important; border: 1px solid var(--lf-border) !important; color: var(--lf-text) !important; padding: 0.05em 0.42em !important; border-radius: 4px !important; font-size: 0.85em !important; font-weight: 400 !important; } /* Fenced code blocks — proper code-box with mono font + subtle bg */ .gradio-container .prose pre, .gradio-container .markdown pre { background: #14120f !important; border: 1px solid var(--lf-border) !important; border-radius: 8px !important; padding: 0.9rem 1rem !important; margin: 0.4rem 0 0.8rem 0 !important; overflow-x: auto !important; } .gradio-container .prose pre code, .gradio-container .markdown pre code { background: transparent !important; border: none !important; color: #e8e3d6 !important; font-size: 0.82rem !important; line-height: 1.55 !important; padding: 0 !important; } /* Chips — lightweight tags for action kind, model, endpoint */ .gradio-container .lf-chip { display: inline-block; padding: 0.08rem 0.5rem; border-radius: 5px; background: var(--lf-surface-alt); color: var(--lf-text); border: 1px solid var(--lf-border); font-family: "JetBrains Mono", ui-monospace, monospace; font-size: 0.78rem; font-weight: 500; letter-spacing: -0.01em; } .gradio-container .lf-chip-draft { color: var(--lf-accent); border-color: var(--lf-accent); } .gradio-container .lf-chip-run_baseline { color: #7ecfc5; border-color: #5a9c94; } .gradio-container .lf-chip-inspect { color: #b5a5e0; border-color: #7e6ea8; } .gradio-container .lf-chip-commit { color: #7ab68c; border-color: #4e7c5c; } /* Soft divider inside transcript */ .gradio-container .lf-hr-soft, .gradio-container hr.lf-hr-soft { border: none !important; border-top: 1px solid var(--lf-border-soft) !important; margin: 0.9rem 0 0.6rem !important; opacity: 0.6; } /* Turn card — one per REPL step. Clearly demarcates Action vs Output */ .gradio-container .lf-turn { background: var(--lf-surface); border: 1px solid var(--lf-border); border-radius: 10px; padding: 0.9rem 1rem; margin: 0.85rem 0; box-shadow: 0 1px 0 rgba(0,0,0,0.2); } .gradio-container .lf-turn-head { display: flex; align-items: center; gap: 0.55rem; margin-bottom: 0.7rem; padding-bottom: 0.55rem; border-bottom: 1px dashed var(--lf-border-soft); } .gradio-container .lf-turn-num { font-family: "Source Serif 4", Georgia, serif; font-weight: 600; font-size: 0.98rem; color: var(--lf-text); letter-spacing: -0.01em; } .gradio-container .lf-turn-meta { margin-left: auto; font-family: "JetBrains Mono", monospace; font-size: 0.76rem; color: var(--lf-text-subtle); } .gradio-container .lf-turn-meta b { color: var(--lf-text); font-weight: 600; } .gradio-container .lf-turn-row { display: grid; grid-template-columns: 70px 1fr; align-items: baseline; gap: 0.75rem; padding: 0.25rem 0; } .gradio-container .lf-section-label { font-family: "Inter", sans-serif; font-size: 0.68rem; font-weight: 600; letter-spacing: 0.1em; text-transform: uppercase; color: var(--lf-text-subtle); padding-top: 0.15rem; } .gradio-container .lf-section-content { color: var(--lf-text); font-size: 0.9rem; line-height: 1.55; font-family: "Inter", sans-serif; } .gradio-container .lf-section-content code { font-size: 0.82em !important; } .gradio-container .lf-section-content b { color: var(--lf-text); font-weight: 600; } /* Status chips inside the Output row */ .gradio-container .lf-status { display: inline-block; padding: 0.05rem 0.45rem; border-radius: 4px; font-size: 0.78rem; font-weight: 500; border: 1px solid; background: transparent; margin-right: 0.15rem; } .gradio-container .lf-status-good { color: #7ab68c; border-color: rgba(122,182,140,0.4); background: rgba(122,182,140,0.08); } .gradio-container .lf-status-warn { color: #e4b264; border-color: rgba(228,178,100,0.4); background: rgba(228,178,100,0.08); } .gradio-container .lf-status-bad { color: #d47d6a; border-color: rgba(212,125,106,0.4); background: rgba(212,125,106,0.08); } /* Code fence that follows a turn card — tighten top margin */ .gradio-container .lf-turn + pre, .gradio-container .prose pre:has(+ .lf-turn) { margin-top: -0.5rem !important; } /* Episode-done dashboard: KPI row with big metric cards */ .gradio-container .lf-done { background: linear-gradient(180deg, rgba(226,135,99,0.06) 0%, rgba(42,40,36,0) 60%); border: 1px solid var(--lf-border); border-radius: 12px; padding: 1.2rem 1.25rem; margin: 1.1rem 0 0.6rem; } .gradio-container .lf-done-head { display: flex; align-items: baseline; gap: 0.85rem; margin-bottom: 0.9rem; } .gradio-container .lf-done-flag { color: var(--lf-accent); font-family: "Inter", sans-serif; font-weight: 600; font-size: 0.75rem; letter-spacing: 0.11em; text-transform: uppercase; padding: 0.15rem 0.55rem; border: 1px solid var(--lf-accent); border-radius: 5px; } .gradio-container .lf-done-reason { color: var(--lf-text-subtle); font-size: 0.84rem; } .gradio-container .lf-done-reason code { font-family: "JetBrains Mono", monospace; background: transparent !important; border: none !important; color: var(--lf-text-muted) !important; padding: 0 !important; } .gradio-container .lf-kpi-row { display: grid; grid-template-columns: repeat(3, 1fr); gap: 0.8rem; } .gradio-container .lf-kpi { background: var(--lf-surface-alt); border: 1px solid var(--lf-border-soft); border-radius: 10px; padding: 0.9rem 1rem; min-width: 0; } .gradio-container .lf-kpi-label { color: var(--lf-text-subtle); font-family: "Inter", sans-serif; font-size: 0.7rem; font-weight: 600; letter-spacing: 0.1em; text-transform: uppercase; margin-bottom: 0.35rem; } .gradio-container .lf-kpi-value { font-family: "Source Serif 4", Georgia, serif; font-weight: 500; font-size: 1.9rem; color: var(--lf-text); letter-spacing: -0.025em; line-height: 1.1; } .gradio-container .lf-kpi-sub { color: var(--lf-text-subtle); font-size: 0.72rem; margin-top: 0.3rem; font-family: "JetBrains Mono", monospace; } .gradio-container .lf-kpi-good .lf-kpi-value { color: #7ab68c; } .gradio-container .lf-kpi-warn .lf-kpi-value { color: #e4b264; } .gradio-container .lf-kpi-bad .lf-kpi-value { color: #d47d6a; } .gradio-container .lf-kpi-good { border-color: rgba(122,182,140,0.35); } .gradio-container .lf-kpi-warn { border-color: rgba(228,178,100,0.35); } .gradio-container .lf-kpi-bad { border-color: rgba(212,125,106,0.35); } /* Responsive: stack KPIs on narrow */ @media (max-width: 720px) { .gradio-container .lf-kpi-row { grid-template-columns: 1fr; } } /* Main pane plots+outputs */ .gradio-container .gr-plot, .gradio-container .plot-wrap { background: var(--lf-surface-alt) !important; border-radius: 10px !important; } """ # Plotly layout template — matches dark Claude palette. # Margin is intentionally factored out so per-plot overrides don't collide. _PLOTLY_LAYOUT = dict( font=dict(family="Inter, -apple-system, system-ui, sans-serif", color="#f3f0e8", size=12), paper_bgcolor="#2a2824", # card surface plot_bgcolor="#1f1d1a", # page background, slightly darker hoverlabel=dict(bgcolor="#f3f0e8", font_color="#1f1d1a", font_family="Inter", bordercolor="#e28763"), legend=dict(bgcolor="rgba(31,29,26,0.85)", bordercolor="#403b34", borderwidth=1, font=dict(color="#f3f0e8")), ) _DEFAULT_MARGIN = dict(l=60, r=30, t=60, b=55) _AXIS_STYLE = dict(gridcolor="#403b34", zerolinecolor="#554e45", showline=True, linecolor="#554e45", tickfont=dict(color="#b5ada0")) _TITLE_STYLE = dict(x=0.02, xanchor="left", font=dict(size=14, color="#f3f0e8", weight=500)) OPT_COLORS = { "sgd": "#c05450", "momentum": "#d9865b", "adam": "#5b7a6b", "lbfgs": "#556b99", "custom": "#d97757", } BAR_GOOD = "#4a7c59" BAR_BAD = "#a85c4c" # ----------------- plotting helpers (Plotly) ----------------- TEMPLATES_2D_SAFE = ["quadratic", "rosenbrock", "styblinski_tang", "huber", "gaussian_mix", "himmelblau", "plateau", "cliff"] def _color(name: str) -> str: """Look up a trajectory colour, stripping any `(tuned lr=…)` suffix.""" return OPT_COLORS.get(name.split("(")[0].strip(), "#2a2319") def _trajectory_diverged(arr: np.ndarray, clip: float = 8.0) -> bool: """True if trajectory escapes the viewing window (e.g. SGD on a stiff LP).""" return bool(np.any(np.abs(arr) > clip) or np.any(~np.isfinite(arr))) def _contour_plot(ls, trajectories=None, title=None, subtitle=None): assert ls.dim == 2, "contour plot requires dim=2" # Compute view extents from *finite, non-divergent* trajectory points only. # Divergent ones (e.g. SGD exploding to 1e6) are clipped/marked separately. CLIP = 8.0 xs_all, ys_all = [0.0], [0.0] for traj in (trajectories or {}).values(): arr = np.array(traj) if arr.size == 0: continue mask = (np.abs(arr) <= CLIP).all(axis=1) & np.isfinite(arr).all(axis=1) if mask.any(): good = arr[mask] xs_all.extend(good[:, 0].tolist()) ys_all.extend(good[:, 1].tolist()) x_min = min(min(xs_all) - 1.5, -3.5); x_max = max(max(xs_all) + 1.5, 3.5) y_min = min(min(ys_all) - 1.5, -3.5); y_max = max(max(ys_all) + 1.5, 3.5) x_min = max(x_min, -CLIP); x_max = min(x_max, CLIP) y_min = max(y_min, -CLIP); y_max = min(y_max, CLIP) g = 70 xs = np.linspace(x_min, x_max, g); ys = np.linspace(y_min, y_max, g) X, Y = np.meshgrid(xs, ys) Z = np.empty_like(X) for i in range(g): for j in range(g): Z[i, j] = ls.f(np.array([X[i, j], Y[i, j]])) finite = Z[np.isfinite(Z)] lo, hi = np.percentile(finite, [2, 95]) fig = go.Figure() fig.add_trace(go.Contour( x=xs, y=ys, z=Z, zmin=float(lo), zmax=float(hi), # Dark-mode colorscale: deep warm valleys → glowing sienna peaks colorscale=[ [0.0, "#1f1d1a"], [0.15, "#2f2a22"], [0.3, "#4a2f22"], [0.5, "#7a4229"], [0.7, "#c25a3a"], [0.85, "#e28763"], [1.0, "#f4d6c5"], ], contours=dict(coloring="heatmap", showlabels=False), line=dict(width=0.5, color="rgba(243,240,232,0.12)"), colorbar=dict(title=dict(text="f(x)", font=dict(size=11, color="#f3f0e8")), thickness=12, len=0.85, tickfont=dict(size=10, color="#b5ada0"), outlinewidth=0), hovertemplate="x₁=%{x:.3f}
x₂=%{y:.3f}
f=%{z:.3f}", )) divergent_names: list[str] = [] if trajectories: for name, traj in trajectories.items(): if not traj: continue color = _color(name) arr = np.array(traj) # Clip to view; mark divergent for annotation diverged = _trajectory_diverged(arr, clip=CLIP) if diverged: divergent_names.append(name) # Keep only finite, in-window points for plotting mask = (np.abs(arr) <= CLIP).all(axis=1) & np.isfinite(arr).all(axis=1) arr = arr[mask] if arr.shape[0] == 0: continue display_name = f"{name} · diverged" if diverged else name line_style = "dash" if diverged else "solid" hover = [f"step {i}
x₁={a[0]:.3f}
x₂={a[1]:.3f}" for i, a in enumerate(arr)] fig.add_trace(go.Scatter( x=arr[:, 0], y=arr[:, 1], mode="lines+markers", name=display_name, line=dict(color=color, width=2.5, dash=line_style), marker=dict(size=4, color=color, line=dict(color="#ffffff", width=0.8)), hovertemplate="%{text}" + display_name + "", text=hover, )) fig.add_trace(go.Scatter( x=[arr[0, 0]], y=[arr[0, 1]], mode="markers", showlegend=False, marker=dict(size=12, color=color, symbol="circle-open", line=dict(color=color, width=2.5)), hovertemplate=f"start{display_name}", )) end_symbol = "x" if diverged else "star" end_size = 14 if diverged else 16 fig.add_trace(go.Scatter( x=[arr[-1, 0]], y=[arr[-1, 1]], mode="markers", showlegend=False, marker=dict(size=end_size, color=color, symbol=end_symbol, line=dict(color="#ffffff", width=1.2)), hovertemplate=(f"{'diverged-exit' if diverged else 'end'}" f"{display_name}"), )) full_title = title or f"{ls.name} (dim=2)" sub_text = subtitle or ( f"diverged: {', '.join(divergent_names)}" if divergent_names else None ) if sub_text: full_title = f"{full_title}
⚠ {sub_text}" fig.update_layout( **_PLOTLY_LAYOUT, title=dict(text=full_title, **_TITLE_STYLE), height=480, margin=_DEFAULT_MARGIN, xaxis=dict(title="x₁", range=[x_min, x_max], **_AXIS_STYLE), yaxis=dict(title="x₂", range=[y_min, y_max], scaleanchor="x", scaleratio=1, **_AXIS_STYLE), ) return fig def _loss_curves(traj_map, title): fig = go.Figure() for name, fs in traj_map.items(): if not fs: continue color = _color(name) # Drop non-finite / negative-infty tail if optimiser diverged fs_clean = [v if np.isfinite(v) else None for v in fs] xs = list(range(len(fs_clean))) fig.add_trace(go.Scatter( x=xs, y=fs_clean, mode="lines+markers", name=name, line=dict(color=color, width=2.2, shape="spline"), marker=dict(size=4, color=color), hovertemplate="step=%{x}
f=%{y:.4g}" + name + "", connectgaps=False, )) fig.update_layout( **_PLOTLY_LAYOUT, title=dict(text=title, **_TITLE_STYLE), height=360, margin=_DEFAULT_MARGIN, xaxis=dict(title="optimizer step", **_AXIS_STYLE), yaxis=dict(title="f(x) (symlog)", type="log", **_AXIS_STYLE), ) return fig def _bar_plot(values, title, ylabel): names = list(values.keys()) vs = [values[n] for n in names] colors = [_color(n) for n in names] fig = go.Figure(go.Bar( x=names, y=vs, marker=dict(color=colors, line=dict(color="#ffffff", width=1)), text=[f"{v:.3g}" for v in vs], textposition="outside", textfont=dict(size=11), hovertemplate="%{x}
" + ylabel + "=%{y:.4g}", )) fig.update_layout( **_PLOTLY_LAYOUT, title=dict(text=title, **_TITLE_STYLE), height=280, margin=_DEFAULT_MARGIN, xaxis=dict(**_AXIS_STYLE), yaxis=dict(title=ylabel, **_AXIS_STYLE), showlegend=False, ) return fig def _reward_breakdown_plot(components, total): # Horizontal bars — more readable in a narrow column, aligns values nicely. names = list(components.keys()) vs = [components[n] for n in names] colors = [BAR_GOOD if v >= 0 else BAR_BAD for v in vs] fig = go.Figure(go.Bar( y=names, x=vs, orientation="h", marker=dict(color=colors, line=dict(color="#1f1d1a", width=1)), text=[f"{v:+.3f}" for v in vs], textposition="outside", textfont=dict(size=11, color="#f3f0e8"), cliponaxis=False, hovertemplate="%{y}
contribution=%{x:+.3f}", )) fig.add_vline(x=0, line_width=1, line_color="#554e45") fig.update_layout( **_PLOTLY_LAYOUT, title=dict( text=f"Reward breakdown · total = {total:+.3f}", **_TITLE_STYLE), height=240, margin=dict(l=110, r=50, t=50, b=30), xaxis=dict(title="weighted contribution", range=[min(vs + [0]) - 0.15, max(vs + [0]) + 0.15], **_AXIS_STYLE), yaxis=dict(autorange="reversed", **_AXIS_STYLE), showlegend=False, bargap=0.25, ) return fig def _empty_plot(msg): fig = go.Figure() fig.add_annotation( text=msg, x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=14, color="#6b6258"), ) fig.update_layout( **_PLOTLY_LAYOUT, height=480, showlegend=False, margin=_DEFAULT_MARGIN, xaxis=dict(visible=False), yaxis=dict(visible=False), ) return fig # ----------------- tab 1: Landscape Explorer ----------------- def _explore_landscape(template, dim, seed): rng = np.random.default_rng(int(seed)) params: dict[str, Any] = {} if template == "quadratic": params = {"cond": 10.0} if template == "gaussian_mix": params = {"k": 3, "sigma": 0.5, "spread": 2.0} if template == "himmelblau": dim = 2 ls = build_landscape(template=template, dim=int(dim), params=params, rng=rng) hints = structural_hints(ls, rng=rng) if ls.dim == 2: fig = _contour_plot(ls, title=f"{template} · dim=2") else: fig = _empty_plot(f"{template} · dim={ls.dim}\nContour view is 2-D only") rows = [] for k, v in hints.items(): rows.append([k, f"{v:.4g}" if isinstance(v, float) else str(v)]) rows.append(["dim", ls.dim]) rows.append(["f_min (known)", f"{ls.f_min:.4g}"]) rows.append(["description", ls.description]) return fig, rows # ----------------- tab 2: Baseline Race ----------------- def _baseline_race(template, seed): rng = np.random.default_rng(int(seed)) params: dict[str, Any] = {} if template == "quadratic": params = {"cond": 10.0} if template == "gaussian_mix": params = {"k": 3, "sigma": 0.5, "spread": 2.0} ls = build_landscape(template=template, dim=2, params=params, rng=rng) x0 = np.random.default_rng(int(seed) + 999).normal(0.0, 0.5, size=2) # Tune EACH baseline's LR to the landscape (not just Adam). Without this, # SGD at lr=0.01 diverges on stiff landscapes (Rosenbrock, high-cond # quadratics) and produces a monotone-up curve. Per-baseline tuning makes # the race compare *algorithms*, not "default hparams". traj_2d: dict[str, list[tuple[float, float]]] = {} curves: dict[str, list[float]] = {} finals: dict[str, float] = {} tuned_lrs: dict[str, float] = {} for name in ["sgd", "momentum", "adam", "lbfgs"]: r = run_baseline_tuned(name, ls.f, ls.grad, x0, steps=50) tuned_lrs[name] = r["lr"] traj = [s for s in r["trajectory"] if s.get("x") is not None] traj_2d[name] = [(s["x"][0], s["x"][1]) for s in traj] curves[name] = [s["f"] for s in traj if s.get("f") is not None] finals[name] = curves[name][-1] if curves[name] else float("inf") contour = _contour_plot(ls, trajectories=traj_2d, title=f"{template} — baselines racing (LR-tuned)") curves_fig = _loss_curves(curves, "f(x) vs step") finals_fig = _bar_plot(finals, "Final f after 50 steps", ylabel="f(x) at step 50") lr_table = " · ".join(f"`{name}`: `{lr:g}`" for name, lr in tuned_lrs.items()) summary = ( f"**{ls.description}**\n\n" f"Tuned LR per baseline (7-point sweep, 30 steps): {lr_table}\n\n" f"Best baseline: `{min(finals, key=finals.get)}` at f = " f"`{min(finals.values()):.4f}`" ) return contour, curves_fig, finals_fig, summary # ----------------- tab 3: Optimizer Arena ----------------- SAMPLE_OPTIMIZER = """ class Optimizer: def __init__(self, dim): self.lr = 0.05 self.beta = 0.9 self.v = np.zeros(dim) def step(self, x, f_val, grad): # SGD with heavy-ball momentum self.v = self.beta * self.v - self.lr * grad return x + self.v """.strip() ADAM_ARENA_TEMPLATE = """ class Optimizer: def __init__(self, dim): self.lr = {lr} self.b1, self.b2, self.eps = 0.9, 0.999, 1e-8 self.m = np.zeros(dim); self.v = np.zeros(dim); self.t = 0 def step(self, x, f_val, grad): self.t += 1 self.m = self.b1*self.m + (1-self.b1)*grad self.v = self.b2*self.v + (1-self.b2)*grad*grad mh = self.m/(1-self.b1**self.t); vh = self.v/(1-self.b2**self.t) return x - self.lr * mh / (np.sqrt(vh) + self.eps) """.strip() ARENA_SEEDS = [101, 202, 303, 404, 505, 606, 707, 808, 909, 1010] def _arena_compare(template, dim, seed, code): rng = np.random.default_rng(int(seed)) dim = int(dim) params: dict[str, Any] = {} if template == "quadratic": params = {"cond": 10.0} if template == "gaussian_mix": params = {"k": 3, "sigma": 0.5, "spread": 2.0} if template == "himmelblau": dim = 2 ls = build_landscape(template=template, dim=dim, params=params, rng=rng) tune_x0 = np.random.default_rng(0).normal(0.0, 0.5, size=dim) best_lr = tune_adam_lr(ls.f, ls.grad, tune_x0, sweep_steps=30) adam_src = ADAM_ARENA_TEMPLATE.format(lr=best_lr) # Compile user code try: opt = compile_optimizer(code, dim=dim) except SandboxError as e: return (None, None, None, f"### ❌ Compile error\n\n```\n{e}\n```", {}) test = auto_test_draft(opt, ls, seed=int(seed), steps=20) user_arena = run_arena(opt, ls, seeds=ARENA_SEEDS, steps=200) adam_opt = compile_optimizer(adam_src, dim=dim) adam_arena = run_arena(adam_opt, ls, seeds=ARENA_SEEDS, steps=200) reward = compute_optcoder_reward( arena=user_arena, adam_arena=adam_arena, actions_used_cost=0, budget_total=12, novelty_score=ast_novelty_score(code, [adam_src]), convergence_step=None, arena_steps=200, ) if dim == 2: user_traj = [(s["x"][0], s["x"][1]) for s in test["detail"]] adam_run = run_baseline("adam", ls.f, ls.grad, np.random.default_rng(int(seed)).normal(0.0, 0.5, 2), steps=50) adam_traj = [(s["x"][0], s["x"][1]) for s in adam_run["trajectory"] if s.get("x") is not None] contour = _contour_plot(ls, trajectories={"custom": user_traj, "adam": adam_traj}, title=f"{template} — your optimizer vs tuned Adam") else: contour = _empty_plot(f"{template} · dim={dim}\nContour view is 2-D only") progress_fig = _bar_plot( {"custom": user_arena.mean_progress, "adam (tuned)": adam_arena.mean_progress}, "Arena mean progress", ylabel="mean(f₀ − f_N) over 10 seeds", ) bk = reward.breakdown components = { "r_regret": bk["r_regret"], "r_convergence": bk["r_convergence"], "r_robustness": bk["r_robustness"], "r_novelty": bk["r_novelty"], "-r_budget": -bk["r_budget"], "-r_eval_fail": -bk["r_eval_failures"], } reward_fig = _reward_breakdown_plot(components, reward.r_total) summary = ( f"### Results\n\n" f"- Your mean progress: `{user_arena.mean_progress:.4g}`\n" f"- Tuned Adam progress: `{adam_arena.mean_progress:.4g}` " f"(lr = `{best_lr:g}`)\n" f"- Speedup vs Adam: `{bk.get('speedup_vs_adam', 0):.3g}×`\n" f"- Your crash fraction: `{user_arena.crash_fraction:.0%}`\n" f"- **Total reward: `{reward.r_total:+.3f}`**" ) return contour, progress_fig, reward_fig, summary, dict(bk) # ----------------- OpenEnv API + LLM auto-run ----------------- # # Drives the env in-process (no HTTP round trip) so this tab works inside the # Space container without localhost access. # # One session-scoped env lives in _API_ENV_STATE; reset/step mutate it. # Additionally, `_llm_auto_run` connects to any OpenAI-compatible endpoint # and drives a full episode end-to-end, streaming actions as they happen. import os as _os import time as _time import requests as _requests _API_ENV_STATE: dict[str, Any] = {"env": None} def _make_env(tier: str, seed: int): try: from ..server.landscapeforge_environment import LandscapeforgeEnvironment except ImportError: from server.landscapeforge_environment import LandscapeforgeEnvironment # type: ignore return LandscapeforgeEnvironment(tier=tier, seed=int(seed)) def _api_reset(tier, seed): env = _make_env(tier, seed) obs = env.reset() _API_ENV_STATE["env"] = env return ( _fmt_obs(obs.model_dump(exclude_none=True)), f"✓ Reset complete · landscape: **{obs.landscape_description}** · " f"dim = {obs.dim} · budget = {obs.budget_remaining}", ) def _api_step(kind, baseline_name, code, draft_idx, step_start, step_end): env = _API_ENV_STATE.get("env") if env is None: return {"error": "call /reset first"}, "⚠ No active env — hit **reset** first." kwargs: dict[str, Any] = {"kind": kind} if kind == "run_baseline": kwargs["baseline_name"] = baseline_name or "adam" elif kind == "draft": kwargs["code"] = code or "" elif kind == "inspect": kwargs["draft_idx"] = int(draft_idx) if draft_idx is not None else 0 kwargs["step_range_start"] = int(step_start) kwargs["step_range_end"] = int(step_end) try: action = LandscapeforgeAction(**kwargs) except Exception as e: return {"error": str(e)}, f"❌ Invalid action: {e}" obs = env.step(action) dump = _fmt_obs(obs.model_dump(exclude_none=True)) banner = ( f"✓ {kind} executed · budget remaining = {obs.budget_remaining}" + (" · **episode done**" if obs.done else "") ) return dump, banner # ---- LLM auto-run (OpenAI-compat endpoint) ---- PRESET_ENDPOINTS = { "Ollama (localhost:11434)": ("http://localhost:11434/v1", ""), "Hugging Face Router": ("https://router.huggingface.co/v1", _os.getenv("HF_TOKEN", "")), "OpenAI": ("https://api.openai.com/v1", _os.getenv("OPENAI_API_KEY", "")), "Custom": ("", ""), } PRESET_MODELS = [ "qwen2.5:3b", "qwen2.5:7b", "qwen2.5:1.5b", "Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "meta-llama/Llama-3.2-3B-Instruct", "gpt-4o-mini", ] def _llm_auto_run(endpoint_choice, custom_url, api_key, model_name, tier, seed, temperature, max_turns): """Drive a full episode end-to-end with an LLM. Yields progressive markdown transcripts so the UI updates live.""" try: from ..prompts import build_prompt, parse_action except ImportError: from prompts import build_prompt, parse_action # type: ignore base, preset_key = PRESET_ENDPOINTS.get(endpoint_choice, ("", "")) base_url = (custom_url.strip() or base).rstrip("/") key = (api_key or "").strip() or preset_key or _os.getenv("API_KEY", "") if not base_url: yield ("Pick a preset endpoint or type a custom URL.", {}, None) return if not model_name: yield ("Pick or type a model name.", {}, None) return url = base_url + "/chat/completions" headers = {"Content-Type": "application/json"} if key: headers["Authorization"] = f"Bearer {key}" env = _make_env(tier, int(seed)) obs = env.reset() _API_ENV_STATE["env"] = env log_lines: list[str] = [ f"### Episode running", f"Model {model_name} " f"via {base_url}", "", f"**Landscape:** {obs.landscape_description} ", f"**Dim:** {obs.dim} · **Initial budget:** {obs.budget_remaining}", "", ] yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) for turn in range(1, int(max_turns) + 1): messages = build_prompt(obs) t0 = _time.time() try: r = _requests.post(url, headers=headers, json={ "model": model_name, "messages": messages, "temperature": float(temperature), "max_tokens": 1200, "stream": False, }, timeout=180) if r.status_code >= 400: log_lines.append(f"**[LLM error {r.status_code}]** {r.text[:300]}") yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) return raw = r.json()["choices"][0]["message"]["content"] except Exception as e: log_lines.append(f"**[request failed]** `{type(e).__name__}: {e}`") yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) return dt = _time.time() - t0 try: action = parse_action(raw) except Exception as e: log_lines.append( f"**[turn {turn}] parse error:** `{e}`" f"\n```\n{raw[:500]}\n```\n" ) yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) return obs = env.step(action) _API_ENV_STATE["env"] = env # Pretty action line if action.kind == "draft": action_str = f"draft *({len(action.code or '')} chars)*" elif action.kind == "run_baseline": action_str = f"run_baseline(`{action.baseline_name}`)" elif action.kind == "inspect": action_str = (f"inspect(draft={action.draft_idx}, " f"[{action.step_range_start},{action.step_range_end}])") else: action_str = "commit" # Build a self-contained "turn card" with explicit Action / Output # demarcation. Rendered as HTML so we control the structure. kind_chip = (f"" f"{action.kind}") # Output status badges — colored chips + key/value pairs. output_badges: list[str] = [] lar = obs.last_action_result or {} if lar.get("compile_error"): output_badges.append( "compile error") if lar.get("summary"): s = lar["summary"] if s.get("converged"): output_badges.append( "auto-test converged") elif s.get("diverged"): output_badges.append( "auto-test diverged") if s.get("final_f") is not None: output_badges.append( f"final_f = {s['final_f']:.3g}") if action.kind == "run_baseline" and lar.get("final_f") is not None: output_badges.append( f"final_f = {lar['final_f']:.3g}") fb = lar.get("feedback") or {} for k, v in fb.items(): cls = "lf-status-good" if v >= 0 else "lf-status-warn" output_badges.append( f"{k} " f"{v:+.3f}") output_html = " · ".join(output_badges) if output_badges else "ok" turn_html = ( f"
" f"
" f" Turn {turn}" f" {kind_chip}" f" {dt:.1f}s · budget " f"{obs.budget_remaining}" f"
" f"
" f" " f"
{action_str}
" f"
" f"
" f" " f"
{output_html}
" f"
" f"
" ) log_lines.extend([f"", turn_html, f""]) if action.kind == "draft" and action.code: log_lines.append(f"```python\n{action.code.strip()}\n```") log_lines.append(f"") yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) if obs.done: bk = obs.r_optcoder_breakdown or {} reward_val = obs.r_optcoder or 0.0 my_prog = bk.get("my_progress", 0.0) adam_prog = bk.get("adam_progress", 0.0) speedup = bk.get("speedup_vs_adam", 0.0) reason = (obs.last_action_result or {}).get("reason", "?") # Tone of the reward KPI — green if positive, red if negative reward_tone = ("lf-kpi-good" if reward_val >= 0.5 else ("lf-kpi-warn" if reward_val >= 0 else "lf-kpi-bad")) speedup_display = (f"{speedup:.2f}×" if speedup < 100 else f"{speedup:.0f}×") speedup_tone = ("lf-kpi-good" if speedup >= 1.0 else "lf-kpi-warn") episode_done_html = ( "
" "
" " Episode complete" f" ended by " f"{reason}" "
" "
" "
" "
Terminal reward
" f"
{reward_val:+.3f}
" "
GRPO training scalar
" "
" "
" "
Speedup vs tuned Adam
" f"
{speedup_display}
" f"
my {my_prog:.3g} · " f"adam {adam_prog:.3g}
" "
" "
" "
Adam shortfall
" f"
{obs.final_regret:.3f}
" "
0 = matched/beat Adam
" "
" "
" "
" ) log_lines.extend([f"", episode_done_html, f""]) reward_plot = _reward_breakdown_plot({ "r_regret": bk.get("r_regret", 0), "r_convergence": bk.get("r_convergence", 0), "r_robustness": bk.get("r_robustness", 0), "r_novelty": bk.get("r_novelty", 0), "-r_budget": -bk.get("r_budget", 0), "-r_eval_fail": -bk.get("r_eval_failures", 0), }, reward_val) yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), reward_plot) return log_lines.append("\n**[!] Reached max turns without commit** — episode unfinished.") yield ("\n".join(log_lines), _fmt_obs(obs.model_dump(exclude_none=True)), None) # ----------------- top-level UI ----------------- HERO_HTML = """
LandscapeForge
OpenEnv · Hackathon Apr '26

An LLM designs optimizers, through a probe–draft–commit REPL.

Two agents co-evolve: one writes optimizer code, the other picks adversarial landscapes. Connect any OpenAI-compatible endpoint and watch a model play, or explore the landscape library interactively.

""" ABOUT_MD = """ ### How the environment works **OptCoder** (the LLM policy) designs an `Optimizer` class that minimizes a hidden loss landscape. Each episode: 1. **LandscapeForge** (v1: internal template picker) chooses a landscape at a tier-appropriate difficulty — convex quadratic, Rosenbrock, Gaussian mix, Himmelblau, stiff quadratic, cliff. 2. **OptCoder runs a 4-action REPL** with a 12-unit budget: - `run_baseline(name)` — run SGD / Momentum / Adam / L-BFGS, see trajectory (cost: 2) - `draft(code)` — submit `Optimizer` class, env auto-tests 20 steps (cost: 2) - `inspect(draft_idx, step_range)` — per-step detail for a prior draft (cost: 1) - `commit` — run the full 10-seed × 200-step arena (cost: 0) 3. **Reward** is Adam-relative progress — `my_progress / tuned_adam_progress − 1`, clipped to `[−1, +1]`. No `f_min` dependency, so this extends to NN training as a drop-in. 4. **GRPO** trains the policy against this reward; arena cost is ~50 ms so ~36 k episodes/hour on one H100. ### Research anchors - **Thread 1** · LLMs as optimizer designers: [Lion](https://arxiv.org/abs/2302.06675), [FunSearch](https://www.nature.com/articles/s41586-023-06924-6) - **Thread 2** · Co-evolutionary LLM-env: Coevolve, [GenEnv](https://arxiv.org/html/2512.19682v1) - **Thread 3** · Iterative code refinement: [Self-Refine](https://arxiv.org/abs/2303.17651) - **Thread 4** · GRPO with measurable rewards: [HPC GFLOPS reward paper](https://arxiv.org/abs/2602.12049v1) - **Thread 5** · Analytical landscape benchmarks: [BBOB/COCO](https://inria.hal.science/hal-00362649/document), [POET](https://arxiv.org/abs/1901.01753) ### Use from code ```python from landscapeforge import LandscapeforgeEnv, LandscapeforgeAction with LandscapeforgeEnv.from_docker_image("landscapeforge-env:latest") as env: env.reset() env.step(LandscapeforgeAction(kind="run_baseline", baseline_name="adam")) env.step(LandscapeforgeAction(kind="draft", code=MY_OPT_CLASS)) print(env.step(LandscapeforgeAction(kind="commit")).observation.r_optcoder_breakdown) ``` API endpoints on this Space: `/reset`, `/step`, `/schema`, `/openapi.json`, `/health`, WebSocket `/ws`. See **OpenEnv API** tab for a live playground. """ def build_ui(*args, **kwargs) -> gr.Blocks: """Entry point for the Gradio app. Ignores OpenEnv's builder args.""" with gr.Blocks( title="LandscapeForge", theme=gr.themes.Soft( primary_hue=gr.themes.Color( c50="#fbf0ea", c100="#f4d6c5", c200="#ebb69b", c300="#e09778", c400="#d37a58", c500="#c96442", c600="#a8522f", c700="#874123", c800="#623018", c900="#3f1e10", c950="#21100a", ), neutral_hue="stone", font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"], font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"], ), css=CLAUDE_CSS, ) as app: gr.HTML(HERO_HTML) with gr.Tabs(): # --- Tab 0: Run with LLM (primary — auto-run) --- with gr.Tab("Run with LLM"): with gr.Row(equal_height=False): # -------- MAIN PANE (left, wider) -------- with gr.Column(scale=4, min_width=640): gr.Markdown("### Transcript") transcript = gr.Markdown( "*Configure the LLM on the right and hit " "**▶ Run episode** — each turn streams here " "as the model plays.*", ) with gr.Row(): with gr.Column(scale=1): llm_reward_plot = gr.Plot( label="Reward breakdown (on episode end)") with gr.Column(scale=1): latest_obs = gr.Code( language="json", interactive=False, label="Latest observation", lines=14) # -------- SIDEBAR (right, narrower) -------- with gr.Column(scale=1, min_width=300, elem_classes="lf-sidebar"): gr.Markdown("### Connect an LLM") gr.Markdown( "Point at any OpenAI-compatible " "`/v1/chat/completions` endpoint." ) ep_choice = gr.Dropdown( list(PRESET_ENDPOINTS.keys()), value="Ollama (localhost:11434)", label="Endpoint", ) model_name_in = gr.Dropdown( PRESET_MODELS, value="qwen2.5:3b", label="Model", allow_custom_value=True, ) custom_url_in = gr.Textbox( value="", label="Custom base URL", placeholder="http://localhost:8080/v1", ) key_in = gr.Textbox( value="", label="API key", placeholder="Bearer ", type="password", ) gr.Markdown("---") gr.Markdown("### Episode config") tier_llm = gr.Dropdown(["T0", "T1", "T2"], value="T0", label="Tier") seed_llm = gr.Slider(0, 100, value=42, step=1, label="Seed") temp_llm = gr.Slider(0, 1.5, value=0.7, step=0.05, label="Temperature") max_turns_llm = gr.Slider(3, 15, value=10, step=1, label="Max turns") run_btn = gr.Button("▶ Run episode", variant="primary", size="lg") with gr.Accordion("System prompt (sent to LLM)", open=False): try: from ..prompts import SYSTEM as _SYS, ACTION_SPEC as _ACT except ImportError: from prompts import SYSTEM as _SYS, ACTION_SPEC as _ACT # type: ignore gr.Code( value=f"# SYSTEM\n\n{_SYS}\n\n# ACTION_SPEC\n\n{_ACT}", language="markdown", interactive=False, lines=14, ) run_btn.click( _llm_auto_run, [ep_choice, custom_url_in, key_in, model_name_in, tier_llm, seed_llm, temp_llm, max_turns_llm], [transcript, latest_obs, llm_reward_plot], ) # --- Tab: Manual stepping (raw /reset + /step) --- with gr.Tab("API playground"): with gr.Row(equal_height=False): with gr.Column(scale=1, min_width=340, elem_classes="lf-sidebar"): gr.Markdown("### Manual stepping") gr.Markdown( "Drive the env one action at a time — exactly " "the same contract as the HTTP `/reset` + `/step` " "endpoints. Useful for sanity-checking an action " "or debugging." ) tier4 = gr.Dropdown(["T0", "T1", "T2"], value="T0", label="Tier") seed4 = gr.Slider(0, 100, value=42, step=1, label="Seed") reset_btn = gr.Button("Reset env", variant="primary") gr.Markdown("---") kind4 = gr.Radio( ["run_baseline", "draft", "inspect", "commit"], value="run_baseline", label="Action kind") with gr.Accordion("run_baseline args", open=True): bname4 = gr.Dropdown( ["sgd", "momentum", "adam", "lbfgs"], value="adam", label="Reference optimizer") with gr.Accordion("draft args", open=False): code4 = gr.Code(value=SAMPLE_OPTIMIZER, language="python", label="Optimizer class", lines=10) with gr.Accordion("inspect args", open=False): didx4 = gr.Number(value=0, precision=0, label="draft_idx") s4s = gr.Number(value=0, precision=0, label="step_range_start") s4e = gr.Number(value=20, precision=0, label="step_range_end") step_btn = gr.Button("Step", variant="primary") with gr.Column(scale=2, min_width=580): status4 = gr.Markdown( "*No active env — hit **Reset env** to begin.*") obs4_reset = gr.Code( language="json", interactive=False, label="Initial observation", lines=12) status4b = gr.Markdown() obs4 = gr.Code( language="json", interactive=False, label="Step observation", lines=14) reset_btn.click(_api_reset, [tier4, seed4], [obs4_reset, status4]) step_btn.click( _api_step, [kind4, bname4, code4, didx4, s4s, s4e], [obs4, status4b], ) # --- Tab 1: Landscape --- with gr.Tab("Landscape"): with gr.Row(equal_height=False): with gr.Column(scale=1, min_width=320, elem_classes="lf-sidebar"): gr.Markdown("### Landscape Explorer") gr.Markdown( "Pick a template and see what the agent sees " "at reset — the 2-D contour plus env-computed " "structural hints used to calibrate the optimizer." ) tmpl1 = gr.Dropdown(TEMPLATES_2D_SAFE, value="rosenbrock", label="Template") dim1 = gr.Slider(2, 10, value=2, step=1, label="Dim") seed1 = gr.Slider(0, 100, value=0, step=1, label="Seed") go1 = gr.Button("Build landscape", variant="primary", size="lg") with gr.Column(scale=2, min_width=580): plot1 = gr.Plot(label="Contour") hints1 = gr.Dataframe( headers=["property", "value"], datatype=["str", "str"], label="Structural hints (shown to the agent at reset)", wrap=True, row_count=(8, "dynamic"), ) go1.click(_explore_landscape, [tmpl1, dim1, seed1], [plot1, hints1]) app.load(_explore_landscape, [gr.State("rosenbrock"), gr.State(2), gr.State(0)], [plot1, hints1]) # --- Tab 2: Baseline Race --- with gr.Tab("Baseline Race"): with gr.Row(equal_height=False): with gr.Column(scale=1, min_width=320, elem_classes="lf-sidebar"): gr.Markdown("### Baseline Race") gr.Markdown( "Race SGD, Momentum, L-BFGS, and **Adam with " "per-landscape LR tuning** from the same init. " "The tuned Adam is the bar the trained OptCoder " "has to beat." ) tmpl2 = gr.Dropdown(TEMPLATES_2D_SAFE, value="rosenbrock", label="Template") seed2 = gr.Slider(0, 100, value=1, step=1, label="Seed") go2 = gr.Button("Race", variant="primary", size="lg") with gr.Column(scale=2, min_width=580): plot2a = gr.Plot(label="Contour + trajectories") with gr.Row(): plot2b = gr.Plot(label="f(x) vs step") plot2c = gr.Plot(label="Final f after 50 steps") summary2 = gr.Markdown() go2.click(_baseline_race, [tmpl2, seed2], [plot2a, plot2b, plot2c, summary2]) # --- Tab 3: Optimizer Arena --- with gr.Tab("Optimizer Arena"): with gr.Row(equal_height=False): with gr.Column(scale=1, min_width=340, elem_classes="lf-sidebar"): gr.Markdown("### Optimizer Arena") gr.Markdown( "Paste or edit an `Optimizer` class. We run it " "through the full Phase-D arena (10 seeds × 200 " "steps) against tuned Adam and show the reward " "breakdown.
`np` is pre-injected — " "do not write import lines." ) tmpl3 = gr.Dropdown(list(BUILDERS.keys()), value="quadratic", label="Template") dim3 = gr.Slider(2, 10, value=5, step=1, label="Dim") seed3 = gr.Slider(0, 100, value=42, step=1, label="Seed") go3 = gr.Button("Run arena", variant="primary", size="lg") with gr.Column(scale=2, min_width=580): code3 = gr.Code(value=SAMPLE_OPTIMIZER, language="python", label="Your Optimizer class", lines=14) with gr.Row(): plot3a = gr.Plot(label="2-D trajectory (if dim = 2)") plot3b = gr.Plot(label="Mean arena progress") plot3c = gr.Plot(label="Reward breakdown") summary3 = gr.Markdown() breakdown3 = gr.JSON(label="Full reward dict", height=220) go3.click(_arena_compare, [tmpl3, dim3, seed3, code3], [plot3a, plot3b, plot3c, summary3, breakdown3]) # --- About --- with gr.Tab("About"): gr.Markdown(ABOUT_MD) return app