mnawfal29 commited on
Commit
ec5ebb7
·
verified ·
1 Parent(s): 3477236

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. demo/ui.py +368 -168
demo/ui.py CHANGED
@@ -42,32 +42,21 @@ except ImportError: # flat layout (HF Space container)
42
  # Mimics Anthropic's actual surface colors: warmer parchment background,
43
  # deep warm ink for text, Anthropic burnt-sienna as primary accent.
44
  CLAUDE_CSS = """
45
- /* Variables */
46
  :root {
47
- --lf-bg: #f0eee6; /* warm parchment background */
48
- --lf-surface: #faf9f5; /* card surface, lighter than bg */
49
- --lf-surface-alt: #ffffff;
50
- --lf-border: #dcd6c7; /* warm grey border */
51
- --lf-border-soft: #e8e2d3;
52
- --lf-text: #141413; /* deep warm ink */
53
- --lf-text-muted: #595550;
54
- --lf-text-subtle: #8a847c;
55
- --lf-accent: #c96442; /* Anthropic burnt sienna */
56
- --lf-accent-dk: #a8522f;
57
- --lf-accent-soft: #f4d6c5;
58
- --lf-good: #3d6b4c;
59
- --lf-bad: #a0483a;
60
- }
61
- @media (prefers-color-scheme: dark) {
62
- /* keep warm palette even in dark mode — Claude itself stays warm */
63
- :root {
64
- --lf-bg: #262420;
65
- --lf-surface: #2f2c26;
66
- --lf-surface-alt:#393630;
67
- --lf-border: #3d3a33;
68
- --lf-text: #f0eee6;
69
- --lf-text-muted:#a8a29a;
70
- }
71
  }
72
 
73
  /* Page */
@@ -81,6 +70,44 @@ html, body, .gradio-container {
81
  max-width: 1180px !important;
82
  margin: 0 auto !important;
83
  padding: 1.75rem 1.5rem 3rem !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  }
85
 
86
  /* Typography — serif for headings to match Claude's Tiempos-style hero */
@@ -287,12 +314,111 @@ html, body, .gradio-container {
287
  font-size: 0.9rem !important;
288
  }
289
 
290
- /* JSON renderer */
291
- .gradio-container .json-holder {
 
 
292
  background: var(--lf-surface) !important;
293
  border: 1px solid var(--lf-border-soft) !important;
294
  border-radius: 8px !important;
295
  padding: 0.9rem !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  }
297
 
298
  /* Accordion headers */
@@ -313,26 +439,59 @@ footer, .gradio-container footer { display: none !important; }
313
  border-radius: 5px;
314
  }
315
  .gradio-container ::-webkit-scrollbar-thumb:hover { background: var(--lf-text-subtle); }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  """
317
 
318
- # Plotly layout template — matches the Claude palette.
319
- # Axes are intentionally excluded so callers can override type/log/scale
320
- # without kwarg collisions.
321
  _PLOTLY_LAYOUT = dict(
322
  font=dict(family="Inter, -apple-system, system-ui, sans-serif",
323
- color="#2a2319", size=12),
324
- paper_bgcolor="#faf9f5",
325
- plot_bgcolor="#ffffff",
326
  margin=dict(l=60, r=30, t=60, b=55),
327
- hoverlabel=dict(bgcolor="#2a2319", font_color="#faf9f5",
328
- font_family="Inter", bordercolor="#2a2319"),
329
- legend=dict(bgcolor="rgba(255,255,255,0.92)",
330
- bordercolor="#e8e4dc", borderwidth=1),
 
331
  )
332
- _AXIS_STYLE = dict(gridcolor="#e8e4dc", zerolinecolor="#d5d0c6",
333
- showline=True, linecolor="#d5d0c6")
 
334
  _TITLE_STYLE = dict(x=0.02, xanchor="left",
335
- font=dict(size=14, color="#2a2319"))
336
 
337
  OPT_COLORS = {
338
  "sgd": "#c05450",
@@ -396,15 +555,19 @@ def _contour_plot(ls, trajectories=None, title=None, subtitle=None):
396
  fig.add_trace(go.Contour(
397
  x=xs, y=ys, z=Z,
398
  zmin=float(lo), zmax=float(hi),
 
399
  colorscale=[
400
- [0.0, "#fdf5f0"], [0.2, "#fbe5d6"], [0.4, "#f6c9a8"],
401
- [0.6, "#eea87b"], [0.8, "#d97757"], [1.0, "#b75d3f"],
 
402
  ],
403
  contours=dict(coloring="heatmap", showlabels=False),
404
- line=dict(width=0.5, color="rgba(255,255,255,0.35)"),
405
- colorbar=dict(title=dict(text="f(x)", font=dict(size=11)),
 
406
  thickness=12, len=0.85,
407
- tickfont=dict(size=10), outlinewidth=0),
 
408
  hovertemplate="x₁=%{x:.3f}<br>x₂=%{y:.3f}<br>f=%{z:.3f}<extra></extra>",
409
  ))
410
 
@@ -1092,56 +1255,71 @@ def build_ui(*args, **kwargs) -> gr.Blocks:
1092
  gr.HTML(HERO_HTML)
1093
 
1094
  with gr.Tabs():
1095
- # --- Tab 0: OpenEnv API (primary — LLM auto-run + manual) ---
1096
  with gr.Tab("OpenEnv"):
1097
- gr.Markdown(
1098
- "### Connect an LLM and let it play\n\n"
1099
- "Point at any OpenAI-compatible `/v1/chat/completions` "
1100
- "endpoint Ollama, HF Router, OpenAI, a vLLM server. "
1101
- "Click **Run episode** and the model drives one full "
1102
- "run of the env, streaming each REPL action here in real time."
1103
- )
1104
- with gr.Row():
1105
- ep_choice = gr.Dropdown(
1106
- list(PRESET_ENDPOINTS.keys()),
1107
- value="Ollama (localhost:11434)",
1108
- label="Endpoint preset",
1109
- )
1110
- model_name_in = gr.Dropdown(
1111
- PRESET_MODELS, value="qwen2.5:3b",
1112
- label="Model name", allow_custom_value=True,
1113
- )
1114
- with gr.Row():
1115
- custom_url_in = gr.Textbox(
1116
- value="", label="Custom base URL (optional)",
1117
- placeholder="e.g. http://localhost:8080/v1",
1118
- scale=2,
1119
- )
1120
- key_in = gr.Textbox(
1121
- value="", label="API key (optional)",
1122
- placeholder="Authorization: Bearer <key>",
1123
- type="password", scale=2,
1124
- )
1125
- with gr.Row():
1126
- tier_llm = gr.Dropdown(["T0", "T1", "T2"], value="T0",
1127
- label="Tier")
1128
- seed_llm = gr.Slider(0, 100, value=42, step=1, label="Seed")
1129
- temp_llm = gr.Slider(0, 1.5, value=0.7, step=0.05,
1130
- label="Temperature")
1131
- max_turns_llm = gr.Slider(3, 15, value=10, step=1,
1132
- label="Max turns")
1133
- run_btn = gr.Button("▶ Run episode", variant="primary")
1134
-
1135
- with gr.Row():
1136
- with gr.Column(scale=3):
 
 
 
 
 
 
 
 
 
 
 
 
 
1137
  transcript = gr.Markdown(
1138
- "*Click **Run episode** to drive the env with "
1139
- "the selected LLM. Transcript streams below.*",
1140
- label="Transcript",
1141
  )
1142
- with gr.Column(scale=2):
1143
- latest_obs = gr.JSON(label="Latest observation")
1144
- llm_reward_plot = gr.Plot(label="Reward breakdown (on episode end)")
 
 
1145
 
1146
  run_btn.click(
1147
  _llm_auto_run,
@@ -1150,38 +1328,51 @@ def build_ui(*args, **kwargs) -> gr.Blocks:
1150
  [transcript, latest_obs, llm_reward_plot],
1151
  )
1152
 
1153
- gr.Markdown("---\n### Manual stepping")
1154
- gr.Markdown(
1155
- "Drive the env one action at a time. Reset first, then "
1156
- "pick an action kind and hit **Step**. Observations "
1157
- "render as JSON — exactly what the HTTP `/step` endpoint returns."
1158
- )
1159
- with gr.Row():
1160
- tier4 = gr.Dropdown(["T0", "T1", "T2"], value="T0", label="Tier")
1161
- seed4 = gr.Slider(0, 100, value=42, step=1, label="Seed")
1162
- reset_btn = gr.Button("Reset", variant="primary")
1163
- status4 = gr.Markdown("*No active env — hit **reset** to begin.*")
1164
- obs4_reset = gr.JSON(label="Initial observation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
  reset_btn.click(_api_reset, [tier4, seed4],
1166
  [obs4_reset, status4])
1167
-
1168
- gr.Markdown("---")
1169
- kind4 = gr.Radio(["run_baseline", "draft", "inspect", "commit"],
1170
- value="run_baseline", label="Action kind")
1171
- with gr.Accordion("run_baseline args", open=True):
1172
- bname4 = gr.Dropdown(["sgd", "momentum", "adam", "lbfgs"],
1173
- value="adam", label="Reference optimizer")
1174
- with gr.Accordion("draft args", open=False):
1175
- code4 = gr.Code(value=SAMPLE_OPTIMIZER, language="python",
1176
- label="Optimizer class", lines=10)
1177
- with gr.Accordion("inspect args", open=False):
1178
- didx4 = gr.Number(value=0, precision=0, label="draft_idx")
1179
- s4s = gr.Number(value=0, precision=0, label="step_range_start")
1180
- s4e = gr.Number(value=20, precision=0, label="step_range_end")
1181
-
1182
- step_btn = gr.Button("Step", variant="primary")
1183
- status4b = gr.Markdown()
1184
- obs4 = gr.JSON(label="Observation")
1185
  step_btn.click(
1186
  _api_step,
1187
  [kind4, bname4, code4, didx4, s4s, s4e],
@@ -1190,19 +1381,21 @@ def build_ui(*args, **kwargs) -> gr.Blocks:
1190
 
1191
  # --- Tab 1: Landscape ---
1192
  with gr.Tab("Landscape"):
1193
- gr.Markdown(
1194
- "Pick a landscape template and see what the agent sees "
1195
- "at reset — the contour plot plus env-computed structural "
1196
- "hints used to calibrate the optimizer."
1197
- )
1198
- with gr.Row():
1199
- with gr.Column(scale=1, min_width=260):
 
1200
  tmpl1 = gr.Dropdown(TEMPLATES_2D_SAFE,
1201
- value="rosenbrock", label="Template")
1202
  dim1 = gr.Slider(2, 10, value=2, step=1, label="Dim")
1203
  seed1 = gr.Slider(0, 100, value=0, step=1, label="Seed")
1204
- go1 = gr.Button("Build landscape", variant="primary")
1205
- with gr.Column(scale=2, min_width=520):
 
1206
  plot1 = gr.Plot(label="Contour")
1207
  hints1 = gr.Dataframe(
1208
  headers=["property", "value"],
@@ -1217,51 +1410,58 @@ def build_ui(*args, **kwargs) -> gr.Blocks:
1217
 
1218
  # --- Tab 2: Baseline Race ---
1219
  with gr.Tab("Baseline Race"):
1220
- gr.Markdown(
1221
- "Race SGD, Momentum, L-BFGS, and **Adam with per-landscape "
1222
- "LR tuning** from the same init. The tuned Adam is the "
1223
- "bar the trained OptCoder has to beat."
1224
- )
1225
- with gr.Row():
1226
- tmpl2 = gr.Dropdown(TEMPLATES_2D_SAFE,
1227
- value="rosenbrock", label="Template")
1228
- seed2 = gr.Slider(0, 100, value=1, step=1, label="Seed")
1229
- go2 = gr.Button("Race", variant="primary")
1230
-
1231
- plot2a = gr.Plot(label="Contour + trajectories")
1232
- with gr.Row():
1233
- plot2b = gr.Plot(label="f(x) vs step")
1234
- plot2c = gr.Plot(label="Final f after 50 steps")
1235
- summary2 = gr.Markdown()
 
 
 
1236
  go2.click(_baseline_race, [tmpl2, seed2],
1237
  [plot2a, plot2b, plot2c, summary2])
1238
 
1239
  # --- Tab 3: Optimizer Arena ---
1240
  with gr.Tab("Optimizer Arena"):
1241
- gr.Markdown(
1242
- "Paste or edit an `Optimizer` class. We run it through "
1243
- "the full Phase-D arena (10 seeds × 200 steps) against "
1244
- "tuned Adam on the chosen landscape and show the reward "
1245
- "breakdown. `np` is pre-injected no import lines."
1246
- )
1247
- with gr.Row():
1248
- with gr.Column(scale=1, min_width=260):
 
 
1249
  tmpl3 = gr.Dropdown(list(BUILDERS.keys()),
1250
- value="quadratic", label="Template")
1251
  dim3 = gr.Slider(2, 10, value=5, step=1, label="Dim")
1252
  seed3 = gr.Slider(0, 100, value=42, step=1, label="Seed")
1253
- go3 = gr.Button("Run arena", variant="primary")
1254
- with gr.Column(scale=2, min_width=520):
 
1255
  code3 = gr.Code(value=SAMPLE_OPTIMIZER,
1256
- language="python",
1257
- label="Optimizer class", lines=14)
1258
-
1259
- with gr.Row():
1260
- plot3a = gr.Plot(label="2-D trajectory (if dim = 2)")
1261
- plot3b = gr.Plot(label="Mean arena progress")
1262
- plot3c = gr.Plot(label="Reward breakdown")
1263
- summary3 = gr.Markdown()
1264
- breakdown3 = gr.JSON(label="Full reward dict")
 
1265
  go3.click(_arena_compare, [tmpl3, dim3, seed3, code3],
1266
  [plot3a, plot3b, plot3c, summary3, breakdown3])
1267
 
 
42
  # Mimics Anthropic's actual surface colors: warmer parchment background,
43
  # deep warm ink for text, Anthropic burnt-sienna as primary accent.
44
  CLAUDE_CSS = """
45
+ /* Variables — dark mode default, warm ink + sienna accent */
46
  :root {
47
+ --lf-bg: #1f1d1a; /* warm near-black page */
48
+ --lf-surface: #2a2824; /* card surface */
49
+ --lf-surface-alt: #332f2a; /* elevated surface (code, plots) */
50
+ --lf-border: #403b34; /* card edge */
51
+ --lf-border-soft: #332f2a; /* soft inner divider */
52
+ --lf-text: #f3f0e8; /* warm off-white */
53
+ --lf-text-muted: #b5ada0; /* muted body */
54
+ --lf-text-subtle: #857d72; /* labels, captions */
55
+ --lf-accent: #e28763; /* brighter sienna for dark bg */
56
+ --lf-accent-dk: #c96442; /* hover / pressed */
57
+ --lf-accent-soft: #4a2f22; /* accent-tinted dark for selected bg */
58
+ --lf-good: #7ab68c;
59
+ --lf-bad: #d47d6a;
 
 
 
 
 
 
 
 
 
 
 
60
  }
61
 
62
  /* Page */
 
70
  max-width: 1180px !important;
71
  margin: 0 auto !important;
72
  padding: 1.75rem 1.5rem 3rem !important;
73
+
74
+ /* Override Gradio's internal theme variables so every component
75
+ inherits the warm palette instead of Gradio's blue-on-white defaults */
76
+ --body-text-color: var(--lf-text) !important;
77
+ --body-text-color-subdued: var(--lf-text-muted) !important;
78
+ --body-background-fill: var(--lf-bg) !important;
79
+ --background-fill-primary: var(--lf-surface) !important;
80
+ --background-fill-secondary: var(--lf-bg) !important;
81
+ --border-color-primary: var(--lf-border) !important;
82
+ --border-color-accent: var(--lf-accent) !important;
83
+ --input-background-fill: var(--lf-surface) !important;
84
+ --input-border-color: var(--lf-border) !important;
85
+ --input-text-color: var(--lf-text) !important;
86
+ --input-placeholder-color: var(--lf-text-subtle) !important;
87
+ --block-background-fill: var(--lf-surface) !important;
88
+ --block-border-color: var(--lf-border-soft) !important;
89
+ --block-label-background-fill: transparent !important;
90
+ --block-label-text-color: var(--lf-text) !important;
91
+ --block-title-text-color: var(--lf-text) !important;
92
+ --block-info-text-color: var(--lf-text-muted) !important;
93
+ --neutral-50: var(--lf-surface) !important;
94
+ --neutral-100: var(--lf-bg) !important;
95
+ --neutral-200: var(--lf-border-soft) !important;
96
+ --neutral-300: var(--lf-border) !important;
97
+ --neutral-400: var(--lf-text-subtle) !important;
98
+ --neutral-500: var(--lf-text-muted) !important;
99
+ --neutral-600: var(--lf-text-muted) !important;
100
+ --neutral-700: var(--lf-text) !important;
101
+ --neutral-800: var(--lf-text) !important;
102
+ --neutral-900: var(--lf-text) !important;
103
+ --color-accent: var(--lf-accent) !important;
104
+ --color-accent-soft: var(--lf-accent-soft) !important;
105
+ --link-text-color: var(--lf-accent) !important;
106
+ --link-text-color-hover: var(--lf-accent-dk) !important;
107
+ --button-primary-background-fill: var(--lf-accent) !important;
108
+ --button-primary-background-fill-hover: var(--lf-accent-dk) !important;
109
+ --button-primary-text-color: #ffffff !important;
110
+ --button-primary-border-color: var(--lf-accent) !important;
111
  }
112
 
113
  /* Typography — serif for headings to match Claude's Tiempos-style hero */
 
314
  font-size: 0.9rem !important;
315
  }
316
 
317
+ /* JSON renderer — force warm ink for every node + muted for keys */
318
+ .gradio-container .json-holder,
319
+ .gradio-container .json-container,
320
+ .gradio-container .json-node {
321
  background: var(--lf-surface) !important;
322
  border: 1px solid var(--lf-border-soft) !important;
323
  border-radius: 8px !important;
324
  padding: 0.9rem !important;
325
+ color: var(--lf-text) !important;
326
+ font-family: "JetBrains Mono", ui-monospace, Menlo, monospace !important;
327
+ font-size: 0.82rem !important;
328
+ }
329
+ .gradio-container .json-holder *,
330
+ .gradio-container .json-container * {
331
+ color: var(--lf-text) !important;
332
+ }
333
+ .gradio-container .json-holder .key,
334
+ .gradio-container .json-container .key {
335
+ color: var(--lf-accent-dk) !important;
336
+ font-weight: 600 !important;
337
+ }
338
+ .gradio-container .json-holder .string-value {
339
+ color: #3d6b4c !important;
340
+ }
341
+ .gradio-container .json-holder .number-value {
342
+ color: #874123 !important;
343
+ }
344
+
345
+ /* Dropdown option list (open state) — Gradio defaults to white-on-white */
346
+ .gradio-container .options,
347
+ .gradio-container .options .item,
348
+ .gradio-container [role="listbox"],
349
+ .gradio-container [role="option"] {
350
+ background: var(--lf-surface) !important;
351
+ color: var(--lf-text) !important;
352
+ border-color: var(--lf-border) !important;
353
+ }
354
+ .gradio-container [role="option"]:hover,
355
+ .gradio-container .options .item:hover {
356
+ background: var(--lf-accent-soft) !important;
357
+ color: var(--lf-text) !important;
358
+ }
359
+ .gradio-container [role="option"][aria-selected="true"] {
360
+ background: var(--lf-accent) !important;
361
+ color: #ffffff !important;
362
+ }
363
+
364
+ /* Markdown rendered inside blocks */
365
+ .gradio-container .prose,
366
+ .gradio-container .markdown,
367
+ .gradio-container [data-testid="markdown"] {
368
+ color: var(--lf-text) !important;
369
+ }
370
+ .gradio-container .prose p,
371
+ .gradio-container .markdown p,
372
+ .gradio-container [data-testid="markdown"] p {
373
+ color: var(--lf-text-muted) !important;
374
+ }
375
+ .gradio-container .prose strong,
376
+ .gradio-container .markdown strong {
377
+ color: var(--lf-text) !important;
378
+ }
379
+ .gradio-container .prose a,
380
+ .gradio-container .markdown a {
381
+ color: var(--lf-accent) !important;
382
+ text-decoration: underline;
383
+ text-underline-offset: 2px;
384
+ }
385
+ .gradio-container .prose code,
386
+ .gradio-container .markdown code {
387
+ background: var(--lf-bg) !important;
388
+ color: var(--lf-accent-dk) !important;
389
+ padding: 0.12em 0.4em !important;
390
+ border-radius: 4px !important;
391
+ font-size: 0.84em !important;
392
+ }
393
+
394
+ /* Inline label / info text under inputs */
395
+ .gradio-container .block-info,
396
+ .gradio-container .info {
397
+ color: var(--lf-text-muted) !important;
398
+ font-size: 0.82rem !important;
399
+ }
400
+
401
+ /* Slider track+value labels */
402
+ .gradio-container .svelte-range-slider,
403
+ .gradio-container .min-val,
404
+ .gradio-container .max-val,
405
+ .gradio-container .value {
406
+ color: var(--lf-text) !important;
407
+ }
408
+ .gradio-container .value-text {
409
+ color: var(--lf-accent-dk) !important;
410
+ font-weight: 600 !important;
411
+ }
412
+
413
+ /* Radio buttons — labels should be visible */
414
+ .gradio-container .wrap label,
415
+ .gradio-container [role="radio"] + label {
416
+ color: var(--lf-text) !important;
417
+ }
418
+
419
+ /* Status badges inside obs.done etc */
420
+ .gradio-container .status-text {
421
+ color: var(--lf-text) !important;
422
  }
423
 
424
  /* Accordion headers */
 
439
  border-radius: 5px;
440
  }
441
  .gradio-container ::-webkit-scrollbar-thumb:hover { background: var(--lf-text-subtle); }
442
+
443
+ /* Sidebar column — subtle card treatment, sticky-feeling */
444
+ .gradio-container .lf-sidebar {
445
+ background: var(--lf-surface) !important;
446
+ border: 1px solid var(--lf-border) !important;
447
+ border-radius: 12px !important;
448
+ padding: 1.25rem 1.25rem 1.1rem !important;
449
+ box-shadow: 0 1px 0 rgba(20,20,19,0.02);
450
+ }
451
+ .gradio-container .lf-sidebar h3 {
452
+ margin-top: 0.15rem !important;
453
+ margin-bottom: 0.3rem !important;
454
+ }
455
+ .gradio-container .lf-sidebar p {
456
+ font-size: 0.9rem !important;
457
+ margin-bottom: 0.85rem !important;
458
+ }
459
+ .gradio-container .lf-sidebar .block {
460
+ /* Sidebars nest "blocks"; flatten them so the card feel is the outer one */
461
+ background: transparent !important;
462
+ border: none !important;
463
+ padding: 0.35rem 0 !important;
464
+ border-radius: 0 !important;
465
+ }
466
+ .gradio-container .lf-sidebar button {
467
+ width: 100% !important;
468
+ }
469
+
470
+ /* Main pane plots+outputs — a bit more breathing room */
471
+ .gradio-container .gr-plot, .gradio-container .plot-wrap {
472
+ background: var(--lf-surface-alt) !important;
473
+ border-radius: 10px !important;
474
+ }
475
  """
476
 
477
+ # Plotly layout template — matches dark Claude palette.
 
 
478
  _PLOTLY_LAYOUT = dict(
479
  font=dict(family="Inter, -apple-system, system-ui, sans-serif",
480
+ color="#f3f0e8", size=12),
481
+ paper_bgcolor="#2a2824", # card surface
482
+ plot_bgcolor="#1f1d1a", # page background, slightly darker
483
  margin=dict(l=60, r=30, t=60, b=55),
484
+ hoverlabel=dict(bgcolor="#f3f0e8", font_color="#1f1d1a",
485
+ font_family="Inter", bordercolor="#e28763"),
486
+ legend=dict(bgcolor="rgba(31,29,26,0.85)",
487
+ bordercolor="#403b34", borderwidth=1,
488
+ font=dict(color="#f3f0e8")),
489
  )
490
+ _AXIS_STYLE = dict(gridcolor="#403b34", zerolinecolor="#554e45",
491
+ showline=True, linecolor="#554e45",
492
+ tickfont=dict(color="#b5ada0"))
493
  _TITLE_STYLE = dict(x=0.02, xanchor="left",
494
+ font=dict(size=14, color="#f3f0e8", weight=500))
495
 
496
  OPT_COLORS = {
497
  "sgd": "#c05450",
 
555
  fig.add_trace(go.Contour(
556
  x=xs, y=ys, z=Z,
557
  zmin=float(lo), zmax=float(hi),
558
+ # Dark-mode colorscale: deep warm valleys → glowing sienna peaks
559
  colorscale=[
560
+ [0.0, "#1f1d1a"], [0.15, "#2f2a22"], [0.3, "#4a2f22"],
561
+ [0.5, "#7a4229"], [0.7, "#c25a3a"], [0.85, "#e28763"],
562
+ [1.0, "#f4d6c5"],
563
  ],
564
  contours=dict(coloring="heatmap", showlabels=False),
565
+ line=dict(width=0.5, color="rgba(243,240,232,0.12)"),
566
+ colorbar=dict(title=dict(text="f(x)",
567
+ font=dict(size=11, color="#f3f0e8")),
568
  thickness=12, len=0.85,
569
+ tickfont=dict(size=10, color="#b5ada0"),
570
+ outlinewidth=0),
571
  hovertemplate="x₁=%{x:.3f}<br>x₂=%{y:.3f}<br>f=%{z:.3f}<extra></extra>",
572
  ))
573
 
 
1255
  gr.HTML(HERO_HTML)
1256
 
1257
  with gr.Tabs():
1258
+ # --- Tab 0: OpenEnv (primary — LLM auto-run + manual stepping) ---
1259
  with gr.Tab("OpenEnv"):
1260
+ with gr.Row(equal_height=False):
1261
+ # -------- SIDEBAR (left) --------
1262
+ with gr.Column(scale=1, min_width=340, elem_classes="lf-sidebar"):
1263
+ gr.Markdown("### Connect an LLM")
1264
+ gr.Markdown(
1265
+ "Point at any OpenAI-compatible "
1266
+ "`/v1/chat/completions` endpoint. Hit "
1267
+ "**▶ Run episode** and the model drives one "
1268
+ "full run of the env."
1269
+ )
1270
+
1271
+ ep_choice = gr.Dropdown(
1272
+ list(PRESET_ENDPOINTS.keys()),
1273
+ value="Ollama (localhost:11434)",
1274
+ label="Endpoint",
1275
+ )
1276
+ model_name_in = gr.Dropdown(
1277
+ PRESET_MODELS, value="qwen2.5:3b",
1278
+ label="Model", allow_custom_value=True,
1279
+ )
1280
+ custom_url_in = gr.Textbox(
1281
+ value="", label="Custom base URL",
1282
+ placeholder="http://localhost:8080/v1",
1283
+ )
1284
+ key_in = gr.Textbox(
1285
+ value="", label="API key",
1286
+ placeholder="Bearer <key>",
1287
+ type="password",
1288
+ )
1289
+
1290
+ gr.Markdown("---")
1291
+ gr.Markdown("### Episode config")
1292
+
1293
+ tier_llm = gr.Dropdown(["T0", "T1", "T2"], value="T0",
1294
+ label="Tier")
1295
+ seed_llm = gr.Slider(0, 100, value=42, step=1, label="Seed")
1296
+ temp_llm = gr.Slider(0, 1.5, value=0.7, step=0.05,
1297
+ label="Temperature")
1298
+ max_turns_llm = gr.Slider(3, 15, value=10, step=1,
1299
+ label="Max turns")
1300
+
1301
+ run_btn = gr.Button("▶ Run episode", variant="primary",
1302
+ size="lg")
1303
+
1304
+ gr.Markdown(
1305
+ "<br><small>Also available: manual stepping "
1306
+ "below. Reset first, then pick an action kind "
1307
+ "and step.</small>"
1308
+ )
1309
+
1310
+ # -------- MAIN PANE (right) --------
1311
+ with gr.Column(scale=2, min_width=580):
1312
+ gr.Markdown("### Transcript")
1313
  transcript = gr.Markdown(
1314
+ "*Configure the LLM on the left and hit "
1315
+ "**▶ Run episode** each turn streams here "
1316
+ "as the model plays.*",
1317
  )
1318
+ with gr.Row():
1319
+ latest_obs = gr.JSON(label="Latest observation",
1320
+ height=320)
1321
+ llm_reward_plot = gr.Plot(
1322
+ label="Reward breakdown (on episode end)")
1323
 
1324
  run_btn.click(
1325
  _llm_auto_run,
 
1328
  [transcript, latest_obs, llm_reward_plot],
1329
  )
1330
 
1331
+ # -------- Manual stepping section --------
1332
+ gr.Markdown("---")
1333
+ gr.Markdown("### Manual stepping")
1334
+ with gr.Row(equal_height=False):
1335
+ with gr.Column(scale=1, min_width=340, elem_classes="lf-sidebar"):
1336
+ gr.Markdown(
1337
+ "Drive the env one action at a time. "
1338
+ "Reset first, then pick an action kind and step."
1339
+ )
1340
+ tier4 = gr.Dropdown(["T0", "T1", "T2"], value="T0",
1341
+ label="Tier")
1342
+ seed4 = gr.Slider(0, 100, value=42, step=1, label="Seed")
1343
+ reset_btn = gr.Button("Reset env", variant="primary")
1344
+
1345
+ gr.Markdown("---")
1346
+ kind4 = gr.Radio(
1347
+ ["run_baseline", "draft", "inspect", "commit"],
1348
+ value="run_baseline", label="Action kind")
1349
+ with gr.Accordion("run_baseline args", open=True):
1350
+ bname4 = gr.Dropdown(
1351
+ ["sgd", "momentum", "adam", "lbfgs"],
1352
+ value="adam", label="Reference optimizer")
1353
+ with gr.Accordion("draft args", open=False):
1354
+ code4 = gr.Code(value=SAMPLE_OPTIMIZER,
1355
+ language="python",
1356
+ label="Optimizer class", lines=10)
1357
+ with gr.Accordion("inspect args", open=False):
1358
+ didx4 = gr.Number(value=0, precision=0,
1359
+ label="draft_idx")
1360
+ s4s = gr.Number(value=0, precision=0,
1361
+ label="step_range_start")
1362
+ s4e = gr.Number(value=20, precision=0,
1363
+ label="step_range_end")
1364
+ step_btn = gr.Button("Step", variant="primary")
1365
+
1366
+ with gr.Column(scale=2, min_width=580):
1367
+ status4 = gr.Markdown(
1368
+ "*No active env — hit **Reset env** to begin.*")
1369
+ obs4_reset = gr.JSON(label="Initial observation",
1370
+ height=280)
1371
+ status4b = gr.Markdown()
1372
+ obs4 = gr.JSON(label="Step observation", height=320)
1373
+
1374
  reset_btn.click(_api_reset, [tier4, seed4],
1375
  [obs4_reset, status4])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1376
  step_btn.click(
1377
  _api_step,
1378
  [kind4, bname4, code4, didx4, s4s, s4e],
 
1381
 
1382
  # --- Tab 1: Landscape ---
1383
  with gr.Tab("Landscape"):
1384
+ with gr.Row(equal_height=False):
1385
+ with gr.Column(scale=1, min_width=320, elem_classes="lf-sidebar"):
1386
+ gr.Markdown("### Landscape Explorer")
1387
+ gr.Markdown(
1388
+ "Pick a template and see what the agent sees "
1389
+ "at reset — the 2-D contour plus env-computed "
1390
+ "structural hints used to calibrate the optimizer."
1391
+ )
1392
  tmpl1 = gr.Dropdown(TEMPLATES_2D_SAFE,
1393
+ value="rosenbrock", label="Template")
1394
  dim1 = gr.Slider(2, 10, value=2, step=1, label="Dim")
1395
  seed1 = gr.Slider(0, 100, value=0, step=1, label="Seed")
1396
+ go1 = gr.Button("Build landscape", variant="primary",
1397
+ size="lg")
1398
+ with gr.Column(scale=2, min_width=580):
1399
  plot1 = gr.Plot(label="Contour")
1400
  hints1 = gr.Dataframe(
1401
  headers=["property", "value"],
 
1410
 
1411
  # --- Tab 2: Baseline Race ---
1412
  with gr.Tab("Baseline Race"):
1413
+ with gr.Row(equal_height=False):
1414
+ with gr.Column(scale=1, min_width=320, elem_classes="lf-sidebar"):
1415
+ gr.Markdown("### Baseline Race")
1416
+ gr.Markdown(
1417
+ "Race SGD, Momentum, L-BFGS, and **Adam with "
1418
+ "per-landscape LR tuning** from the same init. "
1419
+ "The tuned Adam is the bar the trained OptCoder "
1420
+ "has to beat."
1421
+ )
1422
+ tmpl2 = gr.Dropdown(TEMPLATES_2D_SAFE,
1423
+ value="rosenbrock", label="Template")
1424
+ seed2 = gr.Slider(0, 100, value=1, step=1, label="Seed")
1425
+ go2 = gr.Button("Race", variant="primary", size="lg")
1426
+ with gr.Column(scale=2, min_width=580):
1427
+ plot2a = gr.Plot(label="Contour + trajectories")
1428
+ with gr.Row():
1429
+ plot2b = gr.Plot(label="f(x) vs step")
1430
+ plot2c = gr.Plot(label="Final f after 50 steps")
1431
+ summary2 = gr.Markdown()
1432
  go2.click(_baseline_race, [tmpl2, seed2],
1433
  [plot2a, plot2b, plot2c, summary2])
1434
 
1435
  # --- Tab 3: Optimizer Arena ---
1436
  with gr.Tab("Optimizer Arena"):
1437
+ with gr.Row(equal_height=False):
1438
+ with gr.Column(scale=1, min_width=340, elem_classes="lf-sidebar"):
1439
+ gr.Markdown("### Optimizer Arena")
1440
+ gr.Markdown(
1441
+ "Paste or edit an `Optimizer` class. We run it "
1442
+ "through the full Phase-D arena (10 seeds × 200 "
1443
+ "steps) against tuned Adam and show the reward "
1444
+ "breakdown.<br><small>`np` is pre-injected — "
1445
+ "do not write import lines.</small>"
1446
+ )
1447
  tmpl3 = gr.Dropdown(list(BUILDERS.keys()),
1448
+ value="quadratic", label="Template")
1449
  dim3 = gr.Slider(2, 10, value=5, step=1, label="Dim")
1450
  seed3 = gr.Slider(0, 100, value=42, step=1, label="Seed")
1451
+ go3 = gr.Button("Run arena", variant="primary",
1452
+ size="lg")
1453
+ with gr.Column(scale=2, min_width=580):
1454
  code3 = gr.Code(value=SAMPLE_OPTIMIZER,
1455
+ language="python",
1456
+ label="Your Optimizer class",
1457
+ lines=14)
1458
+ with gr.Row():
1459
+ plot3a = gr.Plot(label="2-D trajectory (if dim = 2)")
1460
+ plot3b = gr.Plot(label="Mean arena progress")
1461
+ plot3c = gr.Plot(label="Reward breakdown")
1462
+ summary3 = gr.Markdown()
1463
+ breakdown3 = gr.JSON(label="Full reward dict",
1464
+ height=220)
1465
  go3.click(_arena_compare, [tmpl3, dim3, seed3, code3],
1466
  [plot3a, plot3b, plot3c, summary3, breakdown3])
1467