"""Observation → prompt rendering + LLM response → action parsing. Keeps prompt format aligned with Appendix A of LANDSCAPEFORGE_DESIGN.md while trimming obs fields that bloat tokens (e.g. full trajectories get summarised). """ from __future__ import annotations import json import re from typing import Any try: from .models import LandscapeforgeAction, LandscapeforgeObservation except ImportError: # flat layout (HF Space container) from models import LandscapeforgeAction, LandscapeforgeObservation # type: ignore SYSTEM = """You are OptCoder. You will design an optimization algorithm for a hidden landscape f: R^n → R by iteratively: running reference optimizers to observe their behaviour, writing candidate `Optimizer` classes and seeing how they perform, inspecting past drafts to diagnose failures, and committing when you are satisfied. How the episode ends: - When you call `commit`, the env runs the full arena evaluation (10 seeds × 200 steps) on your MOST RECENT draft and that becomes your reward. This is the normal, preferred way to finish. - If you never call `commit`, when your budget runs out the env will automatically do the same thing — evaluate your most recent draft. Your last draft is always what gets evaluated, whether you commit explicitly or the budget runs out. - So: make sure your last draft is the one you actually want evaluated. If you improve a draft then change your mind, re-submit the good one before ending the episode. A typical good episode is ~4 turns: draft → (maybe) inspect → (maybe) refine → commit. Reply with a single JSON object — nothing else, no prose, no markdown. JSON formatting rules (important, models frequently get this wrong): - All strings use standard JSON double-quotes: "like this" - Do NOT use Python triple-quoted strings \"\"\"...\"\"\" — they are NOT valid JSON - For multi-line code, escape newlines as \\n inside the string value: {"kind": "draft", "code": "class Optimizer:\\n def __init__(self, dim): ..."} """.strip() ACTION_SPEC = """ Available actions (cost charged against your budget): run_baseline (cost 2) Run a reference optimizer on the hidden landscape. JSON: {"kind": "run_baseline", "baseline_name": "sgd"|"momentum"|"adam"|"lbfgs"} Returns a 30-step trajectory (x_t, f_t, grad_norm_t). Source code not revealed. draft (cost 2) Submit a full Optimizer class; env auto-tests it. JSON: {"kind": "draft", "code": ""} The code MUST be a standalone class with no base class: class Optimizer: def __init__(self, dim): ... def step(self, x, f_val, grad): ... return x_new Rules: - Top-level line must be exactly: class Optimizer: (no parent class — BaseOptimizer, nn.Module, object, etc. do NOT exist) - Use only numpy as `np` and math — both pre-injected; DO NOT write import lines - step(x, f_val, grad) must return a numpy array of shape (dim,) - No I/O, no globals, no file operations - Only the class definition is kept; demo code at module level is stripped inspect (cost 1) Zoom into a prior draft's per-step behaviour. JSON: {"kind": "inspect", "draft_idx": 0, "step_range_start": 10, "step_range_end": 20} Returns per-step (x, f, grad, update_norm, step_size_eff). commit (cost 0) Evaluate your most recent draft on the full arena. JSON: {"kind": "commit"} Preferred way to end the episode. Call it when you have a draft you trust. If you don't call it, budget exhaustion triggers the same evaluation on whatever your latest draft is — so your most recent draft should always be your best one. Committing explicitly just ends the episode sooner. """.strip() def render_observation(obs: LandscapeforgeObservation) -> str: """Turn an Observation into a compact prompt-friendly state summary.""" lines: list[str] = [] lines.append(f"Landscape: {obs.landscape_description}") lines.append(f"Dim: {obs.dim}") lines.append(f"Structural hints:") for k, v in (obs.structural_hints or {}).items(): lines.append(f" {k}: {_fmt(v)}") lines.append(f"Budget remaining: {obs.budget_remaining}") if obs.baseline_history: lines.append("\nBaseline runs (diagnostic trajectories):") for i, b in enumerate(obs.baseline_history): summary = _summarise_trajectory(b.get("trajectory", [])) lines.append(f" [{i}] {b['name']}: {summary}") if obs.draft_history: lines.append("\nDraft history:") for i, d in enumerate(obs.draft_history): if d.get("compile_error"): lines.append(f" [{i}] COMPILE ERROR: {d['compile_error']}") else: s = d["summary"] or {} status = "CONVERGED" if s.get("converged") else ( "DIVERGED" if s.get("diverged") else "partial" ) lines.append( f" [{i}] {status} | initial_f={_fmt(s.get('initial_f'))} " f"final_f={_fmt(s.get('final_f'))} " f"step_of_min={s.get('step_of_min')}" ) code = d.get("code") or "" lines.append(" code:") for cl in code.splitlines()[:40]: # first 40 lines only lines.append(f" {cl}") if obs.inspect_requests: lines.append("\nInspect results:") for r in obs.inspect_requests: detail = r.get("detail") or [] lines.append( f" draft={r.get('draft_idx')} range={r.get('step_range')} " f"({len(detail)} steps)" ) for d in detail[:8]: # first 8 of the slice lines.append( f" t={d.get('t'):>3} f={_fmt(d.get('f'))} " f"|g|={_fmt(d.get('grad_norm'))} " f"|Δx|={_fmt(d.get('update_norm'))} " f"η_eff={_fmt(d.get('step_size_eff'))}" ) if obs.current_draft: lines.append(f"\nCurrent draft ({len(obs.current_draft)} chars) — will be evaluated on commit.") if obs.last_action_kind: lines.append(f"\nLast action: {obs.last_action_kind}") feedback = (obs.last_action_result or {}).get("feedback") if feedback: parts = ", ".join(f"{k}={_fmt(v)}" for k, v in feedback.items()) lines.append(f"Step feedback: {parts} " "(signals for your reasoning; not added to final reward)") return "\n".join(lines) def build_prompt(obs: LandscapeforgeObservation) -> list[dict]: """Return OpenAI-style messages list for the chat completions endpoint.""" state_text = render_observation(obs) return [ {"role": "system", "content": SYSTEM}, {"role": "user", "content": f"{ACTION_SPEC}\n\nCurrent state:\n{state_text}\n\n" "Reply with a single JSON object for your next action."}, ] # ---------- response → action ---------- _JSON_RE = re.compile(r"\{.*\}", re.DOTALL) def parse_action(response_text: str) -> LandscapeforgeAction: """Extract the first JSON object from the LLM's reply and build an Action. Accepts code-fenced JSON, raw JSON, and JSON embedded in prose. Tolerates the common LLM failure mode of emitting unescaped newlines / tabs inside string values (especially for the `code` field of a `draft` action). Raises ValueError if no parseable object is found. """ text = response_text.strip() if text.startswith("```"): text = re.sub(r"^```(?:json)?\n?", "", text) text = re.sub(r"\n?```\s*$", "", text) match = _JSON_RE.search(text) if not match: raise ValueError(f"No JSON object in response: {response_text[:200]!r}") raw_json = match.group(0) # First pass: strict. try: data = json.loads(raw_json) except json.JSONDecodeError: # Second pass: escape raw control chars inside string literals. fixed = _escape_string_controls(raw_json) try: data = json.loads(fixed) except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON even after control-char fix: {e}; " f"raw: {raw_json[:200]!r}") from e if "kind" not in data: raise ValueError(f"Missing `kind`: {data}") return LandscapeforgeAction(**data) def _escape_string_controls(s: str) -> str: """Escape raw newlines, carriage returns, and tabs inside JSON string literals. Walks character-by-character tracking whether we're inside a double-quoted string, and replaces raw control chars with their escaped forms. Handles the common case: `"code": "class Optimizer:\\n def __init__..."` where the LLM emitted literal newlines. """ out: list[str] = [] in_string = False escape_next = False for ch in s: if escape_next: out.append(ch) escape_next = False continue if ch == "\\": out.append(ch) escape_next = True continue if ch == '"': in_string = not in_string out.append(ch) continue if in_string: if ch == "\n": out.append("\\n"); continue if ch == "\r": out.append("\\r"); continue if ch == "\t": out.append("\\t"); continue out.append(ch) return "".join(out) # ---------- helpers ---------- def _fmt(v: Any) -> str: if v is None: return "None" if isinstance(v, float): if abs(v) < 1e-4 or abs(v) >= 1e4: return f"{v:.3e}" return f"{v:.4f}" if isinstance(v, list): if len(v) <= 4: return "[" + ", ".join(_fmt(x) for x in v) + "]" return f"[{_fmt(v[0])}, {_fmt(v[1])}, ..., {_fmt(v[-1])}] (len={len(v)})" return str(v) def _summarise_trajectory(traj: list[dict]) -> str: """Condense a 30-step baseline trajectory to head/tail snapshots.""" finite = [s for s in traj if s.get("f") is not None] if not finite: return "diverged immediately" head = finite[0] mid = finite[len(finite) // 2] if len(finite) > 2 else finite[-1] tail = finite[-1] diverged_mark = " (DIVERGED)" if len(finite) < len(traj) else "" return (f"t=0: f={_fmt(head['f'])}, |g|={_fmt(head['grad_norm'])} " f"→ t={mid['t']}: f={_fmt(mid['f'])} " f"→ t={tail['t']}: f={_fmt(tail['f'])}{diverged_mark}")