Spaces:
Sleeping
Sleeping
| """Observation β prompt rendering + LLM response β action parsing. | |
| Keeps prompt format aligned with Appendix A of LANDSCAPEFORGE_DESIGN.md while | |
| trimming obs fields that bloat tokens (e.g. full trajectories get summarised). | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import re | |
| from typing import Any | |
| try: | |
| from .models import LandscapeforgeAction, LandscapeforgeObservation | |
| except ImportError: # flat layout (HF Space container) | |
| from models import LandscapeforgeAction, LandscapeforgeObservation # type: ignore | |
| SYSTEM = """You are OptCoder. You will design an optimization algorithm for a | |
| hidden landscape f: R^n β R by iteratively: running reference optimizers to | |
| observe their behaviour, writing candidate `Optimizer` classes and seeing how | |
| they perform, inspecting past drafts to diagnose failures, and committing when | |
| you are satisfied. | |
| How the episode ends: | |
| - When you call `commit`, the env runs the full arena evaluation | |
| (10 seeds Γ 200 steps) on your MOST RECENT draft and that becomes your | |
| reward. This is the normal, preferred way to finish. | |
| - If you never call `commit`, when your budget runs out the env will | |
| automatically do the same thing β evaluate your most recent draft. | |
| Your last draft is always what gets evaluated, whether you commit | |
| explicitly or the budget runs out. | |
| - So: make sure your last draft is the one you actually want evaluated. | |
| If you improve a draft then change your mind, re-submit the good one | |
| before ending the episode. | |
| A typical good episode is ~4 turns: | |
| draft β (maybe) inspect β (maybe) refine β commit. | |
| Reply with a single JSON object β nothing else, no prose, no markdown. | |
| JSON formatting rules (important, models frequently get this wrong): | |
| - All strings use standard JSON double-quotes: "like this" | |
| - Do NOT use Python triple-quoted strings \"\"\"...\"\"\" β they are NOT valid JSON | |
| - For multi-line code, escape newlines as \\n inside the string value: | |
| {"kind": "draft", "code": "class Optimizer:\\n def __init__(self, dim): ..."} | |
| """.strip() | |
| ACTION_SPEC = """ | |
| Available actions (cost charged against your budget): | |
| run_baseline (cost 2) Run a reference optimizer on the hidden landscape. | |
| JSON: {"kind": "run_baseline", "baseline_name": "sgd"|"momentum"|"adam"|"lbfgs"} | |
| Returns a 30-step trajectory (x_t, f_t, grad_norm_t). Source code not revealed. | |
| draft (cost 2) Submit a full Optimizer class; env auto-tests it. | |
| JSON: {"kind": "draft", "code": "<python source>"} | |
| The code MUST be a standalone class with no base class: | |
| class Optimizer: | |
| def __init__(self, dim): | |
| ... | |
| def step(self, x, f_val, grad): | |
| ... | |
| return x_new | |
| Rules: | |
| - Top-level line must be exactly: class Optimizer: | |
| (no parent class β BaseOptimizer, nn.Module, object, etc. do NOT exist) | |
| - Use only numpy as `np` and math β both pre-injected; DO NOT write import lines | |
| - step(x, f_val, grad) must return a numpy array of shape (dim,) | |
| - No I/O, no globals, no file operations | |
| - Only the class definition is kept; demo code at module level is stripped | |
| inspect (cost 1) Zoom into a prior draft's per-step behaviour. | |
| JSON: {"kind": "inspect", "draft_idx": 0, "step_range_start": 10, "step_range_end": 20} | |
| Returns per-step (x, f, grad, update_norm, step_size_eff). | |
| commit (cost 0) Evaluate your most recent draft on the full arena. | |
| JSON: {"kind": "commit"} | |
| Preferred way to end the episode. Call it when you have a draft you | |
| trust. If you don't call it, budget exhaustion triggers the same | |
| evaluation on whatever your latest draft is β so your most recent | |
| draft should always be your best one. Committing explicitly just | |
| ends the episode sooner. | |
| """.strip() | |
| def render_observation(obs: LandscapeforgeObservation) -> str: | |
| """Turn an Observation into a compact prompt-friendly state summary.""" | |
| lines: list[str] = [] | |
| lines.append(f"Landscape: {obs.landscape_description}") | |
| lines.append(f"Dim: {obs.dim}") | |
| lines.append(f"Structural hints:") | |
| for k, v in (obs.structural_hints or {}).items(): | |
| lines.append(f" {k}: {_fmt(v)}") | |
| lines.append(f"Budget remaining: {obs.budget_remaining}") | |
| if obs.baseline_history: | |
| lines.append("\nBaseline runs (diagnostic trajectories):") | |
| for i, b in enumerate(obs.baseline_history): | |
| summary = _summarise_trajectory(b.get("trajectory", [])) | |
| lines.append(f" [{i}] {b['name']}: {summary}") | |
| if obs.draft_history: | |
| lines.append("\nDraft history:") | |
| for i, d in enumerate(obs.draft_history): | |
| if d.get("compile_error"): | |
| lines.append(f" [{i}] COMPILE ERROR: {d['compile_error']}") | |
| else: | |
| s = d["summary"] or {} | |
| status = "CONVERGED" if s.get("converged") else ( | |
| "DIVERGED" if s.get("diverged") else "partial" | |
| ) | |
| lines.append( | |
| f" [{i}] {status} | initial_f={_fmt(s.get('initial_f'))} " | |
| f"final_f={_fmt(s.get('final_f'))} " | |
| f"step_of_min={s.get('step_of_min')}" | |
| ) | |
| code = d.get("code") or "" | |
| lines.append(" code:") | |
| for cl in code.splitlines()[:40]: # first 40 lines only | |
| lines.append(f" {cl}") | |
| if obs.inspect_requests: | |
| lines.append("\nInspect results:") | |
| for r in obs.inspect_requests: | |
| detail = r.get("detail") or [] | |
| lines.append( | |
| f" draft={r.get('draft_idx')} range={r.get('step_range')} " | |
| f"({len(detail)} steps)" | |
| ) | |
| for d in detail[:8]: # first 8 of the slice | |
| lines.append( | |
| f" t={d.get('t'):>3} f={_fmt(d.get('f'))} " | |
| f"|g|={_fmt(d.get('grad_norm'))} " | |
| f"|Ξx|={_fmt(d.get('update_norm'))} " | |
| f"Ξ·_eff={_fmt(d.get('step_size_eff'))}" | |
| ) | |
| if obs.current_draft: | |
| lines.append(f"\nCurrent draft ({len(obs.current_draft)} chars) β will be evaluated on commit.") | |
| if obs.last_action_kind: | |
| lines.append(f"\nLast action: {obs.last_action_kind}") | |
| feedback = (obs.last_action_result or {}).get("feedback") | |
| if feedback: | |
| parts = ", ".join(f"{k}={_fmt(v)}" for k, v in feedback.items()) | |
| lines.append(f"Step feedback: {parts} " | |
| "(signals for your reasoning; not added to final reward)") | |
| return "\n".join(lines) | |
| def build_prompt(obs: LandscapeforgeObservation) -> list[dict]: | |
| """Return OpenAI-style messages list for the chat completions endpoint.""" | |
| state_text = render_observation(obs) | |
| return [ | |
| {"role": "system", "content": SYSTEM}, | |
| {"role": "user", "content": f"{ACTION_SPEC}\n\nCurrent state:\n{state_text}\n\n" | |
| "Reply with a single JSON object for your next action."}, | |
| ] | |
| # ---------- response β action ---------- | |
| _JSON_RE = re.compile(r"\{.*\}", re.DOTALL) | |
| def parse_action(response_text: str) -> LandscapeforgeAction: | |
| """Extract the first JSON object from the LLM's reply and build an Action. | |
| Accepts code-fenced JSON, raw JSON, and JSON embedded in prose. Tolerates | |
| the common LLM failure mode of emitting unescaped newlines / tabs inside | |
| string values (especially for the `code` field of a `draft` action). | |
| Raises ValueError if no parseable object is found. | |
| """ | |
| text = response_text.strip() | |
| if text.startswith("```"): | |
| text = re.sub(r"^```(?:json)?\n?", "", text) | |
| text = re.sub(r"\n?```\s*$", "", text) | |
| match = _JSON_RE.search(text) | |
| if not match: | |
| raise ValueError(f"No JSON object in response: {response_text[:200]!r}") | |
| raw_json = match.group(0) | |
| # First pass: strict. | |
| try: | |
| data = json.loads(raw_json) | |
| except json.JSONDecodeError: | |
| # Second pass: escape raw control chars inside string literals. | |
| fixed = _escape_string_controls(raw_json) | |
| try: | |
| data = json.loads(fixed) | |
| except json.JSONDecodeError as e: | |
| raise ValueError(f"Invalid JSON even after control-char fix: {e}; " | |
| f"raw: {raw_json[:200]!r}") from e | |
| if "kind" not in data: | |
| raise ValueError(f"Missing `kind`: {data}") | |
| return LandscapeforgeAction(**data) | |
| def _escape_string_controls(s: str) -> str: | |
| """Escape raw newlines, carriage returns, and tabs inside JSON string literals. | |
| Walks character-by-character tracking whether we're inside a double-quoted | |
| string, and replaces raw control chars with their escaped forms. Handles | |
| the common case: `"code": "class Optimizer:\\n def __init__..."` where the | |
| LLM emitted literal newlines. | |
| """ | |
| out: list[str] = [] | |
| in_string = False | |
| escape_next = False | |
| for ch in s: | |
| if escape_next: | |
| out.append(ch) | |
| escape_next = False | |
| continue | |
| if ch == "\\": | |
| out.append(ch) | |
| escape_next = True | |
| continue | |
| if ch == '"': | |
| in_string = not in_string | |
| out.append(ch) | |
| continue | |
| if in_string: | |
| if ch == "\n": | |
| out.append("\\n"); continue | |
| if ch == "\r": | |
| out.append("\\r"); continue | |
| if ch == "\t": | |
| out.append("\\t"); continue | |
| out.append(ch) | |
| return "".join(out) | |
| # ---------- helpers ---------- | |
| def _fmt(v: Any) -> str: | |
| if v is None: | |
| return "None" | |
| if isinstance(v, float): | |
| if abs(v) < 1e-4 or abs(v) >= 1e4: | |
| return f"{v:.3e}" | |
| return f"{v:.4f}" | |
| if isinstance(v, list): | |
| if len(v) <= 4: | |
| return "[" + ", ".join(_fmt(x) for x in v) + "]" | |
| return f"[{_fmt(v[0])}, {_fmt(v[1])}, ..., {_fmt(v[-1])}] (len={len(v)})" | |
| return str(v) | |
| def _summarise_trajectory(traj: list[dict]) -> str: | |
| """Condense a 30-step baseline trajectory to head/tail snapshots.""" | |
| finite = [s for s in traj if s.get("f") is not None] | |
| if not finite: | |
| return "diverged immediately" | |
| head = finite[0] | |
| mid = finite[len(finite) // 2] if len(finite) > 2 else finite[-1] | |
| tail = finite[-1] | |
| diverged_mark = " (DIVERGED)" if len(finite) < len(traj) else "" | |
| return (f"t=0: f={_fmt(head['f'])}, |g|={_fmt(head['grad_norm'])} " | |
| f"β t={mid['t']}: f={_fmt(mid['f'])} " | |
| f"β t={tail['t']}: f={_fmt(tail['f'])}{diverged_mark}") | |