Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- frontend/src/pages/OptimizerArena.jsx +13 -4
- frontend/src/pages/RunWithLlm.jsx +51 -16
- server/api_routes.py +1 -3
- server/app.py +4 -1
frontend/src/pages/OptimizerArena.jsx
CHANGED
|
@@ -31,10 +31,19 @@ export function OptimizerArena() {
|
|
| 31 |
const [err, setErr] = useState(null)
|
| 32 |
|
| 33 |
async function run() {
|
| 34 |
-
setLoading(true); setErr(null)
|
| 35 |
-
try {
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
}
|
| 39 |
|
| 40 |
return (
|
|
|
|
| 31 |
const [err, setErr] = useState(null)
|
| 32 |
|
| 33 |
async function run() {
|
| 34 |
+
setLoading(true); setErr(null); setData(null)
|
| 35 |
+
try {
|
| 36 |
+
const r = await runArena({ template, dim, seed, code })
|
| 37 |
+
if (r.error) {
|
| 38 |
+
setErr(r.error)
|
| 39 |
+
} else {
|
| 40 |
+
setData(r)
|
| 41 |
+
}
|
| 42 |
+
} catch (e) {
|
| 43 |
+
setErr(e.message)
|
| 44 |
+
} finally {
|
| 45 |
+
setLoading(false)
|
| 46 |
+
}
|
| 47 |
}
|
| 48 |
|
| 49 |
return (
|
frontend/src/pages/RunWithLlm.jsx
CHANGED
|
@@ -217,17 +217,56 @@ function EpisodeDone({ done }) {
|
|
| 217 |
|
| 218 |
const rewardTone =
|
| 219 |
reward >= 0.5 ? 'good' : reward >= 0 ? 'warn' : 'bad'
|
| 220 |
-
const speedupTone = speedup >= 1.0 ? 'good' : 'warn'
|
| 221 |
-
const speedupDisplay = speedup < 100
|
| 222 |
-
? `${speedup.toFixed(2)}×`
|
| 223 |
-
: `${Math.round(speedup)}×`
|
| 224 |
|
| 225 |
-
//
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
return (
|
| 233 |
<div className="card"
|
|
@@ -251,16 +290,12 @@ function EpisodeDone({ done }) {
|
|
| 251 |
<KpiCard
|
| 252 |
label="Speedup vs Adam"
|
| 253 |
value={speedupDisplay}
|
| 254 |
-
sub={
|
| 255 |
tone={speedupTone} />
|
| 256 |
<KpiCard
|
| 257 |
label="Verdict"
|
| 258 |
value={verdict}
|
| 259 |
-
sub={
|
| 260 |
-
? `lost ${(100 * (1 - speedup)).toFixed(0)}% of Adam's ground`
|
| 261 |
-
: verdict === 'Beats Adam'
|
| 262 |
-
? `${((speedup - 1) * 100).toFixed(0)}% further than Adam`
|
| 263 |
-
: 'within ±10% of Adam'}
|
| 264 |
tone={verdictTone} />
|
| 265 |
</div>
|
| 266 |
|
|
|
|
| 217 |
|
| 218 |
const rewardTone =
|
| 219 |
reward >= 0.5 ? 'good' : reward >= 0 ? 'warn' : 'bad'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
+
// --- Speedup display ---
|
| 222 |
+
// "Speedup" only makes sense when both optimizers descended. Handle the
|
| 223 |
+
// degenerate cases cleanly rather than showing "-0.44×" (mathematically
|
| 224 |
+
// correct, semantically nonsense).
|
| 225 |
+
let speedupDisplay, speedupTone, speedupSub
|
| 226 |
+
if (myProg < 0) {
|
| 227 |
+
// Our optimizer went uphill
|
| 228 |
+
speedupDisplay = 'diverged'
|
| 229 |
+
speedupTone = 'bad'
|
| 230 |
+
speedupSub = `f moved +${Math.abs(myProg).toFixed(2)} (wrong direction)`
|
| 231 |
+
} else if (adamProg <= 0) {
|
| 232 |
+
// Adam itself couldn't descend — unfair denominator
|
| 233 |
+
speedupDisplay = myProg > 0 ? '∞' : '—'
|
| 234 |
+
speedupTone = myProg > 0 ? 'good' : 'warn'
|
| 235 |
+
speedupSub = 'Adam made no progress on this landscape'
|
| 236 |
+
} else {
|
| 237 |
+
const f = speedup < 100 ? speedup.toFixed(2) : Math.round(speedup).toString()
|
| 238 |
+
speedupDisplay = `${f}×`
|
| 239 |
+
speedupTone = speedup >= 1.0 ? 'good' : 'warn'
|
| 240 |
+
speedupSub = `descent ${myProg.toFixed(2)} vs Adam ${adamProg.toFixed(2)}`
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
// --- Verdict ---
|
| 244 |
+
let verdict, verdictTone, verdictSub
|
| 245 |
+
if (myProg < 0) {
|
| 246 |
+
verdict = 'Diverged'
|
| 247 |
+
verdictTone = 'bad'
|
| 248 |
+
verdictSub = 'optimizer moved away from the minimum'
|
| 249 |
+
} else if (adamProg <= 0) {
|
| 250 |
+
verdict = myProg > 0 ? 'Succeeds where Adam fails' : 'Tied · both stuck'
|
| 251 |
+
verdictTone = myProg > 0 ? 'good' : 'warn'
|
| 252 |
+
verdictSub = `you: ${myProg.toFixed(2)}, Adam: ${adamProg.toFixed(2)}`
|
| 253 |
+
} else if (speedup >= 1.5) {
|
| 254 |
+
verdict = 'Beats Adam'
|
| 255 |
+
verdictTone = 'good'
|
| 256 |
+
verdictSub = `${((speedup - 1) * 100).toFixed(0)}% further than Adam`
|
| 257 |
+
} else if (speedup >= 1.1) {
|
| 258 |
+
verdict = 'Edges Adam'
|
| 259 |
+
verdictTone = 'good'
|
| 260 |
+
verdictSub = `${((speedup - 1) * 100).toFixed(0)}% further than Adam`
|
| 261 |
+
} else if (speedup >= 0.9) {
|
| 262 |
+
verdict = 'Matches Adam'
|
| 263 |
+
verdictTone = 'warn'
|
| 264 |
+
verdictSub = 'within ±10% of Adam'
|
| 265 |
+
} else {
|
| 266 |
+
verdict = 'Behind Adam'
|
| 267 |
+
verdictTone = 'bad'
|
| 268 |
+
verdictSub = `covered ${(speedup * 100).toFixed(0)}% of Adam's descent`
|
| 269 |
+
}
|
| 270 |
|
| 271 |
return (
|
| 272 |
<div className="card"
|
|
|
|
| 290 |
<KpiCard
|
| 291 |
label="Speedup vs Adam"
|
| 292 |
value={speedupDisplay}
|
| 293 |
+
sub={speedupSub}
|
| 294 |
tone={speedupTone} />
|
| 295 |
<KpiCard
|
| 296 |
label="Verdict"
|
| 297 |
value={verdict}
|
| 298 |
+
sub={verdictSub}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
tone={verdictTone} />
|
| 300 |
</div>
|
| 301 |
|
server/api_routes.py
CHANGED
|
@@ -360,13 +360,11 @@ def api_arena(req: ArenaReq):
|
|
| 360 |
# 2-D contour if applicable
|
| 361 |
contour = None
|
| 362 |
if dim == 2:
|
| 363 |
-
from .reference_optimizers import run_baseline
|
| 364 |
-
user_traj = [(s["x"][0], s["x"][1]) for s in test["detail"]]
|
| 365 |
-
adam_run_raw = []
|
| 366 |
try:
|
| 367 |
from ..reference_optimizers import run_baseline as _rb
|
| 368 |
except ImportError:
|
| 369 |
from reference_optimizers import run_baseline as _rb # type: ignore
|
|
|
|
| 370 |
adam_run = _rb("adam", ls.f, ls.grad,
|
| 371 |
np.random.default_rng(req.seed).normal(0.0, 0.5, 2),
|
| 372 |
steps=50)
|
|
|
|
| 360 |
# 2-D contour if applicable
|
| 361 |
contour = None
|
| 362 |
if dim == 2:
|
|
|
|
|
|
|
|
|
|
| 363 |
try:
|
| 364 |
from ..reference_optimizers import run_baseline as _rb
|
| 365 |
except ImportError:
|
| 366 |
from reference_optimizers import run_baseline as _rb # type: ignore
|
| 367 |
+
user_traj = [(s["x"][0], s["x"][1]) for s in test["detail"]]
|
| 368 |
adam_run = _rb("adam", ls.f, ls.grad,
|
| 369 |
np.random.default_rng(req.seed).normal(0.0, 0.5, 2),
|
| 370 |
steps=50)
|
server/app.py
CHANGED
|
@@ -30,7 +30,10 @@ app = create_app(
|
|
| 30 |
LandscapeforgeAction,
|
| 31 |
LandscapeforgeObservation,
|
| 32 |
env_name="landscapeforge",
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
| 34 |
)
|
| 35 |
|
| 36 |
# Frontend-facing API (landscape, baseline_race, arena, llm_run)
|
|
|
|
| 30 |
LandscapeforgeAction,
|
| 31 |
LandscapeforgeObservation,
|
| 32 |
env_name="landscapeforge",
|
| 33 |
+
# Single shared env for plain HTTP /reset + /step so the API playground
|
| 34 |
+
# (which doesn't carry session IDs) operates on consistent state.
|
| 35 |
+
# WebSocket clients still get fresh instances via SUPPORTS_CONCURRENT_SESSIONS.
|
| 36 |
+
max_concurrent_envs=1,
|
| 37 |
)
|
| 38 |
|
| 39 |
# Frontend-facing API (landscape, baseline_race, arena, llm_run)
|