Imaginephoenix commited on
Commit
47e777c
·
verified ·
1 Parent(s): 30509d6

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -775
app.py DELETED
@@ -1,775 +0,0 @@
1
- """Auxiliary server entrypoint required by OpenEnv local validation checks."""
2
-
3
- import os
4
-
5
- from flask import Flask, Response, jsonify, request
6
-
7
- from environment import EmailTriageEnv
8
- from tasks import get_task_scenario_count, list_task_ids
9
-
10
- FRONTEND_HTML = """<!doctype html>
11
- <html lang="en">
12
- <head>
13
- <meta charset="utf-8" />
14
- <meta name="viewport" content="width=device-width, initial-scale=1" />
15
- <title>Inbox Helper Practice</title>
16
- <style>
17
- @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;600;700&family=IBM+Plex+Mono:wght@400;500&display=swap');
18
-
19
- :root {
20
- --bg: #f5f1e9;
21
- --paper: #fffaf2;
22
- --ink: #102433;
23
- --accent: #ea6a2a;
24
- --accent-soft: #ffd6bf;
25
- --line: #d7cabb;
26
- --ok: #0f7b6c;
27
- --warn: #9a3a12;
28
- --radius: 14px;
29
- }
30
-
31
- * { box-sizing: border-box; }
32
-
33
- body {
34
- margin: 0;
35
- font-family: 'Space Grotesk', sans-serif;
36
- color: var(--ink);
37
- background:
38
- radial-gradient(1100px 460px at -10% -20%, #f2bc9f 0%, transparent 60%),
39
- radial-gradient(1100px 520px at 120% 115%, #b8d7cf 0%, transparent 62%),
40
- var(--bg);
41
- min-height: 100vh;
42
- }
43
-
44
- .wrap {
45
- max-width: 1100px;
46
- margin: 28px auto;
47
- padding: 0 16px;
48
- animation: reveal .45s ease-out;
49
- }
50
-
51
- @keyframes reveal {
52
- from { opacity: 0; transform: translateY(10px); }
53
- to { opacity: 1; transform: translateY(0); }
54
- }
55
-
56
- .title {
57
- display: flex;
58
- justify-content: space-between;
59
- align-items: baseline;
60
- gap: 14px;
61
- margin-bottom: 14px;
62
- }
63
-
64
- h1 {
65
- margin: 0;
66
- font-size: clamp(1.5rem, 2vw, 2.2rem);
67
- letter-spacing: .4px;
68
- }
69
-
70
- .subtitle {
71
- margin: 6px 0 0;
72
- font-size: .95rem;
73
- opacity: .8;
74
- }
75
-
76
- .badge {
77
- background: var(--accent-soft);
78
- border: 1px solid #f2b693;
79
- color: #7f2e0b;
80
- padding: 6px 10px;
81
- border-radius: 999px;
82
- font-size: .85rem;
83
- font-weight: 600;
84
- }
85
-
86
- .grid {
87
- display: grid;
88
- grid-template-columns: 1fr;
89
- gap: 14px;
90
- }
91
-
92
- @media (min-width: 900px) {
93
- .grid { grid-template-columns: 1fr 1fr; }
94
- .wide { grid-column: span 2; }
95
- }
96
-
97
- .card {
98
- background: var(--paper);
99
- border: 1px solid var(--line);
100
- border-radius: var(--radius);
101
- padding: 14px;
102
- box-shadow: 0 8px 28px rgba(16, 36, 51, 0.08);
103
- }
104
-
105
- .card h2 {
106
- margin: 0 0 10px;
107
- font-size: 1rem;
108
- text-transform: uppercase;
109
- letter-spacing: .08em;
110
- opacity: .86;
111
- }
112
-
113
- .row {
114
- display: flex;
115
- flex-wrap: wrap;
116
- gap: 8px;
117
- align-items: center;
118
- margin-bottom: 10px;
119
- }
120
-
121
- select, input, textarea, button {
122
- font-family: inherit;
123
- font-size: .95rem;
124
- }
125
-
126
- select, input, textarea {
127
- width: 100%;
128
- border: 1px solid #cdbba6;
129
- border-radius: 10px;
130
- padding: 9px 10px;
131
- background: #fff;
132
- color: var(--ink);
133
- }
134
-
135
- textarea {
136
- min-height: 92px;
137
- resize: vertical;
138
- }
139
-
140
- button {
141
- border: 0;
142
- border-radius: 10px;
143
- padding: 9px 12px;
144
- font-weight: 700;
145
- background: var(--ink);
146
- color: #fff;
147
- cursor: pointer;
148
- transition: transform .12s ease, opacity .12s ease;
149
- }
150
-
151
- button.secondary {
152
- background: #285066;
153
- }
154
-
155
- button.accent {
156
- background: var(--accent);
157
- }
158
-
159
- button:hover { transform: translateY(-1px); }
160
- button:active { transform: translateY(0); opacity: .92; }
161
-
162
- .status {
163
- padding: 8px 10px;
164
- border-radius: 10px;
165
- background: #eef7f5;
166
- border: 1px solid #c7e4de;
167
- color: var(--ok);
168
- font-weight: 600;
169
- min-height: 40px;
170
- display: flex;
171
- align-items: center;
172
- }
173
-
174
- .status.error {
175
- background: #fff1ea;
176
- border-color: #ffc8ae;
177
- color: var(--warn);
178
- }
179
-
180
- pre {
181
- margin: 0;
182
- white-space: pre-wrap;
183
- background: #0f1b24;
184
- color: #d9efe9;
185
- border-radius: 10px;
186
- padding: 12px;
187
- max-height: 340px;
188
- overflow: auto;
189
- font-family: 'IBM Plex Mono', monospace;
190
- font-size: .85rem;
191
- border: 1px solid #21313f;
192
- }
193
-
194
- .email-block {
195
- background: #fff;
196
- border: 1px solid #d9ccbc;
197
- border-radius: 10px;
198
- padding: 12px;
199
- }
200
-
201
- .email-row {
202
- margin-bottom: 8px;
203
- font-size: .95rem;
204
- line-height: 1.35;
205
- }
206
-
207
- .email-row strong {
208
- display: inline-block;
209
- min-width: 66px;
210
- }
211
-
212
- .help {
213
- margin: 0 0 10px;
214
- font-size: .9rem;
215
- opacity: .8;
216
- }
217
-
218
- .metric {
219
- display: flex;
220
- justify-content: space-between;
221
- align-items: center;
222
- margin-bottom: 8px;
223
- padding-bottom: 6px;
224
- border-bottom: 1px dashed #dbcfbe;
225
- font-size: .95rem;
226
- }
227
-
228
- .metric strong {
229
- font-weight: 700;
230
- }
231
-
232
- .coach {
233
- background: #fff7ed;
234
- border: 1px solid #f2caa9;
235
- border-radius: 10px;
236
- padding: 10px;
237
- min-height: 74px;
238
- line-height: 1.4;
239
- font-size: .92rem;
240
- }
241
-
242
- .chip-row {
243
- display: flex;
244
- flex-wrap: wrap;
245
- gap: 8px;
246
- margin-top: 10px;
247
- }
248
-
249
- .chip {
250
- background: #eaf3ff;
251
- border: 1px solid #b9d1ef;
252
- color: #184469;
253
- border-radius: 999px;
254
- padding: 6px 10px;
255
- font-size: .84rem;
256
- cursor: pointer;
257
- font-weight: 600;
258
- }
259
- </style>
260
- </head>
261
- <body>
262
- <div class="wrap">
263
- <div class="title">
264
- <div>
265
- <h1>Inbox Helper Practice</h1>
266
- <p class="subtitle">Practice deciding priority, category, and who should handle each email.</p>
267
- </div>
268
- <span class="badge" id="badge">connecting...</span>
269
- </div>
270
-
271
- <div class="grid">
272
- <section class="card">
273
- <h2>Start a Scenario</h2>
274
- <p class="help">Pick a difficulty, then click Start.</p>
275
- <div class="row">
276
- <select id="taskId">
277
- <option value="task_easy">Easy: one clear email</option>
278
- <option value="task_medium">Medium: mixed inbox</option>
279
- <option value="task_hard">Hard: high-risk complaint</option>
280
- <option value="task_production">Production: full inbox simulator</option>
281
- </select>
282
- </div>
283
- <div id="productionControls" style="display:none;">
284
- <div class="row">
285
- <select id="productionProfile">
286
- <option value="light">Workload: Light</option>
287
- <option value="standard" selected>Workload: Standard</option>
288
- <option value="heavy">Workload: Heavy</option>
289
- </select>
290
- </div>
291
- <div class="row">
292
- <select id="businessHoursMode">
293
- <option value="false" selected>Time Profile: 24x7 inbox</option>
294
- <option value="true">Time Profile: business hours focus</option>
295
- </select>
296
- </div>
297
- <div class="row">
298
- <select id="escalationMode">
299
- <option value="low">Escalation: Low</option>
300
- <option value="normal" selected>Escalation: Normal</option>
301
- <option value="high">Escalation: High</option>
302
- </select>
303
- </div>
304
- </div>
305
- <div class="row">
306
- <button class="accent" id="btnReset">Start</button>
307
- <button class="secondary" id="btnState">Check Progress</button>
308
- </div>
309
- <div class="status" id="status">Ready. Start a scenario.</div>
310
- </section>
311
-
312
- <section class="card">
313
- <h2>Your Decision</h2>
314
- <p class="help">Choose priority, who should handle it, and a short reason.</p>
315
- <div class="row">
316
- <select id="label">
317
- <option value="urgent">Urgent</option>
318
- <option value="normal" selected>Normal</option>
319
- <option value="spam">Spam</option>
320
- <option value="archive">Archive</option>
321
- </select>
322
- </div>
323
- <div class="row">
324
- <input id="routeTo" placeholder="Who should handle this? (billing, safety, engineering, support)" value="general" />
325
- </div>
326
- <div class="row">
327
- <textarea id="summary" placeholder="Write one clear sentence with key clues from the email.">Needs review.</textarea>
328
- </div>
329
- <div class="row">
330
- <button id="btnStep">Send Decision</button>
331
- </div>
332
- </section>
333
-
334
- <section class="card wide">
335
- <h2>Current Email</h2>
336
- <div class="email-block">
337
- <div class="email-row"><strong>Subject:</strong> <span id="mailSubject">No email loaded yet.</span></div>
338
- <div class="email-row"><strong>From:</strong> <span id="mailSender">-</span></div>
339
- <div class="email-row"><strong>Message:</strong> <span id="mailBody">Start a scenario to load an email.</span></div>
340
- </div>
341
- </section>
342
-
343
- <section class="card">
344
- <h2>Live Progress</h2>
345
- <div class="metric"><span>Task</span><strong id="insightTask">-</strong></div>
346
- <div class="metric"><span>Scenario</span><strong id="insightScenario">-</strong></div>
347
- <div class="metric"><span>Progress</span><strong id="insightProgress">0/0</strong></div>
348
- <div class="metric"><span>Last Reward</span><strong id="insightReward">-</strong></div>
349
- <div class="metric"><span>Base Score</span><strong id="insightBase">-</strong></div>
350
- </section>
351
-
352
- <section class="card">
353
- <h2>Coach Notes</h2>
354
- <p class="help">Use this to improve your next triage action.</p>
355
- <div class="coach" id="coachNotes">Start a scenario and submit one decision to get feedback.</div>
356
- <div class="chip-row">
357
- <button class="chip" id="chipSafety">Quick Fill: Urgent + Safety</button>
358
- <button class="chip" id="chipBilling">Quick Fill: Normal + Billing</button>
359
- <button class="chip" id="chipSpam">Quick Fill: Spam + General</button>
360
- </div>
361
- </section>
362
-
363
- <section class="card wide">
364
- <h2>Details (Advanced)</h2>
365
- <pre id="output">Waiting for your first action...</pre>
366
- </section>
367
- </div>
368
- </div>
369
-
370
- <script>
371
- const statusEl = document.getElementById('status');
372
- const badgeEl = document.getElementById('badge');
373
- const outEl = document.getElementById('output');
374
- const mailSubjectEl = document.getElementById('mailSubject');
375
- const mailSenderEl = document.getElementById('mailSender');
376
- const mailBodyEl = document.getElementById('mailBody');
377
- const taskIdEl = document.getElementById('taskId');
378
- const productionControlsEl = document.getElementById('productionControls');
379
- const insightTaskEl = document.getElementById('insightTask');
380
- const insightScenarioEl = document.getElementById('insightScenario');
381
- const insightProgressEl = document.getElementById('insightProgress');
382
- const insightRewardEl = document.getElementById('insightReward');
383
- const insightBaseEl = document.getElementById('insightBase');
384
- const coachNotesEl = document.getElementById('coachNotes');
385
-
386
- function setStatus(msg, isError = false) {
387
- statusEl.textContent = msg;
388
- statusEl.classList.toggle('error', isError);
389
- }
390
-
391
- function writeOutput(value) {
392
- outEl.textContent = typeof value === 'string' ? value : JSON.stringify(value, null, 2);
393
- }
394
-
395
- function updateEmailPanel(data) {
396
- if (!data || !data.observation) {
397
- return;
398
- }
399
- const obs = data.observation;
400
- mailSubjectEl.textContent = obs.subject || 'No subject';
401
- mailSenderEl.textContent = obs.sender || '-';
402
- mailBodyEl.textContent = obs.body || '';
403
- }
404
-
405
- function updateProductionControlsVisibility() {
406
- const isProduction = taskIdEl.value === 'task_production';
407
- productionControlsEl.style.display = isProduction ? 'block' : 'none';
408
- }
409
-
410
- function safeNumber(value) {
411
- return typeof value === 'number' && !Number.isNaN(value) ? value : null;
412
- }
413
-
414
- function updateInsights(data) {
415
- const info = (data && data.info) ? data.info : {};
416
- const taskValue = info.task_id || data.task_id || (data.observation && data.observation.task_id) || '-';
417
- const scenarioValue = info.scenario_id || '-';
418
-
419
- insightTaskEl.textContent = taskValue;
420
- insightScenarioEl.textContent = scenarioValue;
421
-
422
- const emailsProcessed = safeNumber(info.emails_processed);
423
- const emailsTotal = safeNumber(info.emails_total);
424
- if (emailsProcessed !== null && emailsTotal !== null) {
425
- insightProgressEl.textContent = `${emailsProcessed}/${emailsTotal}`;
426
- } else if (safeNumber(data.current_step) !== null && safeNumber(data.total_steps) !== null) {
427
- insightProgressEl.textContent = `${data.current_step}/${data.total_steps}`;
428
- }
429
-
430
- const rewardValue = safeNumber(data.reward);
431
- insightRewardEl.textContent = rewardValue !== null ? rewardValue.toFixed(2) : '-';
432
-
433
- const baseScoreValue = safeNumber(info.base_score);
434
- insightBaseEl.textContent = baseScoreValue !== null ? baseScoreValue.toFixed(2) : '-';
435
-
436
- const tips = [];
437
- if (info.validation_error) {
438
- tips.push('Action format is invalid. Keep label/summary/route_to filled correctly.');
439
- }
440
-
441
- const routeNoise = safeNumber(info.grade_route_noise_penalty);
442
- if (routeNoise !== null && routeNoise > 0.01) {
443
- tips.push('Route to one best owner team. Avoid sending to many teams at once.');
444
- }
445
-
446
- const summaryMatch = safeNumber(info.grade_summary_match);
447
- if (summaryMatch !== null && summaryMatch < 0.6) {
448
- tips.push('Summary is weak. Include concrete clues from subject/body/thread.');
449
- }
450
-
451
- const labelMatch = safeNumber(info.grade_label_match);
452
- if (labelMatch !== null && labelMatch < 1.0) {
453
- tips.push('Priority label may be off. Re-check urgency and risk signals.');
454
- }
455
-
456
- const routeMatch = safeNumber(info.grade_route_match);
457
- if (routeMatch !== null && routeMatch < 1.0) {
458
- tips.push('Routing looks off. Pick the team that directly owns this issue.');
459
- }
460
-
461
- const urgencyComponent = safeNumber(info.grade_urgency_component);
462
- if (urgencyComponent !== null && urgencyComponent < 0.2) {
463
- tips.push('For high-risk complaints, mark urgent and route to safety first.');
464
- }
465
-
466
- if (!tips.length && typeof info.grading_feedback === 'string' && info.grading_feedback) {
467
- tips.push(info.grading_feedback);
468
- }
469
-
470
- coachNotesEl.textContent = tips.length
471
- ? tips.join(' ')
472
- : 'Looks good. Keep your next route precise and your summary evidence-based.';
473
- }
474
-
475
- function prefillAction(label, routeTo, summary) {
476
- document.getElementById('label').value = label;
477
- document.getElementById('routeTo').value = routeTo;
478
- document.getElementById('summary').value = summary;
479
- }
480
-
481
- async function postJson(path, payload) {
482
- const response = await fetch(path, {
483
- method: 'POST',
484
- headers: { 'Content-Type': 'application/json' },
485
- body: JSON.stringify(payload || {}),
486
- });
487
- const text = await response.text();
488
- let data = text;
489
- try { data = JSON.parse(text); } catch (e) {}
490
- if (!response.ok) {
491
- throw new Error('HTTP ' + response.status + ' - ' + text);
492
- }
493
- return data;
494
- }
495
-
496
- async function warmup() {
497
- try {
498
- const res = await fetch('/meta');
499
- const data = await res.json();
500
- badgeEl.textContent = data.status === 'ok' ? 'ready' : 'check service';
501
- } catch (e) {
502
- badgeEl.textContent = 'offline';
503
- }
504
- }
505
-
506
- document.getElementById('btnReset').addEventListener('click', async () => {
507
- const taskId = taskIdEl.value;
508
- setStatus('Starting a new scenario...');
509
- try {
510
- const payload = { task_id: taskId };
511
- if (taskId === 'task_production') {
512
- payload.production_profile = document.getElementById('productionProfile').value;
513
- payload.business_hours_mode = document.getElementById('businessHoursMode').value === 'true';
514
- payload.escalation_mode = document.getElementById('escalationMode').value;
515
- }
516
- const data = await postJson('/reset', payload);
517
- setStatus('Scenario started. Read the email below.');
518
- updateEmailPanel(data);
519
- updateInsights(data);
520
- writeOutput(data);
521
- } catch (e) {
522
- setStatus('Could not start scenario. See details below.', true);
523
- writeOutput(String(e));
524
- }
525
- });
526
-
527
- document.getElementById('btnState').addEventListener('click', async () => {
528
- setStatus('Checking progress...');
529
- try {
530
- const data = await postJson('/state', {});
531
- setStatus('Progress updated.');
532
- updateInsights(data);
533
- writeOutput(data);
534
- } catch (e) {
535
- setStatus('Could not fetch progress. See details below.', true);
536
- writeOutput(String(e));
537
- }
538
- });
539
-
540
- document.getElementById('btnStep').addEventListener('click', async () => {
541
- const payload = {
542
- label: document.getElementById('label').value,
543
- summary: document.getElementById('summary').value,
544
- route_to: document.getElementById('routeTo').value,
545
- };
546
- setStatus('Sending your decision...');
547
- try {
548
- const data = await postJson('/step', payload);
549
- setStatus('Decision saved.');
550
- updateEmailPanel(data);
551
- updateInsights(data);
552
- writeOutput(data);
553
- } catch (e) {
554
- setStatus('Could not submit decision. See details below.', true);
555
- writeOutput(String(e));
556
- }
557
- });
558
-
559
- document.getElementById('chipSafety').addEventListener('click', () => {
560
- prefillAction('urgent', 'safety', 'Potential safety risk with immediate escalation needed.');
561
- });
562
-
563
- document.getElementById('chipBilling').addEventListener('click', () => {
564
- prefillAction('normal', 'billing', 'Customer billing issue needs finance team review and response.');
565
- });
566
-
567
- document.getElementById('chipSpam').addEventListener('click', () => {
568
- prefillAction('spam', 'general', 'Likely phishing or irrelevant message with suspicious external request.');
569
- });
570
-
571
- taskIdEl.addEventListener('change', updateProductionControlsVisibility);
572
-
573
- updateProductionControlsVisibility();
574
- warmup();
575
- </script>
576
- </body>
577
- </html>
578
- """
579
-
580
- app = Flask(__name__)
581
- current_env = EmailTriageEnv(task_id="task_easy")
582
- SCENARIO_COUNTERS = {task_id: 0 for task_id in list_task_ids()}
583
- DEFAULT_EVAL_SPLIT = os.getenv("OPENENV_EVAL_SPLIT", "public")
584
- ALLOW_CLIENT_EVAL_OVERRIDE = (
585
- os.getenv("OPENENV_ALLOW_CLIENT_EVAL_OVERRIDE", "false").strip().lower() == "true"
586
- )
587
-
588
-
589
- @app.get("/")
590
- def root_page():
591
- """Render a lightweight frontend for interacting with the environment."""
592
- return Response(FRONTEND_HTML, mimetype="text/html")
593
-
594
-
595
- @app.get("/meta")
596
- def root_endpoint():
597
- """Return service metadata for health checks and machine clients."""
598
- return jsonify(
599
- {
600
- "name": "email-triage-env",
601
- "status": "ok",
602
- "endpoints": {
603
- "reset": {"method": "POST", "path": "/reset"},
604
- "step": {"method": "POST", "path": "/step"},
605
- "state": {"method": "POST", "path": "/state"},
606
- },
607
- "scenario_pools": {
608
- "public": {
609
- task_id: get_task_scenario_count(task_id, "public")
610
- for task_id in list_task_ids()
611
- },
612
- },
613
- "eval_split": DEFAULT_EVAL_SPLIT,
614
- "production_runtime_controls": {
615
- "production_profile": ["light", "standard", "heavy"],
616
- "business_hours_mode": [True, False],
617
- "escalation_mode": ["low", "normal", "high"],
618
- },
619
- }
620
- )
621
-
622
-
623
- @app.post("/reset")
624
- def reset_endpoint():
625
- """Reset the environment with a selected task and return ResetResult JSON.
626
-
627
- Returns:
628
- Flask response containing reset payload.
629
- """
630
- global current_env
631
- global SCENARIO_COUNTERS
632
-
633
- payload = request.get_json(silent=True)
634
- if payload is None:
635
- payload = {}
636
- elif not isinstance(payload, dict):
637
- return jsonify({"error": "Malformed JSON payload."}), 400
638
-
639
- task_id = payload.get("task_id", "task_easy")
640
- if not isinstance(task_id, str):
641
- return jsonify({"error": "Field 'task_id' must be a string."}), 400
642
-
643
- runtime_options: dict[str, object] = {}
644
- if task_id == "task_production":
645
- production_profile = payload.get("production_profile", "standard")
646
- if not isinstance(production_profile, str) or production_profile not in {
647
- "light",
648
- "standard",
649
- "heavy",
650
- }:
651
- return (
652
- jsonify(
653
- {
654
- "error": (
655
- "Field 'production_profile' must be one of "
656
- "light/standard/heavy."
657
- )
658
- }
659
- ),
660
- 400,
661
- )
662
-
663
- escalation_mode = payload.get("escalation_mode", "normal")
664
- if not isinstance(escalation_mode, str) or escalation_mode not in {
665
- "low",
666
- "normal",
667
- "high",
668
- }:
669
- return (
670
- jsonify(
671
- {
672
- "error": (
673
- "Field 'escalation_mode' must be one of "
674
- "low/normal/high."
675
- )
676
- }
677
- ),
678
- 400,
679
- )
680
-
681
- business_hours_mode = payload.get("business_hours_mode", False)
682
- if isinstance(business_hours_mode, str):
683
- business_hours_mode = business_hours_mode.strip().lower() in {
684
- "1",
685
- "true",
686
- "yes",
687
- "on",
688
- }
689
- elif not isinstance(business_hours_mode, bool):
690
- return jsonify({"error": "Field 'business_hours_mode' must be boolean."}), 400
691
-
692
- runtime_options = {
693
- "production_profile": production_profile,
694
- "business_hours_mode": business_hours_mode,
695
- "escalation_mode": escalation_mode,
696
- }
697
-
698
- if not ALLOW_CLIENT_EVAL_OVERRIDE and (
699
- "eval_split" in payload or "scenario_index" in payload
700
- ):
701
- return jsonify(
702
- {
703
- "error": (
704
- "Client overrides for eval_split/scenario_index are disabled "
705
- "by server policy."
706
- )
707
- }
708
- ), 400
709
-
710
- eval_split = DEFAULT_EVAL_SPLIT
711
- if ALLOW_CLIENT_EVAL_OVERRIDE:
712
- requested_split = payload.get("eval_split", DEFAULT_EVAL_SPLIT)
713
- if not isinstance(requested_split, str):
714
- return jsonify({"error": "Field 'eval_split' must be a string."}), 400
715
- eval_split = requested_split
716
-
717
- requested_index = payload.get("scenario_index") if ALLOW_CLIENT_EVAL_OVERRIDE else None
718
- if requested_index is not None and (not isinstance(requested_index, int) or requested_index < 0):
719
- return jsonify({"error": "Field 'scenario_index' must be a non-negative integer."}), 400
720
-
721
- try:
722
- scenario_count = get_task_scenario_count(task_id, eval_split)
723
- if requested_index is None:
724
- scenario_index = SCENARIO_COUNTERS.get(task_id, 0)
725
- if scenario_count > 0:
726
- SCENARIO_COUNTERS[task_id] = (scenario_index + 1) % scenario_count
727
- else:
728
- scenario_index = requested_index
729
-
730
- current_env = EmailTriageEnv(
731
- task_id=task_id,
732
- scenario_index=scenario_index,
733
- split=eval_split,
734
- runtime_options=runtime_options,
735
- )
736
- reset_result = current_env.reset()
737
- except KeyError as error:
738
- return jsonify({"error": str(error)}), 400
739
-
740
- return jsonify(reset_result.model_dump())
741
-
742
-
743
- @app.post("/step")
744
- def step_endpoint():
745
- """Advance environment by one action and return StepResult JSON.
746
-
747
- Returns:
748
- Flask response containing step payload.
749
- """
750
- payload = request.get_json(silent=True)
751
- if payload is None:
752
- return jsonify({"error": "Malformed JSON payload."}), 400
753
-
754
- step_result = current_env.step(payload)
755
- return jsonify(step_result.model_dump())
756
-
757
-
758
- @app.post("/state")
759
- def state_endpoint():
760
- """Return read-only EnvironmentState JSON snapshot.
761
-
762
- Returns:
763
- Flask response containing state payload.
764
- """
765
- state_result = current_env.state()
766
- return jsonify(state_result.model_dump())
767
-
768
-
769
- def main() -> None:
770
- """Run the Flask app for local and script-based launches."""
771
- app.run(host="0.0.0.0", port=7860)
772
-
773
-
774
- if __name__ == "__main__":
775
- main()