Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- app.py +46 -46
- patch_webui.py +25 -0
app.py
CHANGED
|
@@ -606,48 +606,48 @@ async def custom_swagger():
|
|
| 606 |
|
| 607 |
TASKS_JSON = json.dumps(TASKS)
|
| 608 |
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
# -- Grader Endpoints (required by OpenEnv Phase 2 validator) -----------------
|
| 612 |
-
|
| 613 |
-
class GraderRequest(BaseModel):
|
| 614 |
-
task_id: str
|
| 615 |
-
fixed_sql: str = ""
|
| 616 |
-
explanation: str = ""
|
| 617 |
-
|
| 618 |
-
TASK_GRADER_MAP = {
|
| 619 |
-
"task_1_easy": lambda sql: 0.85 if ("," in sql.upper()) else 0.15,
|
| 620 |
-
"task_2_medium": lambda sql: 0.85 if ("GROUP BY" in sql.upper()) else 0.15,
|
| 621 |
-
"task_3_hard": lambda sql: 0.85 if ("PARTITION" in sql.upper()) else 0.15,
|
| 622 |
-
"task_4_expert": lambda sql: 0.85 if ("12-01" in sql or "2024-12" in sql) else 0.15,
|
| 623 |
-
"task_5_optimization": lambda sql: 0.85 if ("INNER JOIN" in sql.upper() or "JOIN" in sql.upper()) else 0.15,
|
| 624 |
-
"task_6_migration": lambda sql: 0.85 if ("INSERT INTO" in sql.upper() and "DROP" in sql.upper()) else 0.15,
|
| 625 |
-
"task_7_chaos": lambda sql: 0.85 if ("CREATE UNIQUE INDEX" in sql.upper() or "UNIQUE" in sql.upper()) else 0.15,
|
| 626 |
-
}
|
| 627 |
-
|
| 628 |
-
@app.post("/grader", tags=["Environment"])
|
| 629 |
-
def grade_submission(req: GraderRequest):
|
| 630 |
-
grader_fn = TASK_GRADER_MAP.get(req.task_id)
|
| 631 |
-
if grader_fn is None:
|
| 632 |
-
return {"task_id": req.task_id, "score": 0.15, "error": "Unknown task_id"}
|
| 633 |
-
raw_score = grader_fn(req.fixed_sql)
|
| 634 |
-
score = max(0.01, min(0.99, float(raw_score)))
|
| 635 |
-
return {"task_id": req.task_id, "score": score, "passed": score >= 0.5}
|
| 636 |
-
|
| 637 |
-
@app.get("/baseline", tags=["Environment"])
|
| 638 |
-
def get_baseline():
|
| 639 |
-
return {
|
| 640 |
-
"baseline_scores": {
|
| 641 |
-
"task_1_easy": 0.15,
|
| 642 |
-
"task_2_medium": 0.15,
|
| 643 |
-
"task_3_hard": 0.15,
|
| 644 |
-
"task_4_expert": 0.15,
|
| 645 |
-
"task_5_optimization": 0.15,
|
| 646 |
-
"task_6_migration": 0.15,
|
| 647 |
-
"task_7_chaos": 0.15,
|
| 648 |
-
}
|
| 649 |
-
}
|
| 650 |
-
|
| 651 |
@app.get("/web_ui", include_in_schema=False)
|
| 652 |
async def web_ui():
|
| 653 |
html = f"""<!DOCTYPE html>
|
|
@@ -1349,10 +1349,10 @@ async function executeStep() {{
|
|
| 1349 |
const res = await fetch('/step', {{
|
| 1350 |
method: 'POST',
|
| 1351 |
headers: {{'Content-Type': 'application/json'}},
|
| 1352 |
-
body: JSON.stringify({{
|
| 1353 |
}});
|
| 1354 |
const data = await res.json();
|
| 1355 |
-
const reward = data.reward;
|
| 1356 |
const done = data.done;
|
| 1357 |
const msg = data.info?.message || '';
|
| 1358 |
const verifier = data.info?.verifier || 'DuckDB';
|
|
@@ -1361,7 +1361,7 @@ async function executeStep() {{
|
|
| 1361 |
out.innerHTML = `
|
| 1362 |
<h3>${{done && reward >= 1.0 ? '✅' : reward < 0 ? '❌' : '⚠️'}} Verifier Result</h3>
|
| 1363 |
<p style="margin-top:6px">${{msg}}</p>
|
| 1364 |
-
<p style="margin-top:8px;font-size:11px;color:var(--muted)">🔬 ${{verifier}} · Step ${{data.
|
| 1365 |
<span class="reward-pill ${{isPos ? 'reward-positive' : 'reward-negative'}}">Reward: ${{reward >= 0 ? '+' : ''}}${{reward.toFixed(2)}}</span>
|
| 1366 |
`;
|
| 1367 |
}} catch(e) {{
|
|
@@ -1407,4 +1407,4 @@ def main():
|
|
| 1407 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
| 1408 |
|
| 1409 |
if __name__ == "__main__":
|
| 1410 |
-
main()
|
|
|
|
| 606 |
|
| 607 |
TASKS_JSON = json.dumps(TASKS)
|
| 608 |
|
| 609 |
+
|
| 610 |
+
|
| 611 |
+
# -- Grader Endpoints (required by OpenEnv Phase 2 validator) -----------------
|
| 612 |
+
|
| 613 |
+
class GraderRequest(BaseModel):
|
| 614 |
+
task_id: str
|
| 615 |
+
fixed_sql: str = ""
|
| 616 |
+
explanation: str = ""
|
| 617 |
+
|
| 618 |
+
TASK_GRADER_MAP = {
|
| 619 |
+
"task_1_easy": lambda sql: 0.85 if ("," in sql.upper()) else 0.15,
|
| 620 |
+
"task_2_medium": lambda sql: 0.85 if ("GROUP BY" in sql.upper()) else 0.15,
|
| 621 |
+
"task_3_hard": lambda sql: 0.85 if ("PARTITION" in sql.upper()) else 0.15,
|
| 622 |
+
"task_4_expert": lambda sql: 0.85 if ("12-01" in sql or "2024-12" in sql) else 0.15,
|
| 623 |
+
"task_5_optimization": lambda sql: 0.85 if ("INNER JOIN" in sql.upper() or "JOIN" in sql.upper()) else 0.15,
|
| 624 |
+
"task_6_migration": lambda sql: 0.85 if ("INSERT INTO" in sql.upper() and "DROP" in sql.upper()) else 0.15,
|
| 625 |
+
"task_7_chaos": lambda sql: 0.85 if ("CREATE UNIQUE INDEX" in sql.upper() or "UNIQUE" in sql.upper()) else 0.15,
|
| 626 |
+
}
|
| 627 |
+
|
| 628 |
+
@app.post("/grader", tags=["Environment"])
|
| 629 |
+
def grade_submission(req: GraderRequest):
|
| 630 |
+
grader_fn = TASK_GRADER_MAP.get(req.task_id)
|
| 631 |
+
if grader_fn is None:
|
| 632 |
+
return {"task_id": req.task_id, "score": 0.15, "error": "Unknown task_id"}
|
| 633 |
+
raw_score = grader_fn(req.fixed_sql)
|
| 634 |
+
score = max(0.01, min(0.99, float(raw_score)))
|
| 635 |
+
return {"task_id": req.task_id, "score": score, "passed": score >= 0.5}
|
| 636 |
+
|
| 637 |
+
@app.get("/baseline", tags=["Environment"])
|
| 638 |
+
def get_baseline():
|
| 639 |
+
return {
|
| 640 |
+
"baseline_scores": {
|
| 641 |
+
"task_1_easy": 0.15,
|
| 642 |
+
"task_2_medium": 0.15,
|
| 643 |
+
"task_3_hard": 0.15,
|
| 644 |
+
"task_4_expert": 0.15,
|
| 645 |
+
"task_5_optimization": 0.15,
|
| 646 |
+
"task_6_migration": 0.15,
|
| 647 |
+
"task_7_chaos": 0.15,
|
| 648 |
+
}
|
| 649 |
+
}
|
| 650 |
+
|
| 651 |
@app.get("/web_ui", include_in_schema=False)
|
| 652 |
async def web_ui():
|
| 653 |
html = f"""<!DOCTYPE html>
|
|
|
|
| 1349 |
const res = await fetch('/step', {{
|
| 1350 |
method: 'POST',
|
| 1351 |
headers: {{'Content-Type': 'application/json'}},
|
| 1352 |
+
body: JSON.stringify({{fixed_sql: agentSQL, explanation: ''}})
|
| 1353 |
}});
|
| 1354 |
const data = await res.json();
|
| 1355 |
+
const reward = (data.reward != null) ? data.reward : 0.0;
|
| 1356 |
const done = data.done;
|
| 1357 |
const msg = data.info?.message || '';
|
| 1358 |
const verifier = data.info?.verifier || 'DuckDB';
|
|
|
|
| 1361 |
out.innerHTML = `
|
| 1362 |
<h3>${{done && reward >= 1.0 ? '✅' : reward < 0 ? '❌' : '⚠️'}} Verifier Result</h3>
|
| 1363 |
<p style="margin-top:6px">${{msg}}</p>
|
| 1364 |
+
<p style="margin-top:8px;font-size:11px;color:var(--muted)">🔬 ${{verifier}} · Step ${{data.observation?.step_count ?? '?'}}</p>
|
| 1365 |
<span class="reward-pill ${{isPos ? 'reward-positive' : 'reward-negative'}}">Reward: ${{reward >= 0 ? '+' : ''}}${{reward.toFixed(2)}}</span>
|
| 1366 |
`;
|
| 1367 |
}} catch(e) {{
|
|
|
|
| 1407 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
| 1408 |
|
| 1409 |
if __name__ == "__main__":
|
| 1410 |
+
main()
|
patch_webui.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
with open('app.py', 'r', encoding='utf-8') as f:
|
| 2 |
+
content = f.read()
|
| 3 |
+
|
| 4 |
+
# Fix 1: JS sends 'action' but backend now expects 'fixed_sql'
|
| 5 |
+
old1 = "body: JSON.stringify({{action: agentSQL, explanation: ''}})"
|
| 6 |
+
new1 = "body: JSON.stringify({{fixed_sql: agentSQL, explanation: ''}})"
|
| 7 |
+
content = content.replace(old1, new1)
|
| 8 |
+
|
| 9 |
+
# Fix 2: reward.toFixed(2) crashes when reward is undefined - add null guard
|
| 10 |
+
old2 = 'const reward = data.reward;'
|
| 11 |
+
new2 = 'const reward = (data.reward != null) ? data.reward : 0.0;'
|
| 12 |
+
content = content.replace(old2, new2)
|
| 13 |
+
|
| 14 |
+
# Fix 3: data.state?.step_count -> data.observation?.step_count (renamed key)
|
| 15 |
+
old3 = "data.state?.step_count ?? '?'"
|
| 16 |
+
new3 = "data.observation?.step_count ?? '?'"
|
| 17 |
+
content = content.replace(old3, new3)
|
| 18 |
+
|
| 19 |
+
with open('app.py', 'w', encoding='utf-8') as f:
|
| 20 |
+
f.write(content)
|
| 21 |
+
|
| 22 |
+
print('Done! Verifying...')
|
| 23 |
+
print('fixed_sql patch:', content.count('fixed_sql: agentSQL'))
|
| 24 |
+
print('null guard patch:', content.count('data.reward != null'))
|
| 25 |
+
print('observation patch:', content.count('data.observation?.step_count'))
|