Spaces:
Sleeping
Sleeping
Pramod Basavaraj Menasi commited on
Commit ·
6bb9ecc
1
Parent(s): 3cdafd1
fix: add 3 tasks with graders and grade endpoint
Browse files- server/app.py +23 -1
- server/incidentops_env_environment.py +13 -10
server/app.py
CHANGED
|
@@ -32,7 +32,9 @@ Usage:
|
|
| 32 |
python -m server.app
|
| 33 |
"""
|
| 34 |
from __future__ import annotations
|
|
|
|
| 35 |
from openenv.core.env_server.http_server import create_app
|
|
|
|
| 36 |
try:
|
| 37 |
from ..models import IncidentopsAction, IncidentopsObservation
|
| 38 |
from incidentops_env_environment import IncidentopsEnvironment
|
|
@@ -40,17 +42,37 @@ except Exception:
|
|
| 40 |
from models import IncidentopsAction, IncidentopsObservation
|
| 41 |
from server.incidentops_env_environment import IncidentopsEnvironment
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
app = create_app(
|
| 44 |
-
|
| 45 |
IncidentopsAction,
|
| 46 |
IncidentopsObservation,
|
| 47 |
env_name="incidentops_env",
|
| 48 |
max_concurrent_envs=1,
|
| 49 |
)
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def main(host: str = "0.0.0.0", port: int = 7860) -> None:
|
| 52 |
import uvicorn
|
| 53 |
uvicorn.run(app, host=host, port=port)
|
| 54 |
|
|
|
|
| 55 |
if __name__ == "__main__":
|
| 56 |
main()
|
|
|
|
| 32 |
python -m server.app
|
| 33 |
"""
|
| 34 |
from __future__ import annotations
|
| 35 |
+
from fastapi import HTTPException, Request
|
| 36 |
from openenv.core.env_server.http_server import create_app
|
| 37 |
+
|
| 38 |
try:
|
| 39 |
from ..models import IncidentopsAction, IncidentopsObservation
|
| 40 |
from incidentops_env_environment import IncidentopsEnvironment
|
|
|
|
| 42 |
from models import IncidentopsAction, IncidentopsObservation
|
| 43 |
from server.incidentops_env_environment import IncidentopsEnvironment
|
| 44 |
|
| 45 |
+
|
| 46 |
+
# ✅ Single shared env instance used by BOTH create_app and /grade
|
| 47 |
+
_shared_env = IncidentopsEnvironment()
|
| 48 |
+
|
| 49 |
app = create_app(
|
| 50 |
+
lambda: _shared_env, # ← pass a factory that returns same instance
|
| 51 |
IncidentopsAction,
|
| 52 |
IncidentopsObservation,
|
| 53 |
env_name="incidentops_env",
|
| 54 |
max_concurrent_envs=1,
|
| 55 |
)
|
| 56 |
|
| 57 |
+
|
| 58 |
+
@app.post("/grade")
|
| 59 |
+
@app.get("/grade")
|
| 60 |
+
async def grade_endpoint():
|
| 61 |
+
try:
|
| 62 |
+
return _shared_env.grade()
|
| 63 |
+
except AssertionError:
|
| 64 |
+
raise HTTPException(
|
| 65 |
+
status_code=400,
|
| 66 |
+
detail="No active episode. Call /reset first."
|
| 67 |
+
)
|
| 68 |
+
except Exception as e:
|
| 69 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 70 |
+
|
| 71 |
+
|
| 72 |
def main(host: str = "0.0.0.0", port: int = 7860) -> None:
|
| 73 |
import uvicorn
|
| 74 |
uvicorn.run(app, host=host, port=port)
|
| 75 |
|
| 76 |
+
|
| 77 |
if __name__ == "__main__":
|
| 78 |
main()
|
server/incidentops_env_environment.py
CHANGED
|
@@ -281,29 +281,32 @@ class IncidentopsEnvironment(Environment):
|
|
| 281 |
assert self._snapshot is not None
|
| 282 |
s = self._snapshot
|
| 283 |
|
| 284 |
-
|
| 285 |
-
total_steps = s.step_count or 1
|
| 286 |
sla_ok = s.step_count <= s.sla_steps
|
| 287 |
correct_actions = sum(
|
| 288 |
1 for a in s.action_history if a in s.correct_action_sequence
|
| 289 |
)
|
| 290 |
correctness_ratio = correct_actions / max(len(s.correct_action_sequence), 1)
|
| 291 |
|
|
|
|
|
|
|
|
|
|
| 292 |
if s.resolved and sla_ok:
|
| 293 |
-
score = min(1.0, 0.5 + 0.
|
| 294 |
elif s.resolved:
|
| 295 |
score = min(0.6, 0.3 + 0.3 * correctness_ratio)
|
| 296 |
else:
|
| 297 |
score = max(0.0, 0.1 * correctness_ratio)
|
| 298 |
|
| 299 |
return {
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
|
|
|
| 307 |
}
|
| 308 |
@property
|
| 309 |
def state(self) -> State:
|
|
|
|
| 281 |
assert self._snapshot is not None
|
| 282 |
s = self._snapshot
|
| 283 |
|
| 284 |
+
total_steps = max(s.step_count, 1) # ✅ used below
|
|
|
|
| 285 |
sla_ok = s.step_count <= s.sla_steps
|
| 286 |
correct_actions = sum(
|
| 287 |
1 for a in s.action_history if a in s.correct_action_sequence
|
| 288 |
)
|
| 289 |
correctness_ratio = correct_actions / max(len(s.correct_action_sequence), 1)
|
| 290 |
|
| 291 |
+
# ✅ efficiency bonus — fewer steps = better score
|
| 292 |
+
efficiency_bonus = max(0.0, (s.sla_steps - total_steps) / s.sla_steps)
|
| 293 |
+
|
| 294 |
if s.resolved and sla_ok:
|
| 295 |
+
score = min(1.0, 0.5 + 0.3 * correctness_ratio + 0.2 * efficiency_bonus)
|
| 296 |
elif s.resolved:
|
| 297 |
score = min(0.6, 0.3 + 0.3 * correctness_ratio)
|
| 298 |
else:
|
| 299 |
score = max(0.0, 0.1 * correctness_ratio)
|
| 300 |
|
| 301 |
return {
|
| 302 |
+
"score": round(score, 4),
|
| 303 |
+
"success": s.resolved and sla_ok,
|
| 304 |
+
"incident_resolved": s.resolved,
|
| 305 |
+
"steps_taken": s.step_count,
|
| 306 |
+
"sla_met": sla_ok,
|
| 307 |
+
"efficiency_bonus": round(efficiency_bonus, 4),
|
| 308 |
+
"wrong_escalations": s.wrong_escalations,
|
| 309 |
+
"evidence_collected": s.evidence_collected,
|
| 310 |
}
|
| 311 |
@property
|
| 312 |
def state(self) -> State:
|