Spaces:

menasi11
/

incidentops-env

Sleeping

App Files Files Community

Pramod Basavaraj Menasi commited on Apr 11

Commit

2668702

1 Parent(s): e7cfcc2

updated app.py

Browse files

Files changed (5) hide show

inference.py +2 -2
models.py +1 -1
openenv.yaml +4 -6
server/app.py +39 -17
server/incidentops_env_environment.py +41 -13

inference.py CHANGED Viewed

@@ -21,7 +21,7 @@ BENCHMARK = os.getenv("INCIDENTOPS_BENCHMARK", "incidentops_env")
 MAX_STEPS = int(os.getenv("MAX_STEPS", "12"))
 TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
 ENV_URL = os.getenv("ENV_URL", "http://localhost:8000")
-DIFFICULTY = os.getenv("DIFFICULTY", "easy")
 SYSTEM_PROMPT = """
 You are an incident-response policy.
@@ -113,7 +113,7 @@ async def main() -> None:
     log_start(TASK_NAME, BENCHMARK, MODEL_NAME)
     try:
-        result = await env.reset(difficulty=DIFFICULTY)
         obs = result.observation
         for step in range(1, MAX_STEPS + 1):

 MAX_STEPS = int(os.getenv("MAX_STEPS", "12"))
 TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
 ENV_URL = os.getenv("ENV_URL", "http://localhost:8000")
+TASK_ID = os.getenv("TASK_ID", "incident_easy")
 SYSTEM_PROMPT = """
 You are an incident-response policy.
     log_start(TASK_NAME, BENCHMARK, MODEL_NAME)
     try:
+        result = await env.reset(task_id=TASK_ID)
         obs = result.observation
         for step in range(1, MAX_STEPS + 1):

models.py CHANGED Viewed

@@ -37,6 +37,6 @@ class IncidentopsObservation(Observation):
     metadata: Dict[str, Any] = Field(default_factory=dict, description="Extra debug metadata")
     reward: float = Field(default=0.0, description="Reward returned by the last step")
     done: bool = Field(default=False, description="Whether the episode is finished")
-    grader_score: float = Field(default=0.0, description="Grader score 0.0-1.0, set when done=True")

     metadata: Dict[str, Any] = Field(default_factory=dict, description="Extra debug metadata")
     reward: float = Field(default=0.0, description="Reward returned by the last step")
     done: bool = Field(default=False, description="Whether the episode is finished")

openenv.yaml CHANGED Viewed

@@ -3,14 +3,14 @@ name: incidentops_env
 type: space
 runtime: fastapi
 app: server.app:app
-port: 7860
 tasks:
   - id: incident_easy
     name: "Single Service Outage (Easy)"
     description: "Diagnose and resolve a payment-service latency spike caused by a bad deployment."
     reset_kwargs:
-      difficulty: easy
     grader:
       type: class
       module: graders
@@ -18,9 +18,8 @@ tasks:
   - id: incident_medium
     name: "Dependency Failure (Medium)"
-    description: "Identify a DB timeout causing API gateway failures with no logs initially available."
     reset_kwargs:
-      difficulty: medium
     grader:
       type: class
       module: graders
@@ -28,9 +27,8 @@ tasks:
   - id: incident_hard
     name: "Multi-Service Root Cause (Hard)"
-    description: "Trace EU checkout failures across auth, payment, checkout to a DNS issue."
     reset_kwargs:
-      difficulty: hard
     grader:
       type: class
       module: graders

 type: space
 runtime: fastapi
 app: server.app:app
+port: 8000
 tasks:
   - id: incident_easy
     name: "Single Service Outage (Easy)"
     description: "Diagnose and resolve a payment-service latency spike caused by a bad deployment."
     reset_kwargs:
+      task_id: incident_easy
     grader:
       type: class
       module: graders
   - id: incident_medium
     name: "Dependency Failure (Medium)"
     reset_kwargs:
+      task_id: incident_medium
     grader:
       type: class
       module: graders
   - id: incident_hard
     name: "Multi-Service Root Cause (Hard)"
     reset_kwargs:
+      task_id: incident_hard
     grader:
       type: class
       module: graders

server/app.py CHANGED Viewed

@@ -65,22 +65,44 @@ GRADERS = {
 @app.post("/grade")
 async def grade_endpoint(task_id: str = None, request: Request = None):
     try:
-        if task_id and task_id in GRADERS:
-            snapshot = _shared_env._snapshot
-            if snapshot is None:
-                # Return a zero score instead of erroring — validator just needs grader to respond
-                return {"score": 0.0, "success": False, "grader": task_id, "detail": "no active episode"}
-            trajectory = [
-                {"action": a, "observation": {"incident_resolved": snapshot.resolved}}
-                for a in snapshot.action_history
-            ]
-            score = GRADERS[task_id].grade(trajectory)
-            return {"score": score, "success": score >= 0.5, "grader": task_id}
-        # fallback to env's own grade()
-        return _shared_env.grade()
-    except AssertionError:
-        return {"score": 0.0, "success": False, "detail": "no active episode"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -94,7 +116,7 @@ async def list_tasks():
         ]
     }
-def main(host: str = "0.0.0.0", port: int = 7860) -> None:
     import uvicorn
     uvicorn.run(app, host=host, port=port)

 @app.post("/grade")
 async def grade_endpoint(task_id: str = None, request: Request = None):
     try:
+        # ✅ STRICT validation (important)
+        if not task_id or task_id not in GRADERS:
+            return {
+                "score": 0.0,
+                "success": False,
+                "detail": "invalid or missing task_id"
+            }
+        snapshot = _shared_env._snapshot
+        if snapshot is None:
+            return {
+                "score": 0.0,
+                "success": False,
+                "grader": task_id,
+                "detail": "no active episode"
+            }
+        # ✅ Build trajectory
+        trajectory = [
+            {
+                "action": a,
+                "observation": {
+                    "incident_resolved": snapshot.resolved
+                }
+            }
+            for a in snapshot.action_history
+        ]
+        # ✅ Call correct grader
+        score = GRADERS[task_id].grade(trajectory)
+        return {
+            "score": score,
+            "success": score >= 0.5,
+            "grader": task_id
+        }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
         ]
     }
+def main(host: str = "0.0.0.0", port: int = 8000) -> None:
     import uvicorn
     uvicorn.run(app, host=host, port=port)

server/incidentops_env_environment.py CHANGED Viewed

@@ -50,10 +50,10 @@ class IncidentSnapshot:
 SCENARIOS: Dict[str, List[Dict[str, Any]]] = {
-    "easy": [
         {
             "scenario_id": "easy_001",
-            "task": "single_service_outage",
             "alert_text": "SEV-2: payment-service latency high after deploy.",
             "hidden_truth": "bad_deployment",
             "severity": "high",
@@ -62,15 +62,24 @@ SCENARIOS: Dict[str, List[Dict[str, Any]]] = {
             "log_snippet": "deploy at 14:32 UTC caused connection pool exhaustion",
             "likely_cause": "bad_deployment",
             "hf_confidence": 0.92,
-            "available_actions": ["request_logs", "rollback_deploy", "restart_service", "resolve_incident"],
-            "correct_action_sequence": ["rollback_deploy", "resolve_incident"],
             "sla_steps": 5,
         }
     ],
-    "medium": [
         {
             "scenario_id": "medium_001",
-            "task": "dependency_failure",
             "alert_text": "SEV-1: api-gateway 5xx errors; user-profile-service slow; no logs available.",
             "hidden_truth": "db_timeout",
             "severity": "critical",
@@ -87,14 +96,21 @@ SCENARIOS: Dict[str, List[Dict[str, Any]]] = {
                 "restart_service",
                 "resolve_incident",
             ],
-            "correct_action_sequence": ["request_logs", "query_dependencies", "escalate_db_team", "restart_service", "resolve_incident"],
             "sla_steps": 8,
         }
     ],
-    "hard": [
         {
             "scenario_id": "hard_001",
-            "task": "multi_service_root_cause",
             "alert_text": "SEV-1: EU checkout failures. Auth and payment degraded. Logs incomplete.",
             "hidden_truth": "dns_issue",
             "severity": "critical",
@@ -245,15 +261,27 @@ class IncidentopsEnvironment(Environment):
     def reset(
     self,
     episode_id: str = None,
-    difficulty: str = "easy",
     **kwargs
 ) -> IncidentopsObservation:
-        scenario = self._pick_scenario(difficulty)
-        self._difficulty = difficulty
-        self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
         self._snapshot = IncidentSnapshot(**scenario)
         self._snapshot.action_history = []
         self._last_observation = self._build_observation()
         return self._last_observation
     def step(self, action: IncidentopsAction) -> IncidentopsObservation:  # type: ignore[override]

 SCENARIOS: Dict[str, List[Dict[str, Any]]] = {
+    "incident_easy": [
         {
             "scenario_id": "easy_001",
+            "task": "incident_easy",
             "alert_text": "SEV-2: payment-service latency high after deploy.",
             "hidden_truth": "bad_deployment",
             "severity": "high",
             "log_snippet": "deploy at 14:32 UTC caused connection pool exhaustion",
             "likely_cause": "bad_deployment",
             "hf_confidence": 0.92,
+            "available_actions": [
+                "request_logs",
+                "rollback_deploy",
+                "restart_service",
+                "resolve_incident"
+            ],
+            "correct_action_sequence": [
+                "rollback_deploy",
+                "resolve_incident"
+            ],
             "sla_steps": 5,
         }
     ],
+    "incident_medium": [
         {
             "scenario_id": "medium_001",
+            "task": "incident_medium",
             "alert_text": "SEV-1: api-gateway 5xx errors; user-profile-service slow; no logs available.",
             "hidden_truth": "db_timeout",
             "severity": "critical",
                 "restart_service",
                 "resolve_incident",
             ],
+            "correct_action_sequence": [
+                "request_logs",
+                "query_dependencies",
+                "escalate_db_team",
+                "restart_service",
+                "resolve_incident"
+            ],
             "sla_steps": 8,
         }
     ],
+    "incident_hard": [
         {
             "scenario_id": "hard_001",
+            "task": "incident_hard",
             "alert_text": "SEV-1: EU checkout failures. Auth and payment degraded. Logs incomplete.",
             "hidden_truth": "dns_issue",
             "severity": "critical",
     def reset(
     self,
     episode_id: str = None,
+    task_id: str = "incident_easy",
     **kwargs
 ) -> IncidentopsObservation:
+        # ✅ Pick scenario based on task_id (not difficulty)
+        scenarios = SCENARIOS.get(task_id, SCENARIOS["incident_easy"])
+        scenario = scenarios[0]
+        # ✅ Initialize state
+        self._state = State(
+            episode_id=episode_id or str(uuid4()),
+            step_count=0
+        )
+        # ✅ Load scenario into snapshot
         self._snapshot = IncidentSnapshot(**scenario)
         self._snapshot.action_history = []
+        # ✅ Build first observation
         self._last_observation = self._build_observation()
         return self._last_observation
     def step(self, action: IncidentopsAction) -> IncidentopsObservation:  # type: ignore[override]