Spaces:

menasi11
/

incidentops-env

Sleeping

Pramod Basavaraj Menasi commited on Apr 7

Commit

3cdafd1

1 Parent(s): d574597

fix: add tasks with graders to openenv.yaml and grade() method

Files changed (2) hide show

openenv.yaml CHANGED Viewed

@@ -5,3 +5,33 @@ runtime: fastapi
 app: server.app:app
 port: 8000

 app: server.app:app
 port: 8000
+tasks:
+  - id: incident_easy
+    name: "Single Service Outage (Easy)"
+    description: "Diagnose and resolve a payment-service latency spike caused by a bad deployment."
+    difficulty: easy
+    reset_kwargs:
+      difficulty: easy
+    grader:
+      type: env_reward
+      success_threshold: 0.1
+  - id: incident_medium
+    name: "Dependency Failure (Medium)"
+    description: "Identify a DB timeout causing API gateway failures with no logs initially available."
+    difficulty: medium
+    reset_kwargs:
+      difficulty: medium
+    grader:
+      type: env_reward
+      success_threshold: 0.1
+  - id: incident_hard
+    name: "Multi-Service Root Cause (Hard)"
+    description: "Trace EU checkout failures across auth, payment, and checkout services to a DNS issue."
+    difficulty: hard
+    reset_kwargs:
+      difficulty: hard
+    grader:
+      type: env_reward
+      success_threshold: 0.1

server/incidentops_env_environment.py CHANGED Viewed

@@ -276,7 +276,35 @@ class IncidentopsEnvironment(Environment):
         }
         self._last_observation = obs
         return obs
     @property
     def state(self) -> State:
         return self._state

         }
         self._last_observation = obs
         return obs
+    def grade(self) -> dict:
+        """Called by the OpenEnv validator to score a completed episode."""
+        assert self._snapshot is not None
+        s = self._snapshot
+        # Compute normalized score 0.0 - 1.0
+        total_steps = s.step_count or 1
+        sla_ok = s.step_count <= s.sla_steps
+        correct_actions = sum(
+            1 for a in s.action_history if a in s.correct_action_sequence
+        )
+        correctness_ratio = correct_actions / max(len(s.correct_action_sequence), 1)
+        if s.resolved and sla_ok:
+            score = min(1.0, 0.5 + 0.5 * correctness_ratio)
+        elif s.resolved:
+            score = min(0.6, 0.3 + 0.3 * correctness_ratio)
+        else:
+            score = max(0.0, 0.1 * correctness_ratio)
+        return {
+        "score": round(score, 4),
+        "success": s.resolved and sla_ok,
+        "incident_resolved": s.resolved,
+        "steps_taken": s.step_count,
+        "sla_met": sla_ok,
+        "wrong_escalations": s.wrong_escalations,
+        "evidence_collected": s.evidence_collected,
+        }
     @property
     def state(self) -> State:
         return self._state