Spaces:

menasi11
/

incidentops-env

Sleeping

App Files Files Community

Pramod Basavaraj Menasi commited on 26 days ago

Commit

66ae73a

1 Parent(s): 2668702

fixed errors

Browse files

Files changed (14) hide show

__pycache__/client.cpython-313.pyc +0 -0
__pycache__/graders.cpython-313.pyc +0 -0
__pycache__/inference.cpython-313.pyc +0 -0
__pycache__/models.cpython-313.pyc +0 -0
client.py +17 -18
debug.py +69 -0
graders.py +79 -75
inference.py +129 -97
openenv.yaml +3 -1
server/__pycache__/__init__.cpython-313.pyc +0 -0
server/__pycache__/app.cpython-313.pyc +0 -0
server/__pycache__/incidentops_env_environment.cpython-313.pyc +0 -0
server/incidentops_env_environment.py +41 -52
test_inference.py +57 -0

__pycache__/client.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/client.cpython-313.pyc and b/__pycache__/client.cpython-313.pyc differ

__pycache__/graders.cpython-313.pyc ADDED Viewed

Binary file (3.65 kB). View file

__pycache__/inference.cpython-313.pyc ADDED Viewed

Binary file (10.1 kB). View file

__pycache__/models.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/models.cpython-313.pyc and b/__pycache__/models.cpython-313.pyc differ

client.py CHANGED Viewed

@@ -1,27 +1,27 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-"""Incidentops Env Environment Client."""
 from __future__ import annotations
-from typing import Dict
 from openenv.core import EnvClient
 from openenv.core.client_types import StepResult
 from openenv.core.env_server.types import State
 from models import IncidentopsAction, IncidentopsObservation
 class IncidentopsEnv(EnvClient[IncidentopsAction, IncidentopsObservation, State]):
     def _step_payload(self, action: IncidentopsAction) -> Dict:
-        return {"action": action.action}
     def _parse_result(self, payload: Dict) -> StepResult[IncidentopsObservation]:
-        obs_data = payload.get("observation", {})
         observation = IncidentopsObservation(
             alert_summary=obs_data.get("alert_summary", ""),
             severity=obs_data.get("severity", "low"),
@@ -38,8 +38,8 @@ class IncidentopsEnv(EnvClient[IncidentopsAction, IncidentopsObservation, State]
             incident_resolved=obs_data.get("incident_resolved", False),
             wrong_escalations=obs_data.get("wrong_escalations", 0),
             metadata=obs_data.get("metadata", {}),
-            reward=payload.get("reward", 0.0),
-            done=payload.get("done", False),
         )
         return StepResult(
             observation=observation,
@@ -51,5 +51,4 @@ class IncidentopsEnv(EnvClient[IncidentopsAction, IncidentopsObservation, State]
         return State(
             episode_id=payload.get("episode_id"),
             step_count=payload.get("step_count", 0),
-        )

 from __future__ import annotations
+from typing import Any, Dict
 from openenv.core import EnvClient
 from openenv.core.client_types import StepResult
 from openenv.core.env_server.types import State
 from models import IncidentopsAction, IncidentopsObservation
 class IncidentopsEnv(EnvClient[IncidentopsAction, IncidentopsObservation, State]):
     def _step_payload(self, action: IncidentopsAction) -> Dict:
+        # Return just the model_dump — NOT wrapped in {"action": ...}
+        # The server will do IncidentopsAction.model_validate(payload)
+        return action.model_dump()
+    def _reset_payload(self, **kwargs: Any) -> Dict:
+        return kwargs
     def _parse_result(self, payload: Dict) -> StepResult[IncidentopsObservation]:
+        obs_data = payload.get("observation", payload)
+        if isinstance(obs_data, str):
+            import json
+            obs_data = json.loads(obs_data)
         observation = IncidentopsObservation(
             alert_summary=obs_data.get("alert_summary", ""),
             severity=obs_data.get("severity", "low"),
             incident_resolved=obs_data.get("incident_resolved", False),
             wrong_escalations=obs_data.get("wrong_escalations", 0),
             metadata=obs_data.get("metadata", {}),
+            reward=payload.get("reward", obs_data.get("reward", 0.0)),
+            done=payload.get("done", obs_data.get("done", False)),
         )
         return StepResult(
             observation=observation,
         return State(
             episode_id=payload.get("episode_id"),
             step_count=payload.get("step_count", 0),
+        )

debug.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import sys
+print("1. Script started", flush=True)
+print("2. Testing imports...", flush=True)
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("3. dotenv OK", flush=True)
+except Exception as e:
+    print(f"3. dotenv error: {e}", flush=True)
+try:
+    import httpx
+    print("4. httpx OK", flush=True)
+except Exception as e:
+    print(f"4. httpx FAILED: {e}", flush=True)
+    sys.exit(1)
+try:
+    from openai import OpenAI
+    print("5. openai OK", flush=True)
+except Exception as e:
+    print(f"5. openai FAILED: {e}", flush=True)
+    sys.exit(1)
+import os
+print(f"6. HF_TOKEN={'set' if os.getenv('HF_TOKEN') else 'missing'}", flush=True)
+print("7. Testing server...", flush=True)
+try:
+    r = httpx.get("http://localhost:8000/tasks", timeout=5.0)
+    print(f"8. Server response: {r.status_code}", flush=True)
+except Exception as e:
+    print(f"8. Server error: {e}", flush=True)
+    sys.exit(1)
+print("9. Testing reset...", flush=True)
+try:
+    r = httpx.post("http://localhost:8000/reset", json={"task_id": "incident_easy"}, timeout=5.0)
+    print(f"10. Reset status: {r.status_code}", flush=True)
+    data = r.json()
+    obs = data.get("observation", data)
+    print(f"11. Alert: {obs.get('alert_summary', 'N/A')[:50]}", flush=True)
+except Exception as e:
+    print(f"10. Reset error: {e}", flush=True)
+    sys.exit(1)
+print("12. Testing step...", flush=True)
+try:
+    r = httpx.post(
+        "http://localhost:8000/step",
+        json={"action": {"action": "rollback_deploy"}},
+        timeout=5.0,
+    )
+    print(f"13. Step status: {r.status_code}", flush=True)
+    print(f"14. Step body: {r.text[:200]}", flush=True)
+except Exception as e:
+    print(f"13. Step error: {e}", flush=True)
+print("15. Testing grade...", flush=True)
+try:
+    r = httpx.get("http://localhost:8000/grade", params={"task_id": "incident_easy"}, timeout=5.0)
+    print(f"16. Grade status: {r.status_code}", flush=True)
+    print(f"17. Grade body: {r.text[:200]}", flush=True)
+except Exception as e:
+    print(f"16. Grade error: {e}", flush=True)
+print("18. ALL DONE", flush=True)

graders.py CHANGED Viewed

@@ -1,82 +1,86 @@
 from __future__ import annotations
-from typing import Any, Dict
-class IncidentEasyGrader:
-    """Grader for easy task: single_service_outage"""
-    task_id = "incident_easy"
-    def grade(self, trajectory: list[dict]) -> float:
-        """Score an episode trajectory. Returns 0.0 - 1.0"""
         if not trajectory:
             return 0.0
-        actions = [s.get("action", "") for s in trajectory if s.get("action")]
-        resolved = any(s.get("observation", {}).get("incident_resolved", False) for s in trajectory)
-        steps = len(trajectory)
-        sla = 5  # easy SLA
-        if not resolved:
-            return max(0.0, 0.1 * (len([a for a in actions if a == "rollback_deploy"]) / max(len(actions), 1)))
-        sla_ok = steps <= sla
-        correctness = sum(1 for a in actions if a in ["rollback_deploy", "resolve_incident"]) / 2.0
-        if sla_ok:
-            return round(min(1.0, 0.5 + 0.5 * correctness), 4)
-        return round(min(0.6, 0.3 + 0.3 * correctness), 4)
-class IncidentMediumGrader:
-    """Grader for medium task: dependency_failure"""
     task_id = "incident_medium"
-    def grade(self, trajectory: list[dict]) -> float:
-        if not trajectory:
-            return 0.0
-        actions = [s.get("action", "") for s in trajectory if s.get("action")]
-        resolved = any(s.get("observation", {}).get("incident_resolved", False) for s in trajectory)
-        steps = len(trajectory)
-        sla = 8
-        correct_seq = ["request_logs", "query_dependencies", "escalate_db_team", "restart_service", "resolve_incident"]
-        if not resolved:
-            return max(0.0, 0.1 * (len([a for a in actions if a in correct_seq]) / max(len(correct_seq), 1)))
-        sla_ok = steps <= sla
-        correctness = sum(1 for a in actions if a in correct_seq) / len(correct_seq)
-        if sla_ok:
-            return round(min(1.0, 0.5 + 0.5 * correctness), 4)
-        return round(min(0.6, 0.3 + 0.3 * correctness), 4)
-class IncidentHardGrader:
-    """Grader for hard task: multi_service_root_cause"""
     task_id = "incident_hard"
-    def grade(self, trajectory: list[dict]) -> float:
-        if not trajectory:
-            return 0.0
-        actions = [s.get("action", "") for s in trajectory if s.get("action")]
-        resolved = any(s.get("observation", {}).get("incident_resolved", False) for s in trajectory)
-        steps = len(trajectory)
-        sla = 12
-        correct_seq = ["query_region_health", "query_dns_status", "escalate_network_team", "broadcast_status_page", "resolve_incident"]
-        if not resolved:
-            return max(0.0, 0.1 * (len([a for a in actions if a in correct_seq]) / max(len(correct_seq), 1)))
-        sla_ok = steps <= sla
-        correctness = sum(1 for a in actions if a in correct_seq) / len(correct_seq)
-        if sla_ok:
-            return round(min(1.0, 0.5 + 0.5 * correctness), 4)
-        return round(min(0.6, 0.3 + 0.3 * correctness), 4)

+"""Graders for the Incidentops environment."""
 from __future__ import annotations
+from typing import Any, Dict, List
+class BaseIncidentGrader:
+    """Base grader with shared logic."""
+    task_id: str = ""
+    expected_actions: List[str] = []
+    sla_steps: int = 10
+    def grade(self, trajectory: List[Dict[str, Any]]) -> float:
+        """
+        Grade a trajectory of (action, observation) pairs.
+        Returns a score in [0.0, 1.0].
+        """
         if not trajectory:
             return 0.0
+        actions_taken = []
+        resolved = False
+        wrong_escalations = 0
+        for entry in trajectory:
+            action = entry.get("action", "")
+            if isinstance(action, dict):
+                action = action.get("action", "")
+            actions_taken.append(action)
+            obs = entry.get("observation", {})
+            if isinstance(obs, dict):
+                resolved = obs.get("incident_resolved", False)
+        total_steps = len(actions_taken)
+        # Correctness: how many correct actions were taken
+        correct_actions = sum(
+            1 for a in actions_taken if a in self.expected_actions
+        )
+        correctness_ratio = correct_actions / max(len(self.expected_actions), 1)
+        # Efficiency bonus
+        efficiency_bonus = max(0.0, (self.sla_steps - total_steps) / self.sla_steps)
+        sla_ok = total_steps <= self.sla_steps
+        if resolved and sla_ok:
+            score = min(1.0, 0.5 + 0.3 * correctness_ratio + 0.2 * efficiency_bonus)
+        elif resolved:
+            score = min(0.6, 0.3 + 0.3 * correctness_ratio)
+        else:
+            score = max(0.0, 0.1 * correctness_ratio)
+        return round(score, 4)
+class IncidentEasyGrader(BaseIncidentGrader):
+    task_id = "incident_easy"
+    expected_actions = ["rollback_deploy", "resolve_incident"]
+    sla_steps = 5
+class IncidentMediumGrader(BaseIncidentGrader):
     task_id = "incident_medium"
+    expected_actions = [
+        "request_logs",
+        "query_dependencies",
+        "escalate_db_team",
+        "restart_service",
+        "resolve_incident",
+    ]
+    sla_steps = 8
+class IncidentHardGrader(BaseIncidentGrader):
     task_id = "incident_hard"
+    expected_actions = [
+        "query_region_health",
+        "query_dns_status",
+        "escalate_network_team",
+        "broadcast_status_page",
+        "resolve_incident",
+    ]
+    sla_steps = 12

inference.py CHANGED Viewed

@@ -1,149 +1,181 @@
-from __future__ import annotations
-from dotenv import load_dotenv
-import os
-load_dotenv()
-import asyncio
 import json
 import os
-from typing import List, Optional
 from openai import OpenAI
-from client import IncidentopsEnv
-from models import IncidentopsAction
 API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY")
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
-TASK_NAME = os.getenv("INCIDENTOPS_TASK", "incidentops")
-BENCHMARK = os.getenv("INCIDENTOPS_BENCHMARK", "incidentops_env")
-MAX_STEPS = int(os.getenv("MAX_STEPS", "12"))
-TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
 ENV_URL = os.getenv("ENV_URL", "http://localhost:8000")
-TASK_ID = os.getenv("TASK_ID", "incident_easy")
-SYSTEM_PROMPT = """
-You are an incident-response policy.
-Choose exactly one action from the environment's available actions.
-Prefer investigation when confidence is low.
-Prefer mitigation or escalation when evidence points to a cause.
-Return only the action string.
-""".strip()
-def log_start(task: str, env: str, model: str) -> None:
     print(f"[START] task={task} env={env} model={model}", flush=True)
-def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
-    print(
-        f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error if error else 'null'}",
-        flush=True,
-    )
-def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
     rewards_str = ",".join(f"{r:.2f}" for r in rewards)
-    print(
-        f"[END] success={str(success).lower()} steps={steps} score={score:.2f} rewards={rewards_str}",
-        flush=True,
-    )
-def choose_action(client: OpenAI, obs) -> str:
-    available = obs.available_actions or []
     if not available:
         return "resolve_incident"
-    prompt = {
-        "alert_summary": obs.alert_summary,
-        "severity": obs.severity,
-        "likely_cause": obs.likely_cause,
-        "hf_confidence": obs.hf_confidence,
-        "logs_available": obs.logs_available,
-        "log_snippet": obs.log_snippet,
-        "services_affected": obs.services_affected,
-        "elapsed_steps": obs.elapsed_steps,
-        "sla_steps_remaining": obs.sla_steps_remaining,
-        "action_history": obs.action_history,
-        "available_actions": available,
-    }
-    response = client.chat.completions.create(
-        model=MODEL_NAME,
-        messages=[
-            {"role": "system", "content": SYSTEM_PROMPT},
-            {"role": "user", "content": json.dumps(prompt)},
-        ],
-        temperature=TEMPERATURE,
-        max_tokens=20,
-    )
-    text = (response.choices[0].message.content or "").strip().splitlines()[0].strip()
-    if text in available:
-        return text
-    # fallback heuristics
-    if not obs.logs_available and "request_logs" in available:
         return "request_logs"
-    if obs.likely_cause == "dns_issue" and "query_dns_status" in available:
-        return "query_dns_status"
-    if obs.likely_cause == "dependency_issue" and "query_dependencies" in available:
         return "query_dependencies"
-    if obs.hf_confidence < 0.7 and "query_region_health" in available:
         return "query_region_health"
-    if "resolve_incident" in available and (obs.service_healthy or obs.incident_resolved):
         return "resolve_incident"
-    return available[0]
-async def main() -> None:
-    if not API_KEY:
-        raise RuntimeError("Missing HF_TOKEN/API_KEY/OPENAI_API_KEY")
-    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
-    env = await IncidentopsEnv.from_docker_image(os.getenv("IMAGE_NAME")) if os.getenv("IMAGE_NAME") else IncidentopsEnv(base_url=ENV_URL)
-    rewards: List[float] = []
     steps_taken = 0
     success = False
     score = 0.0
-    log_start(TASK_NAME, BENCHMARK, MODEL_NAME)
     try:
-        result = await env.reset(task_id=TASK_ID)
-        obs = result.observation
         for step in range(1, MAX_STEPS + 1):
-            if result.done:
                 break
-            action_name = choose_action(client, obs)
-            result = await env.step(IncidentopsAction(action=action_name))
-            obs = result.observation
-            reward = float(result.reward or 0.0)
-            done = bool(result.done)
             rewards.append(reward)
             steps_taken = step
-            log_step(step, action_name, reward, done, None)
-            if done:
-                break
-        total_reward = sum(rewards)
-        score = max(0.0, min(1.0, total_reward / 5.0))
-        success = bool(obs.incident_resolved) and score >= 0.1
     finally:
-        try:
-            await env.close()
-        except Exception:
-            pass
         log_end(success, steps_taken, score, rewards)
 if __name__ == "__main__":
-    asyncio.run(main())

 import json
 import os
+import sys
+import traceback
+print("[DEBUG] line 6", flush=True)
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+print("[DEBUG] line 14", flush=True)
+import httpx
+print("[DEBUG] line 18", flush=True)
 from openai import OpenAI
+print("[DEBUG] line 22", flush=True)
 API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY")
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
+BENCHMARK = "incidentops_env"
+TASK_IDS = ["incident_easy", "incident_medium", "incident_hard"]
 ENV_URL = os.getenv("ENV_URL", "http://localhost:8000")
+MAX_STEPS = 12
+TEMPERATURE = 0.2
+print("[DEBUG] line 33", flush=True)
+def log_start(task, env, model):
     print(f"[START] task={task} env={env} model={model}", flush=True)
+def log_step(step, action, reward, done, error):
+    err = error if error else "null"
+    d = str(done).lower()
+    print(f"[STEP] step={step} action={action} reward={reward:.2f} done={d} error={err}", flush=True)
+def log_end(success, steps, score, rewards):
     rewards_str = ",".join(f"{r:.2f}" for r in rewards)
+    print(f"[END] success={str(success).lower()} steps={steps} score={score:.2f} rewards={rewards_str}", flush=True)
+def choose_action(obs):
+    available = obs.get("available_actions", [])
+    logs_available = obs.get("logs_available", False)
+    likely_cause = obs.get("likely_cause", "unknown")
     if not available:
         return "resolve_incident"
+    if not logs_available and "request_logs" in available:
         return "request_logs"
+    if likely_cause == "bad_deployment" and "rollback_deploy" in available:
+        return "rollback_deploy"
+    if likely_cause == "dependency_issue" and "query_dependencies" in available:
         return "query_dependencies"
+    if likely_cause == "ambiguous" and "query_region_health" in available:
         return "query_region_health"
+    if likely_cause == "dns_issue" and "query_dns_status" in available:
+        return "query_dns_status"
+    if likely_cause == "db_timeout" and "escalate_db_team" in available:
+        return "escalate_db_team"
+    if likely_cause == "dns_issue" and "escalate_network_team" in available:
+        return "escalate_network_team"
+    if likely_cause == "dns_issue" and "broadcast_status_page" in available:
+        return "broadcast_status_page"
+    if "restart_service" in available and likely_cause in ("db_timeout", "bad_deployment"):
+        return "restart_service"
+    if "resolve_incident" in available:
         return "resolve_incident"
+    return available[0] if available else "resolve_incident"
+def extract_obs(data):
+    if "observation" in data:
+        obs = data["observation"]
+    else:
+        obs = data
+    if isinstance(obs, str):
+        obs = json.loads(obs)
+    return obs
+def run_task(http, task_id):
+    print(f"[DEBUG] Starting task: {task_id}", flush=True)
+    rewards = []
     steps_taken = 0
     success = False
     score = 0.0
+    log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
     try:
+        r = http.post(f"{ENV_URL}/reset", json={"task_id": task_id}, timeout=30.0)
+        r.raise_for_status()
+        obs = extract_obs(r.json())
+        print(f"[DEBUG] Reset OK: cause={obs.get('likely_cause')}", flush=True)
+        finished = obs.get("done", False) or obs.get("incident_resolved", False)
         for step in range(1, MAX_STEPS + 1):
+            if finished:
                 break
+            action_name = choose_action(obs)
+            print(f"[DEBUG] Step {step}: {action_name}", flush=True)
+            r = http.post(
+                f"{ENV_URL}/step",
+                json={"action": {"action": action_name}},
+                timeout=30.0,
+            )
+            r.raise_for_status()
+            step_data = r.json()
+            obs = extract_obs(step_data)
+            reward = float(step_data.get("reward", obs.get("reward", 0.0)))
+            finished = bool(
+                step_data.get("done", obs.get("done", False))
+                or obs.get("incident_resolved", False)
+            )
             rewards.append(reward)
             steps_taken = step
+            log_step(step, action_name, reward, finished, None)
+        r = http.get(f"{ENV_URL}/grade", params={"task_id": task_id}, timeout=30.0)
+        r.raise_for_status()
+        grade = r.json()
+        score = float(grade.get("score", 0.0))
+        success = bool(grade.get("success", False))
+        print(f"[DEBUG] Grade: {grade}", flush=True)
+    except Exception as e:
+        print(f"[DEBUG] Error: {e}", flush=True)
+        traceback.print_exc()
     finally:
         log_end(success, steps_taken, score, rewards)
+print("[DEBUG] line 137 - about to define main", flush=True)
+def main():
+    print(f"[DEBUG] main() called", flush=True)
+    print(f"[DEBUG] ENV_URL={ENV_URL}", flush=True)
+    http = httpx.Client()
+    try:
+        r = http.get(f"{ENV_URL}/tasks", timeout=10.0)
+        print(f"[DEBUG] Server OK: {r.status_code}", flush=True)
+    except Exception as e:
+        print(f"[ERROR] Server not running: {e}", flush=True)
+        return
+    for task_id in TASK_IDS:
+        run_task(http, task_id)
+    http.close()
+    print("[DEBUG] Done!", flush=True)
+print("[DEBUG] line 160 - about to check name", flush=True)
+print(f"[DEBUG] name = {__name__}", flush=True)
 if __name__ == "__main__":
+    print("[DEBUG] entering main()", flush=True)
+    try:
+        main()
+    except Exception as e:
+        print(f"[FATAL] {e}", flush=True)
+        traceback.print_exc()

openenv.yaml CHANGED Viewed

@@ -3,7 +3,7 @@ name: incidentops_env
 type: space
 runtime: fastapi
 app: server.app:app
-port: 8000
 tasks:
   - id: incident_easy
@@ -18,6 +18,7 @@ tasks:
   - id: incident_medium
     name: "Dependency Failure (Medium)"
     reset_kwargs:
       task_id: incident_medium
     grader:
@@ -27,6 +28,7 @@ tasks:
   - id: incident_hard
     name: "Multi-Service Root Cause (Hard)"
     reset_kwargs:
       task_id: incident_hard
     grader:

 type: space
 runtime: fastapi
 app: server.app:app
+port: 8000
 tasks:
   - id: incident_easy
   - id: incident_medium
     name: "Dependency Failure (Medium)"
+    description: "Investigate and resolve cascading failures caused by database timeouts affecting multiple services."
     reset_kwargs:
       task_id: incident_medium
     grader:
   - id: incident_hard
     name: "Multi-Service Root Cause (Hard)"
+    description: "Diagnose EU checkout failures with ambiguous signals across auth, payment, and checkout services caused by DNS issues."
     reset_kwargs:
       task_id: incident_hard
     grader:

server/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (326 Bytes). View file

server/__pycache__/app.cpython-313.pyc ADDED Viewed

Binary file (3.82 kB). View file

server/__pycache__/incidentops_env_environment.cpython-313.pyc ADDED Viewed

Binary file (13.3 kB). View file

server/incidentops_env_environment.py CHANGED Viewed

@@ -1,16 +1,3 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-Incidentops Env Environment Implementation.
-A simple test environment that echoes back messages sent to it.
-Perfect for testing HTTP server infrastructure.
-"""
 from __future__ import annotations
 from dataclasses import dataclass, field
@@ -21,8 +8,8 @@ from openenv.core.env_server.interfaces import Environment
 from openenv.core.env_server.types import State
 try:
-    from ..models import IncidentopsAction, IncidentopsObservation
-except Exception:
     from models import IncidentopsAction, IncidentopsObservation
@@ -66,16 +53,15 @@ SCENARIOS: Dict[str, List[Dict[str, Any]]] = {
                 "request_logs",
                 "rollback_deploy",
                 "restart_service",
-                "resolve_incident"
             ],
             "correct_action_sequence": [
                 "rollback_deploy",
-                "resolve_incident"
             ],
             "sla_steps": 5,
         }
     ],
     "incident_medium": [
         {
             "scenario_id": "medium_001",
@@ -101,12 +87,11 @@ SCENARIOS: Dict[str, List[Dict[str, Any]]] = {
                 "query_dependencies",
                 "escalate_db_team",
                 "restart_service",
-                "resolve_incident"
             ],
             "sla_steps": 8,
         }
     ],
     "incident_hard": [
         {
             "scenario_id": "hard_001",
@@ -146,16 +131,12 @@ SCENARIOS: Dict[str, List[Dict[str, Any]]] = {
 class IncidentopsEnvironment(Environment):
     SUPPORTS_CONCURRENT_SESSIONS: bool = True
-    def __init__(self):
         self._state = State(episode_id=str(uuid4()), step_count=0)
         self._snapshot: Optional[IncidentSnapshot] = None
         self._difficulty = "easy"
         self._last_observation: Optional[IncidentopsObservation] = None
-    def _pick_scenario(self, difficulty: str) -> Dict[str, Any]:
-        scenarios = SCENARIOS.get(difficulty, SCENARIOS["easy"])
-        return scenarios[0]
     def _build_observation(self) -> IncidentopsObservation:
         assert self._snapshot is not None
         remaining = max(self._snapshot.sla_steps - self._snapshot.step_count, 0)
@@ -189,7 +170,7 @@ class IncidentopsEnvironment(Environment):
         assert self._snapshot is not None
         s = self._snapshot
-        reward = -0.05  # small step cost
         if s.action_history.count(action) > 1:
             reward -= 0.2
@@ -245,7 +226,9 @@ class IncidentopsEnvironment(Environment):
         if action == "resolve_incident":
             if s.resolved or s.hidden_truth in {"bad_deployment", "db_timeout", "dns_issue"}:
-                if s.step_count <= s.sla_steps and (s.evidence_collected or s.team_engaged is not None or s.hidden_truth == "bad_deployment"):
                     reward += 1.5
                     s.resolved = True
                 else:
@@ -258,38 +241,43 @@ class IncidentopsEnvironment(Environment):
         return reward
-    def reset(
-    self,
-    episode_id: str = None,
-    task_id: str = "incident_easy",
-    **kwargs
-) -> IncidentopsObservation:
-        # ✅ Pick scenario based on task_id (not difficulty)
         scenarios = SCENARIOS.get(task_id, SCENARIOS["incident_easy"])
         scenario = scenarios[0]
-        # ✅ Initialize state
-        self._state = State(
-            episode_id=episode_id or str(uuid4()),
-            step_count=0
-        )
-        # ✅ Load scenario into snapshot
         self._snapshot = IncidentSnapshot(**scenario)
         self._snapshot.action_history = []
-        # ✅ Build first observation
         self._last_observation = self._build_observation()
         return self._last_observation
-    def step(self, action: IncidentopsAction) -> IncidentopsObservation:  # type: ignore[override]
         assert self._snapshot is not None
         self._snapshot.step_count += 1
         self._state.step_count = self._snapshot.step_count
-        action_name = action.action
         self._snapshot.action_history.append(action_name)
         reward = self._calc_reward(action_name)
@@ -303,25 +291,25 @@ class IncidentopsEnvironment(Environment):
             "last_action": action_name,
             "last_reward": reward,
         }
         if done:
             grade_result = self.grade()
-            obs.grader_score = grade_result["score"]
         self._last_observation = obs
         return obs
     def grade(self) -> dict:
-        """Called by the OpenEnv validator to score a completed episode."""
         assert self._snapshot is not None
         s = self._snapshot
-        total_steps = max(s.step_count, 1)  # ✅ used below
         sla_ok = s.step_count <= s.sla_steps
         correct_actions = sum(
             1 for a in s.action_history if a in s.correct_action_sequence
         )
         correctness_ratio = correct_actions / max(len(s.correct_action_sequence), 1)
-        # ✅ efficiency bonus — fewer steps = better score
         efficiency_bonus = max(0.0, (s.sla_steps - total_steps) / s.sla_steps)
         if s.resolved and sla_ok:
@@ -341,6 +329,7 @@ class IncidentopsEnvironment(Environment):
             "wrong_escalations": s.wrong_escalations,
             "evidence_collected": s.evidence_collected,
         }
     @property
     def state(self) -> State:
-        return self._state

 from __future__ import annotations
 from dataclasses import dataclass, field
 from openenv.core.env_server.types import State
 try:
+    from models import IncidentopsAction, IncidentopsObservation
+except ImportError:
     from models import IncidentopsAction, IncidentopsObservation
                 "request_logs",
                 "rollback_deploy",
                 "restart_service",
+                "resolve_incident",
             ],
             "correct_action_sequence": [
                 "rollback_deploy",
+                "resolve_incident",
             ],
             "sla_steps": 5,
         }
     ],
     "incident_medium": [
         {
             "scenario_id": "medium_001",
                 "query_dependencies",
                 "escalate_db_team",
                 "restart_service",
+                "resolve_incident",
             ],
             "sla_steps": 8,
         }
     ],
     "incident_hard": [
         {
             "scenario_id": "hard_001",
 class IncidentopsEnvironment(Environment):
     SUPPORTS_CONCURRENT_SESSIONS: bool = True
+    def init(self):
         self._state = State(episode_id=str(uuid4()), step_count=0)
         self._snapshot: Optional[IncidentSnapshot] = None
         self._difficulty = "easy"
         self._last_observation: Optional[IncidentopsObservation] = None
     def _build_observation(self) -> IncidentopsObservation:
         assert self._snapshot is not None
         remaining = max(self._snapshot.sla_steps - self._snapshot.step_count, 0)
         assert self._snapshot is not None
         s = self._snapshot
+        reward = -0.05
         if s.action_history.count(action) > 1:
             reward -= 0.2
         if action == "resolve_incident":
             if s.resolved or s.hidden_truth in {"bad_deployment", "db_timeout", "dns_issue"}:
+                if s.step_count <= s.sla_steps and (
+                    s.evidence_collected or s.team_engaged is not None or s.hidden_truth == "bad_deployment"
+                ):
                     reward += 1.5
                     s.resolved = True
                 else:
         return reward
+    def reset(self, episode_id=None, task_id="incident_easy", **kwargs):
+        print(f"[ENV] reset called: task_id={task_id}", flush=True)
         scenarios = SCENARIOS.get(task_id, SCENARIOS["incident_easy"])
         scenario = scenarios[0]
+        self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
         self._snapshot = IncidentSnapshot(**scenario)
         self._snapshot.action_history = []
         self._last_observation = self._build_observation()
         return self._last_observation
+    def step(self, action) -> IncidentopsObservation:
+        """Handle step - accept both IncidentopsAction objects and dicts."""
+        print(f"[ENV] step called: action={action}, type={type(action)}", flush=True)
+        # Extract action string from whatever format we receive
+        if isinstance(action, IncidentopsAction):
+            action_name = action.action
+        elif isinstance(action, dict):
+            action_name = action.get("action", "resolve_incident")
+        elif isinstance(action, str):
+            action_name = action
+        else:
+            action_name = str(action)
+        print(f"[ENV] action_name={action_name}", flush=True)
+        if self._snapshot is None:
+            print("[ENV] ERROR: No snapshot! Calling reset first.", flush=True)
+            self.reset()
         assert self._snapshot is not None
         self._snapshot.step_count += 1
         self._state.step_count = self._snapshot.step_count
         self._snapshot.action_history.append(action_name)
         reward = self._calc_reward(action_name)
             "last_action": action_name,
             "last_reward": reward,
         }
         if done:
             grade_result = self.grade()
+            obs.metadata["grader_score"] = grade_result["score"]
         self._last_observation = obs
+        print(f"[ENV] step done: reward={reward:.2f}, done={done}", flush=True)
         return obs
     def grade(self) -> dict:
         assert self._snapshot is not None
         s = self._snapshot
+        total_steps = max(s.step_count, 1)
         sla_ok = s.step_count <= s.sla_steps
         correct_actions = sum(
             1 for a in s.action_history if a in s.correct_action_sequence
         )
         correctness_ratio = correct_actions / max(len(s.correct_action_sequence), 1)
         efficiency_bonus = max(0.0, (s.sla_steps - total_steps) / s.sla_steps)
         if s.resolved and sla_ok:
             "wrong_escalations": s.wrong_escalations,
             "evidence_collected": s.evidence_collected,
         }
     @property
     def state(self) -> State:
+        return self._state

test_inference.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import sys
+print("Script started", flush=True)
+print(f"Python: {sys.executable}", flush=True)
+try:
+    import httpx
+    print("httpx imported OK", flush=True)
+except ImportError as e:
+    print(f"httpx import FAILED: {e}", flush=True)
+try:
+    from openai import OpenAI
+    print("openai imported OK", flush=True)
+except ImportError as e:
+    print(f"openai import FAILED: {e}", flush=True)
+try:
+    from models import IncidentopsAction, IncidentopsObservation
+    print("models imported OK", flush=True)
+except ImportError as e:
+    print(f"models import FAILED: {e}", flush=True)
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("dotenv loaded OK", flush=True)
+except ImportError:
+    print("dotenv not available (OK)", flush=True)
+import os
+print(f"HF_TOKEN set: {bool(os.getenv('HF_TOKEN'))}", flush=True)
+print(f"API_KEY set: {bool(os.getenv('API_KEY'))}", flush=True)
+import httpx
+print("\nTesting server connection...", flush=True)
+try:
+    r = httpx.get("http://localhost:8000/tasks", timeout=5.0)
+    print(f"  /tasks status: {r.status_code}", flush=True)
+    print(f"  /tasks body: {r.text}", flush=True)
+except Exception as e:
+    print(f"  Server error: {e}", flush=True)
+try:
+    r = httpx.post("http://localhost:8000/reset", json={"task_id": "incident_easy"}, timeout=5.0)
+    print(f"  /reset status: {r.status_code}", flush=True)
+    print(f"  /reset body: {r.text[:300]}", flush=True)
+except Exception as e:
+    print(f"  Reset error: {e}", flush=True)
+try:
+    r = httpx.post("http://localhost:8000/step", json={"action": "rollback_deploy"}, timeout=5.0)
+    print(f"  /step status: {r.status_code}", flush=True)
+    print(f"  /step body: {r.text[:300]}", flush=True)
+except Exception as e:
+    print(f"  Step error: {e}", flush=True)
+print("\nAll checks done!", flush=True)