Spaces:

Ajsaxena
/

cicd-diagnosis-env

Sleeping

App Files Files Community

Jayant-Kernel commited on Apr 5

Commit

f80bdfb

1 Parent(s): cdb2e06

fix: match exact [START]/[STEP]/[END] format, fix openenv.yaml app path

Browse files

Files changed (2) hide show

inference.py +54 -50
openenv.yaml +1 -1

inference.py CHANGED Viewed

@@ -1,36 +1,25 @@
 # inference.py — run this from the project root
-"""
-LLM agent for the CI/CD Failure Diagnosis environment.
-Env vars:
-    API_BASE_URL  OpenAI-compatible base URL (e.g. https://api.openai.com/v1)
-    MODEL_NAME    model to call (e.g. gpt-4o-mini)
-    HF_TOKEN      HuggingFace token — used as API key when running on HF Spaces
-    ENV_URL       running server URL (default: http://localhost:8000)
-    NUM_EPISODES  how many episodes to run (default: 10)
-"""
 import json
 import os
 import sys
-import time
 from openai import OpenAI
 from cicd_diagnosis_env.client import CICDEnv
 from cicd_diagnosis_env.models import DiagnoseAction
-API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
-MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
-HF_TOKEN = os.environ.get("HF_TOKEN")  # no default — injected by hackathon runner
-ENV_URL = os.environ.get("ENV_URL", "http://localhost:8000")
-NUM_EPISODES = int(os.environ.get("NUM_EPISODES", "10"))
-# HF_TOKEN is the API key on Spaces; fall back to OPENAI_API_KEY for local dev
-_api_key = HF_TOKEN or os.environ.get("OPENAI_API_KEY", "no-key")
 llm = OpenAI(api_key=_api_key, base_url=API_BASE_URL)
 _SYSTEM = """You are an expert CI/CD engineer diagnosing pipeline failures.
-You will receive a failure log. Respond ONLY with valid JSON (no markdown):
 {
   "failure_category": "<dependency|config|flaky|code_bug|infra>",
   "root_cause": "<concise 1-2 sentence explanation>",
@@ -39,7 +28,21 @@ You will receive a failure log. Respond ONLY with valid JSON (no markdown):
 }"""
-def diagnose(log, summary):
     msg = f"Error summary: {summary}\n\nFull log:\n{log}"
     resp = llm.chat.completions.create(
         model=MODEL_NAME,
@@ -48,11 +51,8 @@ def diagnose(log, summary):
         max_tokens=300,
     )
     raw = resp.choices[0].message.content.strip()
-    # strip markdown fences — some models add them even when told not to
-    # this is a bit fragile but works for the models we're targeting
     if raw.startswith("```"):
         lines = raw.splitlines()
-        # drop first line (```json or ```) and last line (```)
         raw = "\n".join(lines[1:-1]).strip()
     parsed = json.loads(raw)
     return DiagnoseAction(
@@ -63,50 +63,54 @@ def diagnose(log, summary):
     )
-def run_episode(env, ep):
     obs = env.reset()
-    print(f"[STEP {ep}.0] reset task_id={obs.task_id} stage={obs.pipeline_stage}")
-    total = 0.0
-    for attempt in range(1, 4):  # max 3 attempts per episode
         try:
-            action = diagnose(obs.pipeline_log, obs.error_summary)
         except Exception as e:
-            print(f"[STEP {ep}.{attempt}] LLM error: {e}", file=sys.stderr)
             break
         obs = env.step(action)
-        total = obs.score
-        print(
-            f"[STEP {ep}.{attempt}] "
-            f"cat={action.failure_category} "
-            f"score={obs.score:.3f} "
-            f"done={obs.done}"
-        )
         if obs.done:
             break
-    return total
 def main():
-    print(f"[START] model={MODEL_NAME} episodes={NUM_EPISODES} env={ENV_URL}")
     env = CICDEnv(base_url=ENV_URL)
-    scores = []
-    t0 = time.time()
     for ep in range(1, NUM_EPISODES + 1):
         try:
-            s = run_episode(env, ep)
-            scores.append(s)
         except Exception as e:
-            print(f"[STEP {ep}] episode error: {e}", file=sys.stderr)
-            scores.append(0.0)
-    elapsed = time.time() - t0
-    avg = sum(scores) / len(scores) if scores else 0.0
-    print(f"[END] episodes={NUM_EPISODES} avg_score={avg:.4f} elapsed={elapsed:.1f}s")
-if __name__ == "__main__":
     main()

 # inference.py — run this from the project root
 import json
 import os
 import sys
 from openai import OpenAI
 from cicd_diagnosis_env.client import CICDEnv
 from cicd_diagnosis_env.models import DiagnoseAction
+API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
+HF_TOKEN = os.getenv("HF_TOKEN")
+ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
+NUM_EPISODES = int(os.getenv("NUM_EPISODES", "10"))
+MAX_STEPS = 3
+_api_key = HF_TOKEN or os.getenv("OPENAI_API_KEY", "no-key")
 llm = OpenAI(api_key=_api_key, base_url=API_BASE_URL)
 _SYSTEM = """You are an expert CI/CD engineer diagnosing pipeline failures.
+Respond ONLY with valid JSON (no markdown):
 {
   "failure_category": "<dependency|config|flaky|code_bug|infra>",
   "root_cause": "<concise 1-2 sentence explanation>",
 }"""
+def log_start(task, env, model):
+    print(f"[START] task={task} env={env} model={model}", flush=True)
+def log_step(step, action, reward, done, error=None):
+    err = error if error else "null"
+    print(f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={err}", flush=True)
+def log_end(success, steps, score, rewards):
+    rstr = ",".join(f"{r:.2f}" for r in rewards)
+    print(f"[END] success={str(success).lower()} steps={steps} score={score:.2f} rewards={rstr}", flush=True)
+def call_llm(log, summary):
     msg = f"Error summary: {summary}\n\nFull log:\n{log}"
     resp = llm.chat.completions.create(
         model=MODEL_NAME,
         max_tokens=300,
     )
     raw = resp.choices[0].message.content.strip()
     if raw.startswith("```"):
         lines = raw.splitlines()
         raw = "\n".join(lines[1:-1]).strip()
     parsed = json.loads(raw)
     return DiagnoseAction(
     )
+def run_episode(env, ep_num):
     obs = env.reset()
+    task_name = f"task{obs.task_id}"
+    log_start(task=task_name, env="cicd_diagnosis_env", model=MODEL_NAME)
+    rewards = []
+    steps = 0
+    success = False
+    score = 0.0
+    for step in range(1, MAX_STEPS + 1):
+        error = None
         try:
+            action = call_llm(obs.pipeline_log, obs.error_summary)
+            action_str = f"diagnose(category={action.failure_category})"
         except Exception as e:
+            error = str(e)
+            action_str = "diagnose(error)"
+            log_step(step, action_str, 0.0, True, error)
+            rewards.append(0.0)
+            steps = step
             break
         obs = env.step(action)
+        reward = obs.reward if obs.reward is not None else 0.0
+        rewards.append(reward)
+        steps = step
+        score = obs.score
+        log_step(step, action_str, reward, obs.done, error)
         if obs.done:
+            success = score >= 0.5
             break
+    log_end(success=success, steps=steps, score=score, rewards=rewards)
+    return score
 def main():
     env = CICDEnv(base_url=ENV_URL)
     for ep in range(1, NUM_EPISODES + 1):
         try:
+            run_episode(env, ep)
         except Exception as e:
+            print(f"[DEBUG] episode {ep} error: {e}", file=sys.stderr, flush=True)
+            log_end(success=False, steps=0, score=0.0, rewards=[])
+if __name__ == '__main__':
     main()

openenv.yaml CHANGED Viewed

@@ -4,7 +4,7 @@ version: "0.1.0"
 description: "RL environment for diagnosing CI/CD pipeline failures"
 type: space
 runtime: fastapi
-app: server.app:app
 port: 7860
 action: DiagnoseAction
 observation: PipelineObservation

 description: "RL environment for diagnosing CI/CD pipeline failures"
 type: space
 runtime: fastapi
+app: cicd_diagnosis_env.server.app:app
 port: 7860
 action: DiagnoseAction
 observation: PipelineObservation