Spaces:

agentDebugger
/

AgentDebugger-training-v3

Running

shank commited on Apr 6

Commit

9940e16

1 Parent(s): ade347f

docs: final professional polish and code sanitization

Files changed (7) hide show

env/environment.py CHANGED Viewed

@@ -1,10 +1,9 @@
 """
 AgentDebuggerEnv — Core Environment
 =====================================
-OpenEnv-compliant environment with reset(), step(), state() methods.
-Manages the full debugging episode lifecycle.
-NEVER crashes — all errors are returned in info["error"].
 """
 import re

 """
 AgentDebuggerEnv — Core Environment
 =====================================
+Implementation of the core OpenEnv-compliant environment, managing the
+debugging episode lifecycle including task initialization, action
+processing, and reward calculation.
 """
 import re

env/models.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """
 AgentDebuggerEnv — Pydantic Data Models
 ========================================
-All models are Pydantic v2 BaseModel subclasses with exact field names
-required by the OpenEnv spec and hackathon validation pipeline.
 """
 from pydantic import BaseModel

 """
 AgentDebuggerEnv — Pydantic Data Models
 ========================================
+Pydantic v2 data models for structured interaction between the agent
+and the environment, ensuring strict type safety and schema compliance.
 """
 from pydantic import BaseModel

env/sandbox.py CHANGED Viewed

@@ -1,15 +1,9 @@
 """
 AgentDebuggerEnv — Sandboxed Code Execution
 ============================================
-ALL code execution in the environment must go through execute_code().
-Never call exec() or subprocess directly anywhere else.
-Security measures:
-  1. Hard execution timeout (10 seconds)
-  2. AST-based import blocking (not string matching)
-  3. Subprocess isolation
-  4. Clean temp file cleanup in finally block
-  5. Fresh namespace per attempt (no state leaks)
 """
 import subprocess

 """
 AgentDebuggerEnv — Sandboxed Code Execution
 ============================================
+Isolated execution environment for user-submitted code, providing
+security through AST-based import filtering, subprocess isolation,
+and runtime constraints.
 """
 import subprocess

env/server.py CHANGED Viewed

@@ -23,7 +23,7 @@ app = FastAPI(
     version="1.0.0",
 )
-# Single environment instance (single-session design as per hackathon constraints)
 env = DebuggerEnvironment()
@@ -33,7 +33,7 @@ class ResetRequest(BaseModel):
 @app.get("/health")
 async def health():
-    """Health check — must return HTTP 200 always. Critical for hackathon Phase 1."""
     return {"status": "ok", "environment": "agentdebugger-env", "version": "1.0.0"}

     version="1.0.0",
 )
+# Single environment instance to manage the debugging lifecycle.
 env = DebuggerEnvironment()
 @app.get("/health")
 async def health():
+    """Health check endpoint to verify server availability."""
     return {"status": "ok", "environment": "agentdebugger-env", "version": "1.0.0"}

env/tasks/task_hard.py CHANGED Viewed

@@ -1,13 +1,14 @@
 """
 Task Hard — Concurrency Race Condition
 ========================================
-Thread-safe counter with a classic race condition: the read-modify-write cycle
-is split across two separate lock acquisitions instead of being atomic.
-All 8 sequential tests pass. The bug only manifests under concurrent access.
-The agent must design a concurrent test to surface the race condition.
-allow_threading=True for this task.
 """
 TASK_DESCRIPTION = """A thread-safe connection counter used in a web server to track active connections.

 """
 Task Hard — Concurrency Race Condition
 ========================================
+Implementation of a thread-safe counter with a classic race condition.
+The read-modify-write cycle is non-atomic, leading to inconsistent
+states under heavy concurrent load.
+Task Configuration:
+- Type: Concurrency / Race Condition
+- Requirements: Proper synchronization and atomicity
+- Execution: Sandbox threading support enabled
 """
 TASK_DESCRIPTION = """A thread-safe connection counter used in a web server to track active connections.

inference.py CHANGED Viewed

@@ -1,16 +1,13 @@
 """
 AgentDebuggerEnv Baseline Inference Script
 ==========================================
-Filename: inference.py (ROOT directory — not in any subdirectory)
-Reads from environment variables (never hardcoded):
-  API_BASE_URL  — LLM API endpoint
-  MODEL_NAME    — Model identifier
-  HF_TOKEN      — API key / HuggingFace token
-Uses openai Python client for all LLM calls (hackathon requirement).
-Must complete all 3 tasks in under 20 minutes total.
-Saves results to baseline_results.json on completion.
 """
 import os
@@ -58,11 +55,15 @@ Give up (if you cannot find the bug):
   "final_diagnosis": "<your best guess at what the bug was>"
 }
-CRITICAL RULES:
-- hypothesis field is REQUIRED in submit_fix — missing it costs reward
-- Submit COMPLETE code files, not diffs or partial functions
-- Read the error output carefully before each attempt — it tells you what changed
-- For concurrent bugs, think about thread safety and atomic operations"""
 def parse_action(raw: str) -> dict:

 """
 AgentDebuggerEnv Baseline Inference Script
 ==========================================
+Baseline evaluation script for testing agent performance in the
+AgentDebugger environment.
+System Configuration:
+- API_BASE_URL: LLM API endpoint
+- MODEL_NAME:   Model identifier for evaluation
+- HF_TOKEN:     Authentication token
 """
 import os
   "final_diagnosis": "<your best guess at what the bug was>"
 }
+Analyze the error output carefully and provide a corrected version of the complete code.
+You must always include a hypothesis explaining the root cause of the bug before
+submitting your fix.
+Guidelines:
+- Submit complete source code files, not partial snippets or diffs.
+- Incorporate all feedback from previous execution attempts.
+- For concurrent tasks, ensure atomic operations and proper synchronization.
+"""
 def parse_action(raw: str) -> dict:

server/app.py CHANGED Viewed

@@ -1,16 +1,14 @@
 """
 Server Entry Point for AgentDebuggerEnv
 ========================================
-This file satisfies the OpenEnv validator requirement for 'server/app.py'.
-It imports the FastAPI app from 'env.server' and provides a main() function.
 """
 import uvicorn
 from env.server import app
 def main():
-    """Main function called by the 'server' script defined in pyproject.toml."""
-    # Runs the server on port 8000 as required by the hackathon spec
     uvicorn.run(app, host="0.0.0.0", port=8000, workers=1)
 if __name__ == "__main__":

 """
 Server Entry Point for AgentDebuggerEnv
 ========================================
+Main entry point to start the FastAPI server for the AgentDebugger environment.
 """
 import uvicorn
 from env.server import app
 def main():
+    """Main execution function to run the FastAPI server."""
     uvicorn.run(app, host="0.0.0.0", port=8000, workers=1)
 if __name__ == "__main__":