shank commited on
Commit
9940e16
Β·
1 Parent(s): ade347f

docs: final professional polish and code sanitization

Browse files
env/environment.py CHANGED
@@ -1,10 +1,9 @@
1
  """
2
  AgentDebuggerEnv β€” Core Environment
3
  =====================================
4
- OpenEnv-compliant environment with reset(), step(), state() methods.
5
- Manages the full debugging episode lifecycle.
6
-
7
- NEVER crashes β€” all errors are returned in info["error"].
8
  """
9
 
10
  import re
 
1
  """
2
  AgentDebuggerEnv β€” Core Environment
3
  =====================================
4
+ Implementation of the core OpenEnv-compliant environment, managing the
5
+ debugging episode lifecycle including task initialization, action
6
+ processing, and reward calculation.
 
7
  """
8
 
9
  import re
env/models.py CHANGED
@@ -1,8 +1,8 @@
1
  """
2
  AgentDebuggerEnv β€” Pydantic Data Models
3
  ========================================
4
- All models are Pydantic v2 BaseModel subclasses with exact field names
5
- required by the OpenEnv spec and hackathon validation pipeline.
6
  """
7
 
8
  from pydantic import BaseModel
 
1
  """
2
  AgentDebuggerEnv β€” Pydantic Data Models
3
  ========================================
4
+ Pydantic v2 data models for structured interaction between the agent
5
+ and the environment, ensuring strict type safety and schema compliance.
6
  """
7
 
8
  from pydantic import BaseModel
env/sandbox.py CHANGED
@@ -1,15 +1,9 @@
1
  """
2
  AgentDebuggerEnv β€” Sandboxed Code Execution
3
  ============================================
4
- ALL code execution in the environment must go through execute_code().
5
- Never call exec() or subprocess directly anywhere else.
6
-
7
- Security measures:
8
- 1. Hard execution timeout (10 seconds)
9
- 2. AST-based import blocking (not string matching)
10
- 3. Subprocess isolation
11
- 4. Clean temp file cleanup in finally block
12
- 5. Fresh namespace per attempt (no state leaks)
13
  """
14
 
15
  import subprocess
 
1
  """
2
  AgentDebuggerEnv β€” Sandboxed Code Execution
3
  ============================================
4
+ Isolated execution environment for user-submitted code, providing
5
+ security through AST-based import filtering, subprocess isolation,
6
+ and runtime constraints.
 
 
 
 
 
 
7
  """
8
 
9
  import subprocess
env/server.py CHANGED
@@ -23,7 +23,7 @@ app = FastAPI(
23
  version="1.0.0",
24
  )
25
 
26
- # Single environment instance (single-session design as per hackathon constraints)
27
  env = DebuggerEnvironment()
28
 
29
 
@@ -33,7 +33,7 @@ class ResetRequest(BaseModel):
33
 
34
  @app.get("/health")
35
  async def health():
36
- """Health check β€” must return HTTP 200 always. Critical for hackathon Phase 1."""
37
  return {"status": "ok", "environment": "agentdebugger-env", "version": "1.0.0"}
38
 
39
 
 
23
  version="1.0.0",
24
  )
25
 
26
+ # Single environment instance to manage the debugging lifecycle.
27
  env = DebuggerEnvironment()
28
 
29
 
 
33
 
34
  @app.get("/health")
35
  async def health():
36
+ """Health check endpoint to verify server availability."""
37
  return {"status": "ok", "environment": "agentdebugger-env", "version": "1.0.0"}
38
 
39
 
env/tasks/task_hard.py CHANGED
@@ -1,13 +1,14 @@
1
  """
2
  Task Hard β€” Concurrency Race Condition
3
  ========================================
4
- Thread-safe counter with a classic race condition: the read-modify-write cycle
5
- is split across two separate lock acquisitions instead of being atomic.
6
-
7
- All 8 sequential tests pass. The bug only manifests under concurrent access.
8
- The agent must design a concurrent test to surface the race condition.
9
-
10
- allow_threading=True for this task.
 
11
  """
12
 
13
  TASK_DESCRIPTION = """A thread-safe connection counter used in a web server to track active connections.
 
1
  """
2
  Task Hard β€” Concurrency Race Condition
3
  ========================================
4
+ Implementation of a thread-safe counter with a classic race condition.
5
+ The read-modify-write cycle is non-atomic, leading to inconsistent
6
+ states under heavy concurrent load.
7
+
8
+ Task Configuration:
9
+ - Type: Concurrency / Race Condition
10
+ - Requirements: Proper synchronization and atomicity
11
+ - Execution: Sandbox threading support enabled
12
  """
13
 
14
  TASK_DESCRIPTION = """A thread-safe connection counter used in a web server to track active connections.
inference.py CHANGED
@@ -1,16 +1,13 @@
1
  """
2
  AgentDebuggerEnv Baseline Inference Script
3
  ==========================================
4
- Filename: inference.py (ROOT directory β€” not in any subdirectory)
 
5
 
6
- Reads from environment variables (never hardcoded):
7
- API_BASE_URL β€” LLM API endpoint
8
- MODEL_NAME β€” Model identifier
9
- HF_TOKEN β€” API key / HuggingFace token
10
-
11
- Uses openai Python client for all LLM calls (hackathon requirement).
12
- Must complete all 3 tasks in under 20 minutes total.
13
- Saves results to baseline_results.json on completion.
14
  """
15
 
16
  import os
@@ -58,11 +55,15 @@ Give up (if you cannot find the bug):
58
  "final_diagnosis": "<your best guess at what the bug was>"
59
  }
60
 
61
- CRITICAL RULES:
62
- - hypothesis field is REQUIRED in submit_fix β€” missing it costs reward
63
- - Submit COMPLETE code files, not diffs or partial functions
64
- - Read the error output carefully before each attempt β€” it tells you what changed
65
- - For concurrent bugs, think about thread safety and atomic operations"""
 
 
 
 
66
 
67
 
68
  def parse_action(raw: str) -> dict:
 
1
  """
2
  AgentDebuggerEnv Baseline Inference Script
3
  ==========================================
4
+ Baseline evaluation script for testing agent performance in the
5
+ AgentDebugger environment.
6
 
7
+ System Configuration:
8
+ - API_BASE_URL: LLM API endpoint
9
+ - MODEL_NAME: Model identifier for evaluation
10
+ - HF_TOKEN: Authentication token
 
 
 
 
11
  """
12
 
13
  import os
 
55
  "final_diagnosis": "<your best guess at what the bug was>"
56
  }
57
 
58
+ Analyze the error output carefully and provide a corrected version of the complete code.
59
+ You must always include a hypothesis explaining the root cause of the bug before
60
+ submitting your fix.
61
+
62
+ Guidelines:
63
+ - Submit complete source code files, not partial snippets or diffs.
64
+ - Incorporate all feedback from previous execution attempts.
65
+ - For concurrent tasks, ensure atomic operations and proper synchronization.
66
+ """
67
 
68
 
69
  def parse_action(raw: str) -> dict:
server/app.py CHANGED
@@ -1,16 +1,14 @@
1
  """
2
  Server Entry Point for AgentDebuggerEnv
3
  ========================================
4
- This file satisfies the OpenEnv validator requirement for 'server/app.py'.
5
- It imports the FastAPI app from 'env.server' and provides a main() function.
6
  """
7
 
8
  import uvicorn
9
  from env.server import app
10
 
11
  def main():
12
- """Main function called by the 'server' script defined in pyproject.toml."""
13
- # Runs the server on port 8000 as required by the hackathon spec
14
  uvicorn.run(app, host="0.0.0.0", port=8000, workers=1)
15
 
16
  if __name__ == "__main__":
 
1
  """
2
  Server Entry Point for AgentDebuggerEnv
3
  ========================================
4
+ Main entry point to start the FastAPI server for the AgentDebugger environment.
 
5
  """
6
 
7
  import uvicorn
8
  from env.server import app
9
 
10
  def main():
11
+ """Main execution function to run the FastAPI server."""
 
12
  uvicorn.run(app, host="0.0.0.0", port=8000, workers=1)
13
 
14
  if __name__ == "__main__":