name: python-bug-fixer
version: "1.0.0"
description: >
  A real-world environment where an AI agent must identify and fix bugs
  in Python code snippets. The agent receives buggy code along with a
  description of expected behavior and must return corrected code that
  runs without errors and produces the correct output.
  Simulates real-world software debugging and code review workflows.

observation_space:
  type: text
  description: >
    A buggy Python code snippet with a description of the expected behavior
    and expected stdout output. May contain 1–3 bugs of varying types
    (SyntaxError, IndexError, LogicError).

action_space:
  type: text
  description: >
    The corrected Python code as a raw string. Must be valid Python that
    can be executed directly with python3. No markdown, no code fences.

tasks:
  - id: task_easy
    name: "Fix Index Errors"
    difficulty: easy
    max_steps: 5
    reward_threshold: 0.5
    description: "Fix 2 off-by-one index errors in a list-processing script."

  - id: task_medium
    name: "Fix Binary Search Logic"
    difficulty: medium
    max_steps: 5
    reward_threshold: 0.7
    description: "Fix 2 bugs in a binary search implementation (boundary + infinite loop)."

  - id: task_hard
    name: "Fix DataProcessor Class"
    difficulty: hard
    max_steps: 7
    reward_threshold: 0.8
    description: "Fix 3 bugs across 3 methods of a DataProcessor class."

reward_range: [0.0, 1.0]
reward_description: >
  Reward is computed by running the agent's fixed code in a sandboxed subprocess
  and checking how many expected output strings appear in stdout.
  Each expected output line is worth an equal fraction of 1.0.
  Partial credit is awarded for partially correct fixes.

endpoints:
  reset: "POST /reset"
  step:  "POST /step"
  state: "GET  /state"
  tasks: "GET  /tasks"
  health: "GET /health"

runtime:
  max_inference_minutes: 20
  max_vcpu: 2
  max_memory_gb: 8