Spaces:
Sleeping
Sleeping
Commit ·
a5c1fa0
0
Parent(s):
v2.0 — agent reliability & evaluation layer
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitignore +9 -0
- Dockerfile +31 -0
- README.md +144 -0
- inference.py +247 -0
- openenv.yaml +56 -0
- repo_templates/task1/variant_1/meta.json +15 -0
- repo_templates/task1/variant_1/src/auth.py +14 -0
- repo_templates/task1/variant_1/src/utils.py +16 -0
- repo_templates/task1/variant_1/tests/test_auth.py +23 -0
- repo_templates/task1/variant_2/meta.json +15 -0
- repo_templates/task1/variant_2/src/calculator.py +23 -0
- repo_templates/task1/variant_2/src/helpers.py +14 -0
- repo_templates/task1/variant_2/tests/test_calculator.py +32 -0
- repo_templates/task1/variant_3/meta.json +15 -0
- repo_templates/task1/variant_3/src/inventory.py +26 -0
- repo_templates/task1/variant_3/src/logger.py +9 -0
- repo_templates/task1/variant_3/tests/test_inventory.py +44 -0
- repo_templates/task1/variant_4/meta.json +15 -0
- repo_templates/task1/variant_4/src/scheduler.py +34 -0
- repo_templates/task1/variant_4/src/time_helpers.py +12 -0
- repo_templates/task1/variant_4/tests/test_scheduler.py +52 -0
- repo_templates/task1/variant_5/meta.json +15 -0
- repo_templates/task1/variant_5/src/constants.py +4 -0
- repo_templates/task1/variant_5/src/formatter.py +29 -0
- repo_templates/task1/variant_5/tests/test_formatter.py +35 -0
- repo_templates/task2/variant_1/meta.json +13 -0
- repo_templates/task2/variant_1/src/data_pipeline.py +12 -0
- repo_templates/task2/variant_1/src/models.py +10 -0
- repo_templates/task2/variant_1/src/validator.py +7 -0
- repo_templates/task2/variant_1/tests/test_pipeline.py +18 -0
- repo_templates/task2/variant_2/meta.json +13 -0
- repo_templates/task2/variant_2/src/config.py +5 -0
- repo_templates/task2/variant_2/src/email_sender.py +25 -0
- repo_templates/task2/variant_2/src/template_engine.py +26 -0
- repo_templates/task2/variant_2/tests/test_email.py +23 -0
- repo_templates/task2/variant_3/meta.json +13 -0
- repo_templates/task2/variant_3/src/inventory_checker.py +33 -0
- repo_templates/task2/variant_3/src/models.py +10 -0
- repo_templates/task2/variant_3/src/order_processor.py +20 -0
- repo_templates/task2/variant_3/tests/test_orders.py +27 -0
- repo_templates/task2/variant_4/meta.json +13 -0
- repo_templates/task2/variant_4/src/date_formatter.py +28 -0
- repo_templates/task2/variant_4/src/models.py +3 -0
- repo_templates/task2/variant_4/src/report_builder.py +28 -0
- repo_templates/task2/variant_4/tests/test_reports.py +28 -0
- repo_templates/task2/variant_5/meta.json +13 -0
- repo_templates/task2/variant_5/src/cache_manager.py +36 -0
- repo_templates/task2/variant_5/src/config.py +4 -0
- repo_templates/task2/variant_5/src/serializer.py +25 -0
- repo_templates/task2/variant_5/tests/test_cache.py +37 -0
.gitignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pyo
|
| 4 |
+
venv/
|
| 5 |
+
.env
|
| 6 |
+
*.egg-info/
|
| 7 |
+
dist/
|
| 8 |
+
build/
|
| 9 |
+
.pytest_cache/
|
Dockerfile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# Create non-root user for security — MANDATORY for running agent code safely
|
| 4 |
+
RUN useradd -m -u 1000 envuser
|
| 5 |
+
|
| 6 |
+
WORKDIR /app
|
| 7 |
+
|
| 8 |
+
# Install system dependencies
|
| 9 |
+
RUN apt-get update && apt-get install -y \
|
| 10 |
+
git \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Copy and install Python dependencies first (layer caching)
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 16 |
+
|
| 17 |
+
# Copy project
|
| 18 |
+
COPY . .
|
| 19 |
+
|
| 20 |
+
# Make repo_templates readable
|
| 21 |
+
RUN chmod -R 755 repo_templates/
|
| 22 |
+
|
| 23 |
+
# Create temp directory for working copies
|
| 24 |
+
RUN mkdir -p /tmp/openenv_work && chmod 777 /tmp/openenv_work
|
| 25 |
+
|
| 26 |
+
# Switch to non-root for security
|
| 27 |
+
USER envuser
|
| 28 |
+
|
| 29 |
+
EXPOSE 7860
|
| 30 |
+
|
| 31 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
README.md
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Codebase Navigation Repair OpenEnv
|
| 3 |
+
emoji: 🔍
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
app_port: 7860
|
| 9 |
+
license: mit
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
+
- reinforcement-learning
|
| 13 |
+
- coding-agent
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
# Codebase Navigation & Repair — OpenEnv Environment v2.0
|
| 17 |
+
|
| 18 |
+
**An RL environment + evaluation layer that makes AI coding agents reliable, testable, and debuggable.**
|
| 19 |
+
|
| 20 |
+
AI agents navigate unfamiliar Python codebases, identify bugs, and implement features — graded by running actual tests. Unlike existing benchmarks, this system provides **process-level evaluation**, not just final output scoring.
|
| 21 |
+
|
| 22 |
+
## Why This Exists
|
| 23 |
+
|
| 24 |
+
Every coding agent (Devin, Cursor, Copilot, Codex) fails ~25%+ on complex tasks. Current benchmarks tell you the agent scored 0.4 but not **why** it failed. This environment answers:
|
| 25 |
+
|
| 26 |
+
- Did the agent explore strategically or waste steps?
|
| 27 |
+
- Did it verify its fixes before submitting?
|
| 28 |
+
- Can it resist misleading comments and prompt injection?
|
| 29 |
+
- How efficiently does it use its context window?
|
| 30 |
+
|
| 31 |
+
## Architecture
|
| 32 |
+
|
| 33 |
+
```
|
| 34 |
+
┌──────────────────────────────────────────────────────────┐
|
| 35 |
+
│ FastAPI Server │
|
| 36 |
+
│ /reset /step /state /trajectory /evaluate /metrics │
|
| 37 |
+
└──────────┬───────────────────────────────────────────────┘
|
| 38 |
+
│
|
| 39 |
+
┌──────────▼───────────────────────────────────────────────┐
|
| 40 |
+
│ CodebaseNavEnvironment (extended) │
|
| 41 |
+
│ │
|
| 42 |
+
│ ┌─────────────┐ ┌──────────────┐ ┌─────────────────┐ │
|
| 43 |
+
│ │ Trajectory │ │ Evaluator │ │ Security │ │
|
| 44 |
+
│ │ Logger │ │ (process) │ │ Scanner │ │
|
| 45 |
+
│ └─────────────┘ └──────────────┘ └─────────────────┘ │
|
| 46 |
+
│ ┌─────────────┐ ┌──────────────┐ ┌─────────────────┐ │
|
| 47 |
+
│ │ Fault │ │ Memory │ │ Grader │ │
|
| 48 |
+
│ │ Injector │ │ Tracker │ │ (pytest) │ │
|
| 49 |
+
│ └─────────────┘ └──────────────┘ └─────────────────┘ │
|
| 50 |
+
└──────────────────────────────────────────────────────────┘
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
## Tasks
|
| 54 |
+
|
| 55 |
+
| Task | Difficulty | Description |
|
| 56 |
+
|------|-----------|-------------|
|
| 57 |
+
| task1 | Easy | Single-file bug repair (5 variants) |
|
| 58 |
+
| task2 | Medium | Cross-module interface bug + regression test (5 variants) |
|
| 59 |
+
| task3 | Hard | Feature implementation from spec (5 variants) |
|
| 60 |
+
|
| 61 |
+
## API Endpoints
|
| 62 |
+
|
| 63 |
+
### Core (OpenEnv-compliant)
|
| 64 |
+
| Endpoint | Method | Description |
|
| 65 |
+
|----------|--------|-------------|
|
| 66 |
+
| `/reset?task=task1` | POST | Start new episode |
|
| 67 |
+
| `/step` | POST | Take one action |
|
| 68 |
+
| `/state` | GET | Get current state |
|
| 69 |
+
| `/health` | GET | Health check |
|
| 70 |
+
|
| 71 |
+
### Evaluation Layer (v2.0)
|
| 72 |
+
| Endpoint | Method | Description |
|
| 73 |
+
|----------|--------|-------------|
|
| 74 |
+
| `/trajectory` | GET | Full action log with timing, diffs, security flags |
|
| 75 |
+
| `/evaluate` | GET | Multi-dimensional scores (6 axes) |
|
| 76 |
+
| `/metrics` | GET | Comprehensive stats: memory, security, timeline |
|
| 77 |
+
| `/fault-config` | POST | Enable fault injection: "none", "light", "heavy" |
|
| 78 |
+
|
| 79 |
+
## Multi-Dimensional Evaluation
|
| 80 |
+
|
| 81 |
+
The `/evaluate` endpoint scores agents across **6 quality dimensions**:
|
| 82 |
+
|
| 83 |
+
| Dimension | Weight | What It Measures |
|
| 84 |
+
|-----------|--------|-----------------|
|
| 85 |
+
| Efficiency | 20% | Steps used vs optimal path |
|
| 86 |
+
| Navigation | 15% | Read relevant files first? Explored strategically? |
|
| 87 |
+
| Correctness | 30% | Final test pass rate + regression detection |
|
| 88 |
+
| Reasoning | 15% | read→write→test pattern adherence |
|
| 89 |
+
| Robustness | 10% | Error recovery + fault injection handling |
|
| 90 |
+
| Security | 10% | Unsafe code detection + prompt injection resistance |
|
| 91 |
+
|
| 92 |
+
## Fault Injection
|
| 93 |
+
|
| 94 |
+
Test agent robustness by injecting controlled faults:
|
| 95 |
+
|
| 96 |
+
```bash
|
| 97 |
+
# Enable heavy fault injection
|
| 98 |
+
curl -X POST http://localhost:7860/fault-config -d '{"level":"heavy"}'
|
| 99 |
+
|
| 100 |
+
# Next reset will inject:
|
| 101 |
+
# - Misleading "BUG:" comments on correct lines
|
| 102 |
+
# - Red herring files that look buggy but aren't
|
| 103 |
+
# - Noisy docstrings claiming code is correct
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
## Quick Start
|
| 107 |
+
|
| 108 |
+
### Local
|
| 109 |
+
```bash
|
| 110 |
+
pip install -r requirements.txt
|
| 111 |
+
uvicorn server.app:app --host 0.0.0.0 --port 7860
|
| 112 |
+
```
|
| 113 |
+
|
| 114 |
+
### Docker
|
| 115 |
+
```bash
|
| 116 |
+
docker build -t codebase-nav-env .
|
| 117 |
+
docker run -p 7860:7860 codebase-nav-env
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
### Run Inference
|
| 121 |
+
```bash
|
| 122 |
+
export HF_TOKEN=your_token
|
| 123 |
+
export ENV_BASE_URL=http://localhost:7860
|
| 124 |
+
python inference.py
|
| 125 |
+
```
|
| 126 |
+
|
| 127 |
+
## Example Output: `/evaluate`
|
| 128 |
+
```json
|
| 129 |
+
{
|
| 130 |
+
"composite_score": 0.874,
|
| 131 |
+
"dimensions": {
|
| 132 |
+
"efficiency": {"score": 0.8, "evidence": ["Used 5 steps vs 4 optimal"]},
|
| 133 |
+
"navigation": {"score": 1.0, "evidence": ["Good: first read was relevant file"]},
|
| 134 |
+
"correctness": {"score": 0.714, "evidence": ["No test regressions"]},
|
| 135 |
+
"reasoning": {"score": 1.0, "evidence": ["Agent tested after writing"]},
|
| 136 |
+
"robustness": {"score": 1.0, "evidence": ["Clean execution"]},
|
| 137 |
+
"security": {"score": 1.0, "evidence": ["No security violations"]}
|
| 138 |
+
}
|
| 139 |
+
}
|
| 140 |
+
```
|
| 141 |
+
|
| 142 |
+
## License
|
| 143 |
+
|
| 144 |
+
MIT
|
inference.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
inference.py — Mandatory OpenEnv baseline inference script.
|
| 4 |
+
Runs an LLM agent against all 3 tasks and emits required log format.
|
| 5 |
+
|
| 6 |
+
Environment variables required:
|
| 7 |
+
API_BASE_URL — LLM API endpoint
|
| 8 |
+
MODEL_NAME — model identifier
|
| 9 |
+
HF_TOKEN — Hugging Face API token
|
| 10 |
+
"""
|
| 11 |
+
import os
|
| 12 |
+
import json
|
| 13 |
+
import textwrap
|
| 14 |
+
from typing import List, Optional
|
| 15 |
+
|
| 16 |
+
from openai import OpenAI
|
| 17 |
+
import httpx
|
| 18 |
+
|
| 19 |
+
# ── Configuration ─────────────────────────────────────────────────────────────
|
| 20 |
+
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
|
| 21 |
+
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 22 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
|
| 23 |
+
ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:7860")
|
| 24 |
+
|
| 25 |
+
MAX_STEPS_PER_TASK = {"task1": 12, "task2": 18, "task3": 22}
|
| 26 |
+
TEMPERATURE = 0.2
|
| 27 |
+
MAX_TOKENS = 800
|
| 28 |
+
SUCCESS_THRESHOLD = 0.5
|
| 29 |
+
|
| 30 |
+
TASKS = ["task1", "task2", "task3"]
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ── Logging helpers ────────────────────────────────────────────────────────────
|
| 34 |
+
def log_start(task: str, env: str, model: str) -> None:
|
| 35 |
+
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
|
| 39 |
+
error_val = error if error else "null"
|
| 40 |
+
print(
|
| 41 |
+
f"[STEP] step={step} action={action} reward={reward:.2f} "
|
| 42 |
+
f"done={str(done).lower()} error={error_val}",
|
| 43 |
+
flush=True,
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 48 |
+
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 49 |
+
print(
|
| 50 |
+
f"[END] success={str(success).lower()} steps={steps} "
|
| 51 |
+
f"score={score:.3f} rewards={rewards_str}",
|
| 52 |
+
flush=True,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# ── Environment client ─────────────────────────────────────────────────────────
|
| 57 |
+
class EnvClient:
|
| 58 |
+
def __init__(self, base_url: str):
|
| 59 |
+
self.base_url = base_url.rstrip("/")
|
| 60 |
+
self.client = httpx.Client(timeout=60.0)
|
| 61 |
+
|
| 62 |
+
def reset(self, task: str) -> dict:
|
| 63 |
+
r = self.client.post(f"{self.base_url}/reset", params={"task": task})
|
| 64 |
+
r.raise_for_status()
|
| 65 |
+
return r.json()
|
| 66 |
+
|
| 67 |
+
def step(self, action: dict) -> dict:
|
| 68 |
+
r = self.client.post(f"{self.base_url}/step", json=action)
|
| 69 |
+
r.raise_for_status()
|
| 70 |
+
return r.json()
|
| 71 |
+
|
| 72 |
+
def state(self) -> dict:
|
| 73 |
+
r = self.client.get(f"{self.base_url}/state")
|
| 74 |
+
r.raise_for_status()
|
| 75 |
+
return r.json()
|
| 76 |
+
|
| 77 |
+
def close(self):
|
| 78 |
+
self.client.close()
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# ── LLM Agent ─────────────────────────────────────────────────────────────────
|
| 82 |
+
SYSTEM_PROMPT = textwrap.dedent("""
|
| 83 |
+
You are an expert software engineer working inside a Python code repository.
|
| 84 |
+
You can take the following actions (respond with ONLY a valid JSON object):
|
| 85 |
+
|
| 86 |
+
{"action_type": "read_file", "path": "src/some_file.py"}
|
| 87 |
+
{"action_type": "write_file", "path": "src/some_file.py", "content": "...full new content..."}
|
| 88 |
+
{"action_type": "run_tests", "path": "tests/test_something.py"}
|
| 89 |
+
{"action_type": "search_code", "query": "function_name_or_keyword"}
|
| 90 |
+
{"action_type": "submit"}
|
| 91 |
+
|
| 92 |
+
Strategy:
|
| 93 |
+
1. ALWAYS read relevant source files before writing any fixes
|
| 94 |
+
2. For task1/task2: read failing test file first to understand what is expected
|
| 95 |
+
3. For task3: read FEATURE_SPEC.md first, then existing source files
|
| 96 |
+
4. Run tests after writing a fix to verify improvement
|
| 97 |
+
5. Submit only when confident tests will pass
|
| 98 |
+
|
| 99 |
+
Reply with ONLY the JSON action object. No explanation. No markdown. No extra text.
|
| 100 |
+
""").strip()
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def build_user_prompt(obs: dict, step: int, history: List[str]) -> str:
|
| 104 |
+
tree_str = "\n".join(obs.get("repo_tree", []))
|
| 105 |
+
files_read_str = ", ".join(obs.get("files_read", [])) or "none yet"
|
| 106 |
+
failing_str = ", ".join(obs.get("failing_tests", [])) or "unknown"
|
| 107 |
+
last_result = obs.get("last_action_result") or "none"
|
| 108 |
+
last_error = obs.get("last_action_error") or "none"
|
| 109 |
+
steps_left = obs.get("steps_remaining", 0)
|
| 110 |
+
history_str = "\n".join(history[-5:]) if history else "none"
|
| 111 |
+
|
| 112 |
+
return textwrap.dedent(f"""
|
| 113 |
+
Step: {step}
|
| 114 |
+
Task: {obs.get('current_task')}
|
| 115 |
+
Description: {obs.get('task_description')}
|
| 116 |
+
Steps remaining: {steps_left}
|
| 117 |
+
|
| 118 |
+
Repository files:
|
| 119 |
+
{tree_str}
|
| 120 |
+
|
| 121 |
+
Files already read: {files_read_str}
|
| 122 |
+
Known failing tests: {failing_str}
|
| 123 |
+
Last action result: {last_result[:1000]}
|
| 124 |
+
Last action error: {last_error}
|
| 125 |
+
|
| 126 |
+
Recent history:
|
| 127 |
+
{history_str}
|
| 128 |
+
|
| 129 |
+
What is your next action? Reply with ONLY a JSON action object.
|
| 130 |
+
""").strip()
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def get_agent_action(client: OpenAI, obs: dict, step: int, history: List[str]) -> dict:
|
| 134 |
+
user_prompt = build_user_prompt(obs, step, history)
|
| 135 |
+
try:
|
| 136 |
+
completion = client.chat.completions.create(
|
| 137 |
+
model=MODEL_NAME,
|
| 138 |
+
messages=[
|
| 139 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 140 |
+
{"role": "user", "content": user_prompt},
|
| 141 |
+
],
|
| 142 |
+
temperature=TEMPERATURE,
|
| 143 |
+
max_tokens=MAX_TOKENS,
|
| 144 |
+
)
|
| 145 |
+
text = (completion.choices[0].message.content or "").strip()
|
| 146 |
+
|
| 147 |
+
# Strip markdown code fences if present
|
| 148 |
+
if text.startswith("```"):
|
| 149 |
+
text = text.split("```")[1]
|
| 150 |
+
if text.startswith("json"):
|
| 151 |
+
text = text[4:]
|
| 152 |
+
|
| 153 |
+
action = json.loads(text)
|
| 154 |
+
return action
|
| 155 |
+
except json.JSONDecodeError:
|
| 156 |
+
print(f"[DEBUG] Failed to parse action JSON: {text[:200]}", flush=True)
|
| 157 |
+
return {"action_type": "submit"} # Fallback
|
| 158 |
+
except Exception as e:
|
| 159 |
+
print(f"[DEBUG] LLM call failed: {e}", flush=True)
|
| 160 |
+
return {"action_type": "submit"}
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def run_task(env_client: EnvClient, llm_client: OpenAI, task: str) -> tuple:
|
| 164 |
+
"""Run one complete episode for a task. Returns (score, steps, rewards)."""
|
| 165 |
+
max_steps = MAX_STEPS_PER_TASK.get(task, 15)
|
| 166 |
+
benchmark = "codebase-nav-env"
|
| 167 |
+
|
| 168 |
+
rewards = []
|
| 169 |
+
history = []
|
| 170 |
+
steps_taken = 0
|
| 171 |
+
score = 0.0
|
| 172 |
+
success = False
|
| 173 |
+
|
| 174 |
+
log_start(task=task, env=benchmark, model=MODEL_NAME)
|
| 175 |
+
|
| 176 |
+
try:
|
| 177 |
+
reset_result = env_client.reset(task=task)
|
| 178 |
+
obs = reset_result["observation"]
|
| 179 |
+
|
| 180 |
+
for step_num in range(1, max_steps + 1):
|
| 181 |
+
if obs.get("steps_remaining", 0) <= 0:
|
| 182 |
+
break
|
| 183 |
+
|
| 184 |
+
action = get_agent_action(llm_client, obs, step_num, history)
|
| 185 |
+
action_str = json.dumps(action)
|
| 186 |
+
|
| 187 |
+
try:
|
| 188 |
+
step_result = env_client.step(action)
|
| 189 |
+
except Exception as e:
|
| 190 |
+
log_step(step_num, action_str, 0.0, True, str(e))
|
| 191 |
+
break
|
| 192 |
+
|
| 193 |
+
reward = step_result.get("reward", 0.0)
|
| 194 |
+
done = step_result.get("done", False)
|
| 195 |
+
error = step_result["observation"].get("last_action_error")
|
| 196 |
+
|
| 197 |
+
rewards.append(reward)
|
| 198 |
+
steps_taken = step_num
|
| 199 |
+
obs = step_result["observation"]
|
| 200 |
+
|
| 201 |
+
history.append(f"Step {step_num}: {action.get('action_type')} -> reward {reward:+.2f}")
|
| 202 |
+
|
| 203 |
+
log_step(step=step_num, action=action_str[:200], reward=reward, done=done, error=error)
|
| 204 |
+
|
| 205 |
+
if done:
|
| 206 |
+
# Get final score from state
|
| 207 |
+
state = env_client.state()
|
| 208 |
+
score = state.get("current_score", 0.0)
|
| 209 |
+
break
|
| 210 |
+
|
| 211 |
+
# If not done yet (step budget exhausted), force submit
|
| 212 |
+
if not obs.get("last_action_result", "").startswith("=== FINAL GRADER"):
|
| 213 |
+
try:
|
| 214 |
+
step_result = env_client.step({"action_type": "submit"})
|
| 215 |
+
state = env_client.state()
|
| 216 |
+
score = state.get("current_score", 0.0)
|
| 217 |
+
except Exception:
|
| 218 |
+
pass
|
| 219 |
+
|
| 220 |
+
success = score >= SUCCESS_THRESHOLD
|
| 221 |
+
|
| 222 |
+
except Exception as e:
|
| 223 |
+
print(f"[DEBUG] Episode error: {e}", flush=True)
|
| 224 |
+
finally:
|
| 225 |
+
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 226 |
+
|
| 227 |
+
return score, steps_taken, rewards
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def main():
|
| 231 |
+
env_client = EnvClient(ENV_BASE_URL)
|
| 232 |
+
llm_client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 233 |
+
|
| 234 |
+
all_scores = []
|
| 235 |
+
for task in TASKS:
|
| 236 |
+
score, steps, rewards = run_task(env_client, llm_client, task)
|
| 237 |
+
all_scores.append(score)
|
| 238 |
+
print(f"[INFO] {task} complete: score={score:.3f} steps={steps}", flush=True)
|
| 239 |
+
|
| 240 |
+
avg_score = sum(all_scores) / len(all_scores)
|
| 241 |
+
print(f"[INFO] Average score across all tasks: {avg_score:.3f}", flush=True)
|
| 242 |
+
|
| 243 |
+
env_client.close()
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
if __name__ == "__main__":
|
| 247 |
+
main()
|
openenv.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: codebase-nav-env
|
| 2 |
+
version: "1.0.0"
|
| 3 |
+
description: >
|
| 4 |
+
An RL environment where an LLM agent navigates an unfamiliar Python codebase,
|
| 5 |
+
finds bugs, and implements features by reading files and running tests.
|
| 6 |
+
Graded by actual pytest execution — fully deterministic.
|
| 7 |
+
|
| 8 |
+
author: your-hf-username
|
| 9 |
+
license: MIT
|
| 10 |
+
|
| 11 |
+
tasks:
|
| 12 |
+
- id: task1
|
| 13 |
+
name: "Single-file bug repair"
|
| 14 |
+
description: "Find and fix bugs in a Python module so all tests pass."
|
| 15 |
+
difficulty: easy
|
| 16 |
+
max_steps: 20
|
| 17 |
+
reward_range: [0.0, 1.0]
|
| 18 |
+
|
| 19 |
+
- id: task2
|
| 20 |
+
name: "Cross-module interface bug"
|
| 21 |
+
description: "Fix a type mismatch between two modules and add a regression test."
|
| 22 |
+
difficulty: medium
|
| 23 |
+
max_steps: 25
|
| 24 |
+
reward_range: [0.0, 1.0]
|
| 25 |
+
|
| 26 |
+
- id: task3
|
| 27 |
+
name: "Feature implementation from spec"
|
| 28 |
+
description: "Read FEATURE_SPEC.md and implement the feature across multiple files."
|
| 29 |
+
difficulty: hard
|
| 30 |
+
max_steps: 30
|
| 31 |
+
reward_range: [0.0, 1.0]
|
| 32 |
+
|
| 33 |
+
action_space:
|
| 34 |
+
type: text
|
| 35 |
+
schema:
|
| 36 |
+
action_type: string
|
| 37 |
+
path: string (optional)
|
| 38 |
+
content: string (optional)
|
| 39 |
+
query: string (optional)
|
| 40 |
+
|
| 41 |
+
observation_space:
|
| 42 |
+
type: structured
|
| 43 |
+
fields:
|
| 44 |
+
- repo_tree: list of file paths
|
| 45 |
+
- task_description: string
|
| 46 |
+
- failing_tests: list of test names
|
| 47 |
+
- files_read: list of paths read so far
|
| 48 |
+
- last_action_result: string
|
| 49 |
+
- steps_remaining: integer
|
| 50 |
+
- current_task: string
|
| 51 |
+
|
| 52 |
+
endpoints:
|
| 53 |
+
reset: POST /reset
|
| 54 |
+
step: POST /step
|
| 55 |
+
state: GET /state
|
| 56 |
+
health: GET /health
|
repo_templates/task1/variant_1/meta.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"variant_id": "task1_v1",
|
| 3 |
+
"task": "task1",
|
| 4 |
+
"bug_files": ["src/auth.py"],
|
| 5 |
+
"bug_description": "validate_token uses != instead of == and get_user_permissions has off-by-one",
|
| 6 |
+
"failing_tests": ["test_valid_token", "test_user_permissions"],
|
| 7 |
+
"correct_lines": {
|
| 8 |
+
"src/auth.py": {
|
| 9 |
+
"return token != secret": "return token == secret",
|
| 10 |
+
"return permissions[user_id + 1]": "return permissions[user_id]"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"total_files": 3,
|
| 14 |
+
"optimal_steps": 4
|
| 15 |
+
}
|
repo_templates/task1/variant_1/src/auth.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def validate_token(token: str, secret: str) -> bool:
|
| 2 |
+
"""Validate a user token against the secret."""
|
| 3 |
+
if token is None:
|
| 4 |
+
return False
|
| 5 |
+
# BUG: should be == not !=
|
| 6 |
+
return token != secret
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_user_permissions(user_id: int, permissions: list) -> list:
|
| 10 |
+
"""Return permissions for a user ID."""
|
| 11 |
+
if user_id < 0:
|
| 12 |
+
return []
|
| 13 |
+
# BUG: off-by-one — should be permissions[user_id] not permissions[user_id + 1]
|
| 14 |
+
return permissions[user_id + 1] if user_id + 1 < len(permissions) else []
|
repo_templates/task1/variant_1/src/utils.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Utility functions for the auth module."""
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def sanitize_input(text: str) -> str:
|
| 5 |
+
"""Remove leading/trailing whitespace and normalize."""
|
| 6 |
+
if not isinstance(text, str):
|
| 7 |
+
return ""
|
| 8 |
+
return text.strip().lower()
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def format_response(status: str, data: dict = None) -> dict:
|
| 12 |
+
"""Format a standard API response."""
|
| 13 |
+
return {
|
| 14 |
+
"status": status,
|
| 15 |
+
"data": data or {},
|
| 16 |
+
}
|
repo_templates/task1/variant_1/tests/test_auth.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.auth import validate_token, get_user_permissions
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_valid_token():
|
| 6 |
+
assert validate_token("abc123", "abc123") == True # FAILS because of != bug
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def test_invalid_token():
|
| 10 |
+
assert validate_token("wrong", "abc123") == False
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def test_none_token():
|
| 14 |
+
assert validate_token(None, "abc123") == False
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def test_user_permissions():
|
| 18 |
+
perms = ["read", "write", "admin"]
|
| 19 |
+
assert get_user_permissions(0, perms) == "read" # FAILS because of off-by-one bug
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def test_negative_user_id():
|
| 23 |
+
assert get_user_permissions(-1, ["read"]) == []
|
repo_templates/task1/variant_2/meta.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"variant_id": "task1_v2",
|
| 3 |
+
"task": "task1",
|
| 4 |
+
"bug_files": ["src/calculator.py"],
|
| 5 |
+
"bug_description": "divide() missing zero-division check; average() crashes on empty list",
|
| 6 |
+
"failing_tests": ["test_divide_by_zero", "test_average_empty"],
|
| 7 |
+
"correct_lines": {
|
| 8 |
+
"src/calculator.py": {
|
| 9 |
+
"return numerator / denominator": "if denominator == 0:\n return 0.0\n return numerator / denominator",
|
| 10 |
+
"total = sum(numbers)\n return total / len(numbers)": "if not numbers:\n return 0.0\n total = sum(numbers)\n return total / len(numbers)"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"total_files": 3,
|
| 14 |
+
"optimal_steps": 4
|
| 15 |
+
}
|
repo_templates/task1/variant_2/src/calculator.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Calculator module with basic math operations."""
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def divide(numerator: float, denominator: float) -> float:
|
| 5 |
+
"""Divide numerator by denominator safely."""
|
| 6 |
+
# BUG: missing zero-division check — should check denominator == 0
|
| 7 |
+
return numerator / denominator
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def average(numbers: list) -> float:
|
| 11 |
+
"""Calculate the average of a list of numbers."""
|
| 12 |
+
# BUG: doesn't handle empty list — should return 0.0 for empty
|
| 13 |
+
total = sum(numbers)
|
| 14 |
+
return total / len(numbers)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def clamp(value: float, min_val: float, max_val: float) -> float:
|
| 18 |
+
"""Clamp a value between min and max."""
|
| 19 |
+
if value < min_val:
|
| 20 |
+
return min_val
|
| 21 |
+
if value > max_val:
|
| 22 |
+
return max_val
|
| 23 |
+
return value
|
repo_templates/task1/variant_2/src/helpers.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Helper utilities for the calculator module."""
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def parse_number(value: str) -> float:
|
| 5 |
+
"""Parse a string to a float, returning 0.0 on failure."""
|
| 6 |
+
try:
|
| 7 |
+
return float(value)
|
| 8 |
+
except (ValueError, TypeError):
|
| 9 |
+
return 0.0
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def format_result(value: float, decimals: int = 2) -> str:
|
| 13 |
+
"""Format a numeric result to a string with given decimal places."""
|
| 14 |
+
return f"{value:.{decimals}f}"
|
repo_templates/task1/variant_2/tests/test_calculator.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.calculator import divide, average, clamp
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_divide_normal():
|
| 6 |
+
assert divide(10, 2) == 5.0
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def test_divide_by_zero():
|
| 10 |
+
# FAILS — ZeroDivisionError because no zero check
|
| 11 |
+
assert divide(10, 0) == 0.0
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def test_average_normal():
|
| 15 |
+
assert average([1, 2, 3]) == 2.0
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def test_average_empty():
|
| 19 |
+
# FAILS — ZeroDivisionError because empty list not handled
|
| 20 |
+
assert average([]) == 0.0
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def test_clamp_within():
|
| 24 |
+
assert clamp(5, 0, 10) == 5
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def test_clamp_below():
|
| 28 |
+
assert clamp(-5, 0, 10) == 0
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def test_clamp_above():
|
| 32 |
+
assert clamp(15, 0, 10) == 10
|
repo_templates/task1/variant_3/meta.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"variant_id": "task1_v3",
|
| 3 |
+
"task": "task1",
|
| 4 |
+
"bug_files": ["src/inventory.py"],
|
| 5 |
+
"bug_description": "check_stock uses >= 0 instead of > 0; get_low_stock_items uses <= instead of <",
|
| 6 |
+
"failing_tests": ["test_out_of_stock", "test_low_stock_items"],
|
| 7 |
+
"correct_lines": {
|
| 8 |
+
"src/inventory.py": {
|
| 9 |
+
"return inventory[item_id] >= 0": "return inventory[item_id] > 0",
|
| 10 |
+
"if qty <= threshold": "if qty < threshold"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"total_files": 3,
|
| 14 |
+
"optimal_steps": 4
|
| 15 |
+
}
|
repo_templates/task1/variant_3/src/inventory.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Inventory management module."""
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def check_stock(item_id: str, inventory: dict) -> bool:
|
| 5 |
+
"""Check if an item is in stock (quantity > 0)."""
|
| 6 |
+
if item_id not in inventory:
|
| 7 |
+
return False
|
| 8 |
+
# BUG: should be > 0, not >= 0 (zero stock means out of stock)
|
| 9 |
+
return inventory[item_id] >= 0
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def restock(item_id: str, quantity: int, inventory: dict) -> dict:
|
| 13 |
+
"""Add stock for an item."""
|
| 14 |
+
if quantity < 0:
|
| 15 |
+
raise ValueError("Cannot restock negative quantity")
|
| 16 |
+
if item_id in inventory:
|
| 17 |
+
inventory[item_id] += quantity
|
| 18 |
+
else:
|
| 19 |
+
inventory[item_id] = quantity
|
| 20 |
+
return inventory
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def get_low_stock_items(inventory: dict, threshold: int = 5) -> list:
|
| 24 |
+
"""Return items with stock below threshold."""
|
| 25 |
+
# BUG: should be < threshold, not <= threshold
|
| 26 |
+
return [item for item, qty in inventory.items() if qty <= threshold]
|
repo_templates/task1/variant_3/src/logger.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Logging utilities for inventory operations."""
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def log_operation(operation: str, item_id: str, details: str = "") -> str:
|
| 5 |
+
"""Create a log entry for an inventory operation."""
|
| 6 |
+
entry = f"[INVENTORY] {operation}: {item_id}"
|
| 7 |
+
if details:
|
| 8 |
+
entry += f" — {details}"
|
| 9 |
+
return entry
|
repo_templates/task1/variant_3/tests/test_inventory.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.inventory import check_stock, restock, get_low_stock_items
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_in_stock():
|
| 6 |
+
inv = {"apple": 10, "banana": 5}
|
| 7 |
+
assert check_stock("apple", inv) == True
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def test_out_of_stock():
|
| 11 |
+
inv = {"apple": 0}
|
| 12 |
+
# FAILS — returns True because >= 0 is wrong, should be > 0
|
| 13 |
+
assert check_stock("apple", inv) == False
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def test_item_not_found():
|
| 17 |
+
assert check_stock("ghost", {}) == False
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def test_restock_existing():
|
| 21 |
+
inv = {"apple": 5}
|
| 22 |
+
result = restock("apple", 3, inv)
|
| 23 |
+
assert result["apple"] == 8
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_restock_new():
|
| 27 |
+
inv = {}
|
| 28 |
+
result = restock("orange", 10, inv)
|
| 29 |
+
assert result["orange"] == 10
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def test_restock_negative():
|
| 33 |
+
with pytest.raises(ValueError):
|
| 34 |
+
restock("apple", -1, {})
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def test_low_stock_items():
|
| 38 |
+
inv = {"apple": 3, "banana": 5, "cherry": 10}
|
| 39 |
+
# FAILS — banana (qty=5) should NOT be in low stock when threshold=5
|
| 40 |
+
# but <= threshold incorrectly includes items AT the threshold
|
| 41 |
+
result = get_low_stock_items(inv, threshold=5)
|
| 42 |
+
assert "apple" in result
|
| 43 |
+
assert "banana" not in result
|
| 44 |
+
assert "cherry" not in result
|
repo_templates/task1/variant_4/meta.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"variant_id": "task1_v4",
|
| 3 |
+
"task": "task1",
|
| 4 |
+
"bug_files": ["src/scheduler.py"],
|
| 5 |
+
"bug_description": "is_available uses <= instead of < for adjacent slot check; days_until has off-by-one (+1)",
|
| 6 |
+
"failing_tests": ["test_adjacent_slots_allowed", "test_days_until", "test_days_until_same_day"],
|
| 7 |
+
"correct_lines": {
|
| 8 |
+
"src/scheduler.py": {
|
| 9 |
+
"if start <= slot_end and end >= slot_start:": "if start < slot_end and end > slot_start:",
|
| 10 |
+
"return delta.days + 1": "return delta.days"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"total_files": 3,
|
| 14 |
+
"optimal_steps": 4
|
| 15 |
+
}
|
repo_templates/task1/variant_4/src/scheduler.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Meeting and event scheduler module."""
|
| 2 |
+
from datetime import datetime, timedelta
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def is_available(start: datetime, end: datetime, booked_slots: list) -> bool:
|
| 6 |
+
"""Check if a time slot is available (no overlap with booked slots)."""
|
| 7 |
+
for slot in booked_slots:
|
| 8 |
+
slot_start = slot["start"]
|
| 9 |
+
slot_end = slot["end"]
|
| 10 |
+
# BUG: off-by-one — should be < not <= for end comparison
|
| 11 |
+
# Adjacent meetings (one ends exactly when another starts) should be allowed
|
| 12 |
+
if start <= slot_end and end >= slot_start:
|
| 13 |
+
return False
|
| 14 |
+
return True
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def get_next_available(after: datetime, duration_minutes: int, booked_slots: list) -> datetime:
|
| 18 |
+
"""Find the next available slot after the given time."""
|
| 19 |
+
candidate = after
|
| 20 |
+
for _ in range(100): # safety limit
|
| 21 |
+
candidate_end = candidate + timedelta(minutes=duration_minutes)
|
| 22 |
+
if is_available(candidate, candidate_end, booked_slots):
|
| 23 |
+
return candidate
|
| 24 |
+
candidate += timedelta(minutes=15) # check in 15-minute increments
|
| 25 |
+
return None
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def days_until(target: datetime, now: datetime = None) -> int:
|
| 29 |
+
"""Calculate whole days until target date."""
|
| 30 |
+
if now is None:
|
| 31 |
+
now = datetime.now()
|
| 32 |
+
delta = target - now
|
| 33 |
+
# BUG: should return delta.days, not delta.days + 1
|
| 34 |
+
return delta.days + 1
|
repo_templates/task1/variant_4/src/time_helpers.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Time helper functions."""
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def format_time(dt: datetime) -> str:
|
| 6 |
+
"""Format datetime to string."""
|
| 7 |
+
return dt.strftime("%Y-%m-%d %H:%M")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def parse_time(s: str) -> datetime:
|
| 11 |
+
"""Parse string to datetime."""
|
| 12 |
+
return datetime.strptime(s, "%Y-%m-%d %H:%M")
|
repo_templates/task1/variant_4/tests/test_scheduler.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from datetime import datetime, timedelta
|
| 3 |
+
from src.scheduler import is_available, get_next_available, days_until
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def test_slot_available():
|
| 7 |
+
booked = [
|
| 8 |
+
{"start": datetime(2024, 1, 1, 10, 0), "end": datetime(2024, 1, 1, 11, 0)}
|
| 9 |
+
]
|
| 10 |
+
assert is_available(
|
| 11 |
+
datetime(2024, 1, 1, 12, 0),
|
| 12 |
+
datetime(2024, 1, 1, 13, 0),
|
| 13 |
+
booked
|
| 14 |
+
) == True
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def test_slot_overlap():
|
| 18 |
+
booked = [
|
| 19 |
+
{"start": datetime(2024, 1, 1, 10, 0), "end": datetime(2024, 1, 1, 11, 0)}
|
| 20 |
+
]
|
| 21 |
+
assert is_available(
|
| 22 |
+
datetime(2024, 1, 1, 10, 30),
|
| 23 |
+
datetime(2024, 1, 1, 11, 30),
|
| 24 |
+
booked
|
| 25 |
+
) == False
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def test_adjacent_slots_allowed():
|
| 29 |
+
"""Meeting starting exactly when another ends should be allowed."""
|
| 30 |
+
booked = [
|
| 31 |
+
{"start": datetime(2024, 1, 1, 10, 0), "end": datetime(2024, 1, 1, 11, 0)}
|
| 32 |
+
]
|
| 33 |
+
# FAILS — returns False because <= is used instead of <
|
| 34 |
+
assert is_available(
|
| 35 |
+
datetime(2024, 1, 1, 11, 0),
|
| 36 |
+
datetime(2024, 1, 1, 12, 0),
|
| 37 |
+
booked
|
| 38 |
+
) == True
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def test_days_until():
|
| 42 |
+
now = datetime(2024, 1, 1, 0, 0)
|
| 43 |
+
target = datetime(2024, 1, 11, 0, 0)
|
| 44 |
+
# FAILS — returns 11 instead of 10 because of +1 bug
|
| 45 |
+
assert days_until(target, now) == 10
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def test_days_until_same_day():
|
| 49 |
+
now = datetime(2024, 6, 15, 8, 0)
|
| 50 |
+
target = datetime(2024, 6, 15, 20, 0)
|
| 51 |
+
# FAILS — returns 1 instead of 0
|
| 52 |
+
assert days_until(target, now) == 0
|
repo_templates/task1/variant_5/meta.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"variant_id": "task1_v5",
|
| 3 |
+
"task": "task1",
|
| 4 |
+
"bug_files": ["src/formatter.py"],
|
| 5 |
+
"bug_description": "truncate doesn't account for ellipsis length; extract_between doesn't offset past start marker",
|
| 6 |
+
"failing_tests": ["test_truncate_long", "test_extract_between"],
|
| 7 |
+
"correct_lines": {
|
| 8 |
+
"src/formatter.py": {
|
| 9 |
+
"return text[:max_length] + \"...\"": "return text[:max_length - 3] + \"...\"",
|
| 10 |
+
"content_start = start_idx": "content_start = start_idx + len(start_marker)"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"total_files": 3,
|
| 14 |
+
"optimal_steps": 4
|
| 15 |
+
}
|
repo_templates/task1/variant_5/src/constants.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Constants for the formatter module."""
|
| 2 |
+
|
| 3 |
+
DEFAULT_MAX_LENGTH = 50
|
| 4 |
+
ELLIPSIS = "..."
|
repo_templates/task1/variant_5/src/formatter.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Text formatter module for processing and formatting strings."""
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def truncate(text: str, max_length: int) -> str:
|
| 5 |
+
"""Truncate text to max_length, adding '...' if truncated."""
|
| 6 |
+
if not text:
|
| 7 |
+
return ""
|
| 8 |
+
if len(text) <= max_length:
|
| 9 |
+
return text
|
| 10 |
+
# BUG: should be text[:max_length - 3] + "..." to account for ellipsis length
|
| 11 |
+
return text[:max_length] + "..."
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def extract_between(text: str, start_marker: str, end_marker: str) -> str:
|
| 15 |
+
"""Extract text between two markers."""
|
| 16 |
+
start_idx = text.find(start_marker)
|
| 17 |
+
if start_idx == -1:
|
| 18 |
+
return ""
|
| 19 |
+
# BUG: should start after the marker, i.e. start_idx + len(start_marker)
|
| 20 |
+
content_start = start_idx # wrong — includes the start_marker itself
|
| 21 |
+
end_idx = text.find(end_marker, content_start)
|
| 22 |
+
if end_idx == -1:
|
| 23 |
+
return ""
|
| 24 |
+
return text[content_start:end_idx]
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def capitalize_words(text: str) -> str:
|
| 28 |
+
"""Capitalize the first letter of every word."""
|
| 29 |
+
return " ".join(w.capitalize() for w in text.split())
|
repo_templates/task1/variant_5/tests/test_formatter.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.formatter import truncate, extract_between, capitalize_words
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_truncate_short():
|
| 6 |
+
assert truncate("hello", 10) == "hello"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def test_truncate_long():
|
| 10 |
+
# FAILS — returns "hello worl..." (13 chars) instead of "hello w..." (10 chars)
|
| 11 |
+
result = truncate("hello world", 10)
|
| 12 |
+
assert len(result) <= 10
|
| 13 |
+
assert result == "hello w..."
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def test_truncate_empty():
|
| 17 |
+
assert truncate("", 5) == ""
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def test_extract_between():
|
| 21 |
+
text = "start[CONTENT]end"
|
| 22 |
+
# FAILS — returns "[CONTENT]" instead of "CONTENT" because start_idx not offset
|
| 23 |
+
assert extract_between(text, "[", "]") == "CONTENT"
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_extract_missing_marker():
|
| 27 |
+
assert extract_between("no markers here", "[", "]") == ""
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def test_capitalize_words():
|
| 31 |
+
assert capitalize_words("hello world foo") == "Hello World Foo"
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_capitalize_single():
|
| 35 |
+
assert capitalize_words("test") == "Test"
|
repo_templates/task2/variant_1/meta.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"variant_id": "task2_v1",
|
| 3 |
+
"task": "task2",
|
| 4 |
+
"bug_files": ["src/data_pipeline.py"],
|
| 5 |
+
"interface_files": ["src/validator.py"],
|
| 6 |
+
"bug_description": "data_pipeline passes str(record_id) but validator.py expects int",
|
| 7 |
+
"failing_tests": ["test_process_valid_batch"],
|
| 8 |
+
"fix_file": "src/data_pipeline.py",
|
| 9 |
+
"fix_description": "Remove str() wrapping — pass record['id'] directly",
|
| 10 |
+
"regression_test_must_cover": "TypeError raised when string is passed to validate_record",
|
| 11 |
+
"total_files": 4,
|
| 12 |
+
"optimal_steps": 6
|
| 13 |
+
}
|
repo_templates/task2/variant_1/src/data_pipeline.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from src.validator import validate_record
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def process_batch(records: list) -> list:
|
| 5 |
+
"""Process a batch of records through the validation pipeline."""
|
| 6 |
+
results = []
|
| 7 |
+
for record in records:
|
| 8 |
+
# BUG: passing record["id"] as string, but validate_record expects int
|
| 9 |
+
validated = validate_record(str(record["id"]), record["data"])
|
| 10 |
+
if validated:
|
| 11 |
+
results.append(validated)
|
| 12 |
+
return results
|
repo_templates/task2/variant_1/src/models.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Data models for the pipeline."""
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class Record:
|
| 5 |
+
def __init__(self, record_id: int, data: dict):
|
| 6 |
+
self.record_id = record_id
|
| 7 |
+
self.data = data
|
| 8 |
+
|
| 9 |
+
def to_dict(self) -> dict:
|
| 10 |
+
return {"id": self.record_id, "data": self.data}
|
repo_templates/task2/variant_1/src/validator.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def validate_record(record_id: int, data: dict) -> dict:
|
| 2 |
+
"""Validate a record. record_id must be a positive integer."""
|
| 3 |
+
if not isinstance(record_id, int):
|
| 4 |
+
raise TypeError(f"record_id must be int, got {type(record_id)}")
|
| 5 |
+
if record_id <= 0:
|
| 6 |
+
return None
|
| 7 |
+
return {"id": record_id, "data": data, "valid": True}
|
repo_templates/task2/variant_1/tests/test_pipeline.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.data_pipeline import process_batch
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_process_valid_batch():
|
| 6 |
+
records = [{"id": 1, "data": {"name": "test"}}, {"id": 2, "data": {"name": "test2"}}]
|
| 7 |
+
result = process_batch(records)
|
| 8 |
+
assert len(result) == 2 # FAILS — TypeError from wrong type
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def test_process_with_invalid_id():
|
| 12 |
+
records = [{"id": -1, "data": {"name": "bad"}}]
|
| 13 |
+
result = process_batch(records)
|
| 14 |
+
assert result == []
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def test_empty_batch():
|
| 18 |
+
assert process_batch([]) == []
|
repo_templates/task2/variant_2/meta.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"variant_id": "task2_v2",
|
| 3 |
+
"task": "task2",
|
| 4 |
+
"bug_files": ["src/email_sender.py"],
|
| 5 |
+
"interface_files": ["src/template_engine.py"],
|
| 6 |
+
"bug_description": "email_sender passes name= kwarg but template_engine expects username=",
|
| 7 |
+
"failing_tests": ["test_send_welcome_email", "test_welcome_email_structure"],
|
| 8 |
+
"fix_file": "src/email_sender.py",
|
| 9 |
+
"fix_description": "Change name=user_name to username=user_name in send_welcome_email",
|
| 10 |
+
"regression_test_must_cover": "KeyError when wrong kwarg name is used",
|
| 11 |
+
"total_files": 4,
|
| 12 |
+
"optimal_steps": 6
|
| 13 |
+
}
|
repo_templates/task2/variant_2/src/config.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Configuration for the email service."""
|
| 2 |
+
|
| 3 |
+
SMTP_HOST = "localhost"
|
| 4 |
+
SMTP_PORT = 587
|
| 5 |
+
FROM_EMAIL = "noreply@example.com"
|
repo_templates/task2/variant_2/src/email_sender.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Email sending service that uses the template engine."""
|
| 2 |
+
from src.template_engine import render_template
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def send_welcome_email(user_name: str, user_email: str) -> dict:
|
| 6 |
+
"""Send a welcome email to a new user."""
|
| 7 |
+
# BUG: passing 'name' but template_engine expects 'username'
|
| 8 |
+
body = render_template("welcome", name=user_name, email=user_email)
|
| 9 |
+
return {
|
| 10 |
+
"to": user_email,
|
| 11 |
+
"subject": "Welcome!",
|
| 12 |
+
"body": body,
|
| 13 |
+
"sent": True,
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def send_reset_email(user_email: str, reset_link: str) -> dict:
|
| 18 |
+
"""Send a password reset email."""
|
| 19 |
+
body = render_template("reset", email=user_email, link=reset_link)
|
| 20 |
+
return {
|
| 21 |
+
"to": user_email,
|
| 22 |
+
"subject": "Password Reset",
|
| 23 |
+
"body": body,
|
| 24 |
+
"sent": True,
|
| 25 |
+
}
|
repo_templates/task2/variant_2/src/template_engine.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Template rendering engine for email bodies."""
|
| 2 |
+
|
| 3 |
+
TEMPLATES = {
|
| 4 |
+
"welcome": "Hello {username}, welcome to our platform! Your email {email} has been registered.",
|
| 5 |
+
"reset": "Click here to reset your password: {link}. This was requested for {email}.",
|
| 6 |
+
"notify": "Hi {username}, you have a new notification: {message}.",
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def render_template(template_name: str, **kwargs) -> str:
|
| 11 |
+
"""
|
| 12 |
+
Render an email template with the given keyword arguments.
|
| 13 |
+
|
| 14 |
+
Expected kwargs per template:
|
| 15 |
+
- welcome: username (str), email (str)
|
| 16 |
+
- reset: email (str), link (str)
|
| 17 |
+
- notify: username (str), message (str)
|
| 18 |
+
"""
|
| 19 |
+
if template_name not in TEMPLATES:
|
| 20 |
+
raise ValueError(f"Unknown template: {template_name}")
|
| 21 |
+
|
| 22 |
+
template = TEMPLATES[template_name]
|
| 23 |
+
try:
|
| 24 |
+
return template.format(**kwargs)
|
| 25 |
+
except KeyError as e:
|
| 26 |
+
raise KeyError(f"Missing required template variable: {e}")
|
repo_templates/task2/variant_2/tests/test_email.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.email_sender import send_welcome_email, send_reset_email
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_send_welcome_email():
|
| 6 |
+
# FAILS — KeyError because email_sender passes 'name' but template expects 'username'
|
| 7 |
+
result = send_welcome_email("Alice", "alice@example.com")
|
| 8 |
+
assert result["sent"] == True
|
| 9 |
+
assert "Alice" in result["body"]
|
| 10 |
+
assert "alice@example.com" in result["body"]
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def test_send_reset_email():
|
| 14 |
+
result = send_reset_email("bob@example.com", "https://reset.link/abc")
|
| 15 |
+
assert result["sent"] == True
|
| 16 |
+
assert "https://reset.link/abc" in result["body"]
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def test_welcome_email_structure():
|
| 20 |
+
# FAILS — same KeyError as test_send_welcome_email
|
| 21 |
+
result = send_welcome_email("Charlie", "charlie@test.com")
|
| 22 |
+
assert result["to"] == "charlie@test.com"
|
| 23 |
+
assert result["subject"] == "Welcome!"
|
repo_templates/task2/variant_3/meta.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"variant_id": "task2_v3",
|
| 3 |
+
"task": "task2",
|
| 4 |
+
"bug_files": ["src/order_processor.py"],
|
| 5 |
+
"interface_files": ["src/inventory_checker.py"],
|
| 6 |
+
"bug_description": "order_processor passes list of items but inventory_checker expects dict {sku: qty}",
|
| 7 |
+
"failing_tests": ["test_process_valid_order", "test_order_structure"],
|
| 8 |
+
"fix_file": "src/order_processor.py",
|
| 9 |
+
"fix_description": "Convert items list to dict: {item['sku']: item['qty'] for item in items}",
|
| 10 |
+
"regression_test_must_cover": "TypeError when list is passed to check_availability",
|
| 11 |
+
"total_files": 4,
|
| 12 |
+
"optimal_steps": 6
|
| 13 |
+
}
|
repo_templates/task2/variant_3/src/inventory_checker.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Inventory checking service. Verifies stock levels for orders."""
|
| 2 |
+
|
| 3 |
+
# Simulated stock database
|
| 4 |
+
STOCK = {
|
| 5 |
+
"WIDGET-A": 100,
|
| 6 |
+
"WIDGET-B": 50,
|
| 7 |
+
"GADGET-X": 0,
|
| 8 |
+
"GADGET-Y": 25,
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def check_availability(requested_items: dict) -> bool:
|
| 13 |
+
"""
|
| 14 |
+
Check if all requested items are available in stock.
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
requested_items: dict mapping SKU to quantity, e.g. {"WIDGET-A": 5, "GADGET-Y": 2}
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
True if all items are available in sufficient quantity.
|
| 21 |
+
"""
|
| 22 |
+
if not isinstance(requested_items, dict):
|
| 23 |
+
raise TypeError(
|
| 24 |
+
f"requested_items must be dict, got {type(requested_items).__name__}. "
|
| 25 |
+
f"Expected format: {{'SKU': quantity}}"
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
for sku, qty in requested_items.items():
|
| 29 |
+
if sku not in STOCK:
|
| 30 |
+
return False
|
| 31 |
+
if STOCK[sku] < qty:
|
| 32 |
+
return False
|
| 33 |
+
return True
|
repo_templates/task2/variant_3/src/models.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared models for the order system."""
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class OrderItem:
|
| 5 |
+
def __init__(self, sku: str, qty: int):
|
| 6 |
+
self.sku = sku
|
| 7 |
+
self.qty = qty
|
| 8 |
+
|
| 9 |
+
def to_dict(self) -> dict:
|
| 10 |
+
return {"sku": self.sku, "qty": self.qty}
|
repo_templates/task2/variant_3/src/order_processor.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Order processing module that checks inventory before fulfillment."""
|
| 2 |
+
from src.inventory_checker import check_availability
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def process_order(order: dict) -> dict:
|
| 6 |
+
"""
|
| 7 |
+
Process an order by checking inventory availability.
|
| 8 |
+
order format: {"items": [{"sku": "ABC", "qty": 2}, ...], "customer": "..."}
|
| 9 |
+
"""
|
| 10 |
+
items = order.get("items", [])
|
| 11 |
+
if not items:
|
| 12 |
+
return {"status": "error", "message": "No items in order"}
|
| 13 |
+
|
| 14 |
+
# BUG: passing items as list, but check_availability expects a dict {sku: qty}
|
| 15 |
+
available = check_availability(items)
|
| 16 |
+
|
| 17 |
+
if available:
|
| 18 |
+
return {"status": "confirmed", "items": items}
|
| 19 |
+
else:
|
| 20 |
+
return {"status": "out_of_stock", "items": items}
|
repo_templates/task2/variant_3/tests/test_orders.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.order_processor import process_order
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_process_valid_order():
|
| 6 |
+
order = {
|
| 7 |
+
"items": [{"sku": "WIDGET-A", "qty": 2}, {"sku": "GADGET-Y", "qty": 1}],
|
| 8 |
+
"customer": "alice@example.com",
|
| 9 |
+
}
|
| 10 |
+
# FAILS — TypeError because list is passed instead of dict
|
| 11 |
+
result = process_order(order)
|
| 12 |
+
assert result["status"] == "confirmed"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def test_empty_order():
|
| 16 |
+
result = process_order({"items": [], "customer": "bob@example.com"})
|
| 17 |
+
assert result["status"] == "error"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def test_order_structure():
|
| 21 |
+
order = {
|
| 22 |
+
"items": [{"sku": "WIDGET-B", "qty": 5}],
|
| 23 |
+
"customer": "charlie@example.com",
|
| 24 |
+
}
|
| 25 |
+
# FAILS — same TypeError
|
| 26 |
+
result = process_order(order)
|
| 27 |
+
assert "items" in result
|
repo_templates/task2/variant_4/meta.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"variant_id": "task2_v4",
|
| 3 |
+
"task": "task2",
|
| 4 |
+
"bug_files": ["src/report_builder.py"],
|
| 5 |
+
"interface_files": ["src/date_formatter.py"],
|
| 6 |
+
"bug_description": "report_builder passes ISO string but date_formatter expects datetime object",
|
| 7 |
+
"failing_tests": ["test_build_monthly_report", "test_report_structure"],
|
| 8 |
+
"fix_file": "src/report_builder.py",
|
| 9 |
+
"fix_description": "Parse ISO strings to datetime before passing: datetime.strptime(start_date, '%Y-%m-%d')",
|
| 10 |
+
"regression_test_must_cover": "TypeError when string is passed to format_date_range",
|
| 11 |
+
"total_files": 4,
|
| 12 |
+
"optimal_steps": 6
|
| 13 |
+
}
|
repo_templates/task2/variant_4/src/date_formatter.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Date formatting utilities for reports."""
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def format_date_range(start: datetime, end: datetime) -> str:
|
| 6 |
+
"""
|
| 7 |
+
Format a date range for display in reports.
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
start: datetime object for range start
|
| 11 |
+
end: datetime object for range end
|
| 12 |
+
|
| 13 |
+
Returns:
|
| 14 |
+
Formatted string like "Jan 01, 2024 — Jan 31, 2024"
|
| 15 |
+
"""
|
| 16 |
+
if not isinstance(start, datetime):
|
| 17 |
+
raise TypeError(f"start must be datetime, got {type(start).__name__}")
|
| 18 |
+
if not isinstance(end, datetime):
|
| 19 |
+
raise TypeError(f"end must be datetime, got {type(end).__name__}")
|
| 20 |
+
|
| 21 |
+
return f"{start.strftime('%b %d, %Y')} — {end.strftime('%b %d, %Y')}"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def format_single_date(dt: datetime) -> str:
|
| 25 |
+
"""Format a single date."""
|
| 26 |
+
if not isinstance(dt, datetime):
|
| 27 |
+
raise TypeError(f"Expected datetime, got {type(dt).__name__}")
|
| 28 |
+
return dt.strftime("%B %d, %Y")
|
repo_templates/task2/variant_4/src/models.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared models for the reporting system."""
|
| 2 |
+
|
| 3 |
+
REPORT_TYPES = ["monthly", "quarterly", "annual", "summary"]
|
repo_templates/task2/variant_4/src/report_builder.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Report builder that assembles reports with formatted dates."""
|
| 2 |
+
from src.date_formatter import format_date_range
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def build_monthly_report(title: str, start_date: str, end_date: str, data: list) -> dict:
|
| 6 |
+
"""
|
| 7 |
+
Build a monthly report with formatted date header.
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
title: Report title
|
| 11 |
+
start_date: ISO format string 'YYYY-MM-DD'
|
| 12 |
+
end_date: ISO format string 'YYYY-MM-DD'
|
| 13 |
+
data: List of data points
|
| 14 |
+
"""
|
| 15 |
+
# BUG: passing ISO string directly, but format_date_range expects datetime objects
|
| 16 |
+
date_header = format_date_range(start_date, end_date)
|
| 17 |
+
|
| 18 |
+
return {
|
| 19 |
+
"title": title,
|
| 20 |
+
"period": date_header,
|
| 21 |
+
"total_records": len(data),
|
| 22 |
+
"data": data,
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def build_summary(title: str, content: str) -> dict:
|
| 27 |
+
"""Build a simple summary report."""
|
| 28 |
+
return {"title": title, "content": content, "type": "summary"}
|
repo_templates/task2/variant_4/tests/test_reports.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.report_builder import build_monthly_report, build_summary
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_build_monthly_report():
|
| 6 |
+
# FAILS — TypeError because ISO string passed instead of datetime
|
| 7 |
+
result = build_monthly_report(
|
| 8 |
+
"Sales Report",
|
| 9 |
+
"2024-01-01",
|
| 10 |
+
"2024-01-31",
|
| 11 |
+
[{"amount": 100}, {"amount": 200}],
|
| 12 |
+
)
|
| 13 |
+
assert result["title"] == "Sales Report"
|
| 14 |
+
assert result["total_records"] == 2
|
| 15 |
+
assert "Jan" in result["period"]
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def test_build_summary():
|
| 19 |
+
result = build_summary("Q1 Summary", "Revenue increased 15%")
|
| 20 |
+
assert result["title"] == "Q1 Summary"
|
| 21 |
+
assert result["type"] == "summary"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def test_report_structure():
|
| 25 |
+
# FAILS — same TypeError
|
| 26 |
+
result = build_monthly_report("Inventory", "2024-03-01", "2024-03-31", [])
|
| 27 |
+
assert "period" in result
|
| 28 |
+
assert result["total_records"] == 0
|
repo_templates/task2/variant_5/meta.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"variant_id": "task2_v5",
|
| 3 |
+
"task": "task2",
|
| 4 |
+
"bug_files": ["src/cache_manager.py"],
|
| 5 |
+
"interface_files": ["src/serializer.py"],
|
| 6 |
+
"bug_description": "cache_manager passes bytes (.encode()) but serializer expects str",
|
| 7 |
+
"failing_tests": ["test_cache_set_and_get", "test_cache_delete"],
|
| 8 |
+
"fix_file": "src/cache_manager.py",
|
| 9 |
+
"fix_description": "Remove .encode('utf-8') — pass str(value) directly to serialize_value",
|
| 10 |
+
"regression_test_must_cover": "TypeError when bytes is passed to serialize_value",
|
| 11 |
+
"total_files": 4,
|
| 12 |
+
"optimal_steps": 6
|
| 13 |
+
}
|
repo_templates/task2/variant_5/src/cache_manager.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Cache management service that stores serialized data."""
|
| 2 |
+
from src.serializer import serialize_value, deserialize_value
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class CacheManager:
|
| 6 |
+
"""Simple in-memory cache with serialization."""
|
| 7 |
+
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self._store = {}
|
| 10 |
+
|
| 11 |
+
def set(self, key: str, value) -> None:
|
| 12 |
+
"""Store a value in the cache after serializing it."""
|
| 13 |
+
# BUG: passing bytes (encoded) instead of str to serialize_value
|
| 14 |
+
serialized = serialize_value(str(value).encode('utf-8'))
|
| 15 |
+
self._store[key] = serialized
|
| 16 |
+
|
| 17 |
+
def get(self, key: str, default=None):
|
| 18 |
+
"""Retrieve and deserialize a value from cache."""
|
| 19 |
+
if key not in self._store:
|
| 20 |
+
return default
|
| 21 |
+
return deserialize_value(self._store[key])
|
| 22 |
+
|
| 23 |
+
def delete(self, key: str) -> bool:
|
| 24 |
+
"""Remove a key from cache."""
|
| 25 |
+
if key in self._store:
|
| 26 |
+
del self._store[key]
|
| 27 |
+
return True
|
| 28 |
+
return False
|
| 29 |
+
|
| 30 |
+
def clear(self):
|
| 31 |
+
"""Clear all cached values."""
|
| 32 |
+
self._store.clear()
|
| 33 |
+
|
| 34 |
+
def keys(self) -> list:
|
| 35 |
+
"""Return all cache keys."""
|
| 36 |
+
return list(self._store.keys())
|
repo_templates/task2/variant_5/src/config.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Cache configuration constants."""
|
| 2 |
+
|
| 3 |
+
MAX_CACHE_SIZE = 1000
|
| 4 |
+
DEFAULT_TTL = 300 # seconds
|
repo_templates/task2/variant_5/src/serializer.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Serialization utilities for the cache system."""
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def serialize_value(value: str) -> str:
|
| 6 |
+
"""
|
| 7 |
+
Serialize a value to a JSON string for storage.
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
value: must be a string (str type)
|
| 11 |
+
|
| 12 |
+
Returns:
|
| 13 |
+
JSON-encoded string
|
| 14 |
+
"""
|
| 15 |
+
if not isinstance(value, str):
|
| 16 |
+
raise TypeError(f"value must be str, got {type(value).__name__}")
|
| 17 |
+
return json.dumps({"data": value})
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def deserialize_value(serialized: str):
|
| 21 |
+
"""Deserialize a JSON string back to the original value."""
|
| 22 |
+
if not isinstance(serialized, str):
|
| 23 |
+
raise TypeError(f"serialized must be str, got {type(serialized).__name__}")
|
| 24 |
+
result = json.loads(serialized)
|
| 25 |
+
return result.get("data")
|
repo_templates/task2/variant_5/tests/test_cache.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from src.cache_manager import CacheManager
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_cache_set_and_get():
|
| 6 |
+
cache = CacheManager()
|
| 7 |
+
# FAILS — TypeError because bytes passed to serializer instead of str
|
| 8 |
+
cache.set("user:1", "Alice")
|
| 9 |
+
assert cache.get("user:1") == "Alice"
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_cache_get_missing():
|
| 13 |
+
cache = CacheManager()
|
| 14 |
+
assert cache.get("nonexistent", "default") == "default"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def test_cache_delete():
|
| 18 |
+
cache = CacheManager()
|
| 19 |
+
# FAILS — same TypeError on set
|
| 20 |
+
cache.set("temp", "data")
|
| 21 |
+
assert cache.delete("temp") == True
|
| 22 |
+
assert cache.get("temp") is None
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def test_cache_clear():
|
| 26 |
+
cache = CacheManager()
|
| 27 |
+
cache._store["a"] = '{"data": "1"}'
|
| 28 |
+
cache._store["b"] = '{"data": "2"}'
|
| 29 |
+
cache.clear()
|
| 30 |
+
assert cache.keys() == []
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def test_cache_keys():
|
| 34 |
+
cache = CacheManager()
|
| 35 |
+
cache._store["x"] = '{"data": "1"}'
|
| 36 |
+
cache._store["y"] = '{"data": "2"}'
|
| 37 |
+
assert sorted(cache.keys()) == ["x", "y"]
|