sql_env / specs /F007-VERIFICATION_INPUT.json
hjerpe's picture
Upload folder using huggingface_hub
5dd1bb4 verified
{
"$schema": "autocode-verification-input-v1",
"feature_id": "F007",
"spec_path": "specs/F007-IMPLEMENTATION_SPEC.md",
"generated": "2026-03-27T12:00:00Z",
"verification_mode": "mvp",
"overview": {
"summary": "Competition submission package for HuggingFace deployment: Dockerfile hardened for HF Spaces free tier with bundled Spider databases, polished README, blog post outline, and Colab-ready training notebook.",
"goal": "Judges can visit the HF Space, read the blog, run the training notebook, and reproduce results. Someone outside the team can understand, use, and build on SQLEnv."
},
"interfaces": {
"types": [
{
"name": "Dockerfile",
"description": "Docker container specification for HF Spaces deployment. Must bundle Spider SQLite databases, support PORT env variable, run as non-root user, and build successfully on CPU-only free tier.",
"fields": [
{"name": "BASE_IMAGE", "type": "ARG", "description": "openenv-base image from GHCR"},
{"name": "PORT", "type": "ENV", "description": "Server port, defaults to 8000, HF Spaces overrides to 7860"},
{"name": "data/databases/", "type": "COPY", "description": "Bundled Spider SQLite databases (~50MB)"},
{"name": "appuser", "type": "USER", "description": "Non-root user for HF Spaces security"}
]
},
{
"name": "openenv.yaml",
"description": "OpenEnv environment manifest for HF Hub compatibility.",
"fields": [
{"name": "spec_version", "type": "int", "description": "Must be 1"},
{"name": "name", "type": "str", "description": "Environment name: sql_env"},
{"name": "type", "type": "str", "description": "Must be 'space'"},
{"name": "runtime", "type": "str", "description": "Must be 'fastapi'"},
{"name": "app", "type": "str", "description": "Must be 'server.app:app'"},
{"name": "port", "type": "int", "description": "Server port: 8000"}
]
},
{
"name": "BlogOutline",
"description": "Structured blog post outline at docs/blog-outline.md with narrative sections: hook, problem, solution, how-it-works, results placeholder, technical highlights, try-it-yourself.",
"fields": [
{"name": "hook", "type": "str", "description": "Compelling opening that draws readers in"},
{"name": "problem", "type": "str", "description": "Why static benchmarks are insufficient"},
{"name": "solution", "type": "str", "description": "SQLEnv architecture overview"},
{"name": "results", "type": "str", "description": "Placeholder for F006 training results"},
{"name": "try_it", "type": "str", "description": "Links to HF Space, notebook, GitHub"}
]
},
{
"name": "TrainingNotebook",
"description": "Jupyter notebook at notebooks/train_grpo.ipynb. Must be Colab-compatible with setup, connect, train, evaluate, and plot cells.",
"fields": [
{"name": "setup_cell", "type": "code", "description": "pip install dependencies, one-click setup"},
{"name": "connect_cell", "type": "code", "description": "SQLEnvClient connect and test episode"},
{"name": "train_cell", "type": "code", "description": "GRPO training loop"},
{"name": "eval_cell", "type": "code", "description": "Evaluation on held-out questions"},
{"name": "plot_cell", "type": "code", "description": "matplotlib learning curves"}
]
}
],
"functions": [],
"api_endpoints": []
},
"data_flow": {
"primary_flow": [
"Developer runs openenv validate to check manifest and Dockerfile locally",
"Developer runs openenv build to create Docker image with bundled Spider databases",
"Developer runs openenv push to deploy to HuggingFace Spaces",
"Judge visits HF Space URL, connects via WebSocket, plays an episode (reset + steps)",
"Judge opens Colab notebook, runs all cells, sees training results"
],
"alternative_flows": [
{
"name": "Local Docker test",
"steps": [
"docker build -t sql-env:latest -f server/Dockerfile .",
"docker run -p 8000:8000 sql-env:latest",
"curl http://localhost:8000/health returns healthy status",
"WebSocket client connects and plays episode"
]
}
]
},
"error_handling": {
"error_types": [
{
"name": "DockerBuildFailure",
"when": "Missing dependencies, incorrect COPY paths, or base image unavailable",
"resolution": "Check .dockerignore, verify file paths, test locally first"
},
{
"name": "DatabaseNotFound",
"when": "Spider SQLite databases not bundled correctly in Docker image",
"resolution": "Verify COPY data/databases/ path in Dockerfile"
},
{
"name": "PortMismatch",
"when": "HF Spaces sets PORT=7860 but server binds to 8000",
"resolution": "CMD reads PORT env variable with fallback to 8000"
},
{
"name": "MemoryExceeded",
"when": "Container exceeds HF Spaces free tier memory limit",
"resolution": "Reduce bundled databases to essential set only"
}
],
"retry_strategy": null
},
"dependencies": {
"external": [
{"name": "HuggingFace Spaces", "version": "free tier", "usage": "Docker container hosting"},
{"name": "openenv CLI", "version": "latest", "usage": "validate, build, push commands"},
{"name": "Google Colab", "version": "free tier", "usage": "Training notebook execution"}
],
"internal": [
{"name": "F001", "usage": "Core environment loop (server must work)"},
{"name": "F002", "usage": "Multi-DB support (databases to bundle)"},
{"name": "F003", "usage": "Reward computation (used in training)"},
{"name": "F004", "usage": "Answer verification (used in training)"},
{"name": "F005", "usage": "Token-level rewards (used in training)"},
{"name": "F006", "usage": "GRPO training (notebook references training scripts)"}
]
}
}