#!/usr/bin/env bash # # validate-submission.sh — OpenEnv Submission Validator # # This script validates that your submission meets all competition requirements. # It checks: # 1. Docker build passes # 2. openenv validate passes # 3. Server responds to /health and /reset # 4. Baseline inference script reproduces scores # 5. 3+ tasks with graders produce scores in 0.0-1.0 range set -uo pipefail DOCKER_BUILD_TIMEOUT=600 if [ -t 1 ]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BOLD='\033[1m' NC='\033[0m' else RED='' GREEN='' YELLOW='' BOLD='' NC='' fi log() { echo -e "${GREEN}[OK]${NC} $*"; } warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } err() { echo -e "${RED}[ERROR]${NC} $*"; } fail() { echo -e "${RED}[FAIL]${NC} $*"; exit 1; } check() { local cmd="$1" local name="$2" echo -n "Checking $name... " if eval "$cmd" &>/dev/null; then log "$name" return 0 else fail "$name" return 1 fi } # Check prerequisites check "command -v docker" "Docker installed" check "command -v python" "Python installed" # Build Docker log "Building Docker image..." cd "$(dirname "$0")" if docker build -t mlops-debug-env .; then log "Docker build passed" else fail "Docker build failed" fi # Start container in background log "Starting server..." docker run -d -p 7860:7860 --name mlops-test mlops-debug-env sleep 5 # Check health endpoint check "curl -s http://localhost:7860/health" "Server /health responds" # Test reset endpoint log "Testing /reset..." RESET_RESP=$(curl -s -X POST http://localhost:7860/reset -H "Content-Type: application/json" -d '{"task_id":"easy","seed":42}') if echo "$RESET_RESP" | grep -q "task_id"; then log "/reset works" else fail "/reset failed" fi # Test inference script log "Running baseline inference..." export API_BASE_URL="https://api.openai.com/v1" export MODEL_NAME="gemini-2.5-flash" export HF_TOKEN="${HF_TOKEN:-test}" export ENV_BASE_URL="http://localhost:7860" if python inference.py --task easy --seed 42 2>&1 | grep -q "score="; then log "Inference script format correct" else warn "Inference script may have format issues" fi # Cleanup log "Cleaning up..." docker stop mlops-test 2>/dev/null || true docker rm mlops-test 2>/dev/null || true echo "" echo "=========================================" log "All validation checks passed!" echo "========================================="