Buckets:
| set -euo pipefail | |
| # ── Quant name ────────────────────────────────────────────────────────────── | |
| QUANT_NAME="autoround-intel" | |
| # ── Logging ───────────────────────────────────────────────────────────────── | |
| LOG_DIR="/tmp/logs" | |
| mkdir -p "$LOG_DIR" | |
| exec > >(tee -a "$LOG_DIR/job.log") 2>&1 | |
| log() { echo "[$(date -u +%H:%M:%S)] $*"; } | |
| die() { log "FATAL: $*"; sync_logs; exit 1; } | |
| sync_logs() { | |
| log "Syncing logs to /output/${QUANT_NAME}/logs/..." | |
| mkdir -p "/output/${QUANT_NAME}/logs" 2>/dev/null || true | |
| cp -r "$LOG_DIR"/* "/output/${QUANT_NAME}/logs/" 2>/dev/null || log "Warning: could not copy logs to /output" | |
| } | |
| log "=== Job started: $(date -u) ===" | |
| log "ACCELERATOR=${ACCELERATOR:-unknown} | CPU_CORES=${CPU_CORES:-?} | MEMORY=${MEMORY:-?}" | |
| # ── Verify model volume ──────────────────────────────────────────────────── | |
| log "Checking model volume..." | |
| ls -lh /model/ 2>/dev/null | head -20 || die "Model directory /model/ not found" | |
| ls /model/*.safetensors >/dev/null 2>&1 || ls /model/model*.safetensors >/dev/null 2>&1 || log "Warning: no safetensors files found" | |
| log "Model directory contents: $(ls /model/ | wc -l) files" | |
| # ── Install auto-round ────────────────────────────────────────────────────── | |
| log "Installing auto-round + upgrading transformers..." | |
| pip install auto-round 2>&1 | tail -5 | |
| pip install -U transformers 2>&1 | tail -3 | |
| log "auto-round + transformers installed" | |
| # ── Start vLLM server ────────────────────────────────────────────────────── | |
| log "Starting vLLM server..." | |
| python3 -m vllm.entrypoints.openai.api_server \ | |
| --model /model \ | |
| --trust-remote-code \ | |
| --dtype bfloat16 \ | |
| --max-model-len 8192 \ | |
| --gpu-memory-utilization 0.95 \ | |
| --enforce-eager \ | |
| --port 8080 \ | |
| --host 127.0.0.1 \ | |
| > "$LOG_DIR/vllm-server.log" 2>&1 & | |
| VLLM_PID=$! | |
| log "vLLM server PID: $VLLM_PID" | |
| # ── Install Node.js + pi while vLLM loads ────────────────────────────────── | |
| log "Installing Node.js 22 (while model loads in background)..." | |
| apt-get update -qq 2>&1 | tail -3 | |
| apt-get install -y -qq curl git ca-certificates gnupg 2>&1 | tail -3 | |
| curl -fsSL https://deb.nodesource.com/setup_22.x | bash - 2>&1 | tail -3 | |
| apt-get install -y -qq nodejs 2>&1 | tail -3 | |
| log "Node.js $(node --version) installed, npm $(npm --version)" | |
| log "Installing pi coding agent..." | |
| npm install -g @mariozechner/pi-coding-agent 2>&1 | tail -5 | |
| log "pi installed: $(pi --version 2>&1 || echo 'version check done')" | |
| # ── Health check ──────────────────────────────────────────────────────────── | |
| log "Waiting for vLLM server to be ready..." | |
| MAX_WAIT=600 | |
| ELAPSED=0 | |
| while true; do | |
| if curl -sf http://127.0.0.1:8080/health 2>/dev/null; then | |
| break | |
| fi | |
| # Also try /v1/models as fallback health check | |
| if curl -sf http://127.0.0.1:8080/v1/models 2>/dev/null | grep -q '"id"'; then | |
| break | |
| fi | |
| sleep 5 | |
| ELAPSED=$((ELAPSED + 5)) | |
| if [[ $ELAPSED -ge $MAX_WAIT ]]; then | |
| log "vLLM server failed to start within ${MAX_WAIT}s. Last logs:" | |
| tail -50 "$LOG_DIR/vllm-server.log" || true | |
| die "Health check timeout" | |
| fi | |
| kill -0 $VLLM_PID 2>/dev/null || { | |
| log "vLLM server died. Last logs:" | |
| tail -50 "$LOG_DIR/vllm-server.log" || true | |
| # Retry with fallback flags if first attempt failed | |
| if [[ $ELAPSED -lt 30 ]]; then | |
| log "Retrying vLLM with --enforce-eager and reduced context..." | |
| python3 -m vllm.entrypoints.openai.api_server \ | |
| --model /model \ | |
| --trust-remote-code \ | |
| --dtype auto \ | |
| --quantization gptq \ | |
| --max-model-len 4096 \ | |
| --gpu-memory-utilization 0.95 \ | |
| --enforce-eager \ | |
| --port 8080 \ | |
| --host 127.0.0.1 \ | |
| > "$LOG_DIR/vllm-server.log" 2>&1 & | |
| VLLM_PID=$! | |
| log "vLLM server restarted with fallback flags, PID: $VLLM_PID" | |
| sleep 10 | |
| ELAPSED=$((ELAPSED + 10)) | |
| continue | |
| fi | |
| die "vLLM server process exited" | |
| } | |
| [[ $((ELAPSED % 30)) -eq 0 ]] && log " ...still waiting (${ELAPSED}s elapsed)" | |
| done | |
| log "vLLM server is ready!" | |
| # Detect the model name vLLM is serving | |
| MODEL_NAME=$(curl -sf http://127.0.0.1:8080/v1/models 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin)['data'][0]['id'])" 2>/dev/null || echo "/model") | |
| log "vLLM serving model as: $MODEL_NAME" | |
| # Quick sanity check | |
| log "Testing inference..." | |
| SANITY=$(curl -sf http://127.0.0.1:8080/v1/chat/completions \ | |
| -H 'Content-Type: application/json' \ | |
| -d "{\"model\":\"${MODEL_NAME}\",\"messages\":[{\"role\":\"user\",\"content\":\"Say hello in one word.\"}],\"max_tokens\":128}" 2>/dev/null || echo "FAILED") | |
| echo "$SANITY" | head -c 500 > "$LOG_DIR/sanity-check.json" | |
| log "Sanity check response: $(echo "$SANITY" | head -c 200)" | |
| # ── Configure pi ──────────────────────────────────────────────────────────── | |
| log "Writing pi configuration..." | |
| mkdir -p ~/.pi/agent | |
| cat > ~/.pi/agent/models.json << EOF | |
| { | |
| "providers": { | |
| "vllm-local": { | |
| "baseUrl": "http://127.0.0.1:8080/v1", | |
| "api": "openai-completions", | |
| "apiKey": "none", | |
| "compat": { | |
| "supportsDeveloperRole": true, | |
| "supportsReasoningEffort": false, | |
| "supportsUsageInStreaming": true, | |
| "supportsStrictMode": false, | |
| "thinkingFormat": "qwen-chat-template" | |
| }, | |
| "models": [ | |
| { | |
| "id": "${MODEL_NAME}", | |
| "name": "Qwen3.5-27B-${QUANT_NAME}", | |
| "reasoning": true, | |
| "input": ["text"], | |
| "contextWindow": 8192, | |
| "maxTokens": 4096, | |
| "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 } | |
| } | |
| ] | |
| } | |
| } | |
| } | |
| EOF | |
| cat > ~/.pi/agent/settings.json << EOF | |
| { | |
| "defaultProvider": "vllm-local", | |
| "defaultModel": "${MODEL_NAME}", | |
| "defaultThinkingLevel": "medium", | |
| "hideThinkingBlock": false | |
| } | |
| EOF | |
| log "pi config written" | |
| # ── Clone test repo ───────────────────────────────────────────────────────── | |
| WORKDIR="/workspace" | |
| mkdir -p "$WORKDIR" | |
| log "Cloning test repository..." | |
| git clone --depth 1 https://github.com/sindresorhus/slugify "$WORKDIR/slugify" | |
| cd "$WORKDIR/slugify" | |
| log "Repo cloned: $(git log --oneline -1)" | |
| log "Installing repo dependencies..." | |
| npm install 2>&1 | tail -5 | |
| log "Dependencies installed" | |
| # ── Run pi agentic task ───────────────────────────────────────────────────── | |
| TASK=$(cat << 'TASK_EOF' | |
| You are working in a Node.js project called @sindresorhus/slugify — a string slugification library. | |
| Your task: Add a new `--interactive` mode to the CLI (cli.js) that reads lines from stdin, slugifies each line, and prints the result. Requirements: | |
| 1. First, read and understand the existing codebase — look at the main module (index.js), the CLI (cli.js), and the test files. | |
| 2. Modify cli.js to accept a `--interactive` / `-i` flag. When set, the CLI should: | |
| - Read lines from stdin (one string per line) | |
| - Slugify each line using the library | |
| - Print each slugified result to stdout | |
| - Exit cleanly when stdin closes (EOF) | |
| 3. The existing CLI behavior (passing a string as an argument) must continue to work unchanged. | |
| 4. Add tests for the new interactive mode in the test file. The tests should: | |
| - Test that piping multiple lines produces correct slugified output | |
| - Test that existing argument-based usage still works | |
| 5. Run the existing test suite with `npm test` to make sure nothing is broken. | |
| 6. If tests fail, debug and fix until they pass. | |
| Report what you did and whether tests pass. | |
| TASK_EOF | |
| ) | |
| log "Starting pi coding task..." | |
| log "Task: Add --interactive stdin mode to slugify CLI" | |
| pi \ | |
| --print \ | |
| --no-session \ | |
| --provider vllm-local \ | |
| --model "$MODEL_NAME" \ | |
| --thinking medium \ | |
| "$TASK" \ | |
| > "$LOG_DIR/pi-output.txt" 2>&1 || true | |
| PI_EXIT=${PIPESTATUS[0]:-$?} | |
| log "pi exited with code: $PI_EXIT" | |
| # ── Capture results ───────────────────────────────────────────────────────── | |
| log "Collecting results..." | |
| cd "$WORKDIR/slugify" | |
| git diff > "$LOG_DIR/pi-changes.patch" 2>/dev/null || true | |
| git diff --stat > "$LOG_DIR/pi-changes-stat.txt" 2>/dev/null || true | |
| git status > "$LOG_DIR/git-status.txt" 2>/dev/null || true | |
| # Try running tests one final time to get clean output | |
| npm test > "$LOG_DIR/test-output.txt" 2>&1 || true | |
| # ── Summary ───────────────────────────────────────────────────────────────── | |
| log "=== RESULTS SUMMARY ===" | |
| log "Pi exit code: $PI_EXIT" | |
| log "" | |
| log "--- Files changed ---" | |
| cat "$LOG_DIR/pi-changes-stat.txt" 2>/dev/null || log "(no changes)" | |
| log "" | |
| log "--- Test results ---" | |
| tail -20 "$LOG_DIR/test-output.txt" 2>/dev/null || log "(no test output)" | |
| log "" | |
| log "--- Log files ---" | |
| ls -lh "$LOG_DIR/" | |
| log "" | |
| # ── Sync logs to output bucket ───────────────────────────────────────────── | |
| sync_logs | |
| log "=== Job complete: $(date -u) ===" | |
| # Clean up vLLM server | |
| kill $VLLM_PID 2>/dev/null || true | |
| wait $VLLM_PID 2>/dev/null || true | |
Xet Storage Details
- Size:
- 10.3 kB
- Xet hash:
- 2969a3dee0766aecb2309e35e772e1f91ee5f7a452411d15fe0863a77fbcfe27
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.