victor/qwen35-test-scripts / entrypoint-autoround.sh
download
raw
10.3 kB
#!/usr/bin/env bash
set -euo pipefail
# ── Quant name ──────────────────────────────────────────────────────────────
QUANT_NAME="autoround-intel"
# ── Logging ─────────────────────────────────────────────────────────────────
LOG_DIR="/tmp/logs"
mkdir -p "$LOG_DIR"
exec > >(tee -a "$LOG_DIR/job.log") 2>&1
log() { echo "[$(date -u +%H:%M:%S)] $*"; }
die() { log "FATAL: $*"; sync_logs; exit 1; }
sync_logs() {
log "Syncing logs to /output/${QUANT_NAME}/logs/..."
mkdir -p "/output/${QUANT_NAME}/logs" 2>/dev/null || true
cp -r "$LOG_DIR"/* "/output/${QUANT_NAME}/logs/" 2>/dev/null || log "Warning: could not copy logs to /output"
}
log "=== Job started: $(date -u) ==="
log "ACCELERATOR=${ACCELERATOR:-unknown} | CPU_CORES=${CPU_CORES:-?} | MEMORY=${MEMORY:-?}"
# ── Verify model volume ────────────────────────────────────────────────────
log "Checking model volume..."
ls -lh /model/ 2>/dev/null | head -20 || die "Model directory /model/ not found"
ls /model/*.safetensors >/dev/null 2>&1 || ls /model/model*.safetensors >/dev/null 2>&1 || log "Warning: no safetensors files found"
log "Model directory contents: $(ls /model/ | wc -l) files"
# ── Install auto-round ──────────────────────────────────────────────────────
log "Installing auto-round + upgrading transformers..."
pip install auto-round 2>&1 | tail -5
pip install -U transformers 2>&1 | tail -3
log "auto-round + transformers installed"
# ── Start vLLM server ──────────────────────────────────────────────────────
log "Starting vLLM server..."
python3 -m vllm.entrypoints.openai.api_server \
--model /model \
--trust-remote-code \
--dtype bfloat16 \
--max-model-len 8192 \
--gpu-memory-utilization 0.95 \
--enforce-eager \
--port 8080 \
--host 127.0.0.1 \
> "$LOG_DIR/vllm-server.log" 2>&1 &
VLLM_PID=$!
log "vLLM server PID: $VLLM_PID"
# ── Install Node.js + pi while vLLM loads ──────────────────────────────────
log "Installing Node.js 22 (while model loads in background)..."
apt-get update -qq 2>&1 | tail -3
apt-get install -y -qq curl git ca-certificates gnupg 2>&1 | tail -3
curl -fsSL https://deb.nodesource.com/setup_22.x | bash - 2>&1 | tail -3
apt-get install -y -qq nodejs 2>&1 | tail -3
log "Node.js $(node --version) installed, npm $(npm --version)"
log "Installing pi coding agent..."
npm install -g @mariozechner/pi-coding-agent 2>&1 | tail -5
log "pi installed: $(pi --version 2>&1 || echo 'version check done')"
# ── Health check ────────────────────────────────────────────────────────────
log "Waiting for vLLM server to be ready..."
MAX_WAIT=600
ELAPSED=0
while true; do
if curl -sf http://127.0.0.1:8080/health 2>/dev/null; then
break
fi
# Also try /v1/models as fallback health check
if curl -sf http://127.0.0.1:8080/v1/models 2>/dev/null | grep -q '"id"'; then
break
fi
sleep 5
ELAPSED=$((ELAPSED + 5))
if [[ $ELAPSED -ge $MAX_WAIT ]]; then
log "vLLM server failed to start within ${MAX_WAIT}s. Last logs:"
tail -50 "$LOG_DIR/vllm-server.log" || true
die "Health check timeout"
fi
kill -0 $VLLM_PID 2>/dev/null || {
log "vLLM server died. Last logs:"
tail -50 "$LOG_DIR/vllm-server.log" || true
# Retry with fallback flags if first attempt failed
if [[ $ELAPSED -lt 30 ]]; then
log "Retrying vLLM with --enforce-eager and reduced context..."
python3 -m vllm.entrypoints.openai.api_server \
--model /model \
--trust-remote-code \
--dtype auto \
--quantization gptq \
--max-model-len 4096 \
--gpu-memory-utilization 0.95 \
--enforce-eager \
--port 8080 \
--host 127.0.0.1 \
> "$LOG_DIR/vllm-server.log" 2>&1 &
VLLM_PID=$!
log "vLLM server restarted with fallback flags, PID: $VLLM_PID"
sleep 10
ELAPSED=$((ELAPSED + 10))
continue
fi
die "vLLM server process exited"
}
[[ $((ELAPSED % 30)) -eq 0 ]] && log " ...still waiting (${ELAPSED}s elapsed)"
done
log "vLLM server is ready!"
# Detect the model name vLLM is serving
MODEL_NAME=$(curl -sf http://127.0.0.1:8080/v1/models 2>/dev/null | python3 -c "import sys,json; print(json.load(sys.stdin)['data'][0]['id'])" 2>/dev/null || echo "/model")
log "vLLM serving model as: $MODEL_NAME"
# Quick sanity check
log "Testing inference..."
SANITY=$(curl -sf http://127.0.0.1:8080/v1/chat/completions \
-H 'Content-Type: application/json' \
-d "{\"model\":\"${MODEL_NAME}\",\"messages\":[{\"role\":\"user\",\"content\":\"Say hello in one word.\"}],\"max_tokens\":128}" 2>/dev/null || echo "FAILED")
echo "$SANITY" | head -c 500 > "$LOG_DIR/sanity-check.json"
log "Sanity check response: $(echo "$SANITY" | head -c 200)"
# ── Configure pi ────────────────────────────────────────────────────────────
log "Writing pi configuration..."
mkdir -p ~/.pi/agent
cat > ~/.pi/agent/models.json << EOF
{
"providers": {
"vllm-local": {
"baseUrl": "http://127.0.0.1:8080/v1",
"api": "openai-completions",
"apiKey": "none",
"compat": {
"supportsDeveloperRole": true,
"supportsReasoningEffort": false,
"supportsUsageInStreaming": true,
"supportsStrictMode": false,
"thinkingFormat": "qwen-chat-template"
},
"models": [
{
"id": "${MODEL_NAME}",
"name": "Qwen3.5-27B-${QUANT_NAME}",
"reasoning": true,
"input": ["text"],
"contextWindow": 8192,
"maxTokens": 4096,
"cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
}
]
}
}
}
EOF
cat > ~/.pi/agent/settings.json << EOF
{
"defaultProvider": "vllm-local",
"defaultModel": "${MODEL_NAME}",
"defaultThinkingLevel": "medium",
"hideThinkingBlock": false
}
EOF
log "pi config written"
# ── Clone test repo ─────────────────────────────────────────────────────────
WORKDIR="/workspace"
mkdir -p "$WORKDIR"
log "Cloning test repository..."
git clone --depth 1 https://github.com/sindresorhus/slugify "$WORKDIR/slugify"
cd "$WORKDIR/slugify"
log "Repo cloned: $(git log --oneline -1)"
log "Installing repo dependencies..."
npm install 2>&1 | tail -5
log "Dependencies installed"
# ── Run pi agentic task ─────────────────────────────────────────────────────
TASK=$(cat << 'TASK_EOF'
You are working in a Node.js project called @sindresorhus/slugify — a string slugification library.
Your task: Add a new `--interactive` mode to the CLI (cli.js) that reads lines from stdin, slugifies each line, and prints the result. Requirements:
1. First, read and understand the existing codebase — look at the main module (index.js), the CLI (cli.js), and the test files.
2. Modify cli.js to accept a `--interactive` / `-i` flag. When set, the CLI should:
- Read lines from stdin (one string per line)
- Slugify each line using the library
- Print each slugified result to stdout
- Exit cleanly when stdin closes (EOF)
3. The existing CLI behavior (passing a string as an argument) must continue to work unchanged.
4. Add tests for the new interactive mode in the test file. The tests should:
- Test that piping multiple lines produces correct slugified output
- Test that existing argument-based usage still works
5. Run the existing test suite with `npm test` to make sure nothing is broken.
6. If tests fail, debug and fix until they pass.
Report what you did and whether tests pass.
TASK_EOF
)
log "Starting pi coding task..."
log "Task: Add --interactive stdin mode to slugify CLI"
pi \
--print \
--no-session \
--provider vllm-local \
--model "$MODEL_NAME" \
--thinking medium \
"$TASK" \
> "$LOG_DIR/pi-output.txt" 2>&1 || true
PI_EXIT=${PIPESTATUS[0]:-$?}
log "pi exited with code: $PI_EXIT"
# ── Capture results ─────────────────────────────────────────────────────────
log "Collecting results..."
cd "$WORKDIR/slugify"
git diff > "$LOG_DIR/pi-changes.patch" 2>/dev/null || true
git diff --stat > "$LOG_DIR/pi-changes-stat.txt" 2>/dev/null || true
git status > "$LOG_DIR/git-status.txt" 2>/dev/null || true
# Try running tests one final time to get clean output
npm test > "$LOG_DIR/test-output.txt" 2>&1 || true
# ── Summary ─────────────────────────────────────────────────────────────────
log "=== RESULTS SUMMARY ==="
log "Pi exit code: $PI_EXIT"
log ""
log "--- Files changed ---"
cat "$LOG_DIR/pi-changes-stat.txt" 2>/dev/null || log "(no changes)"
log ""
log "--- Test results ---"
tail -20 "$LOG_DIR/test-output.txt" 2>/dev/null || log "(no test output)"
log ""
log "--- Log files ---"
ls -lh "$LOG_DIR/"
log ""
# ── Sync logs to output bucket ─────────────────────────────────────────────
sync_logs
log "=== Job complete: $(date -u) ==="
# Clean up vLLM server
kill $VLLM_PID 2>/dev/null || true
wait $VLLM_PID 2>/dev/null || true

Xet Storage Details

Size:
10.3 kB
·
Xet hash:
2969a3dee0766aecb2309e35e772e1f91ee5f7a452411d15fe0863a77fbcfe27

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.