Buckets:

victor
/

qwen35-test-scripts

victor/qwen35-test-scripts / entrypoint-autoround.sh

10.3 kB

	#!/usr/bin/env bash
	set -euo pipefail

	# ── Quant name ──────────────────────────────────────────────────────────────
	QUANT_NAME="autoround-intel"

	# ── Logging ─────────────────────────────────────────────────────────────────
	LOG_DIR="/tmp/logs"
	mkdir -p "$LOG_DIR"
	exec > >(tee -a "$LOG_DIR/job.log") 2>&1

	log() { echo "[$(date -u +%H:%M:%S)] $*"; }
	die() { log "FATAL: $*"; sync_logs; exit 1; }

	sync_logs() {
	log "Syncing logs to /output/${QUANT_NAME}/logs/..."
	mkdir -p "/output/${QUANT_NAME}/logs" 2>/dev/null \|\| true
	cp -r "$LOG_DIR"/* "/output/${QUANT_NAME}/logs/" 2>/dev/null \|\| log "Warning: could not copy logs to /output"
	}

	log "=== Job started: $(date -u) ==="
	log "ACCELERATOR=${ACCELERATOR:-unknown} \| CPU_CORES=${CPU_CORES:-?} \| MEMORY=${MEMORY:-?}"

	# ── Verify model volume ────────────────────────────────────────────────────
	log "Checking model volume..."
	ls -lh /model/ 2>/dev/null \| head -20 \|\| die "Model directory /model/ not found"
	ls /model/.safetensors >/dev/null 2>&1 \|\| ls /model/model.safetensors >/dev/null 2>&1 \|\| log "Warning: no safetensors files found"
	log "Model directory contents: $(ls /model/ \| wc -l) files"

	# ── Install auto-round ──────────────────────────────────────────────────────
	log "Installing auto-round + upgrading transformers..."
	pip install auto-round 2>&1 \| tail -5
	pip install -U transformers 2>&1 \| tail -3
	log "auto-round + transformers installed"

	# ── Start vLLM server ──────────────────────────────────────────────────────
	log "Starting vLLM server..."
	python3 -m vllm.entrypoints.openai.api_server \
	--model /model \
	--trust-remote-code \
	--dtype bfloat16 \
	--max-model-len 8192 \
	--gpu-memory-utilization 0.95 \
	--enforce-eager \
	--port 8080 \
	--host 127.0.0.1 \
	> "$LOG_DIR/vllm-server.log" 2>&1 &

	VLLM_PID=$!
	log "vLLM server PID: $VLLM_PID"

	# ── Install Node.js + pi while vLLM loads ──────────────────────────────────
	log "Installing Node.js 22 (while model loads in background)..."
	apt-get update -qq 2>&1 \| tail -3
	apt-get install -y -qq curl git ca-certificates gnupg 2>&1 \| tail -3
	curl -fsSL https://deb.nodesource.com/setup_22.x \| bash - 2>&1 \| tail -3
	apt-get install -y -qq nodejs 2>&1 \| tail -3
	log "Node.js $(node --version) installed, npm $(npm --version)"

	log "Installing pi coding agent..."
	npm install -g @mariozechner/pi-coding-agent 2>&1 \| tail -5
	log "pi installed: $(pi --version 2>&1 \|\| echo 'version check done')"

	# ── Health check ────────────────────────────────────────────────────────────
	log "Waiting for vLLM server to be ready..."
	MAX_WAIT=600
	ELAPSED=0
	while true; do
	if curl -sf http://127.0.0.1:8080/health 2>/dev/null; then
	break
	fi
	# Also try /v1/models as fallback health check
	if curl -sf http://127.0.0.1:8080/v1/models 2>/dev/null \| grep -q '"id"'; then
	break
	fi
	sleep 5
	ELAPSED=$((ELAPSED + 5))
	if [[ $ELAPSED -ge $MAX_WAIT ]]; then
	log "vLLM server failed to start within ${MAX_WAIT}s. Last logs:"
	tail -50 "$LOG_DIR/vllm-server.log" \|\| true
	die "Health check timeout"
	fi
	kill -0 $VLLM_PID 2>/dev/null \|\| {
	log "vLLM server died. Last logs:"
	tail -50 "$LOG_DIR/vllm-server.log" \|\| true

	# Retry with fallback flags if first attempt failed
	if [[ $ELAPSED -lt 30 ]]; then
	log "Retrying vLLM with --enforce-eager and reduced context..."
	python3 -m vllm.entrypoints.openai.api_server \
	--model /model \
	--trust-remote-code \
	--dtype auto \
	--quantization gptq \
	--max-model-len 4096 \
	--gpu-memory-utilization 0.95 \
	--enforce-eager \
	--port 8080 \
	--host 127.0.0.1 \
	> "$LOG_DIR/vllm-server.log" 2>&1 &
	VLLM_PID=$!
	log "vLLM server restarted with fallback flags, PID: $VLLM_PID"
	sleep 10
	ELAPSED=$((ELAPSED + 10))
	continue
	fi

	die "vLLM server process exited"
	}
	[[ $((ELAPSED % 30)) -eq 0 ]] && log " ...still waiting (${ELAPSED}s elapsed)"
	done
	log "vLLM server is ready!"

	# Detect the model name vLLM is serving
	MODEL_NAME=$(curl -sf http://127.0.0.1:8080/v1/models 2>/dev/null \| python3 -c "import sys,json; print(json.load(sys.stdin)['data'][0]['id'])" 2>/dev/null \|\| echo "/model")
	log "vLLM serving model as: $MODEL_NAME"

	# Quick sanity check
	log "Testing inference..."
	SANITY=$(curl -sf http://127.0.0.1:8080/v1/chat/completions \
	-H 'Content-Type: application/json' \
	-d "{\"model\":\"${MODEL_NAME}\",\"messages\":[{\"role\":\"user\",\"content\":\"Say hello in one word.\"}],\"max_tokens\":128}" 2>/dev/null \|\| echo "FAILED")
	echo "$SANITY" \| head -c 500 > "$LOG_DIR/sanity-check.json"
	log "Sanity check response: $(echo "$SANITY" \| head -c 200)"

	# ── Configure pi ────────────────────────────────────────────────────────────
	log "Writing pi configuration..."
	mkdir -p ~/.pi/agent

	cat > ~/.pi/agent/models.json << EOF
	{
	"providers": {
	"vllm-local": {
	"baseUrl": "http://127.0.0.1:8080/v1",
	"api": "openai-completions",
	"apiKey": "none",
	"compat": {
	"supportsDeveloperRole": true,
	"supportsReasoningEffort": false,
	"supportsUsageInStreaming": true,
	"supportsStrictMode": false,
	"thinkingFormat": "qwen-chat-template"
	},
	"models": [
	{
	"id": "${MODEL_NAME}",
	"name": "Qwen3.5-27B-${QUANT_NAME}",
	"reasoning": true,
	"input": ["text"],
	"contextWindow": 8192,
	"maxTokens": 4096,
	"cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
	}
	]
	}
	}
	}
	EOF

	cat > ~/.pi/agent/settings.json << EOF
	{
	"defaultProvider": "vllm-local",
	"defaultModel": "${MODEL_NAME}",
	"defaultThinkingLevel": "medium",
	"hideThinkingBlock": false
	}
	EOF

	log "pi config written"

	# ── Clone test repo ─────────────────────────────────────────────────────────
	WORKDIR="/workspace"
	mkdir -p "$WORKDIR"

	log "Cloning test repository..."
	git clone --depth 1 https://github.com/sindresorhus/slugify "$WORKDIR/slugify"
	cd "$WORKDIR/slugify"
	log "Repo cloned: $(git log --oneline -1)"
	log "Installing repo dependencies..."
	npm install 2>&1 \| tail -5
	log "Dependencies installed"

	# ── Run pi agentic task ─────────────────────────────────────────────────────
	TASK=$(cat << 'TASK_EOF'
	You are working in a Node.js project called @sindresorhus/slugify — a string slugification library.

	Your task: Add a new `--interactive` mode to the CLI (cli.js) that reads lines from stdin, slugifies each line, and prints the result. Requirements:

	1. First, read and understand the existing codebase — look at the main module (index.js), the CLI (cli.js), and the test files.
	2. Modify cli.js to accept a `--interactive` / `-i` flag. When set, the CLI should:
	- Read lines from stdin (one string per line)
	- Slugify each line using the library
	- Print each slugified result to stdout
	- Exit cleanly when stdin closes (EOF)
	3. The existing CLI behavior (passing a string as an argument) must continue to work unchanged.
	4. Add tests for the new interactive mode in the test file. The tests should:
	- Test that piping multiple lines produces correct slugified output
	- Test that existing argument-based usage still works
	5. Run the existing test suite with `npm test` to make sure nothing is broken.
	6. If tests fail, debug and fix until they pass.

	Report what you did and whether tests pass.
	TASK_EOF
	)

	log "Starting pi coding task..."
	log "Task: Add --interactive stdin mode to slugify CLI"

	pi \
	--print \
	--no-session \
	--provider vllm-local \
	--model "$MODEL_NAME" \
	--thinking medium \
	"$TASK" \
	> "$LOG_DIR/pi-output.txt" 2>&1 \|\| true

	PI_EXIT=${PIPESTATUS[0]:-$?}
	log "pi exited with code: $PI_EXIT"

	# ── Capture results ─────────────────────────────────────────────────────────
	log "Collecting results..."

	cd "$WORKDIR/slugify"
	git diff > "$LOG_DIR/pi-changes.patch" 2>/dev/null \|\| true
	git diff --stat > "$LOG_DIR/pi-changes-stat.txt" 2>/dev/null \|\| true
	git status > "$LOG_DIR/git-status.txt" 2>/dev/null \|\| true

	# Try running tests one final time to get clean output
	npm test > "$LOG_DIR/test-output.txt" 2>&1 \|\| true

	# ── Summary ─────────────────────────────────────────────────────────────────
	log "=== RESULTS SUMMARY ==="
	log "Pi exit code: $PI_EXIT"
	log ""
	log "--- Files changed ---"
	cat "$LOG_DIR/pi-changes-stat.txt" 2>/dev/null \|\| log "(no changes)"
	log ""
	log "--- Test results ---"
	tail -20 "$LOG_DIR/test-output.txt" 2>/dev/null \|\| log "(no test output)"
	log ""
	log "--- Log files ---"
	ls -lh "$LOG_DIR/"
	log ""

	# ── Sync logs to output bucket ─────────────────────────────────────────────
	sync_logs

	log "=== Job complete: $(date -u) ==="

	# Clean up vLLM server
	kill $VLLM_PID 2>/dev/null \|\| true
	wait $VLLM_PID 2>/dev/null \|\| true

Xet Storage Details

Size:: 10.3 kB
Xet hash:: 2969a3dee0766aecb2309e35e772e1f91ee5f7a452411d15fe0863a77fbcfe27

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.