Spaces:
Paused
Paused
| # ───────────────────────────────────────────────────────────── | |
| # Agent Q-Q (QLAWED-Q) — HF Space Startup v3.1 | |
| # MODE: Ollama direct on :7860 | |
| # | |
| # VRAM budget (T4-small, 16GB): | |
| # qwen3:1.7b 1.10GB triage + chat | |
| # nomic-embed-text 0.27GB embeddings / pgvector | |
| # ────────────────────────────────────────────── | |
| # Total resident: ~1.37GB / 16GB | |
| # ───────────────────────────────────────────────────────────── | |
| set -e | |
| echo "╔═════════════════════════════════════════════╗" | |
| echo "║ Agent Q-Q — Ollama Direct Mode ║" | |
| echo "║ Serving on :7860 (OpenAI-compatible) ║" | |
| echo "╚═════════════════════════════════════════════╝" | |
| echo "" | |
| # ── Persistent disk (HF Pro) ────────────────────────────────── | |
| if [ -d "/data" ]; then | |
| echo "✅ Persistent disk at /data" | |
| mkdir -p /data/ollama-models | |
| export OLLAMA_MODELS="/data/ollama-models" | |
| else | |
| echo "⚠️ Ephemeral storage (models re-download on restart)" | |
| fi | |
| # ── Start Ollama on port 7860 ───────────────────────────────── | |
| export OLLAMA_HOST=0.0.0.0:7860 | |
| export OLLAMA_KEEP_ALIVE=10m | |
| export OLLAMA_NUM_PARALLEL=2 | |
| ollama serve & | |
| OLLAMA_PID=$! | |
| echo "⏳ Waiting for Ollama on :7860..." | |
| for i in {1..40}; do | |
| if curl -sf http://localhost:7860/api/tags > /dev/null 2>&1; then | |
| echo "✅ Ollama ready" | |
| break | |
| fi | |
| if [ $i -eq 40 ]; then | |
| echo "❌ Ollama failed to start" | |
| exit 1 | |
| fi | |
| sleep 2 | |
| done | |
| # ── Pull models ─────────────────────────────────────────────── | |
| echo "" | |
| echo "📦 Pulling models..." | |
| pull_if_missing() { | |
| local model="$1" | |
| if ollama show "$model" > /dev/null 2>&1; then | |
| echo " ✓ cached: $model" | |
| else | |
| echo " ↓ pulling: $model" | |
| ollama pull "$model" && echo " ✅ $model" || echo " ⚠️ failed: $model (non-fatal)" | |
| fi | |
| } | |
| pull_if_missing "qwen3:1.7b" | |
| pull_if_missing "nomic-embed-text" | |
| echo "" | |
| echo "📊 Loaded models:" | |
| ollama list | |
| echo "" | |
| echo "═══════════════════════════════════════════════════════════" | |
| echo "✅ Agent Q-Q LIVE" | |
| echo "" | |
| echo " Public API: http://0.0.0.0:7860" | |
| echo "" | |
| echo " Endpoints:" | |
| echo " GET /api/tags → list models" | |
| echo " GET /v1/models → OpenAI model list" | |
| echo " POST /v1/chat/completions → OpenAI chat" | |
| echo " POST /api/generate → Ollama native" | |
| echo " POST /api/embeddings → embeddings" | |
| echo "" | |
| echo " Models resident:" | |
| echo " qwen3:1.7b ~1.1GB chat / triage" | |
| echo " nomic-embed-text ~0.3GB embeddings" | |
| echo "═══════════════════════════════════════════════════════════" | |
| wait $OLLAMA_PID | |