Spaces:
Paused
Paused
File size: 3,658 Bytes
fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a 8a35e8e a8bb2ab 8a35e8e a8bb2ab 8a35e8e a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab fdb2c4a a8bb2ab | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | #!/bin/bash
# ─────────────────────────────────────────────────────────────
# Agent Q-Q (QLAWED-Q) — HF Space Startup v3.1
# MODE: Ollama direct on :7860
#
# VRAM budget (T4-small, 16GB):
# qwen3:1.7b 1.10GB triage + chat
# nomic-embed-text 0.27GB embeddings / pgvector
# ──────────────────────────────────────────────
# Total resident: ~1.37GB / 16GB
# ─────────────────────────────────────────────────────────────
set -e
echo "╔═════════════════════════════════════════════╗"
echo "║ Agent Q-Q — Ollama Direct Mode ║"
echo "║ Serving on :7860 (OpenAI-compatible) ║"
echo "╚═════════════════════════════════════════════╝"
echo ""
# ── Persistent disk (HF Pro) ──────────────────────────────────
if [ -d "/data" ]; then
echo "✅ Persistent disk at /data"
mkdir -p /data/ollama-models
export OLLAMA_MODELS="/data/ollama-models"
else
echo "⚠️ Ephemeral storage (models re-download on restart)"
fi
# ── Start Ollama on port 7860 ─────────────────────────────────
export OLLAMA_HOST=0.0.0.0:7860
export OLLAMA_KEEP_ALIVE=10m
export OLLAMA_NUM_PARALLEL=2
ollama serve &
OLLAMA_PID=$!
echo "⏳ Waiting for Ollama on :7860..."
for i in {1..40}; do
if curl -sf http://localhost:7860/api/tags > /dev/null 2>&1; then
echo "✅ Ollama ready"
break
fi
if [ $i -eq 40 ]; then
echo "❌ Ollama failed to start"
exit 1
fi
sleep 2
done
# ── Pull models ───────────────────────────────────────────────
echo ""
echo "📦 Pulling models..."
pull_if_missing() {
local model="$1"
if ollama show "$model" > /dev/null 2>&1; then
echo " ✓ cached: $model"
else
echo " ↓ pulling: $model"
ollama pull "$model" && echo " ✅ $model" || echo " ⚠️ failed: $model (non-fatal)"
fi
}
pull_if_missing "qwen3:1.7b"
pull_if_missing "nomic-embed-text"
echo ""
echo "📊 Loaded models:"
ollama list
echo ""
echo "═══════════════════════════════════════════════════════════"
echo "✅ Agent Q-Q LIVE"
echo ""
echo " Public API: http://0.0.0.0:7860"
echo ""
echo " Endpoints:"
echo " GET /api/tags → list models"
echo " GET /v1/models → OpenAI model list"
echo " POST /v1/chat/completions → OpenAI chat"
echo " POST /api/generate → Ollama native"
echo " POST /api/embeddings → embeddings"
echo ""
echo " Models resident:"
echo " qwen3:1.7b ~1.1GB chat / triage"
echo " nomic-embed-text ~0.3GB embeddings"
echo "═══════════════════════════════════════════════════════════"
wait $OLLAMA_PID
|