File size: 3,658 Bytes
fdb2c4a
 
a8bb2ab
 
fdb2c4a
 
a8bb2ab
fdb2c4a
 
a8bb2ab
fdb2c4a
 
 
 
 
a8bb2ab
 
fdb2c4a
 
 
8a35e8e
a8bb2ab
 
 
 
8a35e8e
a8bb2ab
8a35e8e
 
a8bb2ab
 
 
fdb2c4a
 
 
 
 
a8bb2ab
fdb2c4a
a8bb2ab
fdb2c4a
 
 
 
 
 
 
 
 
 
a8bb2ab
fdb2c4a
a8bb2ab
fdb2c4a
 
 
 
 
 
 
a8bb2ab
fdb2c4a
 
 
 
 
 
 
a8bb2ab
fdb2c4a
 
 
 
a8bb2ab
fdb2c4a
a8bb2ab
fdb2c4a
a8bb2ab
 
 
 
 
 
fdb2c4a
a8bb2ab
 
 
fdb2c4a
 
a8bb2ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/bin/bash
# ─────────────────────────────────────────────────────────────
# Agent Q-Q (QLAWED-Q) — HF Space Startup v3.1
# MODE: Ollama direct on :7860
#
# VRAM budget (T4-small, 16GB):
#   qwen3:1.7b       1.10GB  triage + chat
#   nomic-embed-text 0.27GB  embeddings / pgvector
#   ──────────────────────────────────────────────
#   Total resident:  ~1.37GB / 16GB
# ─────────────────────────────────────────────────────────────

set -e

echo "╔═════════════════════════════════════════════╗"
echo "║   Agent Q-Q — Ollama Direct Mode           ║"
echo "║   Serving on :7860 (OpenAI-compatible)     ║"
echo "╚═════════════════════════════════════════════╝"
echo ""

# ── Persistent disk (HF Pro) ──────────────────────────────────
if [ -d "/data" ]; then
    echo "✅ Persistent disk at /data"
    mkdir -p /data/ollama-models
    export OLLAMA_MODELS="/data/ollama-models"
else
    echo "⚠️  Ephemeral storage (models re-download on restart)"
fi

# ── Start Ollama on port 7860 ─────────────────────────────────
export OLLAMA_HOST=0.0.0.0:7860
export OLLAMA_KEEP_ALIVE=10m
export OLLAMA_NUM_PARALLEL=2

ollama serve &
OLLAMA_PID=$!

echo "⏳ Waiting for Ollama on :7860..."
for i in {1..40}; do
    if curl -sf http://localhost:7860/api/tags > /dev/null 2>&1; then
        echo "✅ Ollama ready"
        break
    fi
    if [ $i -eq 40 ]; then
        echo "❌ Ollama failed to start"
        exit 1
    fi
    sleep 2
done

# ── Pull models ───────────────────────────────────────────────
echo ""
echo "📦 Pulling models..."

pull_if_missing() {
    local model="$1"
    if ollama show "$model" > /dev/null 2>&1; then
        echo "  ✓ cached: $model"
    else
        echo "  ↓ pulling: $model"
        ollama pull "$model" && echo "  ✅ $model" || echo "  ⚠️  failed: $model (non-fatal)"
    fi
}

pull_if_missing "qwen3:1.7b"
pull_if_missing "nomic-embed-text"

echo ""
echo "📊 Loaded models:"
ollama list
echo ""

echo "═══════════════════════════════════════════════════════════"
echo "✅ Agent Q-Q LIVE"
echo ""
echo "   Public API:  http://0.0.0.0:7860"
echo ""
echo "   Endpoints:"
echo "     GET  /api/tags                  → list models"
echo "     GET  /v1/models                 → OpenAI model list"
echo "     POST /v1/chat/completions       → OpenAI chat"
echo "     POST /api/generate              → Ollama native"
echo "     POST /api/embeddings            → embeddings"
echo ""
echo "   Models resident:"
echo "     qwen3:1.7b        ~1.1GB  chat / triage"
echo "     nomic-embed-text  ~0.3GB  embeddings"
echo "═══════════════════════════════════════════════════════════"

wait $OLLAMA_PID