Spaces:
Paused
Paused
fix: remove litellm[proxy], Ollama direct on :7860
Browse files- Dockerfile +13 -26
- startup.sh +32 -71
Dockerfile
CHANGED
|
@@ -1,15 +1,14 @@
|
|
| 1 |
# ─────────────────────────────────────────────────────────────
|
| 2 |
-
# Agent Q-Q (Agent Q-QAJAQS) — QLAWED-Q HF Space
|
| 3 |
-
# SDK: Docker | Public Port: 7860 (
|
| 4 |
#
|
| 5 |
# Architecture:
|
| 6 |
-
# Ollama
|
| 7 |
-
# LiteLLM → public :7860 (Claude-compatible API)
|
| 8 |
#
|
| 9 |
-
# Endpoints
|
| 10 |
# POST /v1/chat/completions (OpenAI SDK compatible)
|
| 11 |
-
# POST /
|
| 12 |
-
# GET /
|
| 13 |
# GET /v1/models
|
| 14 |
# ─────────────────────────────────────────────────────────────
|
| 15 |
|
|
@@ -21,43 +20,31 @@ ENV DEBIAN_FRONTEND=noninteractive
|
|
| 21 |
RUN apt-get update && apt-get install -y \
|
| 22 |
curl \
|
| 23 |
ca-certificates \
|
| 24 |
-
python3 \
|
| 25 |
-
python3-pip \
|
| 26 |
-
git \
|
| 27 |
-
git-lfs \
|
| 28 |
&& rm -rf /var/lib/apt/lists/*
|
| 29 |
|
| 30 |
# ── Ollama ────────────────────────────────────────────────────
|
| 31 |
RUN curl -fsSL https://ollama.com/install.sh | sh
|
| 32 |
|
| 33 |
-
# ── Python packages (system-wide, available to all users) ─────
|
| 34 |
-
RUN pip3 install --no-cache-dir \
|
| 35 |
-
"litellm[proxy]>=1.40.0" \
|
| 36 |
-
"huggingface_hub[cli]>=0.23.0" \
|
| 37 |
-
PyYAML \
|
| 38 |
-
uvicorn
|
| 39 |
-
|
| 40 |
# ── HF Spaces: non-root user uid 1000 ─────────────────────────
|
| 41 |
RUN useradd -m -u 1000 user
|
| 42 |
USER user
|
| 43 |
|
| 44 |
# ── Environment ───────────────────────────────────────────────
|
|
|
|
| 45 |
ENV OLLAMA_MODELS=/home/user/.ollama/models
|
| 46 |
-
ENV OLLAMA_HOST=0.0.0.0:
|
| 47 |
-
ENV
|
|
|
|
| 48 |
ENV HOME=/home/user
|
| 49 |
ENV PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:$PATH
|
| 50 |
|
| 51 |
WORKDIR /home/user
|
| 52 |
|
| 53 |
-
# ── Copy
|
| 54 |
-
COPY --chown=user startup.sh
|
| 55 |
-
COPY --chown=user litellm_config.yaml /home/user/litellm_config.yaml
|
| 56 |
-
COPY --chown=user Modelfile.qlawed-frontend /home/user/Modelfile.qlawed-frontend
|
| 57 |
-
|
| 58 |
RUN chmod +x /home/user/startup.sh
|
| 59 |
|
| 60 |
-
# ── Public port:
|
| 61 |
EXPOSE 7860
|
| 62 |
|
| 63 |
CMD ["/home/user/startup.sh"]
|
|
|
|
| 1 |
# ─────────────────────────────────────────────────────────────
|
| 2 |
+
# Agent Q-Q (Agent Q-QAJAQS) — QLAWED-Q HF Space v3.1
|
| 3 |
+
# SDK: Docker | Public Port: 7860 (Ollama direct)
|
| 4 |
#
|
| 5 |
# Architecture:
|
| 6 |
+
# Ollama → public :7860 (native OpenAI-compatible API)
|
|
|
|
| 7 |
#
|
| 8 |
+
# Endpoints at :7860:
|
| 9 |
# POST /v1/chat/completions (OpenAI SDK compatible)
|
| 10 |
+
# POST /api/generate (Ollama native)
|
| 11 |
+
# GET /api/tags (model list)
|
| 12 |
# GET /v1/models
|
| 13 |
# ─────────────────────────────────────────────────────────────
|
| 14 |
|
|
|
|
| 20 |
RUN apt-get update && apt-get install -y \
|
| 21 |
curl \
|
| 22 |
ca-certificates \
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
&& rm -rf /var/lib/apt/lists/*
|
| 24 |
|
| 25 |
# ── Ollama ────────────────────────────────────────────────────
|
| 26 |
RUN curl -fsSL https://ollama.com/install.sh | sh
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# ── HF Spaces: non-root user uid 1000 ─────────────────────────
|
| 29 |
RUN useradd -m -u 1000 user
|
| 30 |
USER user
|
| 31 |
|
| 32 |
# ── Environment ───────────────────────────────────────────────
|
| 33 |
+
# Serve directly on the HF public port
|
| 34 |
ENV OLLAMA_MODELS=/home/user/.ollama/models
|
| 35 |
+
ENV OLLAMA_HOST=0.0.0.0:7860
|
| 36 |
+
ENV OLLAMA_KEEP_ALIVE=10m
|
| 37 |
+
ENV OLLAMA_NUM_PARALLEL=2
|
| 38 |
ENV HOME=/home/user
|
| 39 |
ENV PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:$PATH
|
| 40 |
|
| 41 |
WORKDIR /home/user
|
| 42 |
|
| 43 |
+
# ── Copy startup script ───────────────────────────────────────
|
| 44 |
+
COPY --chown=user startup.sh /home/user/startup.sh
|
|
|
|
|
|
|
|
|
|
| 45 |
RUN chmod +x /home/user/startup.sh
|
| 46 |
|
| 47 |
+
# ── Public port: Ollama API ───────────────────────────────────
|
| 48 |
EXPOSE 7860
|
| 49 |
|
| 50 |
CMD ["/home/user/startup.sh"]
|
startup.sh
CHANGED
|
@@ -1,57 +1,43 @@
|
|
| 1 |
#!/bin/bash
|
| 2 |
# ─────────────────────────────────────────────────────────────
|
| 3 |
-
# Agent Q-Q (QLAWED-Q) — HF Space Startup v3.
|
| 4 |
-
# MODE:
|
| 5 |
-
#
|
| 6 |
-
# When laptop is online: laptop handles all generative load.
|
| 7 |
-
# This Space runs only when laptop is offline — triage + embeddings.
|
| 8 |
#
|
| 9 |
# VRAM budget (T4-small, 16GB):
|
| 10 |
-
# qwen3:1.7b 1.10GB triage
|
| 11 |
# nomic-embed-text 0.27GB embeddings / pgvector
|
| 12 |
# ──────────────────────────────────────────────
|
| 13 |
-
# Total resident: ~1.37GB / 16GB
|
| 14 |
# ─────────────────────────────────────────────────────────────
|
| 15 |
|
| 16 |
set -e
|
| 17 |
|
| 18 |
echo "╔═════════════════════════════════════════════╗"
|
| 19 |
-
echo "║ Agent Q-Q —
|
| 20 |
-
echo "║
|
| 21 |
-
echo "║ LiteLLM :7860 (Claude-compat proxy) ║"
|
| 22 |
echo "╚═════════════════════════════════════════════╝"
|
| 23 |
echo ""
|
| 24 |
|
| 25 |
# ── Persistent disk (HF Pro) ──────────────────────────────────
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
OLLAMA_MODELS_DIR="$PERSISTENT_DISK/ollama-models"
|
| 31 |
-
|
| 32 |
-
if [ -d "$PERSISTENT_DISK" ]; then
|
| 33 |
-
echo "✅ Persistent disk detected at /data"
|
| 34 |
-
mkdir -p "$GGUF_DIR" "$OLLAMA_MODELS_DIR"
|
| 35 |
-
export OLLAMA_MODELS="$OLLAMA_MODELS_DIR"
|
| 36 |
else
|
| 37 |
-
echo "⚠️
|
| 38 |
-
echo " Enable in: Space Settings → Persistent Storage → mount /data"
|
| 39 |
-
GGUF_DIR="$HOME/.ollama/gguf"
|
| 40 |
-
mkdir -p "$GGUF_DIR"
|
| 41 |
fi
|
| 42 |
|
| 43 |
-
# ──
|
| 44 |
-
export OLLAMA_HOST=0.0.0.0:
|
| 45 |
-
export OLLAMA_KEEP_ALIVE=
|
| 46 |
export OLLAMA_NUM_PARALLEL=2
|
| 47 |
-
export OLLAMA_MAX_LOADED_MODELS=2
|
| 48 |
|
| 49 |
ollama serve &
|
| 50 |
OLLAMA_PID=$!
|
| 51 |
|
| 52 |
-
echo "⏳ Waiting for Ollama on :
|
| 53 |
for i in {1..40}; do
|
| 54 |
-
if curl -sf http://localhost:
|
| 55 |
echo "✅ Ollama ready"
|
| 56 |
break
|
| 57 |
fi
|
|
@@ -62,10 +48,9 @@ for i in {1..40}; do
|
|
| 62 |
sleep 2
|
| 63 |
done
|
| 64 |
|
| 65 |
-
# ──
|
| 66 |
-
echo ""
|
| 67 |
-
echo "📦 Setting up passthrough models..."
|
| 68 |
echo ""
|
|
|
|
| 69 |
|
| 70 |
pull_if_missing() {
|
| 71 |
local model="$1"
|
|
@@ -73,57 +58,33 @@ pull_if_missing() {
|
|
| 73 |
echo " ✓ cached: $model"
|
| 74 |
else
|
| 75 |
echo " ↓ pulling: $model"
|
| 76 |
-
ollama pull "$model" && echo " ✅
|
| 77 |
fi
|
| 78 |
}
|
| 79 |
|
| 80 |
-
# Triage agent — always resident, ~1.1GB
|
| 81 |
pull_if_missing "qwen3:1.7b"
|
| 82 |
-
|
| 83 |
-
# Embeddings — always resident, ~0.27GB
|
| 84 |
pull_if_missing "nomic-embed-text"
|
| 85 |
|
| 86 |
echo ""
|
| 87 |
-
echo "📊
|
| 88 |
ollama list
|
| 89 |
echo ""
|
| 90 |
|
| 91 |
-
# ── 3. Start LiteLLM proxy ───────────────────────────────────
|
| 92 |
-
echo "🔀 Starting LiteLLM proxy on :7860..."
|
| 93 |
-
|
| 94 |
-
litellm \
|
| 95 |
-
--config "$HOME/litellm_config.yaml" \
|
| 96 |
-
--port 7860 \
|
| 97 |
-
--host 0.0.0.0 \
|
| 98 |
-
--telemetry False &
|
| 99 |
-
LITELLM_PID=$!
|
| 100 |
-
|
| 101 |
-
echo "⏳ Waiting for LiteLLM..."
|
| 102 |
-
for i in {1..30}; do
|
| 103 |
-
if curl -sf http://localhost:7860/health > /dev/null 2>&1; then
|
| 104 |
-
echo "�� LiteLLM ready"
|
| 105 |
-
break
|
| 106 |
-
fi
|
| 107 |
-
if [ $i -eq 30 ]; then
|
| 108 |
-
echo "⚠️ LiteLLM slow to start — may still be initializing"
|
| 109 |
-
break
|
| 110 |
-
fi
|
| 111 |
-
sleep 2
|
| 112 |
-
done
|
| 113 |
-
|
| 114 |
-
echo ""
|
| 115 |
echo "═══════════════════════════════════════════════════════════"
|
| 116 |
-
echo "✅ Agent Q-Q
|
| 117 |
echo ""
|
| 118 |
-
echo "
|
| 119 |
-
echo " Public LiteLLM: http://0.0.0.0:7860"
|
| 120 |
echo ""
|
| 121 |
-
echo "
|
| 122 |
-
echo "
|
| 123 |
-
echo "
|
|
|
|
|
|
|
|
|
|
| 124 |
echo ""
|
| 125 |
-
echo "
|
| 126 |
-
echo "
|
|
|
|
| 127 |
echo "═══════════════════════════════════════════════════════════"
|
| 128 |
|
| 129 |
-
wait $
|
|
|
|
| 1 |
#!/bin/bash
|
| 2 |
# ─────────────────────────────────────────────────────────────
|
| 3 |
+
# Agent Q-Q (QLAWED-Q) — HF Space Startup v3.1
|
| 4 |
+
# MODE: Ollama direct on :7860
|
|
|
|
|
|
|
|
|
|
| 5 |
#
|
| 6 |
# VRAM budget (T4-small, 16GB):
|
| 7 |
+
# qwen3:1.7b 1.10GB triage + chat
|
| 8 |
# nomic-embed-text 0.27GB embeddings / pgvector
|
| 9 |
# ──────────────────────────────────────────────
|
| 10 |
+
# Total resident: ~1.37GB / 16GB
|
| 11 |
# ─────────────────────────────────────────────────────────────
|
| 12 |
|
| 13 |
set -e
|
| 14 |
|
| 15 |
echo "╔═════════════════════════════════════════════╗"
|
| 16 |
+
echo "║ Agent Q-Q — Ollama Direct Mode ║"
|
| 17 |
+
echo "║ Serving on :7860 (OpenAI-compatible) ║"
|
|
|
|
| 18 |
echo "╚═════════════════════════════════════════════╝"
|
| 19 |
echo ""
|
| 20 |
|
| 21 |
# ── Persistent disk (HF Pro) ──────────────────────────────────
|
| 22 |
+
if [ -d "/data" ]; then
|
| 23 |
+
echo "✅ Persistent disk at /data"
|
| 24 |
+
mkdir -p /data/ollama-models
|
| 25 |
+
export OLLAMA_MODELS="/data/ollama-models"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
else
|
| 27 |
+
echo "⚠️ Ephemeral storage (models re-download on restart)"
|
|
|
|
|
|
|
|
|
|
| 28 |
fi
|
| 29 |
|
| 30 |
+
# ── Start Ollama on port 7860 ─────────────────────────────────
|
| 31 |
+
export OLLAMA_HOST=0.0.0.0:7860
|
| 32 |
+
export OLLAMA_KEEP_ALIVE=10m
|
| 33 |
export OLLAMA_NUM_PARALLEL=2
|
|
|
|
| 34 |
|
| 35 |
ollama serve &
|
| 36 |
OLLAMA_PID=$!
|
| 37 |
|
| 38 |
+
echo "⏳ Waiting for Ollama on :7860..."
|
| 39 |
for i in {1..40}; do
|
| 40 |
+
if curl -sf http://localhost:7860/api/tags > /dev/null 2>&1; then
|
| 41 |
echo "✅ Ollama ready"
|
| 42 |
break
|
| 43 |
fi
|
|
|
|
| 48 |
sleep 2
|
| 49 |
done
|
| 50 |
|
| 51 |
+
# ── Pull models ───────────────────────────────────────────────
|
|
|
|
|
|
|
| 52 |
echo ""
|
| 53 |
+
echo "📦 Pulling models..."
|
| 54 |
|
| 55 |
pull_if_missing() {
|
| 56 |
local model="$1"
|
|
|
|
| 58 |
echo " ✓ cached: $model"
|
| 59 |
else
|
| 60 |
echo " ↓ pulling: $model"
|
| 61 |
+
ollama pull "$model" && echo " ✅ $model" || echo " ⚠️ failed: $model (non-fatal)"
|
| 62 |
fi
|
| 63 |
}
|
| 64 |
|
|
|
|
| 65 |
pull_if_missing "qwen3:1.7b"
|
|
|
|
|
|
|
| 66 |
pull_if_missing "nomic-embed-text"
|
| 67 |
|
| 68 |
echo ""
|
| 69 |
+
echo "📊 Loaded models:"
|
| 70 |
ollama list
|
| 71 |
echo ""
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
echo "═══════════════════════════════════════════════════════════"
|
| 74 |
+
echo "✅ Agent Q-Q LIVE"
|
| 75 |
echo ""
|
| 76 |
+
echo " Public API: http://0.0.0.0:7860"
|
|
|
|
| 77 |
echo ""
|
| 78 |
+
echo " Endpoints:"
|
| 79 |
+
echo " GET /api/tags → list models"
|
| 80 |
+
echo " GET /v1/models → OpenAI model list"
|
| 81 |
+
echo " POST /v1/chat/completions → OpenAI chat"
|
| 82 |
+
echo " POST /api/generate → Ollama native"
|
| 83 |
+
echo " POST /api/embeddings → embeddings"
|
| 84 |
echo ""
|
| 85 |
+
echo " Models resident:"
|
| 86 |
+
echo " qwen3:1.7b ~1.1GB chat / triage"
|
| 87 |
+
echo " nomic-embed-text ~0.3GB embeddings"
|
| 88 |
echo "═══════════════════════════════════════════════════════════"
|
| 89 |
|
| 90 |
+
wait $OLLAMA_PID
|