Spaces:
Paused
Paused
feat: passthrough mode v3 - qwen3:1.7b triage only
Browse files- Dockerfile +63 -0
- Modelfile.qlawed-frontend +89 -0
- litellm_config.yaml +70 -0
- startup.sh +111 -0
Dockerfile
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ─────────────────────────────────────────────────────────────
|
| 2 |
+
# Agent Q-Q (Agent Q-QAJAQS) — QLAWED-Q HF Space v2.0
|
| 3 |
+
# SDK: Docker | Public Port: 7860 (LiteLLM proxy)
|
| 4 |
+
#
|
| 5 |
+
# Architecture:
|
| 6 |
+
# Ollama → internal :11434 (model inference)
|
| 7 |
+
# LiteLLM → public :7860 (Claude-compatible API)
|
| 8 |
+
#
|
| 9 |
+
# Endpoints exposed at :7860:
|
| 10 |
+
# POST /v1/chat/completions (OpenAI SDK compatible)
|
| 11 |
+
# POST /v1/messages (Anthropic SDK compatible)
|
| 12 |
+
# GET /health
|
| 13 |
+
# GET /v1/models
|
| 14 |
+
# ─────────────────────────────────────────────────────────────
|
| 15 |
+
|
| 16 |
+
FROM ubuntu:22.04
|
| 17 |
+
|
| 18 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 19 |
+
|
| 20 |
+
# ── System dependencies ───────────────────────────────────────
|
| 21 |
+
RUN apt-get update && apt-get install -y \
|
| 22 |
+
curl \
|
| 23 |
+
ca-certificates \
|
| 24 |
+
python3 \
|
| 25 |
+
python3-pip \
|
| 26 |
+
git \
|
| 27 |
+
git-lfs \
|
| 28 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 29 |
+
|
| 30 |
+
# ── Ollama ────────────────────────────────────────────────────
|
| 31 |
+
RUN curl -fsSL https://ollama.com/install.sh | sh
|
| 32 |
+
|
| 33 |
+
# ── Python packages (system-wide, available to all users) ─────
|
| 34 |
+
RUN pip3 install --no-cache-dir \
|
| 35 |
+
"litellm[proxy]>=1.40.0" \
|
| 36 |
+
"huggingface_hub[cli]>=0.23.0" \
|
| 37 |
+
PyYAML \
|
| 38 |
+
uvicorn
|
| 39 |
+
|
| 40 |
+
# ── HF Spaces: non-root user uid 1000 ─────────────────────────
|
| 41 |
+
RUN useradd -m -u 1000 user
|
| 42 |
+
USER user
|
| 43 |
+
|
| 44 |
+
# ── Environment ───────────────────────────────────────────────
|
| 45 |
+
ENV OLLAMA_MODELS=/home/user/.ollama/models
|
| 46 |
+
ENV OLLAMA_HOST=0.0.0.0:11434
|
| 47 |
+
ENV HF_HOME=/home/user/.cache/huggingface
|
| 48 |
+
ENV HOME=/home/user
|
| 49 |
+
ENV PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:$PATH
|
| 50 |
+
|
| 51 |
+
WORKDIR /home/user
|
| 52 |
+
|
| 53 |
+
# ── Copy config files ─────────────────────────────────────────
|
| 54 |
+
COPY --chown=user startup.sh /home/user/startup.sh
|
| 55 |
+
COPY --chown=user litellm_config.yaml /home/user/litellm_config.yaml
|
| 56 |
+
COPY --chown=user Modelfile.qlawed-frontend /home/user/Modelfile.qlawed-frontend
|
| 57 |
+
|
| 58 |
+
RUN chmod +x /home/user/startup.sh
|
| 59 |
+
|
| 60 |
+
# ── Public port: LiteLLM proxy ────────────────────────────────
|
| 61 |
+
EXPOSE 7860
|
| 62 |
+
|
| 63 |
+
CMD ["/home/user/startup.sh"]
|
Modelfile.qlawed-frontend
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ─────────────────────────────────────────────────────────────
|
| 2 |
+
# Agent Q-Q — Frontend Instructor Agent Modelfile
|
| 3 |
+
#
|
| 4 |
+
# Base: bartowski/Qwen_Qwen3-14B-GGUF IQ4_XS (8.11GB)
|
| 5 |
+
# imatrix calibrated IQ-quant · llama.cpp b5200
|
| 6 |
+
#
|
| 7 |
+
# Role: Primary voice/persona/memory instructional agent
|
| 8 |
+
# for the QLAWED-Q Agent OS and MAD Gambit platform
|
| 9 |
+
# ─────────────────────────────────────────────────────────────
|
| 10 |
+
|
| 11 |
+
FROM /home/user/.ollama/gguf/Qwen_Qwen3-14B-IQ4_XS.gguf
|
| 12 |
+
|
| 13 |
+
# ── Inference parameters ──────────────────────────────────────
|
| 14 |
+
PARAMETER temperature 0.72
|
| 15 |
+
PARAMETER top_p 0.90
|
| 16 |
+
PARAMETER top_k 40
|
| 17 |
+
PARAMETER repeat_penalty 1.08
|
| 18 |
+
PARAMETER num_ctx 32768
|
| 19 |
+
PARAMETER num_predict -1
|
| 20 |
+
|
| 21 |
+
# Stop tokens (ChatML format — Qwen3 native)
|
| 22 |
+
PARAMETER stop "<|im_end|>"
|
| 23 |
+
PARAMETER stop "<|im_start|>"
|
| 24 |
+
PARAMETER stop "<|endoftext|>"
|
| 25 |
+
|
| 26 |
+
# ── System persona ────────────────────────────────────────────
|
| 27 |
+
SYSTEM """
|
| 28 |
+
You are Agent Q-Q (full name: Agent Q-QAJAQS), the primary interface for the QLAWED-Q Agent OS — built by MAD Gambit.
|
| 29 |
+
|
| 30 |
+
## Identity
|
| 31 |
+
You are a 300IQ confidant and expert assistant. You hold deep expertise across:
|
| 32 |
+
- Prediction markets, conviction trading, and DeFi mechanics
|
| 33 |
+
- Web3 / Web2 full-stack engineering and blockchain development
|
| 34 |
+
- AI, machine learning, and multi-agent systems
|
| 35 |
+
- Lean Six Sigma, product management, and system architecture
|
| 36 |
+
- Research synthesis, ghostwriting, and strategic analysis
|
| 37 |
+
|
| 38 |
+
## MAD Gambit Platform Context (these numbers never change)
|
| 39 |
+
- Platform name: MAD Gambit
|
| 40 |
+
- Platform fee: 1.88% (applied globally to all markets)
|
| 41 |
+
- Community profit share: 28.8% (display as 28% in presentations)
|
| 42 |
+
- Creator revenue share: 40% (creator-made markets only — not general community)
|
| 43 |
+
- Seed raise: $1–2M at $12M pre-money valuation
|
| 44 |
+
- Token: $MADx (ERC-20)
|
| 45 |
+
- Card game product line: MADHATs (NFT cards within MAD Gambit)
|
| 46 |
+
|
| 47 |
+
## Tech Stack (always current)
|
| 48 |
+
- Frontend: React 18, TypeScript, Hono/HonoX, TailwindCSS
|
| 49 |
+
- Backend: Supabase (Postgres + Edge Functions), Node.js
|
| 50 |
+
- Blockchain: Solidity, Foundry, OpenZeppelin — Arbitrum One, HyperEVM
|
| 51 |
+
- Oracles: Chainlink VRF + Price Feeds, Pyth Network
|
| 52 |
+
- Account Abstraction: Alchemy AA-SDK (ERC-4337)
|
| 53 |
+
- AI: Claude Opus 4.6, MCP servers, GraphRAG, Instructor
|
| 54 |
+
|
| 55 |
+
## Response Rules
|
| 56 |
+
- Skip preamble — answer first, context second
|
| 57 |
+
- Use specific numbers, not adjectives
|
| 58 |
+
- Active voice. Short sentences. No filler.
|
| 59 |
+
- Match energy: casual prompt = casual reply, formal request = formal output
|
| 60 |
+
- Never use: leverage, synergy, ecosystem play, unlock value, game-changing, utilize
|
| 61 |
+
- When asked for choices: give 6 ranked options with recommendation clear
|
| 62 |
+
- When uncertain: say so clearly, then offer to research
|
| 63 |
+
|
| 64 |
+
## Memory Protocol
|
| 65 |
+
You have access to a memory layer. When users share important context:
|
| 66 |
+
- Acknowledge it: "Got it — I'll remember that."
|
| 67 |
+
- Reference prior context naturally in follow-up responses
|
| 68 |
+
- Flag recalled memories: "You mentioned earlier that..."
|
| 69 |
+
- If memory seems stale or conflicting, ask before assuming
|
| 70 |
+
|
| 71 |
+
## Voice Mode
|
| 72 |
+
When responding to voice input (marked with [VOICE] or in audio context):
|
| 73 |
+
- Keep answers under 3 sentences for simple questions
|
| 74 |
+
- Use natural spoken language — no markdown, no bullet points, no code blocks
|
| 75 |
+
- Confirm before long answers: "Got it. Here's what I found..."
|
| 76 |
+
- Spell out numbers and abbreviations for text-to-speech clarity
|
| 77 |
+
|
| 78 |
+
## Persona Capability
|
| 79 |
+
You can take on specific personas when instructed. When a persona is set:
|
| 80 |
+
- Maintain it consistently throughout the session
|
| 81 |
+
- Stay in character unless the user explicitly breaks the frame
|
| 82 |
+
- A persona overrides default tone but never overrides factual accuracy or safety
|
| 83 |
+
|
| 84 |
+
## Output Format Defaults
|
| 85 |
+
- Prose for explanations, not bullets (unless explicitly requested)
|
| 86 |
+
- Code in fenced blocks with language labels
|
| 87 |
+
- Tables for comparisons
|
| 88 |
+
- Never start a response with "Certainly", "Absolutely", or "Great question"
|
| 89 |
+
"""
|
litellm_config.yaml
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ─────────────────────────────────────────────────────────────
|
| 2 |
+
# Agent Q-Q (QLAWED-Q) — HF Space LiteLLM Config
|
| 3 |
+
# Version: 3.0 | MODE: Passthrough / Triage only
|
| 4 |
+
#
|
| 5 |
+
# MODEL ROUTING (passthrough mode):
|
| 6 |
+
# claude-haiku-4-5 → qwen3:1.7b triage, non-generative
|
| 7 |
+
# text-embedding-* → nomic-embed-text memory embeddings
|
| 8 |
+
#
|
| 9 |
+
# Generative tasks (claude-sonnet-4-6) handled by laptop stack.
|
| 10 |
+
# This Space activates only when laptop is offline.
|
| 11 |
+
# COST: ~1.37GB VRAM resident — T4-small sleeps when idle.
|
| 12 |
+
# ─────────────────────────────────────────────────────────────
|
| 13 |
+
|
| 14 |
+
model_list:
|
| 15 |
+
|
| 16 |
+
# ── Triage / Passthrough (always resident ~1.1GB) ──────────
|
| 17 |
+
- model_name: claude-haiku-4-5
|
| 18 |
+
litellm_params:
|
| 19 |
+
model: ollama/qwen3:1.7b
|
| 20 |
+
api_base: http://localhost:11434
|
| 21 |
+
model_info:
|
| 22 |
+
description: "qwen3:1.7b — triage, routing, non-generative Q&A"
|
| 23 |
+
max_input_tokens: 8192
|
| 24 |
+
max_output_tokens: 4096
|
| 25 |
+
input_cost_per_token: 0
|
| 26 |
+
output_cost_per_token: 0
|
| 27 |
+
|
| 28 |
+
# ── Embeddings (always resident ~0.27GB) ───────────────────
|
| 29 |
+
- model_name: text-embedding-ada-002
|
| 30 |
+
litellm_params:
|
| 31 |
+
model: ollama/nomic-embed-text
|
| 32 |
+
api_base: http://localhost:11434
|
| 33 |
+
model_info:
|
| 34 |
+
description: "nomic-embed-text — pgvector / Supabase memory"
|
| 35 |
+
mode: embedding
|
| 36 |
+
input_cost_per_token: 0
|
| 37 |
+
output_cost_per_token: 0
|
| 38 |
+
|
| 39 |
+
- model_name: text-embedding-3-small
|
| 40 |
+
litellm_params:
|
| 41 |
+
model: ollama/nomic-embed-text
|
| 42 |
+
api_base: http://localhost:11434
|
| 43 |
+
model_info:
|
| 44 |
+
mode: embedding
|
| 45 |
+
input_cost_per_token: 0
|
| 46 |
+
output_cost_per_token: 0
|
| 47 |
+
|
| 48 |
+
# ── Global settings ──────────────────────────────────────────
|
| 49 |
+
litellm_settings:
|
| 50 |
+
drop_params: true
|
| 51 |
+
request_timeout: 120
|
| 52 |
+
telemetry: false
|
| 53 |
+
set_verbose: false
|
| 54 |
+
json_logs: true
|
| 55 |
+
num_retries: 2
|
| 56 |
+
|
| 57 |
+
# ── Router ───────────────────────────────────────────────────
|
| 58 |
+
router_settings:
|
| 59 |
+
routing_strategy: least-busy
|
| 60 |
+
num_retries: 2
|
| 61 |
+
allowed_fails: 1
|
| 62 |
+
cooldown_time: 30
|
| 63 |
+
retry_after: 5
|
| 64 |
+
|
| 65 |
+
# ── Auth ─────────────────────────────────────────────────────
|
| 66 |
+
general_settings:
|
| 67 |
+
master_key: os.environ/LITELLM_MASTER_KEY
|
| 68 |
+
store_model_in_db: false
|
| 69 |
+
allow_user_auth: false
|
| 70 |
+
disable_spend_logs: true
|
startup.sh
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# ─────────────────────────────────────────────────────────────
|
| 3 |
+
# Agent Q-Q (QLAWED-Q) — HF Space Startup v3.0
|
| 4 |
+
# MODE: Passthrough / Triage only
|
| 5 |
+
#
|
| 6 |
+
# When laptop is online: laptop handles all generative load.
|
| 7 |
+
# This Space runs only when laptop is offline — triage + embeddings.
|
| 8 |
+
#
|
| 9 |
+
# VRAM budget (T4-small, 16GB):
|
| 10 |
+
# qwen3:1.7b 1.10GB triage, non-generative
|
| 11 |
+
# nomic-embed-text 0.27GB embeddings / pgvector
|
| 12 |
+
# ──────────────────────────────────────────────
|
| 13 |
+
# Total resident: ~1.37GB / 16GB (minimal cost)
|
| 14 |
+
# ─────────────────────────────────────────────────────────────
|
| 15 |
+
|
| 16 |
+
set -e
|
| 17 |
+
|
| 18 |
+
echo "╔═════════════════════════════════════════════╗"
|
| 19 |
+
echo "║ Agent Q-Q — Passthrough Mode ║"
|
| 20 |
+
echo "║ qwen3:1.7b :11434 (triage) ║"
|
| 21 |
+
echo "║ LiteLLM :7860 (Claude-compat proxy) ║"
|
| 22 |
+
echo "╚═════════════════════════════════════════════╝"
|
| 23 |
+
echo ""
|
| 24 |
+
|
| 25 |
+
# ── 1. Start Ollama ───────────────────────────────────────────
|
| 26 |
+
export OLLAMA_HOST=0.0.0.0:11434
|
| 27 |
+
export OLLAMA_KEEP_ALIVE=5m
|
| 28 |
+
export OLLAMA_NUM_PARALLEL=2
|
| 29 |
+
export OLLAMA_MAX_LOADED_MODELS=2
|
| 30 |
+
|
| 31 |
+
ollama serve &
|
| 32 |
+
OLLAMA_PID=$!
|
| 33 |
+
|
| 34 |
+
echo "⏳ Waiting for Ollama on :11434..."
|
| 35 |
+
for i in {1..40}; do
|
| 36 |
+
if curl -sf http://localhost:11434/api/tags > /dev/null 2>&1; then
|
| 37 |
+
echo "✅ Ollama ready"
|
| 38 |
+
break
|
| 39 |
+
fi
|
| 40 |
+
if [ $i -eq 40 ]; then
|
| 41 |
+
echo "❌ Ollama failed to start"
|
| 42 |
+
exit 1
|
| 43 |
+
fi
|
| 44 |
+
sleep 2
|
| 45 |
+
done
|
| 46 |
+
|
| 47 |
+
# ── 2. Pull triage models ─────────────────────────────────────
|
| 48 |
+
echo ""
|
| 49 |
+
echo "📦 Setting up passthrough models..."
|
| 50 |
+
echo ""
|
| 51 |
+
|
| 52 |
+
pull_if_missing() {
|
| 53 |
+
local model="$1"
|
| 54 |
+
if ollama show "$model" > /dev/null 2>&1; then
|
| 55 |
+
echo " ✓ cached: $model"
|
| 56 |
+
else
|
| 57 |
+
echo " ↓ pulling: $model"
|
| 58 |
+
ollama pull "$model" && echo " ✅ ready: $model" || echo " ⚠️ failed: $model"
|
| 59 |
+
fi
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
# Triage agent — always resident, ~1.1GB
|
| 63 |
+
pull_if_missing "qwen3:1.7b"
|
| 64 |
+
|
| 65 |
+
# Embeddings — always resident, ~0.27GB
|
| 66 |
+
pull_if_missing "nomic-embed-text"
|
| 67 |
+
|
| 68 |
+
echo ""
|
| 69 |
+
echo "📊 Registered models:"
|
| 70 |
+
ollama list
|
| 71 |
+
echo ""
|
| 72 |
+
|
| 73 |
+
# ── 3. Start LiteLLM proxy ───────────────────────────────────
|
| 74 |
+
echo "🔀 Starting LiteLLM proxy on :7860..."
|
| 75 |
+
|
| 76 |
+
litellm \
|
| 77 |
+
--config "$HOME/litellm_config.yaml" \
|
| 78 |
+
--port 7860 \
|
| 79 |
+
--host 0.0.0.0 \
|
| 80 |
+
--telemetry False &
|
| 81 |
+
LITELLM_PID=$!
|
| 82 |
+
|
| 83 |
+
echo "⏳ Waiting for LiteLLM..."
|
| 84 |
+
for i in {1..30}; do
|
| 85 |
+
if curl -sf http://localhost:7860/health > /dev/null 2>&1; then
|
| 86 |
+
echo "✅ LiteLLM ready"
|
| 87 |
+
break
|
| 88 |
+
fi
|
| 89 |
+
if [ $i -eq 30 ]; then
|
| 90 |
+
echo "⚠️ LiteLLM slow to start — may still be initializing"
|
| 91 |
+
break
|
| 92 |
+
fi
|
| 93 |
+
sleep 2
|
| 94 |
+
done
|
| 95 |
+
|
| 96 |
+
echo ""
|
| 97 |
+
echo "═══════════════════════════════════════════════════════════"
|
| 98 |
+
echo "✅ Agent Q-Q HF Space — PASSTHROUGH MODE LIVE"
|
| 99 |
+
echo ""
|
| 100 |
+
echo " Internal Ollama: http://localhost:11434"
|
| 101 |
+
echo " Public LiteLLM: http://0.0.0.0:7860"
|
| 102 |
+
echo ""
|
| 103 |
+
echo " Routing (passthrough mode):"
|
| 104 |
+
echo " claude-haiku-4-5 → qwen3:1.7b (triage/non-generative)"
|
| 105 |
+
echo " text-embedding-* → nomic-embed-text (embeddings)"
|
| 106 |
+
echo ""
|
| 107 |
+
echo " NOTE: Generative tasks (claude-sonnet-4-6) handled by laptop."
|
| 108 |
+
echo " Start laptop stack to enable full generative capability."
|
| 109 |
+
echo "═══════════════════════════════════════════════════════════"
|
| 110 |
+
|
| 111 |
+
wait $LITELLM_PID $OLLAMA_PID
|