Spaces:
Runtime error
Runtime error
File size: 2,051 Bytes
c2f3a1e f3bc30f 8524a15 c2f3a1e 8524a15 c2f3a1e f3bc30f c2f3a1e 8524a15 f3bc30f 8524a15 f3bc30f 8524a15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | #!/usr/bin/env python3
"""
Ultralekki entrypoint dla HF Spaces (Docker SDK)
✅ Pobiera model → uruchamia llama_cpp.server
✅ OpenAI format | ✅ Brak auth | ✅ Odporny na zerwania
"""
import os
import sys
import signal
import logging
import subprocess
from huggingface_hub import hf_hub_download
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
logger = logging.getLogger(__name__)
MODEL_REPO = "unsloth/granite-4.1-3b-GGUF"
MODEL_FILE = os.environ.get("MODEL_FILE", "granite-4.1-3b-UD-IQ2_M.gguf")
PORT = os.environ.get("PORT", "7860")
N_CTX = os.environ.get("N_CTX", "2048")
N_THREADS = os.environ.get("N_THREADS", "2")
N_BATCH = os.environ.get("N_BATCH", "512")
def graceful_shutdown(signum, frame):
logger.info("📡 Otrzymano sygnał zakończenia. Zamykanie...")
sys.exit(0)
signal.signal(signal.SIGTERM, graceful_shutdown)
signal.signal(signal.SIGINT, graceful_shutdown)
if __name__ == "__main__":
try:
logger.info(f"⬇️ Pobieranie/weryfikacja: {MODEL_REPO}/{MODEL_FILE}")
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
resume_download=True
)
logger.info(f"✅ Model gotowy: {model_path}")
# Komenda startowa llama_cpp.server
cmd = [
sys.executable, "-m", "llama_cpp.server",
"--model", model_path,
"--host", "0.0.0.0",
"--port", PORT,
"--n_ctx", N_CTX,
"--n_threads", N_THREADS,
"--n_batch", N_BATCH,
"--n_gpu_layers", "0",
"--use_mmap",
"--no_flash_attn",
"--chat_format", "chatml" # Domyślny format dla Granite/LLama
]
logger.info(f"🚀 Start serwera: {' '.join(cmd)}")
# Uruchom proces podrzędny i czekaj na jego zakończenie
process = subprocess.Popen(cmd)
process.wait()
except Exception as e:
logger.error(f"❌ Krytyczny błąd: {e}", exc_info=True)
sys.exit(1) |