Spaces:
Runtime error
Runtime error
| sdk: docker | |
| colorTo: indigo | |
| #!/usr/bin/env python3 | |
| """ | |
| Ultralekki entrypoint dla HF Spaces (Docker SDK) | |
| ✅ Pobiera model z cache HF → uruchamia llama_cpp.server | |
| ✅ OpenAI format | ✅ Brak auth | ✅ Odporny na zerwania | ✅ Optymalizacja CPU/RAM | |
| """ | |
| import os | |
| import sys | |
| import signal | |
| import logging | |
| from huggingface_hub import hf_hub_download | |
| logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") | |
| logger = logging.getLogger(__name__) | |
| MODEL_REPO = "unsloth/granite-4.1-3b-GGUF" | |
| MODEL_FILE = os.environ.get("MODEL_FILE", "granite-4.1-3b-UD-IQ2_M.gguf") | |
| PORT = os.environ.get("PORT", "7860") | |
| N_CTX = os.environ.get("N_CTX", "2048") | |
| N_THREADS = os.environ.get("N_THREADS", "2") | |
| N_BATCH = os.environ.get("N_BATCH", "512") | |
| def graceful_shutdown(signum, frame): | |
| logger.info("📡 Otrzymano sygnał zakończenia. Zamykanie...") | |
| sys.exit(0) | |
| signal.signal(signal.SIGTERM, graceful_shutdown) | |
| signal.signal(signal.SIGINT, graceful_shutdown) | |
| if __name__ == "__main__": | |
| logger.info(f"⬇️ Pobieranie/weryfikacja: {MODEL_REPO}/{MODEL_FILE}") | |
| model_path = hf_hub_download( | |
| repo_id=MODEL_REPO, | |
| filename=MODEL_FILE, | |
| resume_download=True, | |
| local_dir_use_symlinks=False | |
| ) | |
| logger.info(f"✅ Model gotowy: {model_path}") | |
| # Komenda startowa llama_cpp.server (wbudowany serwer OpenAI-compatible) | |
| cmd = [ | |
| sys.executable, "-m", "llama_cpp.server", | |
| "--model", model_path, | |
| "--host", "0.0.0.0", | |
| "--port", PORT, | |
| "--n_ctx", N_CTX, | |
| "--n_threads", N_THREADS, | |
| "--n_batch", N_BATCH, | |
| "--n_gpu_layers", "0", | |
| "--use_mmap", | |
| "--no_flash_attn" | |
| ] | |
| logger.info(f"🚀 Start serwera: {' '.join(cmd)}") | |
| # execvp zastępuje proces Pythona serwerem → poprawna obsługa sygnałów Docker/HF | |
| os.execvp(sys.executable, cmd) |