oki0ki commited on
Commit
8524a15
·
verified ·
1 Parent(s): cc61885

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -26
app.py CHANGED
@@ -1,13 +1,14 @@
1
  #!/usr/bin/env python3
2
  """
3
  Ultralekki entrypoint dla HF Spaces (Docker SDK)
4
- ✅ Pobiera model z cache HF → uruchamia llama_cpp.server
5
- ✅ OpenAI format | ✅ Brak auth | ✅ Odporny na zerwania | ✅ Optymalizacja CPU/RAM
6
  """
7
  import os
8
  import sys
9
  import signal
10
  import logging
 
11
  from huggingface_hub import hf_hub_download
12
 
13
  logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
@@ -28,29 +29,36 @@ signal.signal(signal.SIGTERM, graceful_shutdown)
28
  signal.signal(signal.SIGINT, graceful_shutdown)
29
 
30
  if __name__ == "__main__":
31
- logger.info(f"⬇️ Pobieranie/weryfikacja: {MODEL_REPO}/{MODEL_FILE}")
32
- model_path = hf_hub_download(
33
- repo_id=MODEL_REPO,
34
- filename=MODEL_FILE,
35
- resume_download=True,
36
- local_dir_use_symlinks=False
37
- )
38
- logger.info(f"✅ Model gotowy: {model_path}")
39
 
40
- # Komenda startowa llama_cpp.server (wbudowany serwer OpenAI-compatible)
41
- cmd = [
42
- sys.executable, "-m", "llama_cpp.server",
43
- "--model", model_path,
44
- "--host", "0.0.0.0",
45
- "--port", PORT,
46
- "--n_ctx", N_CTX,
47
- "--n_threads", N_THREADS,
48
- "--n_batch", N_BATCH,
49
- "--n_gpu_layers", "0",
50
- "--use_mmap",
51
- "--no_flash_attn"
52
- ]
 
53
 
54
- logger.info(f"🚀 Start serwera: {' '.join(cmd)}")
55
- # execvp zastępuje proces Pythona serwerem → poprawna obsługa sygnałów Docker/HF
56
- os.execvp(sys.executable, cmd)
 
 
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  """
3
  Ultralekki entrypoint dla HF Spaces (Docker SDK)
4
+ ✅ Pobiera model → uruchamia llama_cpp.server
5
+ ✅ OpenAI format | ✅ Brak auth | ✅ Odporny na zerwania
6
  """
7
  import os
8
  import sys
9
  import signal
10
  import logging
11
+ import subprocess
12
  from huggingface_hub import hf_hub_download
13
 
14
  logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
 
29
  signal.signal(signal.SIGINT, graceful_shutdown)
30
 
31
  if __name__ == "__main__":
32
+ try:
33
+ logger.info(f"⬇️ Pobieranie/weryfikacja: {MODEL_REPO}/{MODEL_FILE}")
34
+ model_path = hf_hub_download(
35
+ repo_id=MODEL_REPO,
36
+ filename=MODEL_FILE,
37
+ resume_download=True
38
+ )
39
+ logger.info(f"✅ Model gotowy: {model_path}")
40
 
41
+ # Komenda startowa llama_cpp.server
42
+ cmd = [
43
+ sys.executable, "-m", "llama_cpp.server",
44
+ "--model", model_path,
45
+ "--host", "0.0.0.0",
46
+ "--port", PORT,
47
+ "--n_ctx", N_CTX,
48
+ "--n_threads", N_THREADS,
49
+ "--n_batch", N_BATCH,
50
+ "--n_gpu_layers", "0",
51
+ "--use_mmap",
52
+ "--no_flash_attn",
53
+ "--chat_format", "chatml" # Domyślny format dla Granite/LLama
54
+ ]
55
 
56
+ logger.info(f"🚀 Start serwera: {' '.join(cmd)}")
57
+
58
+ # Uruchom proces podrzędny i czekaj na jego zakończenie
59
+ process = subprocess.Popen(cmd)
60
+ process.wait()
61
+
62
+ except Exception as e:
63
+ logger.error(f"❌ Krytyczny błąd: {e}", exc_info=True)
64
+ sys.exit(1)