rydlrKE commited on
Commit
adbafd9
·
1 Parent(s): 6e8f47a

Default HF runtime to CPU and honor KIMODO_DEVICE

Browse files
Files changed (2) hide show
  1. app.py +2 -0
  2. kimodo/demo/app.py +8 -1
app.py CHANGED
@@ -28,6 +28,8 @@ os.environ.setdefault("SERVER_PORT", str(NATIVE_PORT))
28
  os.environ.setdefault("HF_MODE", "1")
29
  # Avoid local LLM2Vec fallback on Spaces (requires gated Llama weights).
30
  os.environ.setdefault("TEXT_ENCODER_MODE", "api")
 
 
31
 
32
  _state: dict[str, object] = {
33
  "ok": False,
 
28
  os.environ.setdefault("HF_MODE", "1")
29
  # Avoid local LLM2Vec fallback on Spaces (requires gated Llama weights).
30
  os.environ.setdefault("TEXT_ENCODER_MODE", "api")
31
+ # Prefer CPU on ZeroGPU to avoid low-level CUDA init crashes during model load.
32
+ os.environ.setdefault("KIMODO_DEVICE", "cpu")
33
 
34
  _state: dict[str, object] = {
35
  "ok": False,
kimodo/demo/app.py CHANGED
@@ -54,7 +54,14 @@ from .state import ClientSession, ModelBundle
54
 
55
  class Demo:
56
  def __init__(self, default_model_name: str = DEFAULT_MODEL):
57
- self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
58
  print(f"Using device: {self.device}")
59
  self.models: dict[str, ModelBundle] = {}
60
  resolved = resolve_model_name(default_model_name, "Kimodo")
 
54
 
55
  class Demo:
56
  def __init__(self, default_model_name: str = DEFAULT_MODEL):
57
+ requested_device = (os.environ.get("KIMODO_DEVICE") or "").strip().lower()
58
+ if requested_device and requested_device != "auto":
59
+ self.device = requested_device
60
+ elif HF_MODE:
61
+ # ZeroGPU can report CUDA availability while blocking low-level CUDA init.
62
+ self.device = "cpu"
63
+ else:
64
+ self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
65
  print(f"Using device: {self.device}")
66
  self.models: dict[str, ModelBundle] = {}
67
  resolved = resolve_model_name(default_model_name, "Kimodo")