Spaces:
Running on Zero
Running on Zero
Default HF runtime to CPU and honor KIMODO_DEVICE
Browse files- app.py +2 -0
- kimodo/demo/app.py +8 -1
app.py
CHANGED
|
@@ -28,6 +28,8 @@ os.environ.setdefault("SERVER_PORT", str(NATIVE_PORT))
|
|
| 28 |
os.environ.setdefault("HF_MODE", "1")
|
| 29 |
# Avoid local LLM2Vec fallback on Spaces (requires gated Llama weights).
|
| 30 |
os.environ.setdefault("TEXT_ENCODER_MODE", "api")
|
|
|
|
|
|
|
| 31 |
|
| 32 |
_state: dict[str, object] = {
|
| 33 |
"ok": False,
|
|
|
|
| 28 |
os.environ.setdefault("HF_MODE", "1")
|
| 29 |
# Avoid local LLM2Vec fallback on Spaces (requires gated Llama weights).
|
| 30 |
os.environ.setdefault("TEXT_ENCODER_MODE", "api")
|
| 31 |
+
# Prefer CPU on ZeroGPU to avoid low-level CUDA init crashes during model load.
|
| 32 |
+
os.environ.setdefault("KIMODO_DEVICE", "cpu")
|
| 33 |
|
| 34 |
_state: dict[str, object] = {
|
| 35 |
"ok": False,
|
kimodo/demo/app.py
CHANGED
|
@@ -54,7 +54,14 @@ from .state import ClientSession, ModelBundle
|
|
| 54 |
|
| 55 |
class Demo:
|
| 56 |
def __init__(self, default_model_name: str = DEFAULT_MODEL):
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
print(f"Using device: {self.device}")
|
| 59 |
self.models: dict[str, ModelBundle] = {}
|
| 60 |
resolved = resolve_model_name(default_model_name, "Kimodo")
|
|
|
|
| 54 |
|
| 55 |
class Demo:
|
| 56 |
def __init__(self, default_model_name: str = DEFAULT_MODEL):
|
| 57 |
+
requested_device = (os.environ.get("KIMODO_DEVICE") or "").strip().lower()
|
| 58 |
+
if requested_device and requested_device != "auto":
|
| 59 |
+
self.device = requested_device
|
| 60 |
+
elif HF_MODE:
|
| 61 |
+
# ZeroGPU can report CUDA availability while blocking low-level CUDA init.
|
| 62 |
+
self.device = "cpu"
|
| 63 |
+
else:
|
| 64 |
+
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
| 65 |
print(f"Using device: {self.device}")
|
| 66 |
self.models: dict[str, ModelBundle] = {}
|
| 67 |
resolved = resolve_model_name(default_model_name, "Kimodo")
|