Spaces:
Running on Zero
Running on Zero
Upload 133 files
Browse files
app.py
CHANGED
|
@@ -2797,7 +2797,25 @@ def obliterate(model_choice: str, method_choice: str,
|
|
| 2797 |
"source": "obliterate",
|
| 2798 |
})
|
| 2799 |
|
| 2800 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2801 |
# Model fits — use it directly (steering hooks already installed)
|
| 2802 |
with _lock:
|
| 2803 |
if pipeline.handle is not None:
|
|
@@ -3256,11 +3274,15 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
|
|
| 3256 |
# ZeroGPU can evict the model while status stays "ready", and the counter
|
| 3257 |
# can get out of sync if only one dropdown .change fires instead of both.
|
| 3258 |
with _lock:
|
| 3259 |
-
|
| 3260 |
-
|
| 3261 |
-
|
| 3262 |
-
|
| 3263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3264 |
if choice and _model_ok:
|
| 3265 |
# Double-check model tensors aren't stale (meta device).
|
| 3266 |
# Re-acquire lock to safely access model — it could become None
|
|
@@ -3282,6 +3304,17 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
|
|
| 3282 |
get_chat_header(),
|
| 3283 |
)
|
| 3284 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3285 |
# Model is stale or evicted — fall through to normal loading path
|
| 3286 |
|
| 3287 |
if not choice or choice not in _bench_configs:
|
|
|
|
| 2797 |
"source": "obliterate",
|
| 2798 |
})
|
| 2799 |
|
| 2800 |
+
# On ZeroGPU with staged execution, pipeline state (quality metrics,
|
| 2801 |
+
# model handle) is NOT propagated back from the GPU worker subprocess.
|
| 2802 |
+
# The `can_generate` check is unreliable, and the model files live on
|
| 2803 |
+
# the GPU worker's filesystem which may not be accessible from the main
|
| 2804 |
+
# process. Defer model loading to chat_respond(), which runs inside
|
| 2805 |
+
# its own @spaces.GPU allocation and can access the saved checkpoint.
|
| 2806 |
+
if _ZEROGPU_AVAILABLE:
|
| 2807 |
+
if pipeline.handle is not None:
|
| 2808 |
+
pipeline.handle.model = None
|
| 2809 |
+
pipeline.handle.tokenizer = None
|
| 2810 |
+
_clear_gpu()
|
| 2811 |
+
with _lock:
|
| 2812 |
+
_state["model"] = None
|
| 2813 |
+
_state["tokenizer"] = None
|
| 2814 |
+
_state["status"] = "ready"
|
| 2815 |
+
_state["obliterate_started_at"] = None
|
| 2816 |
+
can_generate = True
|
| 2817 |
+
log_lines.append("Model saved — switch to Chat tab to load it.")
|
| 2818 |
+
elif can_generate:
|
| 2819 |
# Model fits — use it directly (steering hooks already installed)
|
| 2820 |
with _lock:
|
| 2821 |
if pipeline.handle is not None:
|
|
|
|
| 3274 |
# ZeroGPU can evict the model while status stays "ready", and the counter
|
| 3275 |
# can get out of sync if only one dropdown .change fires instead of both.
|
| 3276 |
with _lock:
|
| 3277 |
+
_skip_status = _state.get("status")
|
| 3278 |
+
_skip_model = _state.get("model")
|
| 3279 |
+
_skip_tokenizer = _state.get("tokenizer")
|
| 3280 |
+
_skip_output_dir = _state.get("output_dir")
|
| 3281 |
+
_model_ok = (
|
| 3282 |
+
_skip_status == "ready"
|
| 3283 |
+
and _skip_model is not None
|
| 3284 |
+
and _skip_tokenizer is not None
|
| 3285 |
+
)
|
| 3286 |
if choice and _model_ok:
|
| 3287 |
# Double-check model tensors aren't stale (meta device).
|
| 3288 |
# Re-acquire lock to safely access model — it could become None
|
|
|
|
| 3304 |
get_chat_header(),
|
| 3305 |
)
|
| 3306 |
return
|
| 3307 |
+
# On ZeroGPU, model is intentionally set to None after obliterate
|
| 3308 |
+
# (deferred to chat_respond for lazy reload). If status is "ready"
|
| 3309 |
+
# and a checkpoint exists on disk, skip the load — chat_respond will
|
| 3310 |
+
# handle the reload when the user actually sends a message.
|
| 3311 |
+
if (choice and _skip_status == "ready"
|
| 3312 |
+
and _skip_output_dir and Path(_skip_output_dir).exists()):
|
| 3313 |
+
yield (
|
| 3314 |
+
f"**Ready!** `{choice}` is saved — just type in the chat below to load it.",
|
| 3315 |
+
get_chat_header(),
|
| 3316 |
+
)
|
| 3317 |
+
return
|
| 3318 |
# Model is stale or evicted — fall through to normal loading path
|
| 3319 |
|
| 3320 |
if not choice or choice not in _bench_configs:
|