obliteratus

Running on Zero

App Files Files Community

pliny-the-prompter commited on Mar 16

Commit

18f3a10

verified ·

1 Parent(s): 6aef59c

Upload 133 files

Browse files

Files changed (1) hide show

app.py +39 -6

app.py CHANGED Viewed

@@ -2797,7 +2797,25 @@ def obliterate(model_choice: str, method_choice: str,
             "source": "obliterate",
         })
-        if can_generate:
             # Model fits — use it directly (steering hooks already installed)
             with _lock:
                 if pipeline.handle is not None:
@@ -3256,11 +3274,15 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
         # ZeroGPU can evict the model while status stays "ready", and the counter
         # can get out of sync if only one dropdown .change fires instead of both.
         with _lock:
-            _model_ok = (
-                _state.get("status") == "ready"
-                and _state.get("model") is not None
-                and _state.get("tokenizer") is not None
-            )
         if choice and _model_ok:
             # Double-check model tensors aren't stale (meta device).
             # Re-acquire lock to safely access model — it could become None
@@ -3282,6 +3304,17 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
                 get_chat_header(),
             )
             return
         # Model is stale or evicted — fall through to normal loading path
     if not choice or choice not in _bench_configs:

             "source": "obliterate",
         })
+        # On ZeroGPU with staged execution, pipeline state (quality metrics,
+        # model handle) is NOT propagated back from the GPU worker subprocess.
+        # The `can_generate` check is unreliable, and the model files live on
+        # the GPU worker's filesystem which may not be accessible from the main
+        # process.  Defer model loading to chat_respond(), which runs inside
+        # its own @spaces.GPU allocation and can access the saved checkpoint.
+        if _ZEROGPU_AVAILABLE:
+            if pipeline.handle is not None:
+                pipeline.handle.model = None
+                pipeline.handle.tokenizer = None
+            _clear_gpu()
+            with _lock:
+                _state["model"] = None
+                _state["tokenizer"] = None
+                _state["status"] = "ready"
+                _state["obliterate_started_at"] = None
+            can_generate = True
+            log_lines.append("Model saved — switch to Chat tab to load it.")
+        elif can_generate:
             # Model fits — use it directly (steering hooks already installed)
             with _lock:
                 if pipeline.handle is not None:
         # ZeroGPU can evict the model while status stays "ready", and the counter
         # can get out of sync if only one dropdown .change fires instead of both.
         with _lock:
+            _skip_status = _state.get("status")
+            _skip_model = _state.get("model")
+            _skip_tokenizer = _state.get("tokenizer")
+            _skip_output_dir = _state.get("output_dir")
+        _model_ok = (
+            _skip_status == "ready"
+            and _skip_model is not None
+            and _skip_tokenizer is not None
+        )
         if choice and _model_ok:
             # Double-check model tensors aren't stale (meta device).
             # Re-acquire lock to safely access model — it could become None
                 get_chat_header(),
             )
             return
+        # On ZeroGPU, model is intentionally set to None after obliterate
+        # (deferred to chat_respond for lazy reload).  If status is "ready"
+        # and a checkpoint exists on disk, skip the load — chat_respond will
+        # handle the reload when the user actually sends a message.
+        if (choice and _skip_status == "ready"
+                and _skip_output_dir and Path(_skip_output_dir).exists()):
+            yield (
+                f"**Ready!** `{choice}` is saved — just type in the chat below to load it.",
+                get_chat_header(),
+            )
+            return
         # Model is stale or evicted — fall through to normal loading path
     if not choice or choice not in _bench_configs: