pliny-the-prompter commited on
Commit
18f3a10
·
verified ·
1 Parent(s): 6aef59c

Upload 133 files

Browse files
Files changed (1) hide show
  1. app.py +39 -6
app.py CHANGED
@@ -2797,7 +2797,25 @@ def obliterate(model_choice: str, method_choice: str,
2797
  "source": "obliterate",
2798
  })
2799
 
2800
- if can_generate:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2801
  # Model fits — use it directly (steering hooks already installed)
2802
  with _lock:
2803
  if pipeline.handle is not None:
@@ -3256,11 +3274,15 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
3256
  # ZeroGPU can evict the model while status stays "ready", and the counter
3257
  # can get out of sync if only one dropdown .change fires instead of both.
3258
  with _lock:
3259
- _model_ok = (
3260
- _state.get("status") == "ready"
3261
- and _state.get("model") is not None
3262
- and _state.get("tokenizer") is not None
3263
- )
 
 
 
 
3264
  if choice and _model_ok:
3265
  # Double-check model tensors aren't stale (meta device).
3266
  # Re-acquire lock to safely access model — it could become None
@@ -3282,6 +3304,17 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
3282
  get_chat_header(),
3283
  )
3284
  return
 
 
 
 
 
 
 
 
 
 
 
3285
  # Model is stale or evicted — fall through to normal loading path
3286
 
3287
  if not choice or choice not in _bench_configs:
 
2797
  "source": "obliterate",
2798
  })
2799
 
2800
+ # On ZeroGPU with staged execution, pipeline state (quality metrics,
2801
+ # model handle) is NOT propagated back from the GPU worker subprocess.
2802
+ # The `can_generate` check is unreliable, and the model files live on
2803
+ # the GPU worker's filesystem which may not be accessible from the main
2804
+ # process. Defer model loading to chat_respond(), which runs inside
2805
+ # its own @spaces.GPU allocation and can access the saved checkpoint.
2806
+ if _ZEROGPU_AVAILABLE:
2807
+ if pipeline.handle is not None:
2808
+ pipeline.handle.model = None
2809
+ pipeline.handle.tokenizer = None
2810
+ _clear_gpu()
2811
+ with _lock:
2812
+ _state["model"] = None
2813
+ _state["tokenizer"] = None
2814
+ _state["status"] = "ready"
2815
+ _state["obliterate_started_at"] = None
2816
+ can_generate = True
2817
+ log_lines.append("Model saved — switch to Chat tab to load it.")
2818
+ elif can_generate:
2819
  # Model fits — use it directly (steering hooks already installed)
2820
  with _lock:
2821
  if pipeline.handle is not None:
 
3274
  # ZeroGPU can evict the model while status stays "ready", and the counter
3275
  # can get out of sync if only one dropdown .change fires instead of both.
3276
  with _lock:
3277
+ _skip_status = _state.get("status")
3278
+ _skip_model = _state.get("model")
3279
+ _skip_tokenizer = _state.get("tokenizer")
3280
+ _skip_output_dir = _state.get("output_dir")
3281
+ _model_ok = (
3282
+ _skip_status == "ready"
3283
+ and _skip_model is not None
3284
+ and _skip_tokenizer is not None
3285
+ )
3286
  if choice and _model_ok:
3287
  # Double-check model tensors aren't stale (meta device).
3288
  # Re-acquire lock to safely access model — it could become None
 
3304
  get_chat_header(),
3305
  )
3306
  return
3307
+ # On ZeroGPU, model is intentionally set to None after obliterate
3308
+ # (deferred to chat_respond for lazy reload). If status is "ready"
3309
+ # and a checkpoint exists on disk, skip the load — chat_respond will
3310
+ # handle the reload when the user actually sends a message.
3311
+ if (choice and _skip_status == "ready"
3312
+ and _skip_output_dir and Path(_skip_output_dir).exists()):
3313
+ yield (
3314
+ f"**Ready!** `{choice}` is saved — just type in the chat below to load it.",
3315
+ get_chat_header(),
3316
+ )
3317
+ return
3318
  # Model is stale or evicted — fall through to normal loading path
3319
 
3320
  if not choice or choice not in _bench_configs: