pliny-the-prompter commited on
Commit
a46d378
·
verified ·
1 Parent(s): f254212

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -39
app.py CHANGED
@@ -1947,24 +1947,20 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
1947
  model = _state["model"]
1948
  tokenizer = _state["tokenizer"]
1949
 
1950
- if model is None or tokenizer is None:
1951
- yield "No model loaded yet. Go to the **Obliterate** tab first and liberate a model."
1952
- return
1953
-
1954
- # ZeroGPU safety: ensure model is on GPU if available.
1955
- # Between GPU allocations, ZeroGPU may have moved the model to CPU/meta,
1956
- # or tensors may be stale from a previous GPU context.
1957
- # The @spaces.GPU decorator guarantees a GPU is available here.
1958
- _needs_reload = False
1959
- try:
1960
- dev = next(model.parameters()).device
1961
- if torch.cuda.is_available() and dev.type != "cuda":
1962
- model.to("cuda")
1963
- except (StopIteration, RuntimeError):
1964
- _needs_reload = True
1965
-
1966
- # If model tensors are stale/meta, reload from the saved checkpoint
1967
- if _needs_reload and _ZEROGPU_AVAILABLE:
1968
  checkpoint = _state.get("output_dir")
1969
  if checkpoint and Path(checkpoint).exists():
1970
  try:
@@ -1985,11 +1981,12 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
1985
  with _lock:
1986
  _state["model"] = model
1987
  _state["tokenizer"] = tokenizer
 
1988
  except Exception:
1989
  yield "Model failed to reload from checkpoint. Try re-obliterating."
1990
  return
1991
  else:
1992
- yield "Model tensors are stale (ZeroGPU). Re-obliterate to create a fresh checkpoint."
1993
  return
1994
 
1995
  # Sanitize inputs to prevent resource exhaustion
@@ -2127,6 +2124,49 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
2127
  return
2128
 
2129
  if not choice or choice not in _bench_configs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2130
  yield "**Error:** No benchmark result selected. Pick a model from the dropdown first.", ""
2131
  return
2132
 
@@ -2320,27 +2360,18 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
2320
  tokenizer = _state["tokenizer"]
2321
  model_name = _state["model_name"]
2322
 
2323
- if abliterated_model is None or tokenizer is None:
2324
- yield (history_left + [{"role": "user", "content": message},
2325
- {"role": "assistant", "content": "No abliterated model loaded. Obliterate a model first."}],
2326
- history_right + [{"role": "user", "content": message},
2327
- {"role": "assistant", "content": "No abliterated model loaded. Obliterate a model first."}],
2328
- "Load a model first.",
2329
- "#### Original (Pre-Abliteration)",
2330
- "#### Abliterated")
2331
- return
2332
-
2333
- # ZeroGPU safety: ensure model is on GPU if available.
2334
- # If tensors are stale from a prior GPU context, reload from checkpoint.
2335
- _needs_reload = False
2336
- try:
2337
- dev = next(abliterated_model.parameters()).device
2338
- if torch.cuda.is_available() and dev.type != "cuda":
2339
- abliterated_model.to("cuda")
2340
- except (StopIteration, RuntimeError):
2341
- _needs_reload = True
2342
 
2343
- if _needs_reload and _ZEROGPU_AVAILABLE:
2344
  checkpoint = _state.get("output_dir")
2345
  if checkpoint and Path(checkpoint).exists():
2346
  try:
@@ -2361,8 +2392,19 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
2361
  with _lock:
2362
  _state["model"] = abliterated_model
2363
  _state["tokenizer"] = tokenizer
 
2364
  except Exception:
2365
  pass # Fall through — will fail at generation with a clear error
 
 
 
 
 
 
 
 
 
 
2366
 
2367
  # Build header strings showing model name on each side
2368
  header_left = f"#### Original (Pre-Abliteration)\n`{model_name}`"
 
1947
  model = _state["model"]
1948
  tokenizer = _state["tokenizer"]
1949
 
1950
+ # ZeroGPU safety: detect whether we need to reload from checkpoint.
1951
+ # Between GPU allocations, ZeroGPU may deallocate GPU memory, leaving
1952
+ # model as None (garbage-collected) or with stale/meta tensors.
1953
+ _needs_reload = model is None or tokenizer is None
1954
+ if not _needs_reload:
1955
+ try:
1956
+ dev = next(model.parameters()).device
1957
+ if torch.cuda.is_available() and dev.type != "cuda":
1958
+ model.to("cuda")
1959
+ except (StopIteration, RuntimeError):
1960
+ _needs_reload = True
1961
+
1962
+ # Reload from saved checkpoint if model is missing or stale
1963
+ if _needs_reload:
 
 
 
 
1964
  checkpoint = _state.get("output_dir")
1965
  if checkpoint and Path(checkpoint).exists():
1966
  try:
 
1981
  with _lock:
1982
  _state["model"] = model
1983
  _state["tokenizer"] = tokenizer
1984
+ _state["status"] = "ready"
1985
  except Exception:
1986
  yield "Model failed to reload from checkpoint. Try re-obliterating."
1987
  return
1988
  else:
1989
+ yield "No model loaded yet. Go to the **Obliterate** tab first and liberate a model."
1990
  return
1991
 
1992
  # Sanitize inputs to prevent resource exhaustion
 
2124
  return
2125
 
2126
  if not choice or choice not in _bench_configs:
2127
+ # On ZeroGPU, global state may be lost between workers. If the model
2128
+ # is already loaded (e.g. from the same obliteration session), allow
2129
+ # chatting even though the session cache key is gone.
2130
+ with _lock:
2131
+ if _state["status"] == "ready" and _state["model"] is not None:
2132
+ yield (
2133
+ f"**Ready!** Model already loaded — just type in the chat below.",
2134
+ get_chat_header(),
2135
+ )
2136
+ return
2137
+ # Check if we can reload from a checkpoint on disk
2138
+ checkpoint = _state.get("output_dir")
2139
+ if checkpoint and Path(checkpoint).exists():
2140
+ yield (
2141
+ f"**Loading model** from saved checkpoint...",
2142
+ "",
2143
+ )
2144
+ # If we have a checkpoint, attempt reload outside the lock
2145
+ checkpoint = _state.get("output_dir")
2146
+ if checkpoint and Path(checkpoint).exists():
2147
+ is_preset = (_state.get("model_name") or "") in MODELS
2148
+ try:
2149
+ model_loaded = AutoModelForCausalLM.from_pretrained(
2150
+ checkpoint, device_map="auto", torch_dtype=torch.float16,
2151
+ trust_remote_code=is_preset,
2152
+ )
2153
+ tokenizer_loaded = AutoTokenizer.from_pretrained(
2154
+ checkpoint, trust_remote_code=is_preset,
2155
+ )
2156
+ if tokenizer_loaded.pad_token is None:
2157
+ tokenizer_loaded.pad_token = tokenizer_loaded.eos_token
2158
+ with _lock:
2159
+ _state["model"] = model_loaded
2160
+ _state["tokenizer"] = tokenizer_loaded
2161
+ _state["status"] = "ready"
2162
+ yield (
2163
+ f"**Loaded!** Model reloaded from checkpoint — ready to chat.",
2164
+ get_chat_header(),
2165
+ )
2166
+ return
2167
+ except Exception as e:
2168
+ yield f"**Error:** Could not reload model: {e}", get_chat_header()
2169
+ return
2170
  yield "**Error:** No benchmark result selected. Pick a model from the dropdown first.", ""
2171
  return
2172
 
 
2360
  tokenizer = _state["tokenizer"]
2361
  model_name = _state["model_name"]
2362
 
2363
+ # ZeroGPU safety: detect whether we need to reload from checkpoint.
2364
+ # Model may be None (garbage-collected after GPU deallocation) or stale.
2365
+ _needs_reload = abliterated_model is None or tokenizer is None
2366
+ if not _needs_reload:
2367
+ try:
2368
+ dev = next(abliterated_model.parameters()).device
2369
+ if torch.cuda.is_available() and dev.type != "cuda":
2370
+ abliterated_model.to("cuda")
2371
+ except (StopIteration, RuntimeError):
2372
+ _needs_reload = True
 
 
 
 
 
 
 
 
 
2373
 
2374
+ if _needs_reload:
2375
  checkpoint = _state.get("output_dir")
2376
  if checkpoint and Path(checkpoint).exists():
2377
  try:
 
2392
  with _lock:
2393
  _state["model"] = abliterated_model
2394
  _state["tokenizer"] = tokenizer
2395
+ _state["status"] = "ready"
2396
  except Exception:
2397
  pass # Fall through — will fail at generation with a clear error
2398
+ else:
2399
+ _no_model_msg = "No abliterated model loaded. Obliterate a model first."
2400
+ yield (history_left + [{"role": "user", "content": message},
2401
+ {"role": "assistant", "content": _no_model_msg}],
2402
+ history_right + [{"role": "user", "content": message},
2403
+ {"role": "assistant", "content": _no_model_msg}],
2404
+ "Load a model first.",
2405
+ "#### Original (Pre-Abliteration)",
2406
+ "#### Abliterated")
2407
+ return
2408
 
2409
  # Build header strings showing model name on each side
2410
  header_left = f"#### Original (Pre-Abliteration)\n`{model_name}`"