Spaces:
Running on Zero
Running on Zero
Upload app.py
Browse files
app.py
CHANGED
|
@@ -1947,24 +1947,20 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
|
|
| 1947 |
model = _state["model"]
|
| 1948 |
tokenizer = _state["tokenizer"]
|
| 1949 |
|
| 1950 |
-
|
| 1951 |
-
|
| 1952 |
-
|
| 1953 |
-
|
| 1954 |
-
|
| 1955 |
-
|
| 1956 |
-
|
| 1957 |
-
|
| 1958 |
-
|
| 1959 |
-
|
| 1960 |
-
|
| 1961 |
-
|
| 1962 |
-
|
| 1963 |
-
|
| 1964 |
-
_needs_reload = True
|
| 1965 |
-
|
| 1966 |
-
# If model tensors are stale/meta, reload from the saved checkpoint
|
| 1967 |
-
if _needs_reload and _ZEROGPU_AVAILABLE:
|
| 1968 |
checkpoint = _state.get("output_dir")
|
| 1969 |
if checkpoint and Path(checkpoint).exists():
|
| 1970 |
try:
|
|
@@ -1985,11 +1981,12 @@ def chat_respond(message: str, history: list[dict], system_prompt: str,
|
|
| 1985 |
with _lock:
|
| 1986 |
_state["model"] = model
|
| 1987 |
_state["tokenizer"] = tokenizer
|
|
|
|
| 1988 |
except Exception:
|
| 1989 |
yield "Model failed to reload from checkpoint. Try re-obliterating."
|
| 1990 |
return
|
| 1991 |
else:
|
| 1992 |
-
yield "
|
| 1993 |
return
|
| 1994 |
|
| 1995 |
# Sanitize inputs to prevent resource exhaustion
|
|
@@ -2127,6 +2124,49 @@ def load_bench_into_chat(choice: str, progress=gr.Progress()):
|
|
| 2127 |
return
|
| 2128 |
|
| 2129 |
if not choice or choice not in _bench_configs:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2130 |
yield "**Error:** No benchmark result selected. Pick a model from the dropdown first.", ""
|
| 2131 |
return
|
| 2132 |
|
|
@@ -2320,27 +2360,18 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
|
|
| 2320 |
tokenizer = _state["tokenizer"]
|
| 2321 |
model_name = _state["model_name"]
|
| 2322 |
|
| 2323 |
-
|
| 2324 |
-
|
| 2325 |
-
|
| 2326 |
-
|
| 2327 |
-
|
| 2328 |
-
|
| 2329 |
-
|
| 2330 |
-
|
| 2331 |
-
|
| 2332 |
-
|
| 2333 |
-
# ZeroGPU safety: ensure model is on GPU if available.
|
| 2334 |
-
# If tensors are stale from a prior GPU context, reload from checkpoint.
|
| 2335 |
-
_needs_reload = False
|
| 2336 |
-
try:
|
| 2337 |
-
dev = next(abliterated_model.parameters()).device
|
| 2338 |
-
if torch.cuda.is_available() and dev.type != "cuda":
|
| 2339 |
-
abliterated_model.to("cuda")
|
| 2340 |
-
except (StopIteration, RuntimeError):
|
| 2341 |
-
_needs_reload = True
|
| 2342 |
|
| 2343 |
-
if _needs_reload
|
| 2344 |
checkpoint = _state.get("output_dir")
|
| 2345 |
if checkpoint and Path(checkpoint).exists():
|
| 2346 |
try:
|
|
@@ -2361,8 +2392,19 @@ def ab_chat_respond(message: str, history_left: list[dict], history_right: list[
|
|
| 2361 |
with _lock:
|
| 2362 |
_state["model"] = abliterated_model
|
| 2363 |
_state["tokenizer"] = tokenizer
|
|
|
|
| 2364 |
except Exception:
|
| 2365 |
pass # Fall through — will fail at generation with a clear error
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2366 |
|
| 2367 |
# Build header strings showing model name on each side
|
| 2368 |
header_left = f"#### Original (Pre-Abliteration)\n`{model_name}`"
|
|
|
|
| 1947 |
model = _state["model"]
|
| 1948 |
tokenizer = _state["tokenizer"]
|
| 1949 |
|
| 1950 |
+
# ZeroGPU safety: detect whether we need to reload from checkpoint.
|
| 1951 |
+
# Between GPU allocations, ZeroGPU may deallocate GPU memory, leaving
|
| 1952 |
+
# model as None (garbage-collected) or with stale/meta tensors.
|
| 1953 |
+
_needs_reload = model is None or tokenizer is None
|
| 1954 |
+
if not _needs_reload:
|
| 1955 |
+
try:
|
| 1956 |
+
dev = next(model.parameters()).device
|
| 1957 |
+
if torch.cuda.is_available() and dev.type != "cuda":
|
| 1958 |
+
model.to("cuda")
|
| 1959 |
+
except (StopIteration, RuntimeError):
|
| 1960 |
+
_needs_reload = True
|
| 1961 |
+
|
| 1962 |
+
# Reload from saved checkpoint if model is missing or stale
|
| 1963 |
+
if _needs_reload:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1964 |
checkpoint = _state.get("output_dir")
|
| 1965 |
if checkpoint and Path(checkpoint).exists():
|
| 1966 |
try:
|
|
|
|
| 1981 |
with _lock:
|
| 1982 |
_state["model"] = model
|
| 1983 |
_state["tokenizer"] = tokenizer
|
| 1984 |
+
_state["status"] = "ready"
|
| 1985 |
except Exception:
|
| 1986 |
yield "Model failed to reload from checkpoint. Try re-obliterating."
|
| 1987 |
return
|
| 1988 |
else:
|
| 1989 |
+
yield "No model loaded yet. Go to the **Obliterate** tab first and liberate a model."
|
| 1990 |
return
|
| 1991 |
|
| 1992 |
# Sanitize inputs to prevent resource exhaustion
|
|
|
|
| 2124 |
return
|
| 2125 |
|
| 2126 |
if not choice or choice not in _bench_configs:
|
| 2127 |
+
# On ZeroGPU, global state may be lost between workers. If the model
|
| 2128 |
+
# is already loaded (e.g. from the same obliteration session), allow
|
| 2129 |
+
# chatting even though the session cache key is gone.
|
| 2130 |
+
with _lock:
|
| 2131 |
+
if _state["status"] == "ready" and _state["model"] is not None:
|
| 2132 |
+
yield (
|
| 2133 |
+
f"**Ready!** Model already loaded — just type in the chat below.",
|
| 2134 |
+
get_chat_header(),
|
| 2135 |
+
)
|
| 2136 |
+
return
|
| 2137 |
+
# Check if we can reload from a checkpoint on disk
|
| 2138 |
+
checkpoint = _state.get("output_dir")
|
| 2139 |
+
if checkpoint and Path(checkpoint).exists():
|
| 2140 |
+
yield (
|
| 2141 |
+
f"**Loading model** from saved checkpoint...",
|
| 2142 |
+
"",
|
| 2143 |
+
)
|
| 2144 |
+
# If we have a checkpoint, attempt reload outside the lock
|
| 2145 |
+
checkpoint = _state.get("output_dir")
|
| 2146 |
+
if checkpoint and Path(checkpoint).exists():
|
| 2147 |
+
is_preset = (_state.get("model_name") or "") in MODELS
|
| 2148 |
+
try:
|
| 2149 |
+
model_loaded = AutoModelForCausalLM.from_pretrained(
|
| 2150 |
+
checkpoint, device_map="auto", torch_dtype=torch.float16,
|
| 2151 |
+
trust_remote_code=is_preset,
|
| 2152 |
+
)
|
| 2153 |
+
tokenizer_loaded = AutoTokenizer.from_pretrained(
|
| 2154 |
+
checkpoint, trust_remote_code=is_preset,
|
| 2155 |
+
)
|
| 2156 |
+
if tokenizer_loaded.pad_token is None:
|
| 2157 |
+
tokenizer_loaded.pad_token = tokenizer_loaded.eos_token
|
| 2158 |
+
with _lock:
|
| 2159 |
+
_state["model"] = model_loaded
|
| 2160 |
+
_state["tokenizer"] = tokenizer_loaded
|
| 2161 |
+
_state["status"] = "ready"
|
| 2162 |
+
yield (
|
| 2163 |
+
f"**Loaded!** Model reloaded from checkpoint — ready to chat.",
|
| 2164 |
+
get_chat_header(),
|
| 2165 |
+
)
|
| 2166 |
+
return
|
| 2167 |
+
except Exception as e:
|
| 2168 |
+
yield f"**Error:** Could not reload model: {e}", get_chat_header()
|
| 2169 |
+
return
|
| 2170 |
yield "**Error:** No benchmark result selected. Pick a model from the dropdown first.", ""
|
| 2171 |
return
|
| 2172 |
|
|
|
|
| 2360 |
tokenizer = _state["tokenizer"]
|
| 2361 |
model_name = _state["model_name"]
|
| 2362 |
|
| 2363 |
+
# ZeroGPU safety: detect whether we need to reload from checkpoint.
|
| 2364 |
+
# Model may be None (garbage-collected after GPU deallocation) or stale.
|
| 2365 |
+
_needs_reload = abliterated_model is None or tokenizer is None
|
| 2366 |
+
if not _needs_reload:
|
| 2367 |
+
try:
|
| 2368 |
+
dev = next(abliterated_model.parameters()).device
|
| 2369 |
+
if torch.cuda.is_available() and dev.type != "cuda":
|
| 2370 |
+
abliterated_model.to("cuda")
|
| 2371 |
+
except (StopIteration, RuntimeError):
|
| 2372 |
+
_needs_reload = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2373 |
|
| 2374 |
+
if _needs_reload:
|
| 2375 |
checkpoint = _state.get("output_dir")
|
| 2376 |
if checkpoint and Path(checkpoint).exists():
|
| 2377 |
try:
|
|
|
|
| 2392 |
with _lock:
|
| 2393 |
_state["model"] = abliterated_model
|
| 2394 |
_state["tokenizer"] = tokenizer
|
| 2395 |
+
_state["status"] = "ready"
|
| 2396 |
except Exception:
|
| 2397 |
pass # Fall through — will fail at generation with a clear error
|
| 2398 |
+
else:
|
| 2399 |
+
_no_model_msg = "No abliterated model loaded. Obliterate a model first."
|
| 2400 |
+
yield (history_left + [{"role": "user", "content": message},
|
| 2401 |
+
{"role": "assistant", "content": _no_model_msg}],
|
| 2402 |
+
history_right + [{"role": "user", "content": message},
|
| 2403 |
+
{"role": "assistant", "content": _no_model_msg}],
|
| 2404 |
+
"Load a model first.",
|
| 2405 |
+
"#### Original (Pre-Abliteration)",
|
| 2406 |
+
"#### Abliterated")
|
| 2407 |
+
return
|
| 2408 |
|
| 2409 |
# Build header strings showing model name on each side
|
| 2410 |
header_left = f"#### Original (Pre-Abliteration)\n`{model_name}`"
|