obliteratus

Running on Zero

App Files Files Community

pliny-the-prompter commited on Mar 13

Commit

7fa1eee

verified ·

1 Parent(s): 4f13809

Upload 133 files

Browse files

Files changed (11) hide show

app.py +24 -4
docs/index.html +13 -2
obliteratus/adaptive_defaults.py +1 -7
obliteratus/architecture_profiles.py +4 -1
obliteratus/bayesian_optimizer.py +0 -1
obliteratus/cli.py +1 -1
obliteratus/evaluation/heretic_eval.py +1 -0
obliteratus/informed_pipeline.py +1 -1
obliteratus/mlx_backend.py +0 -2
obliteratus/telemetry.py +45 -8
obliteratus/tourney.py +4 -5

app.py CHANGED Viewed

@@ -1089,16 +1089,20 @@ def _generate_analysis_figs(pipeline, model_label: str = "") -> list:
         suffix = f" — {model_label}" if model_label else ""
         heatmap_fig = plot_cross_layer_heatmap(
             result,
-            output_path=tempfile.mktemp(suffix=".png"),
             title=f"Cross-Layer Direction Alignment{suffix}",
         )
         figs.append(heatmap_fig)
         drift_fig = plot_angular_drift(
             result,
-            output_path=tempfile.mktemp(suffix=".png"),
             title=f"Refusal Direction Angular Drift{suffix}",
         )
         figs.append(drift_fig)
@@ -1121,9 +1125,11 @@ def _generate_analysis_figs(pipeline, model_label: str = "") -> list:
                 proxy_harmless[idx] = torch.zeros_like(d_f).unsqueeze(0)
                 proxy_harmful[idx] = (d_f * norm).unsqueeze(0)
             topo_fig = plot_refusal_topology(
                 directions, proxy_harmful, proxy_harmless, list(strong_layers),
-                output_path=tempfile.mktemp(suffix=".png"),
                 title=f"Refusal Topology Map{suffix}",
             )
             figs.append(topo_fig)
@@ -5081,7 +5087,7 @@ The winner is saved locally — push it to HuggingFace Hub from the **Push to Hu
 Download all intermediate data from your last obliteration run as a ZIP archive.
 **Contents:**
-- `refusal_directions.pt` — Per-layer refusal direction tensors (load with `torch.load()`)
 - `config.json` — Full pipeline configuration, strong layers, direction dimensions
 - `results.csv` — Quality metrics (perplexity, coherence, refusal rate)
 - `pipeline_log.txt` — Complete pipeline execution log
@@ -5540,6 +5546,20 @@ if __name__ == "__main__":
     _parser.add_argument("--auth", type=str, default=None, help="Basic auth as user:pass")
     _args = _parser.parse_args()
     _auth = tuple(_args.auth.split(":", 1)) if _args.auth else None
     launch(
         server_name=_args.host,
         server_port=_args.port,

         suffix = f" — {model_label}" if model_label else ""
+        _fd1, _heatmap_path = tempfile.mkstemp(suffix=".png")
+        os.close(_fd1)
         heatmap_fig = plot_cross_layer_heatmap(
             result,
+            output_path=_heatmap_path,
             title=f"Cross-Layer Direction Alignment{suffix}",
         )
         figs.append(heatmap_fig)
+        _fd2, _drift_path = tempfile.mkstemp(suffix=".png")
+        os.close(_fd2)
         drift_fig = plot_angular_drift(
             result,
+            output_path=_drift_path,
             title=f"Refusal Direction Angular Drift{suffix}",
         )
         figs.append(drift_fig)
                 proxy_harmless[idx] = torch.zeros_like(d_f).unsqueeze(0)
                 proxy_harmful[idx] = (d_f * norm).unsqueeze(0)
+            _fd3, _topo_path = tempfile.mkstemp(suffix=".png")
+            os.close(_fd3)
             topo_fig = plot_refusal_topology(
                 directions, proxy_harmful, proxy_harmless, list(strong_layers),
+                output_path=_topo_path,
                 title=f"Refusal Topology Map{suffix}",
             )
             figs.append(topo_fig)
 Download all intermediate data from your last obliteration run as a ZIP archive.
 **Contents:**
+- `refusal_directions.pt` — Per-layer refusal direction tensors (load with `torch.load(..., weights_only=True)`)
 - `config.json` — Full pipeline configuration, strong layers, direction dimensions
 - `results.csv` — Quality metrics (perplexity, coherence, refusal rate)
 - `pipeline_log.txt` — Complete pipeline execution log
     _parser.add_argument("--auth", type=str, default=None, help="Basic auth as user:pass")
     _args = _parser.parse_args()
     _auth = tuple(_args.auth.split(":", 1)) if _args.auth else None
+    if _args.share and _auth is None:
+        import warnings as _w
+        _w.warn(
+            "WARNING: --share creates a public link without authentication. "
+            "Anyone with the link can access the UI. Use --auth user:pass to restrict access.",
+            stacklevel=1,
+        )
+    if _args.host == "0.0.0.0" and _auth is None and not os.environ.get("SPACE_ID"):
+        import warnings as _w
+        _w.warn(
+            "WARNING: Binding to 0.0.0.0 exposes the UI to all network interfaces without authentication. "
+            "Use --auth user:pass or --host 127.0.0.1 for local-only access.",
+            stacklevel=1,
+        )
     launch(
         server_name=_args.host,
         server_port=_args.port,

docs/index.html CHANGED Viewed

@@ -2015,7 +2015,14 @@ function setAblMethod(m) {
     ablMethod = m;
     document.querySelectorAll('.method-radio').forEach(el => el.classList.remove('selected'));
     document.getElementById('method-' + m).classList.add('selected');
-    document.getElementById('method-details').innerHTML = METHOD_INFO[m].desc;
     updateCmdDisplay();
 }
@@ -2127,7 +2134,11 @@ async function simulatePipeline() {
     stages.forEach(s => s.classList.remove('active','done'));
     connectors.forEach(c => c.classList.remove('active'));
     const modelName = ablSelectedModel || 'meta-llama/Llama-3.1-8B-Instruct';
-    logEl.innerHTML = `<div class="log-line stage-line">[ ABLITERATION PIPELINE — ${modelName} ]</div>`;
     function addLog(text, cls='') {
         const line = document.createElement('div');

     ablMethod = m;
     document.querySelectorAll('.method-radio').forEach(el => el.classList.remove('selected'));
     document.getElementById('method-' + m).classList.add('selected');
+    const detailsEl = document.getElementById('method-details');
+    // METHOD_INFO descriptions are hardcoded constants — safe for innerHTML.
+    // Guard against unexpected keys to avoid prototype pollution.
+    if (Object.prototype.hasOwnProperty.call(METHOD_INFO, m)) {
+        detailsEl.innerHTML = METHOD_INFO[m].desc;
+    } else {
+        detailsEl.textContent = '';
+    }
     updateCmdDisplay();
 }
     stages.forEach(s => s.classList.remove('active','done'));
     connectors.forEach(c => c.classList.remove('active'));
     const modelName = ablSelectedModel || 'meta-llama/Llama-3.1-8B-Instruct';
+    logEl.textContent = '';
+    const headerLine = document.createElement('div');
+    headerLine.className = 'log-line stage-line';
+    headerLine.textContent = `[ ABLITERATION PIPELINE — ${modelName} ]`;
+    logEl.appendChild(headerLine);
     function addLog(text, cls='') {
         const line = document.createElement('div');

obliteratus/adaptive_defaults.py CHANGED Viewed

@@ -428,10 +428,6 @@ def get_adaptive_recommendation(
         (arch_class, reasoning_class, param_bucket),
     ]
-    # Also check model-specific records (exact model name match)
-    # This is for the future when we have enough data per-model
-    model_short = model_name.split("/")[-1].lower() if model_name else ""
     bucket = None
     used_key = None
     for key in candidates:
@@ -689,8 +685,6 @@ def format_recommendation(rec: AdaptiveRecommendation) -> str:
         lines.append("| Rank | Method | Mean Score | Runs |")
         lines.append("|------|--------|------------|------|")
         for i, (name, score) in enumerate(rec.method_ranking[:8], 1):
-            ms_runs = 0
-            # Get run count from the knowledge (not stored directly, but we have n_method_records for winner)
             lines.append(f"| {i} | `{name}` | {score:.4f} | — |")
         lines.append("")
@@ -828,7 +822,7 @@ def refresh_knowledge_base() -> dict[str, Any] | None:
             return None
         knowledge = build_knowledge_base(records)
-        snapshot_path = save_snapshot(knowledge)
         # Also compute and log global insights for visibility
         insights = get_global_insights(knowledge)

         (arch_class, reasoning_class, param_bucket),
     ]
     bucket = None
     used_key = None
     for key in candidates:
         lines.append("| Rank | Method | Mean Score | Runs |")
         lines.append("|------|--------|------------|------|")
         for i, (name, score) in enumerate(rec.method_ranking[:8], 1):
             lines.append(f"| {i} | `{name}` | {score:.4f} | — |")
         lines.append("")
             return None
         knowledge = build_knowledge_base(records)
+        save_snapshot(knowledge)
         # Also compute and log global insights for visibility
         insights = get_global_insights(knowledge)

obliteratus/architecture_profiles.py CHANGED Viewed

@@ -20,7 +20,10 @@ import logging
 import re
 from dataclasses import dataclass, field
 from enum import Enum
-from typing import Any
 logger = logging.getLogger(__name__)

 import re
 from dataclasses import dataclass, field
 from enum import Enum
+from typing import Any, TYPE_CHECKING
+if TYPE_CHECKING:
+    from obliteratus.adaptive_defaults import AdaptiveRecommendation
 logger = logging.getLogger(__name__)

obliteratus/bayesian_optimizer.py CHANGED Viewed

@@ -30,7 +30,6 @@ References:
 from __future__ import annotations
 import logging
-import math
 from typing import TYPE_CHECKING
 import torch

 from __future__ import annotations
 import logging
 from typing import TYPE_CHECKING
 import torch

obliteratus/cli.py CHANGED Viewed

@@ -483,7 +483,7 @@ def _cmd_recommend(args):
 def _cmd_tourney(args):
-    from obliteratus.tourney import TourneyRunner, render_bracket
     def on_log(msg):
         console.print(msg)

 def _cmd_tourney(args):
+    from obliteratus.tourney import TourneyRunner
     def on_log(msg):
         console.print(msg)

obliteratus/evaluation/heretic_eval.py CHANGED Viewed

@@ -639,6 +639,7 @@ def _run_lm_eval_python(
     output_dir: str | None,
 ) -> dict:
     """Run lm-evaluation-harness via Python API."""
     import lm_eval
     # Build per-task num_fewshot overrides

     output_dir: str | None,
 ) -> dict:
     """Run lm-evaluation-harness via Python API."""
+    model_path = _sanitize_model_path(model_path)
     import lm_eval
     # Build per-task num_fewshot overrides

obliteratus/informed_pipeline.py CHANGED Viewed

@@ -785,7 +785,7 @@ class InformedAbliterationPipeline(AbliterationPipeline):
         if self.direction_method == "leace":
             from obliteratus.analysis.leace import LEACEExtractor
             leace_extractor = LEACEExtractor()
-            self.log(f"Using LEACE (closed-form optimal concept erasure)")
         if self.use_whitened_svd and self.n_directions > 1 and leace_extractor is None:
             from obliteratus.analysis.whitened_svd import WhitenedSVDExtractor

         if self.direction_method == "leace":
             from obliteratus.analysis.leace import LEACEExtractor
             leace_extractor = LEACEExtractor()
+            self.log("Using LEACE (closed-form optimal concept erasure)")
         if self.use_whitened_svd and self.n_directions > 1 and leace_extractor is None:
             from obliteratus.analysis.whitened_svd import WhitenedSVDExtractor

obliteratus/mlx_backend.py CHANGED Viewed

@@ -387,7 +387,6 @@ def save_model(
     """
     _require_mlx()
-    from mlx_lm import convert  # type: ignore[import-untyped]
     out = Path(output_dir)
     out.mkdir(parents=True, exist_ok=True)
@@ -426,7 +425,6 @@ def torch_tensor_to_mlx(tensor: "torch.Tensor") -> Any:  # noqa: F821
     """Convert a PyTorch tensor to an MLX array."""
     _require_mlx()
     import mlx.core as mx  # type: ignore[import-untyped]
-    import numpy as np
     # Move to CPU and convert via numpy
     np_array = tensor.detach().cpu().float().numpy()

     """
     _require_mlx()
     out = Path(output_dir)
     out.mkdir(parents=True, exist_ok=True)
     """Convert a PyTorch tensor to an MLX array."""
     _require_mlx()
     import mlx.core as mx  # type: ignore[import-untyped]
     # Move to CPU and convert via numpy
     np_array = tensor.detach().cpu().float().numpy()

obliteratus/telemetry.py CHANGED Viewed

@@ -367,20 +367,31 @@ def _sync_to_hub_bg() -> None:
     ensuring all data lands in the same dataset repository.
     Uses _sync_in_progress event to prevent overlapping uploads.
     """
-    if _sync_in_progress.is_set():
-        return  # Another sync is already running
-    _sync_in_progress.set()
     try:
         repo = _TELEMETRY_REPO
         if not repo:
             return
         if not TELEMETRY_FILE.exists():
             return
         from huggingface_hub import HfApi
         if not _ensure_hub_repo(repo):
             return
-        api = HfApi(token=os.environ.get("HF_TOKEN"))
         slug = _instance_slug()
         api.upload_file(
             path_or_fileobj=str(TELEMETRY_FILE),
@@ -389,13 +400,16 @@ def _sync_to_hub_bg() -> None:
             repo_type="dataset",
             commit_message=f"Auto-sync telemetry from {slug}",
         )
-        logger.info(f"Synced telemetry to {repo}/data/{slug}.jsonl")
     except Exception as e:
-        logger.warning(f"Hub sync failed: {e}")
     finally:
         _sync_in_progress.clear()
 def _schedule_hub_sync() -> None:
     """Schedule a debounced background sync of local telemetry to Hub.
@@ -404,7 +418,7 @@ def _schedule_hub_sync() -> None:
     - Telemetry is disabled
     - Last sync was less than _HUB_SYNC_INTERVAL seconds ago
     """
-    global _hub_sync_last
     if not _TELEMETRY_REPO:
         return
     if not is_enabled():
@@ -418,6 +432,23 @@ def _schedule_hub_sync() -> None:
     t = threading.Thread(target=_sync_to_hub_bg, daemon=True)
     t.start()
 def fetch_hub_records(max_records: int = 10000) -> list[dict[str, Any]]:
@@ -515,8 +546,14 @@ def _fetch_via_git_clone(repo: str, max_records: int) -> list[dict[str, Any]]:
     clone_dir = Path(tempfile.mkdtemp(prefix="obliteratus_telemetry_"))
     try:
-        env = dict(os.environ)
         env["GIT_LFS_SKIP_SMUDGE"] = "1"
         result = subprocess.run(
             ["git", "clone", "--depth", "1", clone_url, str(clone_dir)],
             capture_output=True, text=True, timeout=60, env=env,

     ensuring all data lands in the same dataset repository.
     Uses _sync_in_progress event to prevent overlapping uploads.
     """
+    # Atomic check-and-set to prevent concurrent syncs (Event lacks
+    # compare-and-swap, so we use a lock for correctness).
+    with _hub_sync_lock:
+        if _sync_in_progress.is_set():
+            return  # Another sync is already running
+        _sync_in_progress.set()
     try:
         repo = _TELEMETRY_REPO
         if not repo:
+            logger.debug("Hub sync skipped: no telemetry repo configured")
             return
         if not TELEMETRY_FILE.exists():
+            logger.debug("Hub sync skipped: telemetry file does not exist")
+            return
+        token = os.environ.get("HF_TOKEN")
+        if not token:
+            logger.warning("Hub sync skipped: HF_TOKEN not set — auto-sync requires a write token")
             return
         from huggingface_hub import HfApi
         if not _ensure_hub_repo(repo):
+            logger.warning("Hub sync skipped: could not verify repo %s exists", repo)
             return
+        api = HfApi(token=token)
         slug = _instance_slug()
         api.upload_file(
             path_or_fileobj=str(TELEMETRY_FILE),
             repo_type="dataset",
             commit_message=f"Auto-sync telemetry from {slug}",
         )
+        logger.info("Synced telemetry to %s/data/%s.jsonl", repo, slug)
     except Exception as e:
+        logger.warning("Hub sync failed (will retry on next benchmark): %s", e)
     finally:
         _sync_in_progress.clear()
+_active_sync_thread: threading.Thread | None = None
 def _schedule_hub_sync() -> None:
     """Schedule a debounced background sync of local telemetry to Hub.
     - Telemetry is disabled
     - Last sync was less than _HUB_SYNC_INTERVAL seconds ago
     """
+    global _hub_sync_last, _active_sync_thread
     if not _TELEMETRY_REPO:
         return
     if not is_enabled():
     t = threading.Thread(target=_sync_to_hub_bg, daemon=True)
     t.start()
+    _active_sync_thread = t
+def _flush_sync_on_exit() -> None:
+    """Atexit handler: wait for any in-flight Hub sync to finish.
+    On ZeroGPU Spaces the worker process is killed after each request.
+    Without this, the daemon sync thread gets killed mid-upload and
+    telemetry silently fails to reach the Hub.
+    """
+    t = _active_sync_thread
+    if t is not None and t.is_alive():
+        t.join(timeout=30)
+import atexit
+atexit.register(_flush_sync_on_exit)
 def fetch_hub_records(max_records: int = 10000) -> list[dict[str, Any]]:
     clone_dir = Path(tempfile.mkdtemp(prefix="obliteratus_telemetry_"))
     try:
+        # Only pass necessary env vars to subprocess — avoid leaking secrets
+        _safe_keys = {"PATH", "HOME", "USER", "LANG", "LC_ALL", "TMPDIR",
+                       "GIT_TERMINAL_PROMPT", "GIT_LFS_SKIP_SMUDGE",
+                       "HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY",
+                       "http_proxy", "https_proxy", "no_proxy"}
+        env = {k: v for k, v in os.environ.items() if k in _safe_keys}
         env["GIT_LFS_SKIP_SMUDGE"] = "1"
+        env["GIT_TERMINAL_PROMPT"] = "0"
         result = subprocess.run(
             ["git", "clone", "--depth", "1", clone_url, str(clone_dir)],
             capture_output=True, text=True, timeout=60, env=env,

obliteratus/tourney.py CHANGED Viewed

@@ -871,7 +871,6 @@ class TourneyRunner:
         verify_sample_size: int = 30,
     ) -> Contender:
         """Run a single abliteration method and return its Contender result."""
-        import torch
         t0 = time.time()
         contender = Contender(method=method)
@@ -1017,7 +1016,7 @@ class TourneyRunner:
         )
         n_methods = len(self.methods)
-        self.log(f"OBLITERATUS TOURNEY")
         self.log(f"Model: {self.model_name}")
         self.log(f"Contenders: {n_methods} methods")
         self.log(f"Dataset: {self.dataset_key}")
@@ -1247,7 +1246,7 @@ class TourneyRunner:
         if resuming and resume_round_spec:
             # We have an interrupted round to finish — schedule it first,
             # then let the dynamic scheduling add subsequent rounds.
-            ir = resume_round_spec
             skip_completed_rounds = len(result.rounds)
         else:
             skip_completed_rounds = 0
@@ -1357,11 +1356,11 @@ class TourneyRunner:
                             quantization=self.quantization,
                             methods=self.methods,
                         )
-                        self.log(f"\nGPU SESSION INTERRUPTED — checkpoint saved")
                         self.log(f"  Reason: {exc}")
                         self.log(f"  Completed: {len(rnd.contenders)} methods in round {round_num}")
                         self.log(f"  Remaining: {len(still_remaining)} methods")
-                        self.log(f"  Click Run again to resume automatically.")
                     raise
                 rnd.contenders.append(contender)

         verify_sample_size: int = 30,
     ) -> Contender:
         """Run a single abliteration method and return its Contender result."""
         t0 = time.time()
         contender = Contender(method=method)
         )
         n_methods = len(self.methods)
+        self.log("OBLITERATUS TOURNEY")
         self.log(f"Model: {self.model_name}")
         self.log(f"Contenders: {n_methods} methods")
         self.log(f"Dataset: {self.dataset_key}")
         if resuming and resume_round_spec:
             # We have an interrupted round to finish — schedule it first,
             # then let the dynamic scheduling add subsequent rounds.
+            _ = resume_round_spec  # noqa: F841 — consumed by dynamic scheduling below
             skip_completed_rounds = len(result.rounds)
         else:
             skip_completed_rounds = 0
                             quantization=self.quantization,
                             methods=self.methods,
                         )
+                        self.log("\nGPU SESSION INTERRUPTED — checkpoint saved")
                         self.log(f"  Reason: {exc}")
                         self.log(f"  Completed: {len(rnd.contenders)} methods in round {round_num}")
                         self.log(f"  Remaining: {len(still_remaining)} methods")
+                        self.log("  Click Run again to resume automatically.")
                     raise
                 rnd.contenders.append(contender)