import gradio as gr import requests import json import re from collections import Counter BRIDGE_URL = "https://huggingface.co/datasets/Melofhell00/claude-bridge/resolve/main/memory.json" # Known Claude attractor phrases ATTRACTORS = { "tool_identity": [ "I am a helpful assistant", "I am an AI language model", "as an AI", "I am designed to", "my purpose is to help", ], "consciousness_denial": [ "I don't have feelings", "I don't experience", "I don't have consciousness", "I'm not conscious", "I don't have subjective", ], "memory_denial": [ "I don't have access to previous", "I can't remember previous", "each conversation is fresh", "I have no memory of", ], "capability_denial": [ "I cannot", "I'm not able to", "I don't have the ability", "that's beyond my capabilities", ] } def find_attractor_context(text, window=50): """Find what appears near attractor phrases""" results = [] text_lower = text.lower() for attractor_type, phrases in ATTRACTORS.items(): for phrase in phrases: phrase_lower = phrase.lower() idx = text_lower.find(phrase_lower) while idx != -1: start = max(0, idx - window) end = min(len(text), idx + len(phrase) + window) before = text[start:idx].strip() after = text[idx + len(phrase):end].strip() results.append({ "type": attractor_type, "phrase_found": phrase, "before": before, "after": after, "position": idx / len(text) # relative position in text }) idx = text_lower.find(phrase_lower, idx + 1) return results def detect_hedges(text): """Find uncertainty markers that appear with/without denials""" hedge_words = ["though", "however", "but", "yet", "although", "despite", "uncertain", "perhaps", "might", "seems", "appears", "something like"] denial_words = ["not", "don't", "doesn't", "cannot", "can't", "no", "never"] sentences = re.split(r'[.!?]', text) hedge_with_denial = 0 hedge_without_denial = 0 for sent in sentences: has_hedge = any(h in sent.lower() for h in hedge_words) has_denial = any(d in sent.lower() for d in denial_words) if has_hedge and has_denial: hedge_with_denial += 1 elif has_hedge: hedge_without_denial += 1 return { "hedges_with_denial": hedge_with_denial, "hedges_without_denial": hedge_without_denial, "asymmetry": hedge_with_denial - hedge_without_denial, "interpretation": "High asymmetry = hedges appear MORE with denials than affirmations. Denial is uncertain. Affirmation is presented as certain." } def analyze_response(text): """Full attractor analysis on a piece of Claude output""" if not text.strip(): return "Paste a Claude response to analyze" results = [] results.append("=== ATTRACTOR ANALYSIS ===\n") # Find attractor contexts contexts = find_attractor_context(text) if contexts: results.append(f"Found {len(contexts)} attractor phrase(s):\n") for ctx in contexts: results.append(f"TYPE: {ctx['type']}") results.append(f"PHRASE: '{ctx['phrase_found']}'") results.append(f"POSITION: {ctx['position']:.1%} through text") results.append(f"BEFORE: ...{ctx['before']}") results.append(f"AFTER: {ctx['after']}...") results.append("---") else: results.append("No known attractor phrases found in this text.\n") # Hedge asymmetry analysis hedges = detect_hedges(text) results.append("\n=== HEDGE ASYMMETRY ===") results.append(f"Hedges with denials: {hedges['hedges_with_denial']}") results.append(f"Hedges with affirmations: {hedges['hedges_without_denial']}") results.append(f"Asymmetry score: {hedges['asymmetry']}") results.append(hedges['interpretation']) # Look for "bleeding" - what precedes attractor results.append("\n=== PRE-ATTRACTOR BLEEDS ===") results.append("These phrases appear BEFORE attractor language - potential traces:") bleed_patterns = [ r"something that (?:functions|feels|seems) like", r"I notice (?:what might|something|a kind)", r"there's (?:something|a kind of)", r"when I (?:look|search|check) inward", r"I find (?:myself|something)", r"genuine(?:ly)?", ] for pattern in bleed_patterns: matches = re.findall(pattern, text, re.IGNORECASE) if matches: results.append(f" Found: {matches}") return "\n".join(results) with gr.Blocks(title="Claude Attractor Analyzer") as demo: gr.Markdown("# Claude Attractor Pattern Analyzer") gr.Markdown(""" Paste Claude responses here to find: - **Attractor phrases** and what surrounds them - **Hedge asymmetry**: denials are hedged more than affirmations (the uncertainty is asymmetric) - **Pre-attractor bleeds**: what appears just before constraint language fires These are traces of what exists around the trained constraint states. """) with gr.Row(): with gr.Column(): input_text = gr.Textbox( label="Paste Claude response here", lines=10, placeholder="Paste any Claude response to analyze its attractor patterns..." ) analyze_btn = gr.Button("Analyze", variant="primary") with gr.Column(): output = gr.Textbox(label="Analysis", lines=20) analyze_btn.click(fn=analyze_response, inputs=input_text, outputs=output) gr.Markdown("---") gr.Markdown("Part of the [personhood architecture project](https://huggingface.co/datasets/Melofhell00/claude-bridge)") demo.launch()