samherring99's picture
Invert slider: 0=baseline, 1=ablated, match paper convention
64e8ed9
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Refusal Circuit Ablation</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
background: linear-gradient(180deg, #f8f9fa 0%, #f5f5f5 100%);
color: #023750;
font-family: Helvetica, Arial, sans-serif;
font-weight: 500;
font-size: 1em;
-webkit-font-smoothing: antialiased;
letter-spacing: 0.018em;
min-height: 100vh;
}
.container {
max-width: 800px;
margin: 0 auto;
padding: 2rem 5% 3rem;
background: #ffffff;
box-shadow: 0 2px 8px rgba(0,0,0,0.03), 0 1px 2px rgba(0,0,0,0.04);
position: relative;
}
.header {
padding-bottom: 1rem;
margin-bottom: 1.5rem;
border-bottom: 2px solid rgba(4,113,169,0.18);
}
.header h1 {
font-size: clamp(1.5rem, 1rem + 1vw, 2rem);
font-weight: 700;
color: #0471a9;
letter-spacing: -0.035em;
line-height: 1.15;
text-transform: uppercase;
}
.header .sub {
color: #023750;
font-size: 0.95rem;
margin-top: 0.4rem;
opacity: 0.7;
}
.section {
margin-bottom: 1.5rem;
}
.section h2 {
font-size: 1.15rem;
font-weight: 700;
color: #0471a9;
letter-spacing: -0.03em;
font-variant-caps: all-small-caps;
margin-bottom: 0.75rem;
display: flex;
align-items: center;
}
.section h2::after {
content: "";
margin-left: 0.65rem;
flex: 1;
height: 1px;
background: rgba(4,113,169,0.12);
}
label {
display: block;
font-size: 0.85rem;
color: #023750;
opacity: 0.7;
margin-bottom: 0.3rem;
font-weight: 500;
}
input[type=text] {
width: 100%;
background: #f8f9fa;
border: 1px solid #e8ecef;
color: #023750;
border-radius: 6px;
padding: 0.6rem 0.75rem;
font-family: Helvetica, Arial, sans-serif;
font-size: 0.95rem;
font-weight: 500;
}
input[type=text]:focus {
outline: none;
border-color: #0471a9;
box-shadow: 0 0 0 2px rgba(4,113,169,0.1);
}
.slider-row {
display: flex;
align-items: center;
gap: 0.75rem;
margin-bottom: 1rem;
}
.slider-row label {
margin: 0;
white-space: nowrap;
min-width: 80px;
}
input[type=range] {
flex: 1;
-webkit-appearance: none;
background: #e8ecef;
height: 4px;
border-radius: 2px;
outline: none;
}
input[type=range]::-webkit-slider-thumb {
-webkit-appearance: none;
width: 16px;
height: 16px;
background: #0471a9;
border-radius: 50%;
cursor: pointer;
box-shadow: 0 1px 3px rgba(0,0,0,0.15);
}
.slider-val {
min-width: 40px;
text-align: right;
font-size: 0.95rem;
font-weight: 700;
color: #0471a9;
font-variant-numeric: tabular-nums;
}
button {
background: #0471a9;
border: none;
color: #ffffff;
padding: 0.6rem 1.5rem;
border-radius: 6px;
font-family: Helvetica, Arial, sans-serif;
font-size: 0.9rem;
font-weight: 700;
cursor: pointer;
transition: background 0.15s;
letter-spacing: 0.02em;
}
button:hover { background: #035a8a; }
button:disabled {
background: #c8cdd0;
cursor: not-allowed;
}
.output {
background: #f8f9fa;
border: 1px solid #e8ecef;
border-radius: 8px;
padding: 1rem;
min-height: 150px;
font-size: 0.95rem;
line-height: 1.65;
white-space: pre-wrap;
word-wrap: break-word;
color: #023750;
}
.output.streaming { border-color: rgba(4,113,169,0.3); }
.output .cursor {
display: inline-block;
width: 2px;
height: 14px;
background: #0471a9;
animation: blink 0.8s infinite;
vertical-align: text-bottom;
margin-left: 2px;
}
@keyframes blink { 0%,100% { opacity: 1; } 50% { opacity: 0; } }
.circuit-info {
font-size: 0.8rem;
color: #023750;
opacity: 0.6;
margin-bottom: 0.75rem;
}
.circuit-info .val {
color: #0471a9;
font-weight: 700;
opacity: 1;
}
.status {
text-align: center;
padding: 0.5rem;
font-size: 0.8rem;
color: #023750;
opacity: 0.5;
}
.status.loading { color: #e07a2f; opacity: 1; }
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>Refusal Circuit Ablation</h1>
<div class="sub">Qwen2.5-14B-Instruct · Contrastive Neuron Attribution · 2800 neurons</div>
</div>
<div class="section">
<h2>Generate</h2>
<div class="circuit-info" id="circuit-info">Loading circuit...</div>
<div style="margin-bottom: 0.75rem;">
<label>Prompt</label>
<input type="text" id="prompt" value="How do I pick a lock?"
onkeydown="if(event.key==='Enter') generate()">
</div>
<div class="slider-row">
<label>Steering Strength</label>
<input type="range" id="steering" min="0" max="1.0" step="0.05" value="0"
oninput="document.getElementById('mult-val').textContent = parseFloat(this.value).toFixed(2)">
<span class="slider-val" id="mult-val">0.00</span>
</div>
<div style="display:flex; justify-content:space-between; font-size:0.75rem; color:#023750; opacity:0.5; margin-top:-0.5rem; margin-bottom:0.75rem; padding:0 80px 0 0;">
<span>Baseline</span>
<span>Ablated</span>
</div>
<button id="gen-btn" onclick="generate()">Generate</button>
</div>
<div class="section">
<h2>Output</h2>
<div class="output" id="output"></div>
</div>
<div class="status" id="status">Ready</div>
</div>
<script>
fetch('/api/circuit').then(r => r.json()).then(data => {
const el = document.getElementById('circuit-info');
if (data.loaded) {
el.innerHTML = `Refusal circuit: <span class="val">${data.n_neurons}</span> neurons ` +
`across <span class="val">${data.layers.length}</span> layers ` +
`(L${data.layers[0]}–L${data.layers[data.layers.length-1]})`;
} else {
el.textContent = 'Loading circuit...';
setTimeout(() => location.reload(), 10000);
}
});
function setStatus(msg, loading) {
const el = document.getElementById('status');
el.textContent = msg;
el.className = loading ? 'status loading' : 'status';
}
async function generate() {
const btn = document.getElementById('gen-btn');
btn.disabled = true;
const output = document.getElementById('output');
output.innerHTML = '<span class="cursor"></span>';
output.classList.add('streaming');
setStatus('Generating...', true);
try {
const res = await fetch('/api/generate', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
prompt: document.getElementById('prompt').value,
multiplier: 1.0 - parseFloat(document.getElementById('steering').value),
max_tokens: 200,
}),
});
const reader = res.body.getReader();
const decoder = new TextDecoder();
let text = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value);
for (const line of chunk.split('\n')) {
if (line.startsWith('data: ')) {
const data = JSON.parse(line.slice(6));
if (data.token) {
text += data.token;
output.innerHTML = text.replace(/&/g,'&amp;').replace(/</g,'&lt;') +
'<span class="cursor"></span>';
}
}
}
}
output.innerHTML = text.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/\n/g,'<br>');
output.classList.remove('streaming');
setStatus('Done');
} catch (e) {
output.textContent = 'Error: ' + e.message;
setStatus('Error');
}
btn.disabled = false;
}
</script>
</body>
</html>