lsn-analysis / activation_qwen_all.py
tvkain's picture
Upload folder using huggingface_hub
fed1832 verified
#!/usr/bin/env python3
"""
Simple per-neuron activation tracker for LLaMA-2 MLP layers.
Runs on a fixed set of models and input IDs.
"""
import torch
import os
from types import MethodType
from vllm import LLM, SamplingParams # Keep your original import since hook logic depends on vLLM
# ---------------------- Config ----------------------
BASE_PATH = "/home/khanh/sla/sla_cpt"
RUN_CONFIGS = [
{
'name': 'q2.5-zh', # custom name for output file
'model': f'{BASE_PATH}/llama2_7b_full_basque_corpus_grad_clip_1/checkpoint-10200', # replace with LLaMA2 HF model if needed
'ids_path': './ids/l2-7b/id.eu.train.l2-7b',
'lang': 'eu'
},
{
'name': 'q2.5-ga',
'model': f'{BASE_PATH}/llama2_13b_full_irish_corpus_grad_clip_1/checkpoint-4280',
'ids_path': '.ids/l2-13b/id.ga.train.l2-13b',
'lang': 'en'
}
]
SAVE_FOLDER = "new_activations"
os.makedirs(SAVE_FOLDER, exist_ok=True)
# ---------------------- Original Hook Function ----------------------
def make_qwen_hook(idx):
def qwen_forward(self, x):
"""
x: (s, d)
s: number of tokens
d: number of features for each tokens
"""
# Qwen2 uses fused gate_up_proj that returns both gate and up in one tensor
# (s, 2h)
gate_up, _ = self.gate_up_proj(x)
# Split the concatenated gate and up projections
intermediate_size = gate_up.size(-1) // 2
# (s, h)
gate = gate_up[..., :intermediate_size]
# (s, h)
up = gate_up[..., intermediate_size:]
# Apply activation function to gate
# (s, h)
gate_activation = torch.nn.functional.silu(gate)
# Track activations > 0
over_zero[idx, :] += (gate_activation > 0).sum(dim=(0)) # (h)
# Complete forward pass: gate * up -> down projection
x, _ = self.down_proj(gate_activation * up)
return x
return qwen_forward
# ---------------------- Run All Configs ----------------------
for config in RUN_CONFIGS:
model_name = config['model']
lang = config['lang']
ids_path = config['ids_path']
save_name = config.get('name', model_name) # use 'name' key if present, otherwise fallback to model_name
print(f"\n=== Processing model: {model_name}, lang: {lang} ===")
# Load model
model = LLM(
model=model_name,
tensor_parallel_size=1,
enforce_eager=True,
trust_remote_code=True
)
max_length = model.llm_engine.model_config.max_model_len
num_layers = model.llm_engine.model_config.hf_config.num_hidden_layers
intermediate_size = model.llm_engine.model_config.hf_config.intermediate_size
print(f"Layers: {num_layers}, Intermediate size: {intermediate_size}, Max length: {max_length}")
# Setup activation tracker
over_zero = torch.zeros(num_layers, intermediate_size, dtype=torch.int32).to('cuda')
# Hook MLP layers
for i in range(num_layers):
mlp = model.llm_engine.model_executor.driver_worker.model_runner.model.model.layers[i].mlp
mlp.forward = MethodType(make_qwen_hook(i), mlp)
# Load input IDs
print("Loading IDs...")
ids = torch.load(ids_path)
print(f"ID shape: {ids.shape}")
l = ids.size(0)
l = min(l, 99999744) // max_length * max_length
input_ids = ids[:l].reshape(-1, max_length)
print(f"Processing {input_ids.size(0)} sequences of length {max_length}")
# Run inference
print("Running inference...")
_ = model.generate(
prompt_token_ids=input_ids.tolist(),
sampling_params=SamplingParams(max_tokens=1)
)
# Save results using the 'name' key
output_path = os.path.join(SAVE_FOLDER, f'activation.{lang}.train.{save_name}.pt')
torch.save({
'n': l,
'over_zero': over_zero.cpu(),
'num_layers': num_layers,
'intermediate_size': intermediate_size
}, output_path)
print(f"Saved activation counts to {output_path}")
print(f"Processed {l} tokens total")
print("Activation analysis complete!")