lsn-analysis / activation_llama_all.py

Upload folder using huggingface_hub

fed1832 verified 7 months ago

5.01 kB

	#!/usr/bin/env python3
	"""
	Per-neuron activation tracker for LLaMA-2 and Qwen MLP layers.
	Runs on a fixed set of models and input IDs.
	"""

	import torch
	import os
	from types import MethodType
	from vllm import LLM, SamplingParams # Keep original import since hook logic depends on vLLM

	# ---------------------- Config ----------------------
	BASE_PATH = "/home/khanh/sla/sla_cpt"

	RUN_CONFIGS = [
	{
	'name': 'l2-7b-eu',
	'model': f'{BASE_PATH}/llama2_7b_full_basque_corpus_grad_clip_1/checkpoint-10200',
	'ids_path': './ids/l2-7b/id.eu.train.l2-7b',
	'lang': 'eu',
	'type': 'llama'
	},
	{
	'name': 'l2-13b-ga',
	'model': f'{BASE_PATH}/llama2_13b_full_irish_corpus_grad_clip_1/checkpoint-4280',
	'ids_path': '.ids/l2-13b/id.ga.train.l2-13b',
	'lang': 'en',
	'type': 'llama'
	},
	{
	'name': 'q2.5-zh',
	'model': f'{BASE_PATH}/qwen2.5-0.5_full_chinese_corpus_grad_clip_1/checkpoint-7800',
	'ids_path': './ids/qwen2.5-0.5/id.zh.train.qwen2.5-0.5',
	'lang': 'zh',
	'type': 'qwen'
	},
	{
	'name': 'q2.5-ga',
	'model': f'{BASE_PATH}/qwen2.5-0.5_full_english_corpus_grad_clip_1/checkpoint-3231',
	'ids_path': './ids/qwen2.5-0.5/id.en.train.qwen2.5-0.5',
	'lang': 'ga',
	'type': 'qwen'
	},
	{
	'name': 'q2.5-en+ga',
	'model': f'{BASE_PATH}/qwen2.5-0.5_full_english_corpus_grad_clip_1/checkpoint-3231',
	'ids_path': './ids/qwen2.5-0.5/id.en+ga.train.qwen2.5-0.5',
	'lang': 'ga',
	'type': 'qwen'
	}
	]

	SAVE_FOLDER = "new_activations"
	os.makedirs(SAVE_FOLDER, exist_ok=True)

	# ---------------------- Hook Functions ----------------------
	def make_llama_hook(idx):
	def llama_forward(self, x):
	gate_up, _ = self.gate_up_proj(x) # l, 2i
	i = gate_up.size(-1)
	gate_up[:, : i // 2] = torch.nn.SiLU()(gate_up[:, : i // 2])
	activation = gate_up[:, : i // 2].float() # l, i
	over_zero[idx, :] += (activation > 0).sum(dim=(0))
	x = gate_up[:, : i // 2] * gate_up[:, i // 2 :]
	x, _ = self.down_proj(x)
	return x
	return llama_forward

	def make_qwen_hook(idx):
	def qwen_forward(self, x):
	gate_up, _ = self.gate_up_proj(x) # (s, 2h)
	intermediate_size = gate_up.size(-1) // 2
	gate = gate_up[..., :intermediate_size] # (s, h)
	up = gate_up[..., intermediate_size:] # (s, h)
	gate_activation = torch.nn.functional.silu(gate)
	over_zero[idx, :] += (gate_activation > 0).sum(dim=(0))
	x, _ = self.down_proj(gate_activation * up)
	return x
	return qwen_forward

	# ---------------------- Run All Configs ----------------------
	for config in RUN_CONFIGS:
	model_name = config['model']
	lang = config['lang']
	ids_path = config['ids_path']
	save_name = config.get('name', model_name)
	model_type = config.get('type', 'llama') # default to 'llama'

	print(f"\n=== Processing model: {model_name}, lang: {lang}, type: {model_type} ===")

	# Load model
	model = LLM(
	model=model_name,
	tensor_parallel_size=1,
	enforce_eager=True,
	trust_remote_code=True
	)

	max_length = model.llm_engine.model_config.max_model_len
	num_layers = model.llm_engine.model_config.hf_config.num_hidden_layers
	intermediate_size = model.llm_engine.model_config.hf_config.intermediate_size

	print(f"Layers: {num_layers}, Intermediate size: {intermediate_size}, Max length: {max_length}")

	# Setup activation tracker
	over_zero = torch.zeros(num_layers, intermediate_size, dtype=torch.int32).to('cuda')

	# Hook MLP layers
	for i in range(num_layers):
	mlp = model.llm_engine.model_executor.driver_worker.model_runner.model.model.layers[i].mlp
	if model_type == 'llama':
	mlp.forward = MethodType(make_llama_hook(i), mlp)
	elif model_type == 'qwen':
	mlp.forward = MethodType(make_qwen_hook(i), mlp)
	else:
	raise ValueError(f"Unknown model type: {model_type}")

	# Load input IDs
	print("Loading IDs...")
	ids = torch.load(ids_path)
	print(f"ID shape: {ids.shape}")

	l = ids.size(0)
	l = min(l, 99999744) // max_length * max_length
	input_ids = ids[:l].reshape(-1, max_length)
	print(f"Processing {input_ids.size(0)} sequences of length {max_length}")

	# Run inference
	print("Running inference...")
	_ = model.generate(
	prompt_token_ids=input_ids.tolist(),
	sampling_params=SamplingParams(max_tokens=1)
	)

	# Save results
	output_path = os.path.join(SAVE_FOLDER, f'activation.{lang}.train.{save_name}.pt')
	torch.save({
	'n': l,
	'over_zero': over_zero.cpu(),
	'num_layers': num_layers,
	'intermediate_size': intermediate_size
	}, output_path)

	print(f"Saved activation counts to {output_path}")
	print(f"Processed {l} tokens total")
	print("Activation analysis complete!")