llm-sort / run_high_intensity.py

Upload folder using huggingface_hub

c7f1373 verified 25 days ago

4.49 kB

	"""Quick run: intensity up to 30 for the 1M checkpoint, layer 0, ub=60."""
	import os
	import sys
	import numpy as np
	import torch
	import matplotlib
	matplotlib.use('Agg')
	import matplotlib.pyplot as plt

	sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'grid-run'))
	from model_analysis import GPT, GPTConfig, GPTIntervention

	CKPT = os.path.join(os.path.dirname(os.path.abspath(__file__)),
	'sortgpt_k16_methfixed_mlp1_L2_N256_E64_pos0_fln1_wd0p0_lr0p03_dseed1337_iseed1337__final.pt')
	PLOT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'outputs',
	'plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000')

	INTENSITIES = [1.0, 2.0, 4.0, 6.0, 8.0, 10.0, 14.0, 18.0, 22.0, 26.0, 30.0]
	UB = 60
	MIN_VALID = 200
	GPU = 0


	def remap_state_dict(sd):
	new = {}
	for k, v in sd.items():
	nk = k
	for i in range(10):
	nk = nk.replace(f'transformer.h.{i}.attn.', f'transformer.h.{i}.c_attn.')
	nk = nk.replace(f'transformer.h.{i}.mlp.', f'transformer.h.{i}.c_fc.')
	new[nk] = v
	return new


	def load_model(device):
	ckpt = torch.load(CKPT, map_location='cpu')
	mc = ckpt['model_config']
	config = GPTConfig(block_size=mc['block_size'], vocab_size=mc['vocab_size'] - 1,
	with_layer_norm=mc.get('use_final_LN', True))
	model = GPT(config)
	sd = remap_state_dict(ckpt['model_state_dict'])
	wpe_max = config.block_size * 4 + 1
	if 'transformer.wpe.weight' in sd and sd['transformer.wpe.weight'].shape[0] > wpe_max:
	sd['transformer.wpe.weight'] = sd['transformer.wpe.weight'][:wpe_max]
	for k in [k for k in sd if k.endswith('.c_attn.bias') and 'c_attn.c_attn' not in k]:
	del sd[k]
	if 'lm_head.weight' in sd:
	del sd['lm_head.weight']
	model.load_state_dict(sd, strict=False)
	model.to(device)
	model.eval()
	return model, config


	def get_batch(vs, bs, device):
	x = torch.randperm(vs)[:bs]
	vals, _ = torch.sort(x)
	return torch.cat((x, torch.tensor([vs]), vals), dim=0).unsqueeze(0).to(device)


	def main():
	os.environ['CUDA_VISIBLE_DEVICES'] = str(GPU)
	device = 'cuda'
	print("Loading model...", flush=True)
	model, config = load_model(device)
	bs = config.block_size
	vs = config.vocab_size
	location = bs + 5

	for layer in [0, 1]:
	print(f"\nLayer {layer}:", flush=True)
	rates, counts = [], []
	for intens in INTENSITIES:
	attempts, rounds = [], 0
	while len(attempts) < MIN_VALID and rounds < 3000:
	rounds += 1
	idx = get_batch(vs, bs, device)
	try:
	im = GPTIntervention(model, idx)
	im.intervent_attention(
	attention_layer_num=layer, location=location,
	unsorted_lb=UB, unsorted_ub=UB,
	unsorted_lb_num=0, unsorted_ub_num=1,
	unsorted_intensity_inc=intens,
	sorted_lb=0, sorted_num=0, sorted_intensity_inc=0.0)
	g, n = im.check_if_still_works()
	attempts.append(g == n)
	im.revert_attention(layer)
	except:
	continue
	rate = sum(attempts) / len(attempts) if attempts else 0.0
	rates.append(rate)
	counts.append(len(attempts))
	print(f" intensity={intens:5.1f}: success={rate:.4f} (n={len(attempts)})", flush=True)

	intensities = np.array(INTENSITIES)
	rates = np.array(rates)

	plt.figure(figsize=(5.5, 3.8))
	plt.plot(intensities, rates, marker='o', linewidth=1.5, markersize=5, color='#e6850e')
	plt.xlabel('Intervention Intensity', fontsize=10)
	plt.ylabel('Success Probability', fontsize=10)
	plt.title(f'Robustness to Attention Intervention (Layer {layer}) [ub={UB}]\n'
	f'V=256 B=16 lr=0.03 iters=1000000 dseed=1337 iseed=1337',
	fontsize=10, fontweight='bold')
	plt.grid(True, alpha=0.3)
	plt.xticks(intensities[::2], fontsize=9)
	plt.yticks(fontsize=9)
	plt.ylim(0, 1.05)
	plt.tight_layout()
	out = os.path.join(PLOT_DIR, f'intensity_layer{layer}_ub60_high.png')
	plt.savefig(out, dpi=300, bbox_inches='tight')
	plt.close()
	print(f" Saved: {out}", flush=True)

	print("\nDone!", flush=True)


	if __name__ == '__main__':
	main()