Add files using upload-large-folder tool

7cb2f27 verified 4 months ago

8.31 kB

	#!/usr/bin/env python3
	"""
	Graft INTELLECT-3 language model weights into GLM-4.6V vision-language model.

	This script:
	1. Loads both models into CPU memory
	2. Copies model.layers.* from INTELLECT-3 to model.language_model.layers.* in GLM-4.6V
	3. Copies model.norm.weight from INTELLECT-3 to model.language_model.norm.weight in GLM-4.6V
	4. Saves the resulting model to a new directory

	Does NOT touch:
	- model.language_model.embed_tokens (needed for vision token compatibility)
	- lm_head (kept aligned with embed_tokens)
	- model.visual.* (vision encoder preserved)
	"""

	import os
	import argparse
	import json
	import shutil
	from pathlib import Path
	from safetensors import safe_open
	from safetensors.torch import save_file
	import torch
	from tqdm import tqdm


	def get_safetensor_files(model_dir: Path) -> list[Path]:
	"""Get all safetensor files in a model directory."""
	files = sorted(model_dir.glob("*.safetensors"))
	if not files:
	raise FileNotFoundError(f"No safetensor files found in {model_dir}")
	return files


	def load_state_dict_from_safetensors(model_dir: Path) -> dict[str, torch.Tensor]:
	"""Load all tensors from safetensor files into a state dict."""
	state_dict = {}
	files = get_safetensor_files(model_dir)

	for f in tqdm(files, desc=f"Loading {model_dir.name}"):
	with safe_open(f, framework="pt", device="cpu") as st:
	for key in st.keys():
	state_dict[key] = st.get_tensor(key)

	return state_dict


	def graft_weights(
	intellect3_state: dict[str, torch.Tensor],
	glm_state: dict[str, torch.Tensor]
	) -> dict[str, torch.Tensor]:
	"""
	Graft INTELLECT-3 weights into GLM-4.6V state dict.

	Mapping:
	- model.layers.* -> model.language_model.layers.*
	- model.norm.weight -> model.language_model.norm.weight
	"""
	grafted_state = dict(glm_state) # shallow copy

	grafted_count = 0
	skipped_keys = []

	for intellect_key, tensor in tqdm(intellect3_state.items(), desc="Grafting weights"):
	# Skip embed_tokens and lm_head from INTELLECT-3
	if "embed_tokens" in intellect_key or "lm_head" in intellect_key:
	skipped_keys.append(intellect_key)
	continue

	# Map model.layers.* -> model.language_model.layers.*
	if intellect_key.startswith("model.layers."):
	glm_key = intellect_key.replace("model.layers.", "model.language_model.layers.")
	# Map model.norm.weight -> model.language_model.norm.weight
	elif intellect_key == "model.norm.weight":
	glm_key = "model.language_model.norm.weight"
	else:
	skipped_keys.append(intellect_key)
	continue

	# Verify the key exists in GLM and shapes match
	if glm_key not in grafted_state:
	print(f"WARNING: {glm_key} not found in GLM-4.6V state dict!")
	continue

	if grafted_state[glm_key].shape != tensor.shape:
	print(f"WARNING: Shape mismatch for {glm_key}:")
	print(f" INTELLECT-3: {tensor.shape}")
	print(f" GLM-4.6V: {grafted_state[glm_key].shape}")
	continue

	grafted_state[glm_key] = tensor
	grafted_count += 1

	print(f"\nGrafted {grafted_count} tensors from INTELLECT-3")
	print(f"Skipped {len(skipped_keys)} tensors: {skipped_keys[:5]}{'...' if len(skipped_keys) > 5 else ''}")

	return grafted_state


	def save_state_dict_to_safetensors(
	state_dict: dict[str, torch.Tensor],
	output_dir: Path,
	max_shard_size: int = 5 * 1024 ** 3 # 5GB default
	):
	"""Save state dict to sharded safetensor files."""
	output_dir.mkdir(parents=True, exist_ok=True)

	# Calculate total size and plan shards
	tensors_by_size = [(k, v, v.numel() * v.element_size()) for k, v in state_dict.items()]
	total_size = sum(size for _, _, size in tensors_by_size)

	print(f"\nTotal model size: {total_size / 1024**3:.2f} GB")

	# Create shards
	shards = []
	current_shard = {}
	current_size = 0

	for key, tensor, size in tensors_by_size:
	if current_size + size > max_shard_size and current_shard:
	shards.append(current_shard)
	current_shard = {}
	current_size = 0

	current_shard[key] = tensor
	current_size += size

	if current_shard:
	shards.append(current_shard)

	print(f"Saving to {len(shards)} shard(s)...")

	# Save shards and build index
	weight_map = {}

	for i, shard in enumerate(tqdm(shards, desc="Saving shards")):
	if len(shards) == 1:
	filename = "model.safetensors"
	else:
	filename = f"model-{i+1:05d}-of-{len(shards):05d}.safetensors"

	filepath = output_dir / filename
	save_file(shard, filepath)

	for key in shard.keys():
	weight_map[key] = filename

	# Save index if sharded
	if len(shards) > 1:
	index = {
	"metadata": {"total_size": total_size},
	"weight_map": weight_map
	}
	with open(output_dir / "model.safetensors.index.json", "w") as f:
	json.dump(index, f, indent=2)

	return weight_map


	def copy_config_files(src_dir: Path, dst_dir: Path):
	"""Copy config files from source to destination."""
	config_files = [
	"config.json",
	"tokenizer.json",
	"tokenizer_config.json",
	"special_tokens_map.json",
	"generation_config.json",
	"preprocessor_config.json",
	"chat_template.json",
	]

	for filename in config_files:
	src_file = src_dir / filename
	if src_file.exists():
	shutil.copy2(src_file, dst_dir / filename)
	print(f"Copied {filename}")


	def main():
	parser = argparse.ArgumentParser(
	description="Graft INTELLECT-3 weights into GLM-4.6V"
	)
	parser.add_argument(
	"--intellect3",
	type=Path,
	default=Path.home() / "models" / "INTELLECT-3",
	help="Path to INTELLECT-3 model directory"
	)
	parser.add_argument(
	"--glm",
	type=Path,
	default=Path.home() / "models" / "GLM-4.6V",
	help="Path to GLM-4.6V model directory"
	)
	parser.add_argument(
	"--output",
	type=Path,
	default=Path.home() / "models" / "INTELLECT-3-V",
	help="Path to output directory"
	)
	parser.add_argument(
	"--shard-size",
	type=int,
	default=5,
	help="Maximum shard size in GB (default: 5)"
	)

	args = parser.parse_args()

	print("=" * 60)
	print("INTELLECT-3 -> GLM-4.6V Weight Grafting")
	print("=" * 60)
	print(f"INTELLECT-3 source: {args.intellect3}")
	print(f"GLM-4.6V source: {args.glm}")
	print(f"Output directory: {args.output}")
	print("=" * 60)

	# Verify source directories exist
	if not args.intellect3.exists():
	raise FileNotFoundError(f"INTELLECT-3 directory not found: {args.intellect3}")
	if not args.glm.exists():
	raise FileNotFoundError(f"GLM-4.6V directory not found: {args.glm}")

	# Load both models
	print("\nStep 1: Loading models into CPU memory...")
	intellect3_state = load_state_dict_from_safetensors(args.intellect3)
	glm_state = load_state_dict_from_safetensors(args.glm)

	print(f"\nINTELLECT-3 tensors: {len(intellect3_state)}")
	print(f"GLM-4.6V tensors: {len(glm_state)}")

	# Graft weights
	print("\nStep 2: Grafting INTELLECT-3 weights into GLM-4.6V...")
	grafted_state = graft_weights(intellect3_state, glm_state)

	# Free memory from source models
	del intellect3_state
	del glm_state

	# Save grafted model
	print("\nStep 3: Saving grafted model...")
	save_state_dict_to_safetensors(
	grafted_state,
	args.output,
	max_shard_size=args.shard_size * 1024 ** 3
	)

	# Copy config files from GLM-4.6V (since we're keeping its architecture)
	print("\nStep 4: Copying config files from GLM-4.6V...")
	copy_config_files(args.glm, args.output)

	print("\n" + "=" * 60)
	print("Done! Grafted model saved to:", args.output)
	print("=" * 60)


	if __name__ == "__main__":
	main()