| |
| """ |
| Graft INTELLECT-3 language model weights into GLM-4.6V vision-language model. |
| |
| This script: |
| 1. Loads both models into CPU memory |
| 2. Copies model.layers.* from INTELLECT-3 to model.language_model.layers.* in GLM-4.6V |
| 3. Copies model.norm.weight from INTELLECT-3 to model.language_model.norm.weight in GLM-4.6V |
| 4. Saves the resulting model to a new directory |
| |
| Does NOT touch: |
| - model.language_model.embed_tokens (needed for vision token compatibility) |
| - lm_head (kept aligned with embed_tokens) |
| - model.visual.* (vision encoder preserved) |
| """ |
|
|
| import os |
| import argparse |
| import json |
| import shutil |
| from pathlib import Path |
| from safetensors import safe_open |
| from safetensors.torch import save_file |
| import torch |
| from tqdm import tqdm |
|
|
|
|
| def get_safetensor_files(model_dir: Path) -> list[Path]: |
| """Get all safetensor files in a model directory.""" |
| files = sorted(model_dir.glob("*.safetensors")) |
| if not files: |
| raise FileNotFoundError(f"No safetensor files found in {model_dir}") |
| return files |
|
|
|
|
| def load_state_dict_from_safetensors(model_dir: Path) -> dict[str, torch.Tensor]: |
| """Load all tensors from safetensor files into a state dict.""" |
| state_dict = {} |
| files = get_safetensor_files(model_dir) |
| |
| for f in tqdm(files, desc=f"Loading {model_dir.name}"): |
| with safe_open(f, framework="pt", device="cpu") as st: |
| for key in st.keys(): |
| state_dict[key] = st.get_tensor(key) |
| |
| return state_dict |
|
|
|
|
| def graft_weights( |
| intellect3_state: dict[str, torch.Tensor], |
| glm_state: dict[str, torch.Tensor] |
| ) -> dict[str, torch.Tensor]: |
| """ |
| Graft INTELLECT-3 weights into GLM-4.6V state dict. |
| |
| Mapping: |
| - model.layers.* -> model.language_model.layers.* |
| - model.norm.weight -> model.language_model.norm.weight |
| """ |
| grafted_state = dict(glm_state) |
| |
| grafted_count = 0 |
| skipped_keys = [] |
| |
| for intellect_key, tensor in tqdm(intellect3_state.items(), desc="Grafting weights"): |
| |
| if "embed_tokens" in intellect_key or "lm_head" in intellect_key: |
| skipped_keys.append(intellect_key) |
| continue |
| |
| |
| if intellect_key.startswith("model.layers."): |
| glm_key = intellect_key.replace("model.layers.", "model.language_model.layers.") |
| |
| elif intellect_key == "model.norm.weight": |
| glm_key = "model.language_model.norm.weight" |
| else: |
| skipped_keys.append(intellect_key) |
| continue |
| |
| |
| if glm_key not in grafted_state: |
| print(f"WARNING: {glm_key} not found in GLM-4.6V state dict!") |
| continue |
| |
| if grafted_state[glm_key].shape != tensor.shape: |
| print(f"WARNING: Shape mismatch for {glm_key}:") |
| print(f" INTELLECT-3: {tensor.shape}") |
| print(f" GLM-4.6V: {grafted_state[glm_key].shape}") |
| continue |
| |
| grafted_state[glm_key] = tensor |
| grafted_count += 1 |
| |
| print(f"\nGrafted {grafted_count} tensors from INTELLECT-3") |
| print(f"Skipped {len(skipped_keys)} tensors: {skipped_keys[:5]}{'...' if len(skipped_keys) > 5 else ''}") |
| |
| return grafted_state |
|
|
|
|
| def save_state_dict_to_safetensors( |
| state_dict: dict[str, torch.Tensor], |
| output_dir: Path, |
| max_shard_size: int = 5 * 1024 ** 3 |
| ): |
| """Save state dict to sharded safetensor files.""" |
| output_dir.mkdir(parents=True, exist_ok=True) |
| |
| |
| tensors_by_size = [(k, v, v.numel() * v.element_size()) for k, v in state_dict.items()] |
| total_size = sum(size for _, _, size in tensors_by_size) |
| |
| print(f"\nTotal model size: {total_size / 1024**3:.2f} GB") |
| |
| |
| shards = [] |
| current_shard = {} |
| current_size = 0 |
| |
| for key, tensor, size in tensors_by_size: |
| if current_size + size > max_shard_size and current_shard: |
| shards.append(current_shard) |
| current_shard = {} |
| current_size = 0 |
| |
| current_shard[key] = tensor |
| current_size += size |
| |
| if current_shard: |
| shards.append(current_shard) |
| |
| print(f"Saving to {len(shards)} shard(s)...") |
| |
| |
| weight_map = {} |
| |
| for i, shard in enumerate(tqdm(shards, desc="Saving shards")): |
| if len(shards) == 1: |
| filename = "model.safetensors" |
| else: |
| filename = f"model-{i+1:05d}-of-{len(shards):05d}.safetensors" |
| |
| filepath = output_dir / filename |
| save_file(shard, filepath) |
| |
| for key in shard.keys(): |
| weight_map[key] = filename |
| |
| |
| if len(shards) > 1: |
| index = { |
| "metadata": {"total_size": total_size}, |
| "weight_map": weight_map |
| } |
| with open(output_dir / "model.safetensors.index.json", "w") as f: |
| json.dump(index, f, indent=2) |
| |
| return weight_map |
|
|
|
|
| def copy_config_files(src_dir: Path, dst_dir: Path): |
| """Copy config files from source to destination.""" |
| config_files = [ |
| "config.json", |
| "tokenizer.json", |
| "tokenizer_config.json", |
| "special_tokens_map.json", |
| "generation_config.json", |
| "preprocessor_config.json", |
| "chat_template.json", |
| ] |
| |
| for filename in config_files: |
| src_file = src_dir / filename |
| if src_file.exists(): |
| shutil.copy2(src_file, dst_dir / filename) |
| print(f"Copied {filename}") |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="Graft INTELLECT-3 weights into GLM-4.6V" |
| ) |
| parser.add_argument( |
| "--intellect3", |
| type=Path, |
| default=Path.home() / "models" / "INTELLECT-3", |
| help="Path to INTELLECT-3 model directory" |
| ) |
| parser.add_argument( |
| "--glm", |
| type=Path, |
| default=Path.home() / "models" / "GLM-4.6V", |
| help="Path to GLM-4.6V model directory" |
| ) |
| parser.add_argument( |
| "--output", |
| type=Path, |
| default=Path.home() / "models" / "INTELLECT-3-V", |
| help="Path to output directory" |
| ) |
| parser.add_argument( |
| "--shard-size", |
| type=int, |
| default=5, |
| help="Maximum shard size in GB (default: 5)" |
| ) |
| |
| args = parser.parse_args() |
| |
| print("=" * 60) |
| print("INTELLECT-3 -> GLM-4.6V Weight Grafting") |
| print("=" * 60) |
| print(f"INTELLECT-3 source: {args.intellect3}") |
| print(f"GLM-4.6V source: {args.glm}") |
| print(f"Output directory: {args.output}") |
| print("=" * 60) |
| |
| |
| if not args.intellect3.exists(): |
| raise FileNotFoundError(f"INTELLECT-3 directory not found: {args.intellect3}") |
| if not args.glm.exists(): |
| raise FileNotFoundError(f"GLM-4.6V directory not found: {args.glm}") |
| |
| |
| print("\nStep 1: Loading models into CPU memory...") |
| intellect3_state = load_state_dict_from_safetensors(args.intellect3) |
| glm_state = load_state_dict_from_safetensors(args.glm) |
| |
| print(f"\nINTELLECT-3 tensors: {len(intellect3_state)}") |
| print(f"GLM-4.6V tensors: {len(glm_state)}") |
| |
| |
| print("\nStep 2: Grafting INTELLECT-3 weights into GLM-4.6V...") |
| grafted_state = graft_weights(intellect3_state, glm_state) |
| |
| |
| del intellect3_state |
| del glm_state |
| |
| |
| print("\nStep 3: Saving grafted model...") |
| save_state_dict_to_safetensors( |
| grafted_state, |
| args.output, |
| max_shard_size=args.shard_size * 1024 ** 3 |
| ) |
| |
| |
| print("\nStep 4: Copying config files from GLM-4.6V...") |
| copy_config_files(args.glm, args.output) |
| |
| print("\n" + "=" * 60) |
| print("Done! Grafted model saved to:", args.output) |
| print("=" * 60) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|