| --- |
| library_name: vllm-omni |
| pipeline_tag: text-to-image |
| inference: true |
| base_model: |
| - ByteDance-Seed/BAGEL-7B-MoT |
| --- |
| |
| This tiny model is for debugging. It is randomly initialized with the config adapted from [ByteDance-Seed/BAGEL-7B-MoT](https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT). |
|
|
| File size: |
| - ~335MB ae.safetensors (VAE, full architecture — hardcoded in vllm-omni) |
| - ~41MB ema.safetensors (LLM + ViT + connectors, 1 layer each) |
|
|
| ## Acknowledgements |
|
|
| 🎉 Special thanks to [@zhengyuansu](https://huggingface.co/zhengyuansu) for contributing this model! |
|
|
|
|
| ### Example usage: |
|
|
| ```python |
| from vllm_omni.entrypoints.omni import Omni |
| |
| omni = Omni( |
| # model="zhengyuansu/bagel-tiny-random", |
| model="tiny-random/bagel", |
| stage_configs_path="path/to/bagel_sharedmemory_2gpu_ci.yaml", |
| custom_pipeline_args={ |
| "pipeline_class": "examples.flowgrpo_trainer.vllm_omni.pipeline_bagel.BagelPipelineWithLogProb" |
| }, |
| ) |
| |
| params_list = omni.default_sampling_params_list |
| params_list[1].num_inference_steps = 10 |
| params_list[1].extra_args = {"cfg_text_scale": 4.0, "cfg_img_scale": 1.5} |
| |
| outputs = list(omni.generate( |
| prompts=[{"prompt": "a cute cat", "modalities": ["image"]}], |
| sampling_params_list=params_list, |
| )) |
| ``` |
|
|
| ### Codes to create this repo: |
|
|
| ```python |
| """Create a tiny-random BAGEL model for CI testing. |
| |
| Reads real BAGEL-7B-MoT checkpoint weight names, creates matching tiny random |
| tensors with scaled-down dimensions. VAE architecture is hardcoded in vllm-omni |
| and cannot be shrunk, so VAE weights are kept at full size. |
| |
| Usage: |
| python scripts/create_tiny_bagel.py --source ByteDance-Seed/BAGEL-7B-MoT |
| """ |
| |
| import argparse |
| import json |
| import os |
| import re |
| import shutil |
| |
| import torch |
| from safetensors import safe_open |
| from safetensors.torch import save_file |
| |
| # LLM/ViT dimension shrinkage |
| EMA_DIM_MAP = { |
| 3584: 64, # LLM hidden_size |
| 18944: 128, # LLM intermediate_size |
| 1152: 64, # ViT hidden_size |
| 4304: 128, # ViT intermediate_size |
| 128: 32, # head_dim |
| 512: 64, # kv_proj dim |
| } |
| |
| # VAE: keep original dims (architecture is hardcoded in vllm-omni) |
| VAE_DIM_MAP = {} |
| |
| MAX_LLM_LAYERS = 1 |
| MAX_VIT_LAYERS = 1 |
| |
| |
| def shrink_dims(shape, dim_map): |
| return [dim_map.get(d, d) for d in shape] |
| |
| |
| def create_tiny_configs(source_dir, output_dir): |
| with open(os.path.join(source_dir, "config.json")) as f: |
| config = json.load(f) |
| |
| llm = config["llm_config"] |
| llm["hidden_size"] = 64 |
| llm["num_hidden_layers"] = MAX_LLM_LAYERS |
| llm["num_attention_heads"] = 2 |
| llm["num_key_value_heads"] = 2 |
| llm["intermediate_size"] = 128 |
| llm["max_position_embeddings"] = 4096 |
| llm["max_window_layers"] = MAX_LLM_LAYERS |
| |
| vit = config["vit_config"] |
| vit["hidden_size"] = 64 |
| vit["num_hidden_layers"] = MAX_VIT_LAYERS |
| vit["num_attention_heads"] = 2 |
| vit["intermediate_size"] = 128 |
| |
| with open(os.path.join(output_dir, "config.json"), "w") as f: |
| json.dump(config, f, indent=4) |
| |
| llm_standalone = dict(llm) |
| llm_standalone["qk_norm"] = True |
| llm_standalone["tie_word_embeddings"] = False |
| with open(os.path.join(output_dir, "llm_config.json"), "w") as f: |
| json.dump(llm_standalone, f, indent=4) |
| |
| with open(os.path.join(output_dir, "vit_config.json"), "w") as f: |
| json.dump(dict(vit), f, indent=4) |
| |
| return config |
| |
| |
| def create_tiny_weights(source_path, dim_map, max_layers, seed=42): |
| gen = torch.Generator().manual_seed(seed) |
| weights = {} |
| is_vae = "ae" in os.path.basename(source_path).lower() |
| dtype = torch.float32 if is_vae else torch.bfloat16 |
| |
| with safe_open(source_path, framework="pt") as f: |
| for name in f.keys(): |
| m = re.search(r"\.layers\.(\d+)\.", name) |
| if m: |
| idx = int(m.group(1)) |
| for pattern, limit in max_layers.items(): |
| if pattern in name and idx >= limit: |
| break |
| else: |
| pass |
| if m and any(p in name for p in max_layers) and idx >= max_layers.get( |
| next((p for p in max_layers if p in name), ""), 999 |
| ): |
| continue |
| |
| real_shape = list(f.get_tensor(name).shape) |
| tiny_shape = shrink_dims(real_shape, dim_map) |
| |
| if "norm" in name and len(tiny_shape) == 1: |
| weights[name] = torch.ones(tiny_shape, dtype=dtype) |
| else: |
| weights[name] = torch.randn(tiny_shape, generator=gen, dtype=dtype) * 0.02 |
| |
| return weights |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--source", default="ByteDance-Seed/BAGEL-7B-MoT") |
| parser.add_argument("--output", default=os.path.expanduser("~/models/tiny-random/BAGEL-7B-MoT")) |
| args = parser.parse_args() |
| |
| source_dir = args.source |
| if not os.path.exists(os.path.join(source_dir, "config.json")): |
| from huggingface_hub import snapshot_download |
| source_dir = snapshot_download(source_dir) |
| |
| output_dir = args.output |
| os.makedirs(output_dir, exist_ok=True) |
| |
| create_tiny_configs(source_dir, output_dir) |
| |
| for fname in ["generation_config.json", "preprocessor_config.json", "tokenizer.json", |
| "tokenizer_config.json", "vocab.json", "merges.txt"]: |
| src = os.path.join(source_dir, fname) |
| if os.path.exists(src): |
| shutil.copy2(src, os.path.join(output_dir, fname)) |
| |
| ema = create_tiny_weights( |
| os.path.join(source_dir, "ema.safetensors"), |
| dim_map=EMA_DIM_MAP, |
| max_layers={"language_model": MAX_LLM_LAYERS, "vit_model": MAX_VIT_LAYERS}, |
| seed=42, |
| ) |
| save_file(ema, os.path.join(output_dir, "ema.safetensors")) |
| |
| vae = create_tiny_weights( |
| os.path.join(source_dir, "ae.safetensors"), |
| dim_map=VAE_DIM_MAP, |
| max_layers={}, |
| seed=43, |
| ) |
| save_file(vae, os.path.join(output_dir, "ae.safetensors")) |
| |
| weight_map = {k: "ema.safetensors" for k in ema} |
| weight_map.update({k: "ae.safetensors" for k in vae}) |
| total_size = sum(t.numel() * t.element_size() for t in ema.values()) |
| total_size += sum(t.numel() * t.element_size() for t in vae.values()) |
| with open(os.path.join(output_dir, "model.safetensors.index.json"), "w") as f: |
| json.dump({"metadata": {"total_size": total_size}, "weight_map": weight_map}, f, indent=4) |
| |
| |
| if __name__ == "__main__": |
| main() |
| ``` |
|
|