--- library_name: vllm-omni pipeline_tag: text-to-image inference: true base_model: - ByteDance-Seed/BAGEL-7B-MoT --- This tiny model is for debugging. It is randomly initialized with the config adapted from [ByteDance-Seed/BAGEL-7B-MoT](https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT). File size: - ~335MB ae.safetensors (VAE, full architecture — hardcoded in vllm-omni) - ~41MB ema.safetensors (LLM + ViT + connectors, 1 layer each) ## Acknowledgements 🎉 Special thanks to [@zhengyuansu](https://huggingface.co/zhengyuansu) for contributing this model! ### Example usage: ```python from vllm_omni.entrypoints.omni import Omni omni = Omni( # model="zhengyuansu/bagel-tiny-random", model="tiny-random/bagel", stage_configs_path="path/to/bagel_sharedmemory_2gpu_ci.yaml", custom_pipeline_args={ "pipeline_class": "examples.flowgrpo_trainer.vllm_omni.pipeline_bagel.BagelPipelineWithLogProb" }, ) params_list = omni.default_sampling_params_list params_list[1].num_inference_steps = 10 params_list[1].extra_args = {"cfg_text_scale": 4.0, "cfg_img_scale": 1.5} outputs = list(omni.generate( prompts=[{"prompt": "a cute cat", "modalities": ["image"]}], sampling_params_list=params_list, )) ``` ### Codes to create this repo: ```python """Create a tiny-random BAGEL model for CI testing. Reads real BAGEL-7B-MoT checkpoint weight names, creates matching tiny random tensors with scaled-down dimensions. VAE architecture is hardcoded in vllm-omni and cannot be shrunk, so VAE weights are kept at full size. Usage: python scripts/create_tiny_bagel.py --source ByteDance-Seed/BAGEL-7B-MoT """ import argparse import json import os import re import shutil import torch from safetensors import safe_open from safetensors.torch import save_file # LLM/ViT dimension shrinkage EMA_DIM_MAP = { 3584: 64, # LLM hidden_size 18944: 128, # LLM intermediate_size 1152: 64, # ViT hidden_size 4304: 128, # ViT intermediate_size 128: 32, # head_dim 512: 64, # kv_proj dim } # VAE: keep original dims (architecture is hardcoded in vllm-omni) VAE_DIM_MAP = {} MAX_LLM_LAYERS = 1 MAX_VIT_LAYERS = 1 def shrink_dims(shape, dim_map): return [dim_map.get(d, d) for d in shape] def create_tiny_configs(source_dir, output_dir): with open(os.path.join(source_dir, "config.json")) as f: config = json.load(f) llm = config["llm_config"] llm["hidden_size"] = 64 llm["num_hidden_layers"] = MAX_LLM_LAYERS llm["num_attention_heads"] = 2 llm["num_key_value_heads"] = 2 llm["intermediate_size"] = 128 llm["max_position_embeddings"] = 4096 llm["max_window_layers"] = MAX_LLM_LAYERS vit = config["vit_config"] vit["hidden_size"] = 64 vit["num_hidden_layers"] = MAX_VIT_LAYERS vit["num_attention_heads"] = 2 vit["intermediate_size"] = 128 with open(os.path.join(output_dir, "config.json"), "w") as f: json.dump(config, f, indent=4) llm_standalone = dict(llm) llm_standalone["qk_norm"] = True llm_standalone["tie_word_embeddings"] = False with open(os.path.join(output_dir, "llm_config.json"), "w") as f: json.dump(llm_standalone, f, indent=4) with open(os.path.join(output_dir, "vit_config.json"), "w") as f: json.dump(dict(vit), f, indent=4) return config def create_tiny_weights(source_path, dim_map, max_layers, seed=42): gen = torch.Generator().manual_seed(seed) weights = {} is_vae = "ae" in os.path.basename(source_path).lower() dtype = torch.float32 if is_vae else torch.bfloat16 with safe_open(source_path, framework="pt") as f: for name in f.keys(): m = re.search(r"\.layers\.(\d+)\.", name) if m: idx = int(m.group(1)) for pattern, limit in max_layers.items(): if pattern in name and idx >= limit: break else: pass if m and any(p in name for p in max_layers) and idx >= max_layers.get( next((p for p in max_layers if p in name), ""), 999 ): continue real_shape = list(f.get_tensor(name).shape) tiny_shape = shrink_dims(real_shape, dim_map) if "norm" in name and len(tiny_shape) == 1: weights[name] = torch.ones(tiny_shape, dtype=dtype) else: weights[name] = torch.randn(tiny_shape, generator=gen, dtype=dtype) * 0.02 return weights def main(): parser = argparse.ArgumentParser() parser.add_argument("--source", default="ByteDance-Seed/BAGEL-7B-MoT") parser.add_argument("--output", default=os.path.expanduser("~/models/tiny-random/BAGEL-7B-MoT")) args = parser.parse_args() source_dir = args.source if not os.path.exists(os.path.join(source_dir, "config.json")): from huggingface_hub import snapshot_download source_dir = snapshot_download(source_dir) output_dir = args.output os.makedirs(output_dir, exist_ok=True) create_tiny_configs(source_dir, output_dir) for fname in ["generation_config.json", "preprocessor_config.json", "tokenizer.json", "tokenizer_config.json", "vocab.json", "merges.txt"]: src = os.path.join(source_dir, fname) if os.path.exists(src): shutil.copy2(src, os.path.join(output_dir, fname)) ema = create_tiny_weights( os.path.join(source_dir, "ema.safetensors"), dim_map=EMA_DIM_MAP, max_layers={"language_model": MAX_LLM_LAYERS, "vit_model": MAX_VIT_LAYERS}, seed=42, ) save_file(ema, os.path.join(output_dir, "ema.safetensors")) vae = create_tiny_weights( os.path.join(source_dir, "ae.safetensors"), dim_map=VAE_DIM_MAP, max_layers={}, seed=43, ) save_file(vae, os.path.join(output_dir, "ae.safetensors")) weight_map = {k: "ema.safetensors" for k in ema} weight_map.update({k: "ae.safetensors" for k in vae}) total_size = sum(t.numel() * t.element_size() for t in ema.values()) total_size += sum(t.numel() * t.element_size() for t in vae.values()) with open(os.path.join(output_dir, "model.safetensors.index.json"), "w") as f: json.dump({"metadata": {"total_size": total_size}, "weight_map": weight_map}, f, indent=4) if __name__ == "__main__": main() ```