File size: 6,454 Bytes

---
library_name: vllm-omni
pipeline_tag: text-to-image
inference: true
base_model:
- ByteDance-Seed/BAGEL-7B-MoT
---

This tiny model is for debugging. It is randomly initialized with the config adapted from [ByteDance-Seed/BAGEL-7B-MoT](https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT).

File size:
- ~335MB ae.safetensors (VAE, full architecture — hardcoded in vllm-omni)
- ~41MB  ema.safetensors (LLM + ViT + connectors, 1 layer each)

## Acknowledgements

🎉 Special thanks to [@zhengyuansu](https://huggingface.co/zhengyuansu) for contributing this model!


### Example usage:

```python
from vllm_omni.entrypoints.omni import Omni

omni = Omni(
    # model="zhengyuansu/bagel-tiny-random",
    model="tiny-random/bagel",
    stage_configs_path="path/to/bagel_sharedmemory_2gpu_ci.yaml",
    custom_pipeline_args={
        "pipeline_class": "examples.flowgrpo_trainer.vllm_omni.pipeline_bagel.BagelPipelineWithLogProb"
    },
)

params_list = omni.default_sampling_params_list
params_list[1].num_inference_steps = 10
params_list[1].extra_args = {"cfg_text_scale": 4.0, "cfg_img_scale": 1.5}

outputs = list(omni.generate(
    prompts=[{"prompt": "a cute cat", "modalities": ["image"]}],
    sampling_params_list=params_list,
))
```

### Codes to create this repo:

```python
"""Create a tiny-random BAGEL model for CI testing.

Reads real BAGEL-7B-MoT checkpoint weight names, creates matching tiny random
tensors with scaled-down dimensions. VAE architecture is hardcoded in vllm-omni
and cannot be shrunk, so VAE weights are kept at full size.

Usage:
    python scripts/create_tiny_bagel.py --source ByteDance-Seed/BAGEL-7B-MoT
"""

import argparse
import json
import os
import re
import shutil

import torch
from safetensors import safe_open
from safetensors.torch import save_file

# LLM/ViT dimension shrinkage
EMA_DIM_MAP = {
    3584: 64,     # LLM hidden_size
    18944: 128,   # LLM intermediate_size
    1152: 64,     # ViT hidden_size
    4304: 128,    # ViT intermediate_size
    128: 32,      # head_dim
    512: 64,      # kv_proj dim
}

# VAE: keep original dims (architecture is hardcoded in vllm-omni)
VAE_DIM_MAP = {}

MAX_LLM_LAYERS = 1
MAX_VIT_LAYERS = 1


def shrink_dims(shape, dim_map):
    return [dim_map.get(d, d) for d in shape]


def create_tiny_configs(source_dir, output_dir):
    with open(os.path.join(source_dir, "config.json")) as f:
        config = json.load(f)

    llm = config["llm_config"]
    llm["hidden_size"] = 64
    llm["num_hidden_layers"] = MAX_LLM_LAYERS
    llm["num_attention_heads"] = 2
    llm["num_key_value_heads"] = 2
    llm["intermediate_size"] = 128
    llm["max_position_embeddings"] = 4096
    llm["max_window_layers"] = MAX_LLM_LAYERS

    vit = config["vit_config"]
    vit["hidden_size"] = 64
    vit["num_hidden_layers"] = MAX_VIT_LAYERS
    vit["num_attention_heads"] = 2
    vit["intermediate_size"] = 128

    with open(os.path.join(output_dir, "config.json"), "w") as f:
        json.dump(config, f, indent=4)

    llm_standalone = dict(llm)
    llm_standalone["qk_norm"] = True
    llm_standalone["tie_word_embeddings"] = False
    with open(os.path.join(output_dir, "llm_config.json"), "w") as f:
        json.dump(llm_standalone, f, indent=4)

    with open(os.path.join(output_dir, "vit_config.json"), "w") as f:
        json.dump(dict(vit), f, indent=4)

    return config


def create_tiny_weights(source_path, dim_map, max_layers, seed=42):
    gen = torch.Generator().manual_seed(seed)
    weights = {}
    is_vae = "ae" in os.path.basename(source_path).lower()
    dtype = torch.float32 if is_vae else torch.bfloat16

    with safe_open(source_path, framework="pt") as f:
        for name in f.keys():
            m = re.search(r"\.layers\.(\d+)\.", name)
            if m:
                idx = int(m.group(1))
                for pattern, limit in max_layers.items():
                    if pattern in name and idx >= limit:
                        break
                else:
                    pass
                if m and any(p in name for p in max_layers) and idx >= max_layers.get(
                    next((p for p in max_layers if p in name), ""), 999
                ):
                    continue

            real_shape = list(f.get_tensor(name).shape)
            tiny_shape = shrink_dims(real_shape, dim_map)

            if "norm" in name and len(tiny_shape) == 1:
                weights[name] = torch.ones(tiny_shape, dtype=dtype)
            else:
                weights[name] = torch.randn(tiny_shape, generator=gen, dtype=dtype) * 0.02

    return weights


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--source", default="ByteDance-Seed/BAGEL-7B-MoT")
    parser.add_argument("--output", default=os.path.expanduser("~/models/tiny-random/BAGEL-7B-MoT"))
    args = parser.parse_args()

    source_dir = args.source
    if not os.path.exists(os.path.join(source_dir, "config.json")):
        from huggingface_hub import snapshot_download
        source_dir = snapshot_download(source_dir)

    output_dir = args.output
    os.makedirs(output_dir, exist_ok=True)

    create_tiny_configs(source_dir, output_dir)

    for fname in ["generation_config.json", "preprocessor_config.json", "tokenizer.json",
                   "tokenizer_config.json", "vocab.json", "merges.txt"]:
        src = os.path.join(source_dir, fname)
        if os.path.exists(src):
            shutil.copy2(src, os.path.join(output_dir, fname))

    ema = create_tiny_weights(
        os.path.join(source_dir, "ema.safetensors"),
        dim_map=EMA_DIM_MAP,
        max_layers={"language_model": MAX_LLM_LAYERS, "vit_model": MAX_VIT_LAYERS},
        seed=42,
    )
    save_file(ema, os.path.join(output_dir, "ema.safetensors"))

    vae = create_tiny_weights(
        os.path.join(source_dir, "ae.safetensors"),
        dim_map=VAE_DIM_MAP,
        max_layers={},
        seed=43,
    )
    save_file(vae, os.path.join(output_dir, "ae.safetensors"))

    weight_map = {k: "ema.safetensors" for k in ema}
    weight_map.update({k: "ae.safetensors" for k in vae})
    total_size = sum(t.numel() * t.element_size() for t in ema.values())
    total_size += sum(t.numel() * t.element_size() for t in vae.values())
    with open(os.path.join(output_dir, "model.safetensors.index.json"), "w") as f:
        json.dump({"metadata": {"total_size": total_size}, "weight_map": weight_map}, f, indent=4)


if __name__ == "__main__":
    main()
```