File size: 6,454 Bytes
18f69fa 74c4609 18f69fa 74c4609 18f69fa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | ---
library_name: vllm-omni
pipeline_tag: text-to-image
inference: true
base_model:
- ByteDance-Seed/BAGEL-7B-MoT
---
This tiny model is for debugging. It is randomly initialized with the config adapted from [ByteDance-Seed/BAGEL-7B-MoT](https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT).
File size:
- ~335MB ae.safetensors (VAE, full architecture — hardcoded in vllm-omni)
- ~41MB ema.safetensors (LLM + ViT + connectors, 1 layer each)
## Acknowledgements
🎉 Special thanks to [@zhengyuansu](https://huggingface.co/zhengyuansu) for contributing this model!
### Example usage:
```python
from vllm_omni.entrypoints.omni import Omni
omni = Omni(
# model="zhengyuansu/bagel-tiny-random",
model="tiny-random/bagel",
stage_configs_path="path/to/bagel_sharedmemory_2gpu_ci.yaml",
custom_pipeline_args={
"pipeline_class": "examples.flowgrpo_trainer.vllm_omni.pipeline_bagel.BagelPipelineWithLogProb"
},
)
params_list = omni.default_sampling_params_list
params_list[1].num_inference_steps = 10
params_list[1].extra_args = {"cfg_text_scale": 4.0, "cfg_img_scale": 1.5}
outputs = list(omni.generate(
prompts=[{"prompt": "a cute cat", "modalities": ["image"]}],
sampling_params_list=params_list,
))
```
### Codes to create this repo:
```python
"""Create a tiny-random BAGEL model for CI testing.
Reads real BAGEL-7B-MoT checkpoint weight names, creates matching tiny random
tensors with scaled-down dimensions. VAE architecture is hardcoded in vllm-omni
and cannot be shrunk, so VAE weights are kept at full size.
Usage:
python scripts/create_tiny_bagel.py --source ByteDance-Seed/BAGEL-7B-MoT
"""
import argparse
import json
import os
import re
import shutil
import torch
from safetensors import safe_open
from safetensors.torch import save_file
# LLM/ViT dimension shrinkage
EMA_DIM_MAP = {
3584: 64, # LLM hidden_size
18944: 128, # LLM intermediate_size
1152: 64, # ViT hidden_size
4304: 128, # ViT intermediate_size
128: 32, # head_dim
512: 64, # kv_proj dim
}
# VAE: keep original dims (architecture is hardcoded in vllm-omni)
VAE_DIM_MAP = {}
MAX_LLM_LAYERS = 1
MAX_VIT_LAYERS = 1
def shrink_dims(shape, dim_map):
return [dim_map.get(d, d) for d in shape]
def create_tiny_configs(source_dir, output_dir):
with open(os.path.join(source_dir, "config.json")) as f:
config = json.load(f)
llm = config["llm_config"]
llm["hidden_size"] = 64
llm["num_hidden_layers"] = MAX_LLM_LAYERS
llm["num_attention_heads"] = 2
llm["num_key_value_heads"] = 2
llm["intermediate_size"] = 128
llm["max_position_embeddings"] = 4096
llm["max_window_layers"] = MAX_LLM_LAYERS
vit = config["vit_config"]
vit["hidden_size"] = 64
vit["num_hidden_layers"] = MAX_VIT_LAYERS
vit["num_attention_heads"] = 2
vit["intermediate_size"] = 128
with open(os.path.join(output_dir, "config.json"), "w") as f:
json.dump(config, f, indent=4)
llm_standalone = dict(llm)
llm_standalone["qk_norm"] = True
llm_standalone["tie_word_embeddings"] = False
with open(os.path.join(output_dir, "llm_config.json"), "w") as f:
json.dump(llm_standalone, f, indent=4)
with open(os.path.join(output_dir, "vit_config.json"), "w") as f:
json.dump(dict(vit), f, indent=4)
return config
def create_tiny_weights(source_path, dim_map, max_layers, seed=42):
gen = torch.Generator().manual_seed(seed)
weights = {}
is_vae = "ae" in os.path.basename(source_path).lower()
dtype = torch.float32 if is_vae else torch.bfloat16
with safe_open(source_path, framework="pt") as f:
for name in f.keys():
m = re.search(r"\.layers\.(\d+)\.", name)
if m:
idx = int(m.group(1))
for pattern, limit in max_layers.items():
if pattern in name and idx >= limit:
break
else:
pass
if m and any(p in name for p in max_layers) and idx >= max_layers.get(
next((p for p in max_layers if p in name), ""), 999
):
continue
real_shape = list(f.get_tensor(name).shape)
tiny_shape = shrink_dims(real_shape, dim_map)
if "norm" in name and len(tiny_shape) == 1:
weights[name] = torch.ones(tiny_shape, dtype=dtype)
else:
weights[name] = torch.randn(tiny_shape, generator=gen, dtype=dtype) * 0.02
return weights
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--source", default="ByteDance-Seed/BAGEL-7B-MoT")
parser.add_argument("--output", default=os.path.expanduser("~/models/tiny-random/BAGEL-7B-MoT"))
args = parser.parse_args()
source_dir = args.source
if not os.path.exists(os.path.join(source_dir, "config.json")):
from huggingface_hub import snapshot_download
source_dir = snapshot_download(source_dir)
output_dir = args.output
os.makedirs(output_dir, exist_ok=True)
create_tiny_configs(source_dir, output_dir)
for fname in ["generation_config.json", "preprocessor_config.json", "tokenizer.json",
"tokenizer_config.json", "vocab.json", "merges.txt"]:
src = os.path.join(source_dir, fname)
if os.path.exists(src):
shutil.copy2(src, os.path.join(output_dir, fname))
ema = create_tiny_weights(
os.path.join(source_dir, "ema.safetensors"),
dim_map=EMA_DIM_MAP,
max_layers={"language_model": MAX_LLM_LAYERS, "vit_model": MAX_VIT_LAYERS},
seed=42,
)
save_file(ema, os.path.join(output_dir, "ema.safetensors"))
vae = create_tiny_weights(
os.path.join(source_dir, "ae.safetensors"),
dim_map=VAE_DIM_MAP,
max_layers={},
seed=43,
)
save_file(vae, os.path.join(output_dir, "ae.safetensors"))
weight_map = {k: "ema.safetensors" for k in ema}
weight_map.update({k: "ae.safetensors" for k in vae})
total_size = sum(t.numel() * t.element_size() for t in ema.values())
total_size += sum(t.numel() * t.element_size() for t in vae.values())
with open(os.path.join(output_dir, "model.safetensors.index.json"), "w") as f:
json.dump({"metadata": {"total_size": total_size}, "weight_map": weight_map}, f, indent=4)
if __name__ == "__main__":
main()
```
|