bagel / README.md

Update README.md

74c4609 verified 5 days ago

6.45 kB

	---
	library_name: vllm-omni
	pipeline_tag: text-to-image
	inference: true
	base_model:
	- ByteDance-Seed/BAGEL-7B-MoT
	---

	This tiny model is for debugging. It is randomly initialized with the config adapted from [ByteDance-Seed/BAGEL-7B-MoT](https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT).

	File size:
	- ~335MB ae.safetensors (VAE, full architecture — hardcoded in vllm-omni)
	- ~41MB ema.safetensors (LLM + ViT + connectors, 1 layer each)

	## Acknowledgements

	🎉 Special thanks to [@zhengyuansu](https://huggingface.co/zhengyuansu) for contributing this model!


	### Example usage:

	```python
	from vllm_omni.entrypoints.omni import Omni

	omni = Omni(
	# model="zhengyuansu/bagel-tiny-random",
	model="tiny-random/bagel",
	stage_configs_path="path/to/bagel_sharedmemory_2gpu_ci.yaml",
	custom_pipeline_args={
	"pipeline_class": "examples.flowgrpo_trainer.vllm_omni.pipeline_bagel.BagelPipelineWithLogProb"
	},
	)

	params_list = omni.default_sampling_params_list
	params_list[1].num_inference_steps = 10
	params_list[1].extra_args = {"cfg_text_scale": 4.0, "cfg_img_scale": 1.5}

	outputs = list(omni.generate(
	prompts=[{"prompt": "a cute cat", "modalities": ["image"]}],
	sampling_params_list=params_list,
	))
	```

	### Codes to create this repo:

	```python
	"""Create a tiny-random BAGEL model for CI testing.

	Reads real BAGEL-7B-MoT checkpoint weight names, creates matching tiny random
	tensors with scaled-down dimensions. VAE architecture is hardcoded in vllm-omni
	and cannot be shrunk, so VAE weights are kept at full size.

	Usage:
	python scripts/create_tiny_bagel.py --source ByteDance-Seed/BAGEL-7B-MoT
	"""

	import argparse
	import json
	import os
	import re
	import shutil

	import torch
	from safetensors import safe_open
	from safetensors.torch import save_file

	# LLM/ViT dimension shrinkage
	EMA_DIM_MAP = {
	3584: 64, # LLM hidden_size
	18944: 128, # LLM intermediate_size
	1152: 64, # ViT hidden_size
	4304: 128, # ViT intermediate_size
	128: 32, # head_dim
	512: 64, # kv_proj dim
	}

	# VAE: keep original dims (architecture is hardcoded in vllm-omni)
	VAE_DIM_MAP = {}

	MAX_LLM_LAYERS = 1
	MAX_VIT_LAYERS = 1


	def shrink_dims(shape, dim_map):
	return [dim_map.get(d, d) for d in shape]


	def create_tiny_configs(source_dir, output_dir):
	with open(os.path.join(source_dir, "config.json")) as f:
	config = json.load(f)

	llm = config["llm_config"]
	llm["hidden_size"] = 64
	llm["num_hidden_layers"] = MAX_LLM_LAYERS
	llm["num_attention_heads"] = 2
	llm["num_key_value_heads"] = 2
	llm["intermediate_size"] = 128
	llm["max_position_embeddings"] = 4096
	llm["max_window_layers"] = MAX_LLM_LAYERS

	vit = config["vit_config"]
	vit["hidden_size"] = 64
	vit["num_hidden_layers"] = MAX_VIT_LAYERS
	vit["num_attention_heads"] = 2
	vit["intermediate_size"] = 128

	with open(os.path.join(output_dir, "config.json"), "w") as f:
	json.dump(config, f, indent=4)

	llm_standalone = dict(llm)
	llm_standalone["qk_norm"] = True
	llm_standalone["tie_word_embeddings"] = False
	with open(os.path.join(output_dir, "llm_config.json"), "w") as f:
	json.dump(llm_standalone, f, indent=4)

	with open(os.path.join(output_dir, "vit_config.json"), "w") as f:
	json.dump(dict(vit), f, indent=4)

	return config


	def create_tiny_weights(source_path, dim_map, max_layers, seed=42):
	gen = torch.Generator().manual_seed(seed)
	weights = {}
	is_vae = "ae" in os.path.basename(source_path).lower()
	dtype = torch.float32 if is_vae else torch.bfloat16

	with safe_open(source_path, framework="pt") as f:
	for name in f.keys():
	m = re.search(r"\.layers\.(\d+)\.", name)
	if m:
	idx = int(m.group(1))
	for pattern, limit in max_layers.items():
	if pattern in name and idx >= limit:
	break
	else:
	pass
	if m and any(p in name for p in max_layers) and idx >= max_layers.get(
	next((p for p in max_layers if p in name), ""), 999
	):
	continue

	real_shape = list(f.get_tensor(name).shape)
	tiny_shape = shrink_dims(real_shape, dim_map)

	if "norm" in name and len(tiny_shape) == 1:
	weights[name] = torch.ones(tiny_shape, dtype=dtype)
	else:
	weights[name] = torch.randn(tiny_shape, generator=gen, dtype=dtype) * 0.02

	return weights


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument("--source", default="ByteDance-Seed/BAGEL-7B-MoT")
	parser.add_argument("--output", default=os.path.expanduser("~/models/tiny-random/BAGEL-7B-MoT"))
	args = parser.parse_args()

	source_dir = args.source
	if not os.path.exists(os.path.join(source_dir, "config.json")):
	from huggingface_hub import snapshot_download
	source_dir = snapshot_download(source_dir)

	output_dir = args.output
	os.makedirs(output_dir, exist_ok=True)

	create_tiny_configs(source_dir, output_dir)

	for fname in ["generation_config.json", "preprocessor_config.json", "tokenizer.json",
	"tokenizer_config.json", "vocab.json", "merges.txt"]:
	src = os.path.join(source_dir, fname)
	if os.path.exists(src):
	shutil.copy2(src, os.path.join(output_dir, fname))

	ema = create_tiny_weights(
	os.path.join(source_dir, "ema.safetensors"),
	dim_map=EMA_DIM_MAP,
	max_layers={"language_model": MAX_LLM_LAYERS, "vit_model": MAX_VIT_LAYERS},
	seed=42,
	)
	save_file(ema, os.path.join(output_dir, "ema.safetensors"))

	vae = create_tiny_weights(
	os.path.join(source_dir, "ae.safetensors"),
	dim_map=VAE_DIM_MAP,
	max_layers={},
	seed=43,
	)
	save_file(vae, os.path.join(output_dir, "ae.safetensors"))

	weight_map = {k: "ema.safetensors" for k in ema}
	weight_map.update({k: "ae.safetensors" for k in vae})
	total_size = sum(t.numel() * t.element_size() for t in ema.values())
	total_size += sum(t.numel() * t.element_size() for t in vae.values())
	with open(os.path.join(output_dir, "model.safetensors.index.json"), "w") as f:
	json.dump({"metadata": {"total_size": total_size}, "weight_map": weight_map}, f, indent=4)


	if __name__ == "__main__":
	main()
	```