Instructions to use tencent/Hy3-preview with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use tencent/Hy3-preview with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="tencent/Hy3-preview")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("tencent/Hy3-preview")
model = AutoModelForCausalLM.from_pretrained("tencent/Hy3-preview")
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use tencent/Hy3-preview with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "tencent/Hy3-preview"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "tencent/Hy3-preview",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/tencent/Hy3-preview

SGLang

How to use tencent/Hy3-preview with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "tencent/Hy3-preview" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "tencent/Hy3-preview",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "tencent/Hy3-preview" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "tencent/Hy3-preview",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use tencent/Hy3-preview with Docker Model Runner:
```
docker model run hf.co/tencent/Hy3-preview
```

Hy3-preview / train /tools /check_converted.py

yiqichen01

Upload folder using huggingface_hub

cf1003d verified 15 days ago

raw

history blame contribute delete

15.6 kB

	#!/usr/bin/env python3
	"""
	Quick validation script for converted HYV3 outer-format checkpoint.

	Checks:
	1. model.safetensors.index.json structure and completeness
	2. All expected weight keys exist (dense layer 0, MoE layers 1-79)
	3. Expert tensor shapes (fused 3D format)
	4. All referenced shard files exist and are non-empty
	5. Spot-check: load a few shards and verify tensor shapes/dtypes
	6. No duplicate or orphan keys

	Usage:
	python check_converted.py <output_dir> [--spot-check N]

	Example:
	python check_converted.py pretrain_base/hf_outer
	python check_converted.py pretrain_base/hf_outer --spot-check 5
	"""

	import argparse
	import json
	import os
	import sys
	import time
	from collections import defaultdict

	# ============================================================================
	# Expected key patterns for HYV3 outer format
	# ============================================================================

	# Dense layer (layer 0) expected suffixes
	DENSE_SUFFIXES = [
	"input_layernorm.weight",
	"post_attention_layernorm.weight",
	"self_attn.q_proj.weight",
	"self_attn.k_proj.weight",
	"self_attn.v_proj.weight",
	"self_attn.o_proj.weight",
	"self_attn.q_norm.weight",
	"self_attn.k_norm.weight",
	"mlp.gate_proj.weight",
	"mlp.up_proj.weight",
	"mlp.down_proj.weight",
	]

	# MoE layer (layers 1-79) expected suffixes
	MOE_SUFFIXES = [
	"input_layernorm.weight",
	"post_attention_layernorm.weight",
	"self_attn.q_proj.weight",
	"self_attn.k_proj.weight",
	"self_attn.v_proj.weight",
	"self_attn.o_proj.weight",
	"self_attn.q_norm.weight",
	"self_attn.k_norm.weight",
	# MoE-specific
	"mlp.gate.weight",
	"mlp.e_score_correction_bias",
	"mlp.experts.gate_up_proj",
	"mlp.experts.down_proj",
	"mlp.shared_experts.gate_proj.weight",
	"mlp.shared_experts.up_proj.weight",
	"mlp.shared_experts.down_proj.weight",
	]

	# MTP (Multi-Token Prediction) layer expected suffixes
	# MTP layers share MoE structure but have additional projection/norm keys
	MTP_EXTRA_SUFFIXES = [
	"eh_proj.weight",
	"enorm.weight",
	"final_layernorm.weight",
	"hnorm.weight",
	]

	# Global keys (not per-layer)
	GLOBAL_KEYS = [
	"model.embed_tokens.weight",
	"model.norm.weight",
	"lm_head.weight",
	]


	def load_config(output_dir):
	"""Load config.json and extract model parameters."""
	config_path = os.path.join(output_dir, "config.json")
	if not os.path.exists(config_path):
	print(f"[ERROR] config.json not found in {output_dir}")
	return None
	with open(config_path) as f:
	return json.load(f)


	def check_index_json(output_dir):
	"""Check model.safetensors.index.json for structure and completeness."""
	index_path = os.path.join(output_dir, "model.safetensors.index.json")
	if not os.path.exists(index_path):
	print(f"[ERROR] model.safetensors.index.json not found")
	return None, []

	with open(index_path) as f:
	index = json.load(f)

	errors = []

	# Check structure
	if "metadata" not in index:
	errors.append("Missing 'metadata' in index.json")
	elif "total_size" not in index["metadata"]:
	errors.append("Missing 'total_size' in metadata")

	if "weight_map" not in index:
	errors.append("Missing 'weight_map' in index.json")
	return index, errors

	weight_map = index["weight_map"]
	total_size = index.get("metadata", {}).get("total_size", 0)

	print(f" Index keys : {len(weight_map)}")
	print(f" Total size : {total_size / 1e9:.2f} GB")

	# Check for empty weight_map
	if len(weight_map) == 0:
	errors.append("weight_map is empty")

	return index, errors


	def check_expected_keys(weight_map, config):
	"""Check that all expected keys exist in the weight_map."""
	errors = []
	warnings = []

	num_layers = config.get("num_hidden_layers", 80)
	first_k_dense = config.get("first_k_dense_replace", 1)
	num_experts = config.get("num_experts", 192)
	num_mtp_layers = config.get("num_nextn_predict_layers", 0)

	# Check global keys
	for key in GLOBAL_KEYS:
	if key not in weight_map:
	errors.append(f"Missing global key: {key}")

	# Check per-layer keys (regular layers)
	missing_by_type = defaultdict(list)
	for layer_idx in range(num_layers):
	prefix = f"model.layers.{layer_idx}."
	if layer_idx < first_k_dense:
	# Dense layer
	suffixes = DENSE_SUFFIXES
	else:
	# MoE layer
	suffixes = MOE_SUFFIXES

	for suffix in suffixes:
	full_key = prefix + suffix
	if full_key not in weight_map:
	missing_by_type[suffix].append(layer_idx)

	# Check MTP layers (layer num_layers .. num_layers + num_mtp_layers - 1)
	mtp_missing_by_type = defaultdict(list)
	for mtp_idx in range(num_mtp_layers):
	layer_idx = num_layers + mtp_idx
	prefix = f"model.layers.{layer_idx}."
	# MTP layers use MoE structure + extra projection/norm keys
	mtp_suffixes = MOE_SUFFIXES + MTP_EXTRA_SUFFIXES
	for suffix in mtp_suffixes:
	full_key = prefix + suffix
	if full_key not in weight_map:
	mtp_missing_by_type[suffix].append(layer_idx)

	for suffix, layers in sorted(mtp_missing_by_type.items()):
	layer_str = str(layers)
	errors.append(f"Missing MTP key '{suffix}' in layers: {layer_str}")

	for suffix, layers in sorted(missing_by_type.items()):
	if len(layers) <= 5:
	layer_str = str(layers)
	else:
	layer_str = f"{layers[:3]}...({len(layers)} total)"
	errors.append(f"Missing '{suffix}' in layers: {layer_str}")

	# Check for unexpected keys (not matching any known pattern)
	known_prefixes = set()
	# Regular layers + MTP layers
	for layer_idx in range(num_layers + num_mtp_layers):
	known_prefixes.add(f"model.layers.{layer_idx}.")
	known_prefixes.add("model.embed_tokens.")
	known_prefixes.add("model.norm.")
	known_prefixes.add("lm_head.")
	# Alternative MTP prefix (some models use this)
	known_prefixes.add("model.mtp_layers.")

	unexpected = []
	for key in weight_map:
	if not any(key.startswith(p) for p in known_prefixes):
	unexpected.append(key)

	if unexpected:
	if len(unexpected) <= 5:
	for k in unexpected:
	warnings.append(f"Unexpected key: {k}")
	else:
	warnings.append(f"{len(unexpected)} unexpected keys found (first 3: {unexpected[:3]})")

	return errors, warnings


	def check_shard_files(output_dir, weight_map):
	"""Check that all referenced shard files exist and are non-empty."""
	errors = []
	warnings = []

	# Get unique shard files
	shard_files = sorted(set(weight_map.values()))
	print(f" Shard files : {len(shard_files)}")

	missing = []
	empty = []
	total_disk_size = 0

	for sf in shard_files:
	path = os.path.join(output_dir, sf)
	if not os.path.exists(path):
	missing.append(sf)
	else:
	size = os.path.getsize(path)
	if size == 0:
	empty.append(sf)
	total_disk_size += size

	print(f" Disk size : {total_disk_size / 1e9:.2f} GB")

	if missing:
	errors.append(f"Missing shard files ({len(missing)}): {missing[:5]}")
	if empty:
	errors.append(f"Empty shard files ({len(empty)}): {empty[:5]}")

	# Check for orphan shard files (exist on disk but not in index)
	all_safetensors = set(
	f for f in os.listdir(output_dir)
	if f.endswith(".safetensors")
	)
	referenced = set(shard_files)
	orphans = all_safetensors - referenced
	if orphans:
	# Distinguish between empty residue files (cross-shard merge artifacts)
	# and real orphan files with actual data
	EMPTY_SHARD_THRESHOLD = 128 # bytes; empty safetensors header is ~16 bytes
	residue_orphans = []
	real_orphans = []
	for o in sorted(orphans):
	sz = os.path.getsize(os.path.join(output_dir, o))
	if sz <= EMPTY_SHARD_THRESHOLD:
	residue_orphans.append(o)
	else:
	real_orphans.append(o)

	if residue_orphans:
	warnings.append(
	f"{len(residue_orphans)} empty residue shard(s) from cross-shard merge "
	f"(<=128 bytes each, safe to delete)"
	)
	if real_orphans:
	errors.append(
	f"Orphan shard files with data (not in index): {real_orphans[:5]}"
	)

	return errors, warnings


	def check_key_distribution(weight_map):
	"""Check the distribution of keys across shards."""
	shard_key_count = defaultdict(int)
	for key, shard in weight_map.items():
	shard_key_count[shard] += 1

	counts = sorted(shard_key_count.values())
	print(f" Keys/shard : min={counts[0]}, max={counts[-1]}, "
	f"median={counts[len(counts)//2]}")

	# Check for shards with 0 keys (should not happen if they are in weight_map)
	zero_shards = [s for s, c in shard_key_count.items() if c == 0]
	if zero_shards:
	return [f"Shards with 0 keys: {zero_shards}"]
	return []


	def spot_check_shards(output_dir, weight_map, config, num_checks=3):
	"""Spot-check a few shards by loading and verifying tensor shapes."""
	errors = []

	try:
	from safetensors import safe_open
	except ImportError:
	print(" [SKIP] safetensors not installed, skipping spot-check")
	return errors

	num_experts = config.get("num_experts", 192)
	expert_hidden = config.get("expert_hidden_dim", config.get("moe_intermediate_size", 1536))
	hidden_size = config.get("hidden_size", 4096)

	# Find shards that contain expert tensors (most interesting to check)
	expert_shards = set()
	for key, shard in weight_map.items():
	if "experts.gate_up_proj" in key or "experts.down_proj" in key:
	expert_shards.add(shard)

	# Pick a few shards to check
	check_shards = sorted(expert_shards)[:num_checks]
	if not check_shards:
	check_shards = sorted(set(weight_map.values()))[:num_checks]

	print(f"\n Spot-checking {len(check_shards)} shard(s)...")

	for shard_file in check_shards:
	shard_path = os.path.join(output_dir, shard_file)
	t0 = time.time()

	try:
	with safe_open(shard_path, framework="pt", device="cpu") as f:
	keys_in_shard = list(f.keys())
	for key in keys_in_shard:
	tensor = f.get_tensor(key)

	# Check expert shapes
	if key.endswith("experts.gate_up_proj"):
	expected_shape = (num_experts, expert_hidden * 2, hidden_size)
	if tuple(tensor.shape) != expected_shape:
	errors.append(
	f"{shard_file}/{key}: shape {tuple(tensor.shape)} "
	f"!= expected {expected_shape}"
	)

	elif key.endswith("experts.down_proj"):
	expected_shape = (num_experts, hidden_size, expert_hidden)
	if tuple(tensor.shape) != expected_shape:
	errors.append(
	f"{shard_file}/{key}: shape {tuple(tensor.shape)} "
	f"!= expected {expected_shape}"
	)

	# Check for NaN/Inf
	if tensor.is_floating_point():
	if tensor.isnan().any():
	errors.append(f"{shard_file}/{key}: contains NaN values")
	if tensor.isinf().any():
	errors.append(f"{shard_file}/{key}: contains Inf values")

	elapsed = time.time() - t0
	print(f" {shard_file}: {len(keys_in_shard)} keys, OK ({elapsed:.1f}s)")

	except Exception as e:
	errors.append(f"Failed to load {shard_file}: {e}")

	return errors


	def main():
	parser = argparse.ArgumentParser(
	description="Validate converted HYV3 outer-format checkpoint."
	)
	parser.add_argument(
	"output_dir", type=str,
	help="Path to the converted outer-format checkpoint directory.",
	)
	parser.add_argument(
	"--spot-check", type=int, default=3, dest="spot_check",
	help="Number of shards to spot-check by loading tensors (default: 3).",
	)
	args = parser.parse_args()

	output_dir = os.path.abspath(args.output_dir)
	print(f"Validating: {output_dir}\n")

	if not os.path.isdir(output_dir):
	print(f"[ERROR] Directory not found: {output_dir}")
	sys.exit(1)

	all_errors = []
	all_warnings = []

	# 1. Load config
	print("[1/5] Loading config.json...")
	config = load_config(output_dir)
	if config is None:
	print("[ERROR] Cannot proceed without config.json")
	sys.exit(1)

	num_layers = config.get("num_hidden_layers", 0)
	num_experts = config.get("num_experts", 0)
	first_k_dense = config.get("first_k_dense_replace", 0)
	num_mtp = config.get("num_nextn_predict_layers", 0)
	print(f" Layers : {num_layers} ({first_k_dense} dense, {num_layers - first_k_dense} MoE)")
	print(f" MTP layers : {num_mtp}")
	print(f" Experts/layer : {num_experts}")
	print(f" Hidden size : {config.get('hidden_size', '?')}")
	print(f" Expert hidden : {config.get('expert_hidden_dim', config.get('moe_intermediate_size', '?'))}")

	# 2. Check index.json
	print("\n[2/5] Checking model.safetensors.index.json...")
	index, idx_errors = check_index_json(output_dir)
	all_errors.extend(idx_errors)

	if index is None or "weight_map" not in index:
	print("[ERROR] Cannot proceed without valid index.json")
	sys.exit(1)

	weight_map = index["weight_map"]

	# 3. Check expected keys
	print("\n[3/5] Checking expected keys...")
	key_errors, key_warnings = check_expected_keys(weight_map, config)
	all_errors.extend(key_errors)
	all_warnings.extend(key_warnings)

	# Also check key distribution
	dist_errors = check_key_distribution(weight_map)
	all_errors.extend(dist_errors)

	# 4. Check shard files
	print("\n[4/5] Checking shard files on disk...")
	shard_errors, shard_warnings = check_shard_files(output_dir, weight_map)
	all_errors.extend(shard_errors)
	all_warnings.extend(shard_warnings)

	# 5. Spot-check
	if args.spot_check > 0:
	print(f"\n[5/5] Spot-checking tensors (loading {args.spot_check} shard(s))...")
	spot_errors = spot_check_shards(output_dir, weight_map, config, args.spot_check)
	all_errors.extend(spot_errors)
	else:
	print("\n[5/5] Spot-check skipped (--spot-check 0)")

	# Summary
	print(f"\n{'=' * 60}")
	if all_warnings:
	print(f"WARNINGS ({len(all_warnings)}):")
	for w in all_warnings:
	print(f" [WARN] {w}")

	if all_errors:
	print(f"ERRORS ({len(all_errors)}):")
	for e in all_errors:
	print(f" [ERROR] {e}")
	print(f"\nResult: FAILED ({len(all_errors)} error(s), {len(all_warnings)} warning(s))")
	sys.exit(1)
	else:
	print(f"Result: PASSED (0 errors, {len(all_warnings)} warning(s))")
	print(f"{'=' * 60}")
	sys.exit(0)


	if __name__ == "__main__":
	main()