Upload folder using huggingface_hub

bd95c9c verified 1 day ago

15.3 kB

	# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: Apache-2.0

	import argparse
	import os
	import sys
	from pathlib import Path

	import imageio.v2 as imageio
	import numpy as np
	import torch
	import torch.nn.functional as F
	from tqdm import tqdm

	from soma import SOMALayer
	from soma.geometry.rig_utils import joint_local_to_world, joint_world_to_local
	from tools.vis_pyrender import (
	MeshRenderer,
	default_pyopengl_platform,
	look_at,
	set_pyopengl_platform,
	)

	# --------------------------------------------------------------------------------
	# Joint Names & Mapping (from nvhuman_layer/joint_names.py)
	# --------------------------------------------------------------------------------
	# fmt: off
	nvskel93_name = [
	"Hips", "Spine1", "Spine2", "Chest", "Neck1", "Neck2", "Head", "HeadEnd", "Jaw",
	"LeftEye", "RightEye", "LeftShoulder", "LeftArm", "LeftForeArm", "LeftHand",
	"LeftHandThumb1", "LeftHandThumb2", "LeftHandThumb3", "LeftHandThumbEnd",
	"LeftHandIndex1", "LeftHandIndex2", "LeftHandIndex3", "LeftHandIndex4", "LeftHandIndexEnd",
	"LeftHandMiddle1", "LeftHandMiddle2", "LeftHandMiddle3", "LeftHandMiddle4", "LeftHandMiddleEnd",
	"LeftHandRing1", "LeftHandRing2", "LeftHandRing3", "LeftHandRing4", "LeftHandRingEnd",
	"LeftHandPinky1", "LeftHandPinky2", "LeftHandPinky3", "LeftHandPinky4", "LeftHandPinkyEnd",
	"LeftForeArmTwist1", "LeftForeArmTwist2", "LeftArmTwist1", "LeftArmTwist2",
	"RightShoulder", "RightArm", "RightForeArm", "RightHand",
	"RightHandThumb1", "RightHandThumb2", "RightHandThumb3", "RightHandThumbEnd",
	"RightHandIndex1", "RightHandIndex2", "RightHandIndex3", "RightHandIndex4", "RightHandIndexEnd",
	"RightHandMiddle1", "RightHandMiddle2", "RightHandMiddle3", "RightHandMiddle4", "RightHandMiddleEnd",
	"RightHandRing1", "RightHandRing2", "RightHandRing3", "RightHandRing4", "RightHandRingEnd",
	"RightHandPinky1", "RightHandPinky2", "RightHandPinky3", "RightHandPinky4", "RightHandPinkyEnd",
	"RightForeArmTwist1", "RightForeArmTwist2", "RightArmTwist1", "RightArmTwist2",
	"LeftLeg", "LeftShin", "LeftFoot", "LeftToeBase", "LeftToeEnd",
	"LeftShinTwist1", "LeftShinTwist2", "LeftLegTwist1", "LeftLegTwist2",
	"RightLeg", "RightShin", "RightFoot", "RightToeBase", "RightToeEnd",
	"RightShinTwist1", "RightShinTwist2", "RightLegTwist1", "RightLegTwist2",
	]

	nvskel77_name = [
	"Hips", "Spine1", "Spine2", "Chest", "Neck1", "Neck2", "Head", "HeadEnd", "Jaw",
	"LeftEye", "RightEye",
	"LeftShoulder", "LeftArm", "LeftForeArm", "LeftHand",
	"LeftHandThumb1", "LeftHandThumb2", "LeftHandThumb3", "LeftHandThumbEnd",
	"LeftHandIndex1", "LeftHandIndex2", "LeftHandIndex3", "LeftHandIndex4", "LeftHandIndexEnd",
	"LeftHandMiddle1", "LeftHandMiddle2", "LeftHandMiddle3", "LeftHandMiddle4", "LeftHandMiddleEnd",
	"LeftHandRing1", "LeftHandRing2", "LeftHandRing3", "LeftHandRing4", "LeftHandRingEnd",
	"LeftHandPinky1", "LeftHandPinky2", "LeftHandPinky3", "LeftHandPinky4", "LeftHandPinkyEnd",
	"RightShoulder", "RightArm", "RightForeArm", "RightHand",
	"RightHandThumb1", "RightHandThumb2", "RightHandThumb3", "RightHandThumbEnd",
	"RightHandIndex1", "RightHandIndex2", "RightHandIndex3", "RightHandIndex4", "RightHandIndexEnd",
	"RightHandMiddle1", "RightHandMiddle2", "RightHandMiddle3", "RightHandMiddle4", "RightHandMiddleEnd",
	"RightHandRing1", "RightHandRing2", "RightHandRing3", "RightHandRing4", "RightHandRingEnd",
	"RightHandPinky1", "RightHandPinky2", "RightHandPinky3", "RightHandPinky4", "RightHandPinkyEnd",
	"LeftLeg", "LeftShin", "LeftFoot", "LeftToeBase", "LeftToeEnd",
	"RightLeg", "RightShin", "RightFoot", "RightToeBase", "RightToeEnd",
	]

	# fmt: on
	nvskel93to77_idx = [nvskel93_name.index(name) for name in nvskel77_name]

	color_map = {
	"soma": (0.4, 0.8, 0.4, 1.0), # light green
	"mhr": (0.98, 0.65, 0.15, 1.0), # blue
	"anny": (0.25, 0.75, 1.0, 1.0), # yellow
	"smpl": (0.55, 0.15, 0.85, 1.0), # pink
	"smplx": (0.55, 0.15, 0.85, 1.0), # pink
	"garment": (0.15, 0.15, 1.0, 1.0), # orange
	}


	def get_smooth_noise(T, dim, device, num_keyframes=None, mode="normal"):
	if num_keyframes is None:
	num_keyframes = max(3, T // 30)

	if mode == "normal":
	keyframes = torch.randn(1, dim, num_keyframes, device=device)
	elif mode == "uniform":
	keyframes = torch.rand(1, dim, num_keyframes, device=device)

	res = F.interpolate(keyframes, size=T, mode="linear", align_corners=True)[0].T
	return res


	def save_video(frames, path, fps=30):
	imageio.mimsave(path, frames, fps=fps)
	print(f"Saved {path}")


	def main():
	parser = argparse.ArgumentParser(description="SOMA pyrender demo")
	parser.add_argument("--data-root", default="assets", help="Path to SOMA assets")
	parser.add_argument(
	"--motion-file",
	default="assets/example_animation.npy",
	help="Path to motion file (.npy). If None, uses a dummy motion.",
	)
	parser.add_argument("--device", default="cuda:0")
	parser.add_argument("--output-dir", default="out/vis_identity_model")
	parser.add_argument("--image-size", type=int, default=1920)
	parser.add_argument("--pyopengl-platform", default=default_pyopengl_platform())
	parser.add_argument("--random-shape", action="store_true", default=False)
	parser.add_argument(
	"--identity-model-type",
	default="soma,mhr,anny,smpl,smplx,garment",
	help="Comma-separated list of identity models to use. Options: soma, mhr, anny, smpl, smplx garment (default: soma,mhr,anny,smpl,smplx,garment)",
	)
	parser.add_argument(
	"--pose-batch-size",
	type=int,
	default=0,
	help="Run forward pass in batches of this many poses to reduce GPU memory. 0 = process all frames at once (default). Try 32 or 64 if OOM.",
	)
	parser.add_argument(
	"--low-lod",
	action="store_true",
	default=False,
	help="Use low level-of-detail mesh (fewer vertices/faces)",
	)
	parser.add_argument(
	"--apply-correctives",
	action="store_true",
	default=False,
	help="Apply pose corrective offsets (default: False)",
	)
	parser.add_argument(
	"--gender",
	default="neutral",
	help="Gender of the model (default: neutral). Only used for smpl and smplx models.",
	)
	args = parser.parse_args()

	identity_models = [m.strip().lower() for m in args.identity_model_type.split(",")]
	valid_models = {"soma", "mhr", "anny", "smpl", "smplx", "garment"}
	invalid_models = set(identity_models) - valid_models
	if invalid_models:
	raise ValueError(
	f"Invalid identity model type(s): {invalid_models}. Valid options: {valid_models}"
	)
	args.identity_models = identity_models

	repo_root = Path(__file__).resolve().parents[1]
	if str(repo_root) not in sys.path:
	sys.path.insert(0, str(repo_root))

	set_pyopengl_platform(args.pyopengl_platform)
	device = torch.device(args.device if torch.cuda.is_available() else "cpu")
	os.makedirs(args.output_dir, exist_ok=True)

	print(f"Initializing models: {', '.join(args.identity_models)}...")
	models = {}
	for identity_model_type in args.identity_models:
	if identity_model_type == "smpl":
	identity_model_kwargs = {
	"gender": args.gender,
	}
	else:
	identity_model_kwargs = {}
	models[identity_model_type] = SOMALayer(
	data_root=args.data_root,
	low_lod=args.low_lod,
	device=str(device),
	identity_model_type=identity_model_type,
	mode="warp",
	identity_model_kwargs=identity_model_kwargs,
	).to(device)

	reference_model = models[args.identity_models[0]]

	if args.motion_file and os.path.exists(args.motion_file):
	print(f"Loading motion from {args.motion_file}...")
	motion_full = torch.from_numpy(np.load(args.motion_file)).float().to(device)
	joint_rot_mats_local = motion_full[..., :3, :3]
	root_trans = motion_full[..., 1, :3, 3]
	else:
	print("No motion file provided or file not found. Using dummy motion (T-pose rotation).")
	T = 30
	joint_rot_mats_local = (
	torch.eye(3, device=device).unsqueeze(0).unsqueeze(0).repeat(T, 78, 1, 1)
	)
	angle = torch.linspace(0, 2 * np.pi, T, device=device)
	cos = torch.cos(angle)
	sin = torch.sin(angle)
	zeros = torch.zeros_like(angle)
	ones = torch.ones_like(angle)
	rot_y = torch.stack(
	[
	torch.stack([cos, zeros, sin], dim=-1),
	torch.stack([zeros, ones, zeros], dim=-1),
	torch.stack([-sin, zeros, cos], dim=-1),
	],
	dim=-2,
	) # (T, 3, 3)
	joint_rot_mats_local[:, 1] = rot_y # Rotate Hips
	root_trans = torch.zeros(T, 3, device=device)

	if joint_rot_mats_local.shape[1] == 94:
	subset_idx = [0] + [i + 1 for i in nvskel93to77_idx]
	joint_rot_mats_local = joint_rot_mats_local[:, subset_idx]

	correction = reference_model.t_pose_world[:, :3, :3].transpose(-2, -1)
	joint_rot_mats_world = joint_local_to_world(
	joint_rot_mats_local, reference_model.joint_parent_ids
	)
	joint_rot_mats_world = joint_rot_mats_world @ correction
	joint_rot_mats_local = joint_world_to_local(
	joint_rot_mats_world, reference_model.joint_parent_ids
	)

	T = joint_rot_mats_local.shape[0]
	global_orient = joint_rot_mats_local[:T, 1] # (T, 3, 3) - Hips is index 1
	body_pose = joint_rot_mats_local[:T, 2:] # (T, 77, 3, 3)
	pose = torch.cat([global_orient.unsqueeze(1), body_pose], dim=1)

	# Prepare Identity Parameters
	identity_coeffs_map = {}
	for model_type, model in models.items():
	n = model.identity_model.num_identity_coeffs
	if model_type == "anny":
	anny_im = model.identity_model.identity_model
	if args.random_shape:
	phenotypes = {
	k: get_smooth_noise(T, 1, device, mode="uniform").squeeze(-1)
	for k in anny_im.phenotype_labels
	}
	else:
	phenotypes = {
	k: torch.ones(T, device=device) * 0.5 for k in anny_im.phenotype_labels
	}
	local_changes = {k: torch.zeros(T, device=device) for k in anny_im.local_change_labels}
	identity_coeffs_map["anny"] = (phenotypes, local_changes)
	elif model_type == "mhr":
	n_scale = model.identity_model.num_scale_params
	if args.random_shape:
	coeffs = get_smooth_noise(T, n, device)
	scale = get_smooth_noise(T, n_scale, device, mode="normal") * 0.2
	else:
	coeffs = torch.zeros(T, n, device=device)
	scale = torch.zeros(T, n_scale, device=device)
	identity_coeffs_map[model_type] = (coeffs, scale)
	else:
	if args.random_shape:
	coeffs = get_smooth_noise(T, n, device)
	else:
	coeffs = torch.zeros(T, n, device=device)
	identity_coeffs_map[model_type] = (coeffs, None)

	transl = root_trans[:T]

	# 4. Forward Pass using prepare_identity() + pose() API.
	# When identity is constant (not random_shape), prepare_identity is called
	# once per model and only pose() runs per batch -- skipping the expensive
	# identity model + skeleton transfer on every frame.
	pose_batch_size = args.pose_batch_size if args.pose_batch_size > 0 else T
	print(f"Running forward pass (pose_batch_size={pose_batch_size})...")

	outputs = {}
	with torch.no_grad():
	if not args.random_shape:
	for model_type, model in models.items():
	coeffs, scale = identity_coeffs_map[model_type]
	if isinstance(coeffs, dict):
	coeffs_single = {k: v[:1] for k, v in coeffs.items()}
	scale_single = {k: v[:1] for k, v in scale.items()} if scale else None
	else:
	coeffs_single = coeffs[:1]
	scale_single = scale[:1] if scale is not None else None
	model.prepare_identity(coeffs_single, scale_single)

	for start in range(0, T, pose_batch_size):
	end = min(start + pose_batch_size, T)
	pose_b = pose[start:end]
	transl_b = transl[start:end]

	for model_type, model in models.items():
	if args.random_shape:
	coeffs, scale = identity_coeffs_map[model_type]
	if isinstance(coeffs, dict):
	coeffs_b = {k: v[start:end] for k, v in coeffs.items()}
	scale_b = {k: v[start:end] for k, v in scale.items()} if scale else None
	else:
	coeffs_b = coeffs[start:end]
	scale_b = scale[start:end] if scale is not None else None
	model.prepare_identity(coeffs_b, scale_b)

	out_b = model.pose(
	pose_b,
	transl=transl_b,
	pose2rot=False,
	apply_correctives=args.apply_correctives,
	)
	if model_type not in outputs:
	outputs[model_type] = {"vertices": [], "joints": []}
	outputs[model_type]["vertices"].append(out_b["vertices"])
	outputs[model_type]["joints"].append(out_b["joints"])

	for model_type in list(outputs.keys()):
	outputs[model_type]["vertices"] = torch.cat(outputs[model_type]["vertices"], dim=0)
	outputs[model_type]["joints"] = torch.cat(outputs[model_type]["joints"], dim=0)

	# 5. Render (model-first loop with streaming video writer)
	print("Rendering videos...")

	suffix = "rand_shape" if args.random_shape else "fixed_shape"
	faces = {
	model_type: models[model_type].faces.detach().cpu().numpy()
	for model_type in args.identity_models
	}
	cam_pose = look_at(
	eye=np.array([0.0, 1.0, 6.0]),
	target=np.array([0.0, 1.0, 0.0]),
	up=np.array([0.0, 1.0, 0.0]),
	)
	light_dir = np.array([0.0, -0.5, -1.0])
	renderer = MeshRenderer(image_size=args.image_size, light_intensity=5)

	for model_type in args.identity_models:
	out_path = f"{args.output_dir}/{model_type}_{suffix}.mp4"
	renderer.setup_mesh(
	faces=faces[model_type],
	mesh_color=color_map[model_type],
	cam_pose=cam_pose,
	light_dir=light_dir,
	metallic=0.0,
	roughness=0.5,
	base_color_factor=[0.9, 0.9, 0.9, 1.0],
	)
	writer = imageio.get_writer(out_path, fps=30)
	for t in tqdm(range(T), desc=model_type):
	verts = outputs[model_type]["vertices"][t].detach().cpu().numpy()
	img = renderer.render_frame(verts)
	writer.append_data(img[..., ::-1])
	writer.close()
	print(f"Saved {out_path}")

	renderer.delete()


	if __name__ == "__main__":
	main()