Spaces:

Daankular
/

Image2Model

Running on Zero

App Files Files Community

Image2Model / pipeline /tpose_smpl.py

Daankular

Initial local files

14c3d13 14 days ago

raw

history blame contribute delete

16.5 kB

	"""
	tpose_smpl.py -- T-pose a humanoid GLB via inverse Linear Blend Skinning.

	Pipeline:
	1. Render front view and run HMR2 -> SMPL body_pose + betas
	2. Read rigged.glb: mesh verts (rig world space), skinning weights, T-pose joints
	3. Compute FK transforms in rig world space using HMR2 body_pose
	4. Apply inverse LBS: v_tpose = (Sum_j W_j * A_j)^-1 * v_posed
	5. Map T-posed verts back to original mesh coordinate space, preserve UV/texture
	6. Optionally export SKEL bone mesh in T-pose

	Usage:
	python tpose_smpl.py --body /tmp/triposg_textured.glb \
	--rig /tmp/rig_out/rigged.glb \
	--out /tmp/tposed_surface.glb \
	[--skel_out /tmp/tposed_bones.glb] \
	[--debug_dir /tmp/tpose_debug]
	"""

	import os, sys, argparse, struct, json, warnings
	warnings.filterwarnings('ignore')

	import numpy as np
	import cv2
	import torch
	import trimesh
	from trimesh.visual.texture import TextureVisuals
	from trimesh.visual.material import PBRMaterial
	from scipy.spatial.transform import Rotation as R

	sys.path.insert(0, '/root/MV-Adapter')
	SMPL_NEUTRAL = '/root/body_models/smpl/SMPL_NEUTRAL.pkl'
	SKEL_DIR = '/root/body_models/skel'

	SMPL_PARENTS = [-1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9,
	12, 13, 14, 16, 17, 18, 19, 20, 21]


	# ---- Step 1: Render front view -----------------------------------------------

	def render_front(body_glb, H=1024, W=768, device='cuda'):
	from mvadapter.utils.mesh_utils import (
	NVDiffRastContextWrapper, load_mesh, get_orthogonal_camera, render,
	)
	ctx = NVDiffRastContextWrapper(device=device, context_type='cuda')
	mesh_mv = load_mesh(body_glb, rescale=True, device=device)
	camera = get_orthogonal_camera(
	elevation_deg=[0], distance=[1.8],
	left=-0.55, right=0.55, bottom=-0.55, top=0.55,
	azimuth_deg=[-90], device=device,
	)
	out = render(ctx, mesh_mv, camera, height=H, width=W,
	render_attr=True, render_depth=False, render_normal=False,
	attr_background=0.5)
	img_np = (out.attr[0].cpu().numpy() * 255).clip(0, 255).astype(np.uint8)
	return cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)


	# ---- Step 2: HMR2 pose estimation --------------------------------------------

	def run_hmr2(img_bgr, device='cuda'):
	from pathlib import Path
	from hmr2.configs import CACHE_DIR_4DHUMANS
	from hmr2.models import load_hmr2, DEFAULT_CHECKPOINT, download_models
	from hmr2.utils import recursive_to
	from hmr2.datasets.vitdet_dataset import ViTDetDataset
	from hmr2.utils.utils_detectron2 import DefaultPredictor_Lazy
	from detectron2.config import LazyConfig
	import hmr2 as hmr2_pkg

	download_models(CACHE_DIR_4DHUMANS)
	model, model_cfg = load_hmr2(DEFAULT_CHECKPOINT)
	model = model.to(device).eval()

	cfg_path = Path(hmr2_pkg.__file__).parent / 'configs' / 'cascade_mask_rcnn_vitdet_h_75ep.py'
	det_cfg = LazyConfig.load(str(cfg_path))
	det_cfg.train.init_checkpoint = (
	'https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_vitdet_h'
	'/f328730692/model_final_f05665.pkl'
	)
	for i in range(3):
	det_cfg.model.roi_heads.box_predictors[i].test_score_thresh = 0.25
	detector = DefaultPredictor_Lazy(det_cfg)

	det_out = detector(img_bgr)
	instances = det_out['instances']
	valid = (instances.pred_classes == 0) & (instances.scores > 0.5)
	boxes = instances.pred_boxes.tensor[valid].cpu().numpy()
	if len(boxes) == 0:
	raise RuntimeError('HMR2: no person detected in render')

	areas = (boxes[:,2]-boxes[:,0]) * (boxes[:,3]-boxes[:,1])
	boxes = boxes[areas.argmax():areas.argmax()+1]

	dataset = ViTDetDataset(model_cfg, img_bgr, boxes)
	dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0)
	for batch in dataloader:
	batch = recursive_to(batch, device)
	with torch.no_grad():
	out = model(batch)
	sp = out['pred_smpl_params']
	return {
	'body_pose': sp['body_pose'][0].cpu(), # (23, 3, 3)
	'betas': sp['betas'][0].cpu(), # (10,)
	}


	# ---- Step 3: Read all data from rigged.glb -----------------------------------

	def read_rigged_glb(rig_glb):
	"""
	Returns dict with:
	verts : (N, 3) mesh vertices in rig world space
	j_idx : (N, 4) joint indices
	w_arr : (N, 4) skinning weights
	J_bind : (24, 3) T-pose joint world positions
	"""
	with open(rig_glb, 'rb') as fh:
	raw = fh.read()
	ch_len, _ = struct.unpack_from('<II', raw, 12)
	gltf = json.loads(raw[20:20+ch_len])
	bin_data = raw[20+ch_len+8:]

	def _read(acc_i):
	acc = gltf['accessors'][acc_i]
	bv = gltf['bufferViews'][acc['bufferView']]
	off = bv.get('byteOffset', 0) + acc.get('byteOffset', 0)
	cnt = acc['count']
	n = {'SCALAR':1,'VEC2':2,'VEC3':3,'VEC4':4,'MAT4':16}[acc['type']]
	fmt = {5121:'B',5123:'H',5125:'I',5126:'f'}[acc['componentType']]
	nb = {'B':1,'H':2,'I':4,'f':4}[fmt]
	return np.frombuffer(bin_data[off:off+cntnnb],
	dtype=np.dtype(fmt)).reshape(cnt, n)

	prim = gltf['meshes'][0]['primitives'][0]['attributes']
	verts = _read(prim['POSITION']).astype(np.float64) # (N, 3)
	j_idx = _read(prim['JOINTS_0']).astype(int) # (N, 4)
	w_arr = _read(prim['WEIGHTS_0']).astype(np.float64) # (N, 4)
	row_sum = w_arr.sum(axis=1, keepdims=True)
	w_arr /= np.where(row_sum > 0, row_sum, 1.0)

	# Read T-pose joint world positions by accumulating node translations
	nodes = gltf['nodes']
	skin = gltf['skins'][0]
	j_nodes = skin['joints'] # [0, 1, ..., 23]
	J_bind = np.zeros((24, 3), dtype=np.float64)
	for ji, ni in enumerate(j_nodes):
	t_local = np.array(nodes[ni].get('translation', [0, 0, 0]))
	p = SMPL_PARENTS[ji]
	J_bind[ji] = (J_bind[p] if p >= 0 else np.zeros(3)) + t_local

	print(' Rig verts: %d Y: [%.3f, %.3f] X: [%.3f, %.3f]' % (
	len(verts),
	verts[:,1].min(), verts[:,1].max(),
	verts[:,0].min(), verts[:,0].max()))
	print(' J_bind pelvis: (%.3f, %.3f, %.3f) L_shoulder: (%.3f, %.3f, %.3f)' % (
	J_bind[0], J_bind[16]))
	return {'verts': verts, 'j_idx': j_idx, 'w_arr': w_arr, 'J_bind': J_bind}


	# ---- Step 4: FK in rig world space -> A matrices -----------------------------

	_FLIP_X = np.diag([-1.0, 1.0, 1.0]) # X-axis mirror matrix


	def _adapt_rotmat_to_flipped_x(R_smpl):
	"""
	Convert an SO(3) rotation matrix from SMPL convention (left=+X)
	to rig convention (left=-X). F @ R @ F where F = diag(-1,1,1).
	"""
	return _FLIP_X @ R_smpl @ _FLIP_X


	def compute_rig_fk_transforms(J_bind, body_pose_rotmats):
	"""
	Compute A_j = G_j_posed * IBM_j in rig world space.
	A_j maps T-pose -> posed, so A_j^{-1} maps posed -> T-pose.

	HMR2 returns rotations in SMPL convention (left shoulder at +X).
	The rig uses the opposite convention (left shoulder at -X).
	We convert by conjugating with the X-flip matrix before building FK.

	J_bind : (24, 3) T-pose joint world positions from rig
	body_pose_rotmats: (23, 3, 3) HMR2 body pose rotation matrices (joints 1-23)
	Returns A: (24, 4, 4)
	"""
	G = [None] * 24
	for j in range(24):
	p = SMPL_PARENTS[j]
	# Convert rotation from SMPL (+X=left) to rig (-X=left) convention
	R_smpl = body_pose_rotmats[j-1].numpy() if j >= 1 else np.eye(3)
	R_j = _adapt_rotmat_to_flipped_x(R_smpl)

	if p < 0:
	t_j = J_bind[j] # root: absolute world position
	else:
	t_j = J_bind[j] - J_bind[p]

	L = np.eye(4, dtype=np.float64)
	L[:3, :3] = R_j
	L[:3, 3] = t_j

	G[j] = L if p < 0 else G[p] @ L

	G = np.stack(G)

	A = np.zeros((24, 4, 4), dtype=np.float64)
	for j in range(24):
	IBM = np.eye(4, dtype=np.float64)
	IBM[:3, 3] = -J_bind[j]
	A[j] = G[j] @ IBM

	return A


	# ---- Step 5: Inverse LBS -----------------------------------------------------

	def inverse_lbs(verts, j_idx, w_arr, A):
	"""
	v_tpose = (Sum_j W_j * A_j)^{-1} * v_posed
	All inputs in rig world space.
	Returns (N, 3) T-posed vertices.
	"""
	N = len(verts)
	# Blend forward transforms
	T_fwd = np.zeros((N, 4, 4), dtype=np.float64)
	for k in range(4):
	ji = j_idx[:, k]
	w = w_arr[:, k]
	mask = w > 1e-6
	if mask.any():
	T_fwd[mask] += w[mask, None, None] * A[ji[mask]]

	T_inv = np.linalg.inv(T_fwd)
	v_h = np.concatenate([verts, np.ones((N, 1))], axis=1)
	v_tp = np.einsum('nij,nj->ni', T_inv, v_h)[:, :3]

	disp = np.linalg.norm(v_tp - verts, axis=1)
	print(' inverse LBS: mean_disp=%.4f max_disp=%.4f' % (disp.mean(), disp.max()))
	return v_tp


	# ---- Step 6: Map T-posed rig verts back to original mesh space ---------------

	def rig_to_original_space(rig_verts_tposed, rig_verts_original, orig_mesh_verts):
	"""
	Rig verts are a scaled + translated version of the original mesh verts.
	Recover the (scale, offset) from the mapping:
	rig_vert = orig_vert * scale + offset

	Estimates scale from height ratio, offset from floor alignment.
	Returns T-posed vertices in original mesh coordinate space.
	"""
	rig_h = rig_verts_original[:, 1].max() - rig_verts_original[:, 1].min()
	orig_h = orig_mesh_verts[:, 1].max() - orig_mesh_verts[:, 1].min()
	scale = rig_h / max(orig_h, 1e-6)

	# The rig aligns: orig * scale, then v[:,1] -= v[:,1].min() (floor at 0)
	# and v[:,0] += smpl_joints[0,0] - cx; v[:,2] += smpl_joints[0,2] - cz
	# We can recover offset from comparing means/floors
	# offset = rig_floor_Y - (orig_floor_Y * scale)
	rig_floor = rig_verts_original[:, 1].min()
	orig_floor = orig_mesh_verts[:, 1].min()
	y_offset = rig_floor - orig_floor * scale

	# X, Z: center offset
	rig_cx = (rig_verts_original[:, 0].max() + rig_verts_original[:, 0].min()) * 0.5
	orig_cx = (orig_mesh_verts[:, 0].max() + orig_mesh_verts[:, 0].min()) * 0.5
	x_offset = rig_cx - orig_cx * scale

	rig_cz = (rig_verts_original[:, 2].max() + rig_verts_original[:, 2].min()) * 0.5
	orig_cz = (orig_mesh_verts[:, 2].max() + orig_mesh_verts[:, 2].min()) * 0.5
	z_offset = rig_cz - orig_cz * scale

	print(' rig->orig: scale=%.4f offset=[%.3f, %.3f, %.3f]' % (scale, x_offset, y_offset, z_offset))

	# Invert: orig_vert = (rig_vert - offset) / scale
	# For T-posed verts: they're in rig space but T-posed, so same inversion
	tposed_orig = np.zeros_like(rig_verts_tposed)
	tposed_orig[:, 0] = (rig_verts_tposed[:, 0] - x_offset) / scale
	tposed_orig[:, 1] = (rig_verts_tposed[:, 1] - y_offset) / scale
	tposed_orig[:, 2] = (rig_verts_tposed[:, 2] - z_offset) / scale
	return tposed_orig


	# ---- SKEL bone geometry ------------------------------------------------------

	def export_skel_bones(betas, out_path, gender='male'):
	try:
	from skel.skel_model import SKEL
	except ImportError:
	print(' [skel] Not installed')
	return None
	skel_file = os.path.join(SKEL_DIR, 'skel_%s.pkl' % gender)
	if not os.path.exists(skel_file):
	print(' [skel] Weights not found: %s' % skel_file)
	return None
	try:
	skel_model = SKEL(gender=gender, model_path=SKEL_DIR)
	betas_t = betas.unsqueeze(0)[:, :10]
	poses_zero = torch.zeros(1, 46)
	trans_zero = torch.zeros(1, 3)
	with torch.no_grad():
	out = skel_model(poses=poses_zero, betas=betas_t, trans=trans_zero, skelmesh=True)
	bone_verts = out.skel_verts[0].numpy()
	bone_faces = skel_model.skel_f.numpy()
	mesh = trimesh.Trimesh(vertices=bone_verts, faces=bone_faces, process=False)
	mesh.export(out_path)
	print(' [skel] Bone mesh -> %s (%d verts)' % (out_path, len(bone_verts)))
	return out_path
	except Exception as e:
	print(' [skel] Export failed: %s' % e)
	return None


	# ---- Main --------------------------------------------------------------------

	def tpose_smpl(body_glb, out_glb, rig_glb=None, debug_dir=None, skel_out=None):
	device = 'cuda'

	if not rig_glb or not os.path.exists(rig_glb):
	raise RuntimeError('--rig is required: provide the rigged.glb from the Rig step.')

	print('[tpose_smpl] Rendering front view ...')
	img_bgr = render_front(body_glb, device=device)
	if debug_dir:
	cv2.imwrite(os.path.join(debug_dir, 'tpose_render.png'), img_bgr)

	print('[tpose_smpl] Running HMR2 pose estimation ...')
	hmr2_out = run_hmr2(img_bgr, device=device)
	print(' betas: %s' % hmr2_out['betas'].numpy().round(3))

	print('[tpose_smpl] Reading rigged GLB (rig world space) ...')
	rig_data = read_rigged_glb(rig_glb)

	print('[tpose_smpl] Loading original mesh for UV/texture ...')
	scene = trimesh.load(body_glb)
	if isinstance(scene, trimesh.Scene):
	geom_name = list(scene.geometry.keys())[0]
	orig_mesh = scene.geometry[geom_name]
	else:
	orig_mesh = scene; geom_name = None

	orig_verts = np.array(orig_mesh.vertices, dtype=np.float64)
	uvs = np.array(orig_mesh.visual.uv, dtype=np.float64)
	orig_tex = orig_mesh.visual.material.baseColorTexture
	print(' Orig mesh: %d verts Y: [%.3f, %.3f] X: [%.3f, %.3f]' % (
	len(orig_verts),
	orig_verts[:,1].min(), orig_verts[:,1].max(),
	orig_verts[:,0].min(), orig_verts[:,0].max()))

	print('[tpose_smpl] Computing FK transforms in rig world space ...')
	body_pose_rotmats = hmr2_out['body_pose'] # (23, 3, 3)
	A = compute_rig_fk_transforms(rig_data['J_bind'], body_pose_rotmats)

	# Verify zero-pose gives identity (sanity check)
	A_zero = compute_rig_fk_transforms(rig_data['J_bind'],
	torch.zeros(23, 3, 3) + torch.eye(3))
	v_test = rig_data['verts'][:3]
	v_h = np.concatenate([v_test, np.ones((3,1))], axis=1)
	T_fwd_test = np.zeros((3, 4, 4))
	for k in range(4):
	ji = rig_data['j_idx'][:3, k]; w = rig_data['w_arr'][:3, k]
	T_fwd_test += w[:, None, None] * A_zero[ji]
	identity_err = np.abs(T_fwd_test - np.eye(4)).max()
	print(' zero-pose identity check: max_err=%.6f (expect ~0)' % identity_err)

	print('[tpose_smpl] Applying inverse LBS ...')
	rig_verts_tposed = inverse_lbs(
	rig_data['verts'], rig_data['j_idx'], rig_data['w_arr'], A)

	print('[tpose_smpl] T-posed rig verts: Y: [%.3f, %.3f] X: [%.3f, %.3f]' % (
	rig_verts_tposed[:,1].min(), rig_verts_tposed[:,1].max(),
	rig_verts_tposed[:,0].min(), rig_verts_tposed[:,0].max()))

	print('[tpose_smpl] Mapping back to original mesh coordinate space ...')
	tposed_orig = rig_to_original_space(
	rig_verts_tposed, rig_data['verts'], orig_verts)

	print('[tpose_smpl] T-posed orig: Y: [%.3f, %.3f] X: [%.3f, %.3f]' % (
	tposed_orig[:,1].min(), tposed_orig[:,1].max(),
	tposed_orig[:,0].min(), tposed_orig[:,0].max()))

	orig_mesh.vertices = tposed_orig
	orig_mesh.visual = TextureVisuals(uv=uvs,
	material=PBRMaterial(baseColorTexture=orig_tex))

	if geom_name and isinstance(scene, trimesh.Scene):
	scene.geometry[geom_name] = orig_mesh
	scene.export(out_glb)
	else:
	orig_mesh.export(out_glb)

	print('[tpose_smpl] Saved: %s (%d KB)' % (out_glb, os.path.getsize(out_glb)//1024))

	if skel_out:
	print('[tpose_smpl] Exporting SKEL bone geometry ...')
	export_skel_bones(hmr2_out['betas'], skel_out)

	return out_glb


	if __name__ == '__main__':
	ap = argparse.ArgumentParser()
	ap.add_argument('--body', required=True)
	ap.add_argument('--out', required=True)
	ap.add_argument('--rig', required=True, help='Rigged GLB from rig step')
	ap.add_argument('--skel_out', default=None, help='SKEL BSM bone mesh output')
	ap.add_argument('--debug_dir', default=None)
	args = ap.parse_args()
	os.makedirs(args.debug_dir, exist_ok=True) if args.debug_dir else None
	tpose_smpl(args.body, args.out, rig_glb=args.rig,
	debug_dir=args.debug_dir, skel_out=args.skel_out)