""" tpose_smpl.py -- T-pose a humanoid GLB via inverse Linear Blend Skinning. Pipeline: 1. Render front view and run HMR2 -> SMPL body_pose + betas 2. Read rigged.glb: mesh verts (rig world space), skinning weights, T-pose joints 3. Compute FK transforms in rig world space using HMR2 body_pose 4. Apply inverse LBS: v_tpose = (Sum_j W_j * A_j)^-1 * v_posed 5. Map T-posed verts back to original mesh coordinate space, preserve UV/texture 6. Optionally export SKEL bone mesh in T-pose Usage: python tpose_smpl.py --body /tmp/triposg_textured.glb \ --rig /tmp/rig_out/rigged.glb \ --out /tmp/tposed_surface.glb \ [--skel_out /tmp/tposed_bones.glb] \ [--debug_dir /tmp/tpose_debug] """ import os, sys, argparse, struct, json, warnings warnings.filterwarnings('ignore') import numpy as np import cv2 import torch import trimesh from trimesh.visual.texture import TextureVisuals from trimesh.visual.material import PBRMaterial from scipy.spatial.transform import Rotation as R sys.path.insert(0, '/root/MV-Adapter') SMPL_NEUTRAL = '/root/body_models/smpl/SMPL_NEUTRAL.pkl' SKEL_DIR = '/root/body_models/skel' SMPL_PARENTS = [-1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 12, 13, 14, 16, 17, 18, 19, 20, 21] # ---- Step 1: Render front view ----------------------------------------------- def render_front(body_glb, H=1024, W=768, device='cuda'): from mvadapter.utils.mesh_utils import ( NVDiffRastContextWrapper, load_mesh, get_orthogonal_camera, render, ) ctx = NVDiffRastContextWrapper(device=device, context_type='cuda') mesh_mv = load_mesh(body_glb, rescale=True, device=device) camera = get_orthogonal_camera( elevation_deg=[0], distance=[1.8], left=-0.55, right=0.55, bottom=-0.55, top=0.55, azimuth_deg=[-90], device=device, ) out = render(ctx, mesh_mv, camera, height=H, width=W, render_attr=True, render_depth=False, render_normal=False, attr_background=0.5) img_np = (out.attr[0].cpu().numpy() * 255).clip(0, 255).astype(np.uint8) return cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) # ---- Step 2: HMR2 pose estimation -------------------------------------------- def run_hmr2(img_bgr, device='cuda'): from pathlib import Path from hmr2.configs import CACHE_DIR_4DHUMANS from hmr2.models import load_hmr2, DEFAULT_CHECKPOINT, download_models from hmr2.utils import recursive_to from hmr2.datasets.vitdet_dataset import ViTDetDataset from hmr2.utils.utils_detectron2 import DefaultPredictor_Lazy from detectron2.config import LazyConfig import hmr2 as hmr2_pkg download_models(CACHE_DIR_4DHUMANS) model, model_cfg = load_hmr2(DEFAULT_CHECKPOINT) model = model.to(device).eval() cfg_path = Path(hmr2_pkg.__file__).parent / 'configs' / 'cascade_mask_rcnn_vitdet_h_75ep.py' det_cfg = LazyConfig.load(str(cfg_path)) det_cfg.train.init_checkpoint = ( 'https://dl.fbaipublicfiles.com/detectron2/ViTDet/COCO/cascade_mask_rcnn_vitdet_h' '/f328730692/model_final_f05665.pkl' ) for i in range(3): det_cfg.model.roi_heads.box_predictors[i].test_score_thresh = 0.25 detector = DefaultPredictor_Lazy(det_cfg) det_out = detector(img_bgr) instances = det_out['instances'] valid = (instances.pred_classes == 0) & (instances.scores > 0.5) boxes = instances.pred_boxes.tensor[valid].cpu().numpy() if len(boxes) == 0: raise RuntimeError('HMR2: no person detected in render') areas = (boxes[:,2]-boxes[:,0]) * (boxes[:,3]-boxes[:,1]) boxes = boxes[areas.argmax():areas.argmax()+1] dataset = ViTDetDataset(model_cfg, img_bgr, boxes) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0) for batch in dataloader: batch = recursive_to(batch, device) with torch.no_grad(): out = model(batch) sp = out['pred_smpl_params'] return { 'body_pose': sp['body_pose'][0].cpu(), # (23, 3, 3) 'betas': sp['betas'][0].cpu(), # (10,) } # ---- Step 3: Read all data from rigged.glb ----------------------------------- def read_rigged_glb(rig_glb): """ Returns dict with: verts : (N, 3) mesh vertices in rig world space j_idx : (N, 4) joint indices w_arr : (N, 4) skinning weights J_bind : (24, 3) T-pose joint world positions """ with open(rig_glb, 'rb') as fh: raw = fh.read() ch_len, _ = struct.unpack_from(' 0, row_sum, 1.0) # Read T-pose joint world positions by accumulating node translations nodes = gltf['nodes'] skin = gltf['skins'][0] j_nodes = skin['joints'] # [0, 1, ..., 23] J_bind = np.zeros((24, 3), dtype=np.float64) for ji, ni in enumerate(j_nodes): t_local = np.array(nodes[ni].get('translation', [0, 0, 0])) p = SMPL_PARENTS[ji] J_bind[ji] = (J_bind[p] if p >= 0 else np.zeros(3)) + t_local print(' Rig verts: %d Y: [%.3f, %.3f] X: [%.3f, %.3f]' % ( len(verts), verts[:,1].min(), verts[:,1].max(), verts[:,0].min(), verts[:,0].max())) print(' J_bind pelvis: (%.3f, %.3f, %.3f) L_shoulder: (%.3f, %.3f, %.3f)' % ( *J_bind[0], *J_bind[16])) return {'verts': verts, 'j_idx': j_idx, 'w_arr': w_arr, 'J_bind': J_bind} # ---- Step 4: FK in rig world space -> A matrices ----------------------------- _FLIP_X = np.diag([-1.0, 1.0, 1.0]) # X-axis mirror matrix def _adapt_rotmat_to_flipped_x(R_smpl): """ Convert an SO(3) rotation matrix from SMPL convention (left=+X) to rig convention (left=-X). F @ R @ F where F = diag(-1,1,1). """ return _FLIP_X @ R_smpl @ _FLIP_X def compute_rig_fk_transforms(J_bind, body_pose_rotmats): """ Compute A_j = G_j_posed * IBM_j in rig world space. A_j maps T-pose -> posed, so A_j^{-1} maps posed -> T-pose. HMR2 returns rotations in SMPL convention (left shoulder at +X). The rig uses the opposite convention (left shoulder at -X). We convert by conjugating with the X-flip matrix before building FK. J_bind : (24, 3) T-pose joint world positions from rig body_pose_rotmats: (23, 3, 3) HMR2 body pose rotation matrices (joints 1-23) Returns A: (24, 4, 4) """ G = [None] * 24 for j in range(24): p = SMPL_PARENTS[j] # Convert rotation from SMPL (+X=left) to rig (-X=left) convention R_smpl = body_pose_rotmats[j-1].numpy() if j >= 1 else np.eye(3) R_j = _adapt_rotmat_to_flipped_x(R_smpl) if p < 0: t_j = J_bind[j] # root: absolute world position else: t_j = J_bind[j] - J_bind[p] L = np.eye(4, dtype=np.float64) L[:3, :3] = R_j L[:3, 3] = t_j G[j] = L if p < 0 else G[p] @ L G = np.stack(G) A = np.zeros((24, 4, 4), dtype=np.float64) for j in range(24): IBM = np.eye(4, dtype=np.float64) IBM[:3, 3] = -J_bind[j] A[j] = G[j] @ IBM return A # ---- Step 5: Inverse LBS ----------------------------------------------------- def inverse_lbs(verts, j_idx, w_arr, A): """ v_tpose = (Sum_j W_j * A_j)^{-1} * v_posed All inputs in rig world space. Returns (N, 3) T-posed vertices. """ N = len(verts) # Blend forward transforms T_fwd = np.zeros((N, 4, 4), dtype=np.float64) for k in range(4): ji = j_idx[:, k] w = w_arr[:, k] mask = w > 1e-6 if mask.any(): T_fwd[mask] += w[mask, None, None] * A[ji[mask]] T_inv = np.linalg.inv(T_fwd) v_h = np.concatenate([verts, np.ones((N, 1))], axis=1) v_tp = np.einsum('nij,nj->ni', T_inv, v_h)[:, :3] disp = np.linalg.norm(v_tp - verts, axis=1) print(' inverse LBS: mean_disp=%.4f max_disp=%.4f' % (disp.mean(), disp.max())) return v_tp # ---- Step 6: Map T-posed rig verts back to original mesh space --------------- def rig_to_original_space(rig_verts_tposed, rig_verts_original, orig_mesh_verts): """ Rig verts are a scaled + translated version of the original mesh verts. Recover the (scale, offset) from the mapping: rig_vert = orig_vert * scale + offset Estimates scale from height ratio, offset from floor alignment. Returns T-posed vertices in original mesh coordinate space. """ rig_h = rig_verts_original[:, 1].max() - rig_verts_original[:, 1].min() orig_h = orig_mesh_verts[:, 1].max() - orig_mesh_verts[:, 1].min() scale = rig_h / max(orig_h, 1e-6) # The rig aligns: orig * scale, then v[:,1] -= v[:,1].min() (floor at 0) # and v[:,0] += smpl_joints[0,0] - cx; v[:,2] += smpl_joints[0,2] - cz # We can recover offset from comparing means/floors # offset = rig_floor_Y - (orig_floor_Y * scale) rig_floor = rig_verts_original[:, 1].min() orig_floor = orig_mesh_verts[:, 1].min() y_offset = rig_floor - orig_floor * scale # X, Z: center offset rig_cx = (rig_verts_original[:, 0].max() + rig_verts_original[:, 0].min()) * 0.5 orig_cx = (orig_mesh_verts[:, 0].max() + orig_mesh_verts[:, 0].min()) * 0.5 x_offset = rig_cx - orig_cx * scale rig_cz = (rig_verts_original[:, 2].max() + rig_verts_original[:, 2].min()) * 0.5 orig_cz = (orig_mesh_verts[:, 2].max() + orig_mesh_verts[:, 2].min()) * 0.5 z_offset = rig_cz - orig_cz * scale print(' rig->orig: scale=%.4f offset=[%.3f, %.3f, %.3f]' % (scale, x_offset, y_offset, z_offset)) # Invert: orig_vert = (rig_vert - offset) / scale # For T-posed verts: they're in rig space but T-posed, so same inversion tposed_orig = np.zeros_like(rig_verts_tposed) tposed_orig[:, 0] = (rig_verts_tposed[:, 0] - x_offset) / scale tposed_orig[:, 1] = (rig_verts_tposed[:, 1] - y_offset) / scale tposed_orig[:, 2] = (rig_verts_tposed[:, 2] - z_offset) / scale return tposed_orig # ---- SKEL bone geometry ------------------------------------------------------ def export_skel_bones(betas, out_path, gender='male'): try: from skel.skel_model import SKEL except ImportError: print(' [skel] Not installed') return None skel_file = os.path.join(SKEL_DIR, 'skel_%s.pkl' % gender) if not os.path.exists(skel_file): print(' [skel] Weights not found: %s' % skel_file) return None try: skel_model = SKEL(gender=gender, model_path=SKEL_DIR) betas_t = betas.unsqueeze(0)[:, :10] poses_zero = torch.zeros(1, 46) trans_zero = torch.zeros(1, 3) with torch.no_grad(): out = skel_model(poses=poses_zero, betas=betas_t, trans=trans_zero, skelmesh=True) bone_verts = out.skel_verts[0].numpy() bone_faces = skel_model.skel_f.numpy() mesh = trimesh.Trimesh(vertices=bone_verts, faces=bone_faces, process=False) mesh.export(out_path) print(' [skel] Bone mesh -> %s (%d verts)' % (out_path, len(bone_verts))) return out_path except Exception as e: print(' [skel] Export failed: %s' % e) return None # ---- Main -------------------------------------------------------------------- def tpose_smpl(body_glb, out_glb, rig_glb=None, debug_dir=None, skel_out=None): device = 'cuda' if not rig_glb or not os.path.exists(rig_glb): raise RuntimeError('--rig is required: provide the rigged.glb from the Rig step.') print('[tpose_smpl] Rendering front view ...') img_bgr = render_front(body_glb, device=device) if debug_dir: cv2.imwrite(os.path.join(debug_dir, 'tpose_render.png'), img_bgr) print('[tpose_smpl] Running HMR2 pose estimation ...') hmr2_out = run_hmr2(img_bgr, device=device) print(' betas: %s' % hmr2_out['betas'].numpy().round(3)) print('[tpose_smpl] Reading rigged GLB (rig world space) ...') rig_data = read_rigged_glb(rig_glb) print('[tpose_smpl] Loading original mesh for UV/texture ...') scene = trimesh.load(body_glb) if isinstance(scene, trimesh.Scene): geom_name = list(scene.geometry.keys())[0] orig_mesh = scene.geometry[geom_name] else: orig_mesh = scene; geom_name = None orig_verts = np.array(orig_mesh.vertices, dtype=np.float64) uvs = np.array(orig_mesh.visual.uv, dtype=np.float64) orig_tex = orig_mesh.visual.material.baseColorTexture print(' Orig mesh: %d verts Y: [%.3f, %.3f] X: [%.3f, %.3f]' % ( len(orig_verts), orig_verts[:,1].min(), orig_verts[:,1].max(), orig_verts[:,0].min(), orig_verts[:,0].max())) print('[tpose_smpl] Computing FK transforms in rig world space ...') body_pose_rotmats = hmr2_out['body_pose'] # (23, 3, 3) A = compute_rig_fk_transforms(rig_data['J_bind'], body_pose_rotmats) # Verify zero-pose gives identity (sanity check) A_zero = compute_rig_fk_transforms(rig_data['J_bind'], torch.zeros(23, 3, 3) + torch.eye(3)) v_test = rig_data['verts'][:3] v_h = np.concatenate([v_test, np.ones((3,1))], axis=1) T_fwd_test = np.zeros((3, 4, 4)) for k in range(4): ji = rig_data['j_idx'][:3, k]; w = rig_data['w_arr'][:3, k] T_fwd_test += w[:, None, None] * A_zero[ji] identity_err = np.abs(T_fwd_test - np.eye(4)).max() print(' zero-pose identity check: max_err=%.6f (expect ~0)' % identity_err) print('[tpose_smpl] Applying inverse LBS ...') rig_verts_tposed = inverse_lbs( rig_data['verts'], rig_data['j_idx'], rig_data['w_arr'], A) print('[tpose_smpl] T-posed rig verts: Y: [%.3f, %.3f] X: [%.3f, %.3f]' % ( rig_verts_tposed[:,1].min(), rig_verts_tposed[:,1].max(), rig_verts_tposed[:,0].min(), rig_verts_tposed[:,0].max())) print('[tpose_smpl] Mapping back to original mesh coordinate space ...') tposed_orig = rig_to_original_space( rig_verts_tposed, rig_data['verts'], orig_verts) print('[tpose_smpl] T-posed orig: Y: [%.3f, %.3f] X: [%.3f, %.3f]' % ( tposed_orig[:,1].min(), tposed_orig[:,1].max(), tposed_orig[:,0].min(), tposed_orig[:,0].max())) orig_mesh.vertices = tposed_orig orig_mesh.visual = TextureVisuals(uv=uvs, material=PBRMaterial(baseColorTexture=orig_tex)) if geom_name and isinstance(scene, trimesh.Scene): scene.geometry[geom_name] = orig_mesh scene.export(out_glb) else: orig_mesh.export(out_glb) print('[tpose_smpl] Saved: %s (%d KB)' % (out_glb, os.path.getsize(out_glb)//1024)) if skel_out: print('[tpose_smpl] Exporting SKEL bone geometry ...') export_skel_bones(hmr2_out['betas'], skel_out) return out_glb if __name__ == '__main__': ap = argparse.ArgumentParser() ap.add_argument('--body', required=True) ap.add_argument('--out', required=True) ap.add_argument('--rig', required=True, help='Rigged GLB from rig step') ap.add_argument('--skel_out', default=None, help='SKEL BSM bone mesh output') ap.add_argument('--debug_dir', default=None) args = ap.parse_args() os.makedirs(args.debug_dir, exist_ok=True) if args.debug_dir else None tpose_smpl(args.body, args.out, rig_glb=args.rig, debug_dir=args.debug_dir, skel_out=args.skel_out)