Spaces:
Running on Zero
Running on Zero
| """ | |
| rig_yolo.py β Rig a humanoid mesh using YOLO-pose joint detection. | |
| Instead of estimating T-pose rotations (which failed), detect where joints | |
| actually ARE in the mesh's current pose and use those positions as the bind pose. | |
| Pipeline: | |
| 1. Render front view (azimuth=-90, same camera as triposg_app.py views) | |
| 2. YOLOv8x-pose β COCO-17 2D keypoints | |
| 3. Unproject to 3D in original mesh coordinate space | |
| 4. Map COCO-17 β SMPL-24 (interpolate spine, collar, hand, foot joints) | |
| 5. LBS weights: proximity-based (k=4 nearest joints per vertex) | |
| 6. Export rigged GLB β bind pose = current pose | |
| Usage: | |
| python rig_yolo.py --body /tmp/triposg_textured.glb \ | |
| --out /tmp/rig_out/rigged.glb \ | |
| [--debug_dir /tmp/rig_debug] | |
| """ | |
| import os, sys, argparse, warnings | |
| warnings.filterwarnings('ignore') | |
| import numpy as np | |
| import cv2 | |
| import trimesh | |
| from scipy.spatial import cKDTree | |
| sys.path.insert(0, '/root/MV-Adapter') | |
| # ββ Camera constants β MUST match triposg_app.py ββββββββββββββββββββββββββββββ | |
| ORTHO_LEFT, ORTHO_RIGHT = -0.55, 0.55 | |
| ORTHO_BOT, ORTHO_TOP = -0.55, 0.55 | |
| RENDER_W, RENDER_H = 768, 1024 | |
| FRONT_AZ = -90 # azimuth that gives front view | |
| # Orthographic proj scale: 2/(right-left) = 1.818... | |
| PROJ_SCALE = 2.0 / (ORTHO_RIGHT - ORTHO_LEFT) | |
| SMPL_PARENTS = [-1,0,0,0,1,2,3,4,5,6,7,8,9,9,9, | |
| 12,13,14,16,17,18,19,20,21] | |
| SMPL_JOINT_NAMES = [ | |
| 'pelvis','left_hip','right_hip','spine1', | |
| 'left_knee','right_knee','spine2', | |
| 'left_ankle','right_ankle','spine3', | |
| 'left_foot','right_foot','neck', | |
| 'left_collar','right_collar','head', | |
| 'left_shoulder','right_shoulder', | |
| 'left_elbow','right_elbow', | |
| 'left_wrist','right_wrist', | |
| 'left_hand','right_hand', | |
| ] | |
| # COCO-17 order | |
| COCO_NAMES = ['nose','L_eye','R_eye','L_ear','R_ear', | |
| 'L_shoulder','R_shoulder','L_elbow','R_elbow','L_wrist','R_wrist', | |
| 'L_hip','R_hip','L_knee','R_knee','L_ankle','R_ankle'] | |
| # ββ Step 0: Load mesh directly from GLB (correct UV channel) βββββββββββββββββ | |
| def load_mesh_from_gltf(body_glb): | |
| """ | |
| Load mesh from GLB using pygltflib, reading the UV channel the material | |
| actually references (TEXCOORD_0 or TEXCOORD_1). | |
| Returns: verts (N,3) float64, faces (F,3) int32, | |
| uv (N,2) float32 or None, texture_pil PIL.Image or None | |
| """ | |
| import pygltflib | |
| from PIL import Image as PILImage | |
| import io | |
| gltf = pygltflib.GLTF2().load(body_glb) | |
| blob = gltf.binary_blob() | |
| # componentType β (numpy dtype, bytes per element) | |
| _DTYPE = {5120: np.int8, 5121: np.uint8, 5122: np.int16, | |
| 5123: np.uint16, 5125: np.uint32, 5126: np.float32} | |
| _NCOMP = {'SCALAR': 1, 'VEC2': 2, 'VEC3': 3, 'VEC4': 4, 'MAT4': 16} | |
| def read_accessor(idx): | |
| if idx is None: | |
| return None | |
| acc = gltf.accessors[idx] | |
| bv = gltf.bufferViews[acc.bufferView] | |
| dtype = _DTYPE[acc.componentType] | |
| n_comp = _NCOMP[acc.type] | |
| bv_off = bv.byteOffset or 0 | |
| acc_off = acc.byteOffset or 0 | |
| elem_bytes = np.dtype(dtype).itemsize * n_comp | |
| stride = bv.byteStride if (bv.byteStride and bv.byteStride != elem_bytes) else elem_bytes | |
| if stride == elem_bytes: | |
| start = bv_off + acc_off | |
| size = acc.count * elem_bytes | |
| arr = np.frombuffer(blob[start:start + size], dtype=dtype) | |
| else: | |
| # interleaved buffer | |
| rows = [] | |
| for i in range(acc.count): | |
| start = bv_off + acc_off + i * stride | |
| rows.append(np.frombuffer(blob[start:start + elem_bytes], dtype=dtype)) | |
| arr = np.concatenate(rows) | |
| return arr.reshape(acc.count, n_comp) if n_comp > 1 else arr | |
| # ββ Find which texCoord index the material references ββββββββββββββββββββββ | |
| texcoord_idx = 0 | |
| if gltf.materials: | |
| pbr = gltf.materials[0].pbrMetallicRoughness | |
| if pbr and pbr.baseColorTexture: | |
| texcoord_idx = getattr(pbr.baseColorTexture, 'texCoord', 0) or 0 | |
| print(f' material uses TEXCOORD_{texcoord_idx}') | |
| # ββ Read primitive βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| prim = gltf.meshes[0].primitives[0] | |
| attrs = prim.attributes | |
| verts = read_accessor(attrs.POSITION).astype(np.float64) | |
| idx_data = read_accessor(prim.indices).flatten() | |
| faces = idx_data.reshape(-1, 3).astype(np.int32) | |
| # Read the correct UV channel; fall back to TEXCOORD_0 | |
| uv_acc_idx = getattr(attrs, f'TEXCOORD_{texcoord_idx}', None) | |
| if uv_acc_idx is None and texcoord_idx != 0: | |
| uv_acc_idx = getattr(attrs, 'TEXCOORD_0', None) | |
| uv_raw = read_accessor(uv_acc_idx) | |
| uv = uv_raw.astype(np.float32) if uv_raw is not None else None | |
| print(f' verts={len(verts)} faces={len(faces)} uv={len(uv) if uv is not None else None}') | |
| # ββ Extract embedded texture βββββββββββββββββββββββββββββββββββββββββββββββ | |
| texture_pil = None | |
| try: | |
| pbr = gltf.materials[0].pbrMetallicRoughness | |
| if pbr and pbr.baseColorTexture is not None: | |
| tex_idx = pbr.baseColorTexture.index | |
| if tex_idx is not None and tex_idx < len(gltf.textures): | |
| src_idx = gltf.textures[tex_idx].source | |
| if src_idx is not None and src_idx < len(gltf.images): | |
| img_obj = gltf.images[src_idx] | |
| if img_obj.bufferView is not None: | |
| bv = gltf.bufferViews[img_obj.bufferView] | |
| bv_off = bv.byteOffset or 0 | |
| img_bytes = blob[bv_off:bv_off + bv.byteLength] | |
| texture_pil = PILImage.open(io.BytesIO(img_bytes)).convert('RGBA') | |
| print(f' texture: {texture_pil.size}') | |
| except Exception as e: | |
| print(f' texture extraction failed: {e}') | |
| return verts, faces, uv, texture_pil | |
| # ββ Step 1: Render front view βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def render_front(body_glb, debug_dir=None): | |
| """ | |
| Render front view using MV-Adapter. | |
| Returns (img_bgr, scale_factor) where scale_factor = max_abs / 0.5 | |
| (used to convert std-space back to original mesh space). | |
| """ | |
| from mvadapter.utils.mesh_utils import ( | |
| NVDiffRastContextWrapper, load_mesh, get_orthogonal_camera, render, | |
| ) | |
| ctx = NVDiffRastContextWrapper(device='cuda', context_type='cuda') | |
| mesh_mv, _offset, scale_factor = load_mesh( | |
| body_glb, rescale=True, return_transform=True, device='cuda') | |
| camera = get_orthogonal_camera( | |
| elevation_deg=[0], distance=[1.8], | |
| left=ORTHO_LEFT, right=ORTHO_RIGHT, | |
| bottom=ORTHO_BOT, top=ORTHO_TOP, | |
| azimuth_deg=[FRONT_AZ], device='cuda') | |
| out = render(ctx, mesh_mv, camera, | |
| height=RENDER_H, width=RENDER_W, | |
| render_attr=True, render_depth=False, render_normal=False, | |
| attr_background=0.5) | |
| img_np = (out.attr[0].cpu().numpy() * 255).clip(0, 255).astype(np.uint8) | |
| img_bgr = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) | |
| if debug_dir: | |
| cv2.imwrite(os.path.join(debug_dir, 'front_render.png'), img_bgr) | |
| print(f' render: {RENDER_W}x{RENDER_H}, scale_factor={scale_factor:.4f}') | |
| return img_bgr, scale_factor | |
| # ββ Step 2: YOLO-pose keypoints βββββββββββββββββββββββββββββββββββββββββββββββ | |
| def detect_keypoints(img_bgr, debug_dir=None): | |
| """ | |
| Run YOLOv8x-pose on the rendered image. | |
| Returns (17, 3) array: [pixel_x, pixel_y, confidence] for COCO-17 joints. | |
| Picks the largest detected bounding box (the character body). | |
| """ | |
| from ultralytics import YOLO | |
| model = YOLO('yolov8x-pose.pt') | |
| results = model(img_bgr, verbose=False) | |
| if not results or results[0].keypoints is None or len(results[0].boxes) == 0: | |
| raise RuntimeError('YOLO: no person detected in front render') | |
| r = results[0] | |
| boxes = r.boxes.xyxy.cpu().numpy() | |
| areas = (boxes[:,2]-boxes[:,0]) * (boxes[:,3]-boxes[:,1]) | |
| idx = int(areas.argmax()) | |
| kp_xy = r.keypoints[idx].xy[0].cpu().numpy() # (17, 2) pixel | |
| kp_conf = r.keypoints[idx].conf[0].cpu().numpy() # (17,) confidence | |
| kp = np.concatenate([kp_xy, kp_conf[:,None]], axis=1) # (17, 3) | |
| print(' YOLO detections: %d boxes, using largest' % len(boxes)) | |
| for i, name in enumerate(COCO_NAMES): | |
| if kp_conf[i] > 0.3: | |
| print(' [%d] %-14s px=(%.0f, %.0f) conf=%.2f' % ( | |
| i, name, kp_xy[i,0], kp_xy[i,1], kp_conf[i])) | |
| if debug_dir: | |
| vis = img_bgr.copy() | |
| for i in range(17): | |
| if kp_conf[i] > 0.3: | |
| x, y = int(kp_xy[i,0]), int(kp_xy[i,1]) | |
| cv2.circle(vis, (x, y), 6, (0, 255, 0), -1) | |
| cv2.putText(vis, COCO_NAMES[i][:4], (x+4, y-4), | |
| cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,255,0), 1) | |
| cv2.imwrite(os.path.join(debug_dir, 'yolo_keypoints.png'), vis) | |
| return kp | |
| # ββ Step 3: Unproject 2D β 3D ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def unproject_to_3d(kp_2d_conf, scale_factor, mesh_verts_orig): | |
| """ | |
| Convert COCO-17 pixel positions to 3D positions in original mesh space. | |
| MV-Adapter orthographic camera at azimuth=-90 maps: | |
| pixel_x β orig_x (character lateral axis) | |
| pixel_y β orig_y (character height axis, flipped from pixel) | |
| orig_z estimated from k-nearest mesh vertices in image space | |
| Forward projection (for reference): | |
| std_x = orig_x / scale_factor | |
| NDC_x = PROJ_SCALE * std_x | |
| pixel_x = (NDC_x + 1) / 2 * W | |
| std_z = orig_y / scale_factor (mesh Y β std Z β image vertical) | |
| NDC_y = -PROJ_SCALE * std_z (Y-flipped by proj matrix) | |
| pixel_y = (NDC_y + 1) / 2 * H | |
| Inverse: | |
| orig_x = (2*px/W - 1) / PROJ_SCALE * scale_factor | |
| orig_y = -(2*py/H - 1) / PROJ_SCALE * scale_factor | |
| """ | |
| W, H = RENDER_W, RENDER_H | |
| # Project all mesh vertices to image space (for Z lookup) | |
| verts_px_x = ((mesh_verts_orig[:,0] / scale_factor * PROJ_SCALE) + 1.0) / 2.0 * W | |
| verts_px_y = ((-mesh_verts_orig[:,1] / scale_factor * PROJ_SCALE) + 1.0) / 2.0 * H | |
| joints_3d = np.full((17, 3), np.nan) | |
| for i in range(17): | |
| px, py, conf = kp_2d_conf[i] | |
| if conf < 0.15 or px < 1 or py < 1: | |
| continue | |
| orig_x = (2.0*px/W - 1.0) / PROJ_SCALE * scale_factor | |
| orig_y = -(2.0*py/H - 1.0) / PROJ_SCALE * scale_factor | |
| # Z: median of k-nearest mesh vertices in image space | |
| dist_2d = np.hypot(verts_px_x - px, verts_px_y - py) | |
| k = 30 | |
| near_idx = np.argpartition(dist_2d, k-1)[:k] | |
| orig_z = float(np.median(mesh_verts_orig[near_idx, 2])) | |
| joints_3d[i] = [orig_x, orig_y, orig_z] | |
| return joints_3d | |
| # ββ Step 4: COCO-17 β SMPL-24 ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def coco17_to_smpl24(coco_3d, mesh_verts): | |
| """ | |
| Build 24 SMPL joint positions from COCO-17 detections. | |
| Spine / collar / hand / foot joints are interpolated. | |
| Low-confidence (NaN) COCO joints fall back to mesh geometry. | |
| """ | |
| def lerp(a, b, t): | |
| return a + t * (b - a) | |
| def valid(i): | |
| return not np.any(np.isnan(coco_3d[i])) | |
| # Fill NaN joints from mesh geometry (centroid fallback) | |
| c = coco_3d.copy() | |
| centroid = mesh_verts.mean(axis=0) | |
| for i in range(17): | |
| if not valid(i): | |
| c[i] = centroid | |
| # Key anchor points | |
| L_shoulder = c[5] | |
| R_shoulder = c[6] | |
| L_hip = c[11] | |
| R_hip = c[12] | |
| pelvis = lerp(L_hip, R_hip, 0.5) | |
| mid_shoulder = lerp(L_shoulder, R_shoulder, 0.5) | |
| # Neck: midpoint of shoulders, raised slightly (~ collar bone level) | |
| neck = mid_shoulder + np.array([0.0, 0.04 * (mid_shoulder[1] - pelvis[1]), 0.0]) | |
| J = np.zeros((24, 3), dtype=np.float64) | |
| J[0] = pelvis # pelvis | |
| J[1] = L_hip # left_hip | |
| J[2] = R_hip # right_hip | |
| J[3] = lerp(pelvis, neck, 0.25) # spine1 | |
| J[4] = c[13] # left_knee | |
| J[5] = c[14] # right_knee | |
| J[6] = lerp(pelvis, neck, 0.5) # spine2 | |
| J[7] = c[15] # left_ankle | |
| J[8] = c[16] # right_ankle | |
| J[9] = lerp(pelvis, neck, 0.75) # spine3 | |
| J[12] = neck # neck | |
| # Feet: project ankle downward toward mesh floor | |
| mesh_floor_y = mesh_verts[:,1].min() | |
| foot_y = mesh_floor_y + 0.02 * (c[15][1] - mesh_floor_y) # 2% above floor | |
| J[10] = np.array([c[15][0], foot_y, c[15][2]]) # left_foot | |
| J[11] = np.array([c[16][0], foot_y, c[16][2]]) # right_foot | |
| J[13] = lerp(neck, L_shoulder, 0.5) # left_collar | |
| J[14] = lerp(neck, R_shoulder, 0.5) # right_collar | |
| J[15] = c[0] # head (nose as proxy) | |
| J[16] = L_shoulder # left_shoulder | |
| J[17] = R_shoulder # right_shoulder | |
| J[18] = c[7] # left_elbow | |
| J[19] = c[8] # right_elbow | |
| J[20] = c[9] # left_wrist | |
| J[21] = c[10] # right_wrist | |
| # Hands: extrapolate one step beyond wrist in elbowβwrist direction | |
| for side, (elbow_i, wrist_i, hand_i) in enumerate([(7,9,22), (8,10,23)]): | |
| elbow = c[elbow_i]; wrist = c[wrist_i] | |
| bone = wrist - elbow | |
| blen = np.linalg.norm(bone) | |
| if blen > 1e-3: | |
| J[hand_i] = wrist + bone / blen * 0.05 | |
| else: | |
| J[hand_i] = wrist | |
| print(' SMPL-24 joints:') | |
| print(' pelvis : (%.3f, %.3f, %.3f)' % tuple(J[0])) | |
| print(' L_hip : (%.3f, %.3f, %.3f)' % tuple(J[1])) | |
| print(' R_hip : (%.3f, %.3f, %.3f)' % tuple(J[2])) | |
| print(' neck : (%.3f, %.3f, %.3f)' % tuple(J[12])) | |
| print(' L_shoulder: (%.3f, %.3f, %.3f)' % tuple(J[16])) | |
| print(' R_shoulder: (%.3f, %.3f, %.3f)' % tuple(J[17])) | |
| print(' head : (%.3f, %.3f, %.3f)' % tuple(J[15])) | |
| return J.astype(np.float32) | |
| # ββ Step 5: LBS skinning weights βββββββββββββββββββββββββββββββββββββββββββββ | |
| def compute_skinning_weights(mesh_verts, joints, k=4): | |
| """ | |
| Proximity-based LBS weights: each vertex gets k-nearest joint weights | |
| via inverse-distance weighting. | |
| Returns (N, 24) float32 full weight matrix. | |
| """ | |
| N = len(mesh_verts) | |
| tree = cKDTree(joints) | |
| dists, idxs = tree.query(mesh_verts, k=k, workers=-1) | |
| # Clamp minimum distance to avoid division by zero | |
| inv_d = 1.0 / np.maximum(dists, 1e-6) | |
| inv_d /= inv_d.sum(axis=1, keepdims=True) | |
| W_full = np.zeros((N, 24), dtype=np.float32) | |
| for ki in range(k): | |
| W_full[np.arange(N), idxs[:, ki]] += inv_d[:, ki].astype(np.float32) | |
| # Normalize (should already be normalized, but just in case) | |
| row_sum = W_full.sum(axis=1, keepdims=True) | |
| W_full /= np.where(row_sum > 0, row_sum, 1.0) | |
| print(' weights: max_joint=%d mean_support=%.2f joints/vert' % ( | |
| W_full.argmax(axis=1).max(), | |
| (W_full > 0.01).sum(axis=1).mean())) | |
| return W_full | |
| # ββ Skeleton mesh builder βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def make_skeleton_mesh(joints, radius=0.008): | |
| """ | |
| Build a mesh of hexagonal-prism cylinders connecting parentβchild joints. | |
| Returns (verts, faces) as float32 / int32 numpy arrays. | |
| """ | |
| SEG = 6 # hexagonal cross-section | |
| angles = np.linspace(0, 2 * np.pi, SEG, endpoint=False) | |
| circle = np.stack([np.cos(angles), np.sin(angles)], axis=1) # (SEG, 2) | |
| all_verts, all_faces = [], [] | |
| vert_offset = 0 | |
| for i, parent in enumerate(SMPL_PARENTS): | |
| if parent == -1: | |
| continue | |
| p0 = joints[parent].astype(np.float64) | |
| p1 = joints[i].astype(np.float64) | |
| bone_vec = p1 - p0 | |
| length = np.linalg.norm(bone_vec) | |
| if length < 1e-4: | |
| continue | |
| z_axis = bone_vec / length | |
| ref = np.array([0., 1., 0.]) if abs(z_axis[1]) < 0.9 else np.array([1., 0., 0.]) | |
| x_axis = np.cross(ref, z_axis) | |
| x_axis /= np.linalg.norm(x_axis) | |
| y_axis = np.cross(z_axis, x_axis) | |
| # Bottom ring at p0, top ring at p1 | |
| offsets = radius * (circle[:, 0:1] * x_axis + circle[:, 1:2] * y_axis) | |
| bottom = p0 + offsets # (SEG, 3) | |
| top = p1 + offsets # (SEG, 3) | |
| all_verts.append(np.vstack([bottom, top]).astype(np.float32)) | |
| for j in range(SEG): | |
| j1 = (j + 1) % SEG | |
| b0, b1 = vert_offset + j, vert_offset + j1 | |
| t0, t1 = vert_offset + SEG + j, vert_offset + SEG + j1 | |
| all_faces.extend([[b0, b1, t0], [b1, t1, t0]]) | |
| vert_offset += 2 * SEG | |
| if not all_verts: | |
| return np.zeros((0, 3), np.float32), np.zeros((0, 3), np.int32) | |
| return np.vstack(all_verts), np.array(all_faces, dtype=np.int32) | |
| # ββ Step 6: Export rigged GLB βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def export_rigged_glb(verts, faces, uv, texture_pil, joints, skin_weights, | |
| out_path, skel_verts=None, skel_faces=None): | |
| """ | |
| Export skinned GLB using pygltflib. | |
| bind pose = current pose (joints at detected positions). | |
| IBM[j] = Translation(-J_world[j]) (pure offset, no rotation). | |
| If skel_verts/skel_faces are provided, a second mesh (bright green skeleton | |
| sticks) is embedded alongside the body mesh. | |
| """ | |
| import pygltflib | |
| from pygltflib import (GLTF2, Scene, Node, Mesh, Primitive, Accessor, | |
| BufferView, Buffer, Material, Texture, | |
| Image as GImage, Sampler, Skin, Asset) | |
| from pygltflib import (ARRAY_BUFFER, ELEMENT_ARRAY_BUFFER, FLOAT, | |
| UNSIGNED_INT, UNSIGNED_SHORT, LINEAR, | |
| LINEAR_MIPMAP_LINEAR, REPEAT, SCALAR, VEC2, | |
| VEC3, VEC4, MAT4) | |
| gltf = GLTF2() | |
| gltf.asset = Asset(version='2.0', generator='rig_yolo.py') | |
| blobs = [] | |
| def _add(data, comp, acc_type, target=None): | |
| b = data.tobytes() | |
| pad = (4 - len(b) % 4) % 4 | |
| off = sum(len(x) for x in blobs) | |
| blobs.append(b + b'\x00' * pad) | |
| bv = len(gltf.bufferViews) | |
| gltf.bufferViews.append(BufferView( | |
| buffer=0, byteOffset=off, byteLength=len(b), target=target)) | |
| ac = len(gltf.accessors) | |
| flat = data.flatten() | |
| gltf.accessors.append(Accessor( | |
| bufferView=bv, byteOffset=0, componentType=comp, | |
| type=acc_type, count=len(data), | |
| min=[float(flat.min())], max=[float(flat.max())])) | |
| return ac | |
| # Geometry | |
| pos_acc = _add(verts.astype(np.float32), FLOAT, VEC3, ARRAY_BUFFER) | |
| v0, v1, v2 = verts[faces[:,0]], verts[faces[:,1]], verts[faces[:,2]] | |
| fn = np.cross(v1-v0, v2-v0) | |
| fn /= (np.linalg.norm(fn, axis=1, keepdims=True) + 1e-8) | |
| vn = np.zeros_like(verts) | |
| for i in range(3): | |
| np.add.at(vn, faces[:,i], fn) | |
| vn /= (np.linalg.norm(vn, axis=1, keepdims=True) + 1e-8) | |
| nor_acc = _add(vn.astype(np.float32), FLOAT, VEC3, ARRAY_BUFFER) | |
| if uv is None: | |
| uv = np.zeros((len(verts), 2), np.float32) | |
| uv_acc = _add(uv.astype(np.float32), FLOAT, VEC2, ARRAY_BUFFER) | |
| idx_acc = _add(faces.astype(np.uint32).flatten(), UNSIGNED_INT, SCALAR, | |
| ELEMENT_ARRAY_BUFFER) | |
| # Skinning: top-4 joints per vertex | |
| top4_idx = np.argsort(-skin_weights, axis=1)[:, :4].astype(np.uint16) | |
| top4_w = np.take_along_axis(skin_weights, top4_idx.astype(np.int64), axis=1) | |
| top4_w = top4_w.astype(np.float32) | |
| top4_w /= top4_w.sum(axis=1, keepdims=True).clip(1e-8, None) | |
| j_acc = _add(top4_idx, UNSIGNED_SHORT, VEC4, ARRAY_BUFFER) | |
| w_acc = _add(top4_w, FLOAT, VEC4, ARRAY_BUFFER) | |
| # Texture | |
| if texture_pil is not None: | |
| import io | |
| buf = io.BytesIO() | |
| texture_pil.save(buf, format='PNG') | |
| ib = buf.getvalue() | |
| off = sum(len(x) for x in blobs) | |
| pad = (4 - len(ib) % 4) % 4 | |
| blobs.append(ib + b'\x00' * pad) | |
| gltf.bufferViews.append( | |
| BufferView(buffer=0, byteOffset=off, byteLength=len(ib))) | |
| gltf.images.append( | |
| GImage(mimeType='image/png', bufferView=len(gltf.bufferViews)-1)) | |
| gltf.samplers.append( | |
| Sampler(magFilter=LINEAR, minFilter=LINEAR_MIPMAP_LINEAR, | |
| wrapS=REPEAT, wrapT=REPEAT)) | |
| gltf.textures.append(Texture(sampler=0, source=0)) | |
| gltf.materials.append(Material( | |
| name='body', | |
| pbrMetallicRoughness={ | |
| 'baseColorTexture': {'index': 0}, | |
| 'metallicFactor': 0.0, | |
| 'roughnessFactor': 0.8}, | |
| doubleSided=True)) | |
| else: | |
| gltf.materials.append(Material(name='body', doubleSided=True)) | |
| body_prim = Primitive( | |
| attributes={'POSITION': pos_acc, 'NORMAL': nor_acc, | |
| 'TEXCOORD_0': uv_acc, 'JOINTS_0': j_acc, 'WEIGHTS_0': w_acc}, | |
| indices=idx_acc, material=0) | |
| gltf.meshes.append(Mesh(name='body', primitives=[body_prim])) | |
| # ββ Optional skeleton mesh βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| skel_mesh_idx = None | |
| if skel_verts is not None and len(skel_verts) > 0: | |
| sv = skel_verts.astype(np.float32) | |
| sf = skel_faces.astype(np.int32) | |
| sv0, sv1, sv2 = sv[sf[:,0]], sv[sf[:,1]], sv[sf[:,2]] | |
| sfn = np.cross(sv1-sv0, sv2-sv0) | |
| sfn /= (np.linalg.norm(sfn, axis=1, keepdims=True) + 1e-8) | |
| svn = np.zeros_like(sv) | |
| for i in range(3): | |
| np.add.at(svn, sf[:,i], sfn) | |
| svn /= (np.linalg.norm(svn, axis=1, keepdims=True) + 1e-8) | |
| s_pos_acc = _add(sv, FLOAT, VEC3, ARRAY_BUFFER) | |
| s_nor_acc = _add(svn.astype(np.float32), FLOAT, VEC3, ARRAY_BUFFER) | |
| s_idx_acc = _add(sf.astype(np.uint32).flatten(), UNSIGNED_INT, SCALAR, | |
| ELEMENT_ARRAY_BUFFER) | |
| # Lime-green unlit material for skeleton sticks | |
| mat_idx = len(gltf.materials) | |
| gltf.materials.append(Material( | |
| name='skeleton', | |
| pbrMetallicRoughness={ | |
| 'baseColorFactor': [0.2, 1.0, 0.3, 1.0], | |
| 'metallicFactor': 0.0, | |
| 'roughnessFactor': 0.5}, | |
| doubleSided=True)) | |
| skel_mesh_idx = len(gltf.meshes) | |
| skel_prim = Primitive( | |
| attributes={'POSITION': s_pos_acc, 'NORMAL': s_nor_acc}, | |
| indices=s_idx_acc, material=mat_idx) | |
| gltf.meshes.append(Mesh(name='skeleton', primitives=[skel_prim])) | |
| # ββ Skeleton nodes βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| jnodes = [] | |
| for i, (name, parent) in enumerate(zip(SMPL_JOINT_NAMES, SMPL_PARENTS)): | |
| t = joints[i].tolist() if parent == -1 else (joints[i] - joints[parent]).tolist() | |
| n = Node(name=name, translation=t, children=[]) | |
| jnodes.append(len(gltf.nodes)) | |
| gltf.nodes.append(n) | |
| for i, p in enumerate(SMPL_PARENTS): | |
| if p != -1: | |
| gltf.nodes[jnodes[p]].children.append(jnodes[i]) | |
| # Inverse bind matrices: IBM[j] = Translation(-J_world[j]) | |
| # glTF MAT4 is column-major; numpy .tobytes() is row-major. | |
| # glTF reads the numpy buffer as the TRANSPOSE of what numpy stores. | |
| # So we set the translation in the last ROW of the numpy matrix β glTF | |
| # reads that as the last COLUMN (translation column) of a 4x4 mat. | |
| ibms = np.stack([np.eye(4, dtype=np.float32) for _ in range(len(joints))]) | |
| for i in range(len(joints)): | |
| ibms[i, 3, :3] = -joints[i] | |
| ibm_acc = _add(ibms.astype(np.float32), FLOAT, MAT4) | |
| skin_idx = len(gltf.skins) | |
| gltf.skins.append(Skin( | |
| name='smpl_skin', skeleton=jnodes[0], | |
| joints=jnodes, inverseBindMatrices=ibm_acc)) | |
| mesh_node = len(gltf.nodes) | |
| gltf.nodes.append(Node(name='body_mesh', mesh=0, skin=skin_idx)) | |
| root_children = [jnodes[0], mesh_node] | |
| if skel_mesh_idx is not None: | |
| skel_node_idx = len(gltf.nodes) | |
| gltf.nodes.append(Node(name='skeleton_mesh', mesh=skel_mesh_idx)) | |
| root_children.append(skel_node_idx) | |
| root_node = len(gltf.nodes) | |
| gltf.nodes.append(Node(name='root', children=root_children)) | |
| gltf.scenes.append(Scene(name='Scene', nodes=[root_node])) | |
| gltf.scene = 0 | |
| bin_data = b''.join(blobs) | |
| gltf.buffers.append(Buffer(byteLength=len(bin_data))) | |
| gltf.set_binary_blob(bin_data) | |
| gltf.save_binary(out_path) | |
| print(' rigged GLB -> %s (%d KB)' % (out_path, os.path.getsize(out_path) // 1024)) | |
| # ββ Main ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def rig_yolo(body_glb, out_glb, debug_dir=None): | |
| """ | |
| Rig body_glb and write to out_glb. | |
| Returns (out_glb, out_skel_glb) where out_skel_glb includes visible | |
| skeleton bone sticks alongside the body mesh. | |
| """ | |
| os.makedirs(os.path.dirname(out_glb) or '.', exist_ok=True) | |
| if debug_dir: | |
| os.makedirs(debug_dir, exist_ok=True) | |
| print('[rig_yolo] Rendering front view ...') | |
| img_bgr, scale_factor = render_front(body_glb, debug_dir) | |
| print('[rig_yolo] Running YOLO-pose ...') | |
| kp = detect_keypoints(img_bgr, debug_dir) | |
| print('[rig_yolo] Loading original mesh (pygltflib, correct UV channel) ...') | |
| verts, faces, uv, texture_pil = load_mesh_from_gltf(body_glb) | |
| print('[rig_yolo] Unprojecting YOLO keypoints to 3D ...') | |
| coco_3d = unproject_to_3d(kp, scale_factor, verts) | |
| print('[rig_yolo] Building SMPL-24 skeleton ...') | |
| joints = coco17_to_smpl24(coco_3d, verts) | |
| print('[rig_yolo] Computing skinning weights ...') | |
| skin_weights = compute_skinning_weights(verts, joints, k=4) | |
| print('[rig_yolo] Exporting rigged GLB (no skeleton) ...') | |
| export_rigged_glb(verts, faces, uv, texture_pil, joints, skin_weights, out_glb) | |
| print('[rig_yolo] Building skeleton mesh ...') | |
| skel_verts, skel_faces = make_skeleton_mesh(joints) | |
| out_skel_glb = out_glb.replace('.glb', '_skel.glb') | |
| print('[rig_yolo] Exporting rigged GLB (with skeleton) ...') | |
| export_rigged_glb(verts, faces, uv, texture_pil, joints, skin_weights, | |
| out_skel_glb, skel_verts=skel_verts, skel_faces=skel_faces) | |
| print('[rig_yolo] Done.') | |
| return out_glb, out_skel_glb | |
| if __name__ == '__main__': | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument('--body', required=True, help='Input textured GLB') | |
| ap.add_argument('--out', required=True, help='Output rigged GLB') | |
| ap.add_argument('--debug_dir', default=None, help='Save debug renders here') | |
| args = ap.parse_args() | |
| rigged, rigged_skel = rig_yolo(args.body, args.out, args.debug_dir) | |
| print('Rigged: ', rigged) | |
| print('Rigged + skel: ', rigged_skel) | |