Spaces:
Running on Zero
Running on Zero
| """ | |
| face_swap_render.py β Paint reference face onto TripoSG UV texture using | |
| MV-Adapter's UV-baking pipeline. | |
| Pipeline: | |
| 1. Load mesh with same params as triposg_app.py render stage | |
| 2. Create orthographic camera matching render_front.png (azimuth=-90) | |
| 3. Detect face landmarks in render_front.png + reference photo via InsightFace | |
| 4. norm_crop reference β canonical 512Γ512 frontal face | |
| 5. Estimate 4-DOF similarity (canonical β render) and warpAffine | |
| β produces face_on_render.png: reference face at correct render-space coords | |
| 6. uv_render_attr(images=face_on_render) β projects render image into UV space | |
| No inverse transform, no scale mismatch β the render-space coordinate system | |
| is shared between the camera projection and the UV lookup. | |
| 7. Blend projected face into original texture with geometry mask guard. | |
| 8. Save updated GLB | |
| Usage: | |
| python face_swap_render.py \ | |
| --body /tmp/triposg_textured.glb \ | |
| --face /tmp/triposg_face_ref.png \ | |
| --render /tmp/render_front.png \ | |
| --out /tmp/face_swapped.glb \ | |
| [--blend 0.93] [--uv_size 4096] [--debug_dir /tmp] | |
| """ | |
| import os, sys, argparse, warnings | |
| warnings.filterwarnings('ignore') | |
| import numpy as np | |
| import cv2 | |
| import torch | |
| import torch.nn.functional as F | |
| from PIL import Image | |
| import trimesh | |
| from trimesh.visual.texture import TextureVisuals | |
| from trimesh.visual.material import PBRMaterial | |
| from insightface.utils import face_align as insightface_align | |
| sys.path.insert(0, '/root/MV-Adapter') | |
| from mvadapter.utils.mesh_utils import ( | |
| NVDiffRastContextWrapper, load_mesh, get_orthogonal_camera, | |
| ) | |
| from mvadapter.utils.mesh_utils.uv import ( | |
| uv_precompute, uv_render_geometry, uv_render_attr, | |
| ) | |
| from insightface.app import FaceAnalysis | |
| def _detect_largest_face(img_bgr, app): | |
| faces = app.get(img_bgr) | |
| if not faces: | |
| return None | |
| faces.sort(key=lambda f: (f.bbox[2]-f.bbox[0])*(f.bbox[3]-f.bbox[1]), reverse=True) | |
| return faces[0] | |
| def _build_front_face_uv_mask(mesh_t, tex_H, tex_W, neck_frac=0.84): | |
| """ | |
| Build a UV-space mask covering only the front-facing face triangles. | |
| Excludes back-of-head, hair, and ears (lateral vertices). | |
| """ | |
| verts = np.array(mesh_t.vertices, dtype=np.float64) | |
| faces = np.array(mesh_t.faces, dtype=np.int32) | |
| uvs = np.array(mesh_t.visual.uv, dtype=np.float64) | |
| # Head vertices above neck | |
| y_min, y_max = verts[:, 1].min(), verts[:, 1].max() | |
| neck_y = float(y_min + (y_max - y_min) * neck_frac) | |
| head_idx = np.where(verts[:, 1] > neck_y)[0] | |
| hv = verts[head_idx] | |
| # Front half: z >= 40th percentile β generous to include jaw/cheek toward ears | |
| # No lateral exclusion β it splits UV islands through the eyes/mouth β duplicates | |
| z_thresh = float(np.percentile(hv[:, 2], 40)) | |
| front = hv[:, 2] >= z_thresh | |
| if front.sum() < 30: | |
| front = np.ones(len(hv), bool) | |
| face_vert_idx = head_idx[front] | |
| face_vert_mask = np.zeros(len(verts), bool) | |
| face_vert_mask[face_vert_idx] = True | |
| face_tri_mask = face_vert_mask[faces].all(axis=1) | |
| face_tris = faces[face_tri_mask] | |
| print(f' Geometry mask: {face_tri_mask.sum()} front-face triangles selected ' | |
| f'(neck_y={neck_y:.3f}, z_thresh={z_thresh:.3f})') | |
| # Rasterize into UV-space mask (trimesh UV: y=0 is bottom-left β flip V) | |
| geom_mask = np.zeros((tex_H, tex_W), dtype=np.float32) | |
| pts_list = [] | |
| for tri in face_tris: | |
| uv = uvs[tri] # (3, 2) | |
| px = uv[:, 0] * tex_W | |
| py = (1.0 - uv[:, 1]) * tex_H | |
| pts_list.append(np.column_stack([px, py]).astype(np.int32)) | |
| if pts_list: | |
| cv2.fillPoly(geom_mask, pts_list, 1.0) | |
| kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) | |
| geom_mask = cv2.dilate(geom_mask, kernel, iterations=2) # close intra-tri gaps | |
| geom_mask = cv2.erode(geom_mask, kernel, iterations=1) # retreat from island edges | |
| geom_mask = cv2.GaussianBlur(geom_mask, (31, 31), 8) # soft transition | |
| return geom_mask | |
| def face_swap_render(body_glb, face_img_path, render_img_path, out_glb, | |
| blend=0.93, uv_size=4096, neck_frac=0.76, debug_dir=None): | |
| device = 'cuda' | |
| # ββ Step 1: Load mesh βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| print(f'[fsr] Loading mesh: {body_glb}') | |
| ctx = NVDiffRastContextWrapper(device=device, context_type='cuda') | |
| mesh_mv = load_mesh(body_glb, rescale=True, device=device) | |
| scene_t = trimesh.load(body_glb) | |
| if isinstance(scene_t, trimesh.Scene): | |
| geom_name = list(scene_t.geometry.keys())[0] | |
| mesh_t = scene_t.geometry[geom_name] | |
| else: | |
| mesh_t = scene_t; geom_name = None | |
| orig_tex = np.array(mesh_t.visual.material.baseColorTexture, dtype=np.float32) / 255.0 | |
| uvs = np.array(mesh_t.visual.uv, dtype=np.float64) | |
| tex_H, tex_W = orig_tex.shape[:2] | |
| print(f' UV size: {tex_W}Γ{tex_H}') | |
| # ββ Step 1b: Geometry mask (front-face UV islands only) βββββββββββββββββββ | |
| print('[fsr] Building geometry front-face UV mask ...') | |
| geom_uv_mask = _build_front_face_uv_mask(mesh_t, tex_H, tex_W, neck_frac) | |
| # ββ Step 2: Orthographic camera matching render_front.png βββββββββββββββββ | |
| render_img = cv2.imread(render_img_path) | |
| H_r, W_r = render_img.shape[:2] | |
| print(f' Render size: {W_r}Γ{H_r}') | |
| camera = get_orthogonal_camera( | |
| elevation_deg=[0], distance=[1.8], | |
| left=-0.55, right=0.55, bottom=-0.55, top=0.55, | |
| azimuth_deg=[-90], device=device, | |
| ) | |
| print(f'[fsr] Precomputing UV geometry ({uv_size}Γ{uv_size}) ...') | |
| uv_pre = uv_precompute(ctx, mesh_mv, height=uv_size, width=uv_size) | |
| uv_geom = uv_render_geometry( | |
| ctx, mesh_mv, camera, | |
| view_height=H_r, view_width=W_r, | |
| uv_precompute_output=uv_pre, | |
| compute_depth_grad=False, | |
| ) | |
| # ββ Step 3: Face landmark detection βββββββββββββββββββββββββββββββββββββββ | |
| print('[fsr] Detecting face landmarks ...') | |
| app = FaceAnalysis(providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) | |
| app.prepare(ctx_id=0, det_size=(640, 640)) | |
| ref_bgr = cv2.imread(face_img_path) | |
| render_face = _detect_largest_face(render_img, app) | |
| if render_face is None: | |
| raise RuntimeError(f'No face detected in render: {render_img_path}') | |
| ref_face = _detect_largest_face(ref_bgr, app) | |
| if ref_face is None: | |
| raise RuntimeError(f'No face detected in reference: {face_img_path}') | |
| render_kps = render_face.kps # (5, 2) | |
| ref_kps = ref_face.kps | |
| print(f' render kps: x={render_kps[:,0].min():.0f}-{render_kps[:,0].max():.0f}' | |
| f' y={render_kps[:,1].min():.0f}-{render_kps[:,1].max():.0f}') | |
| # ββ Step 4: norm_crop β canonical 512Γ512 frontal face βββββββββββββββββββ | |
| CANONICAL_SIZE = 512 | |
| aligned_bgr = insightface_align.norm_crop(ref_bgr, ref_kps, image_size=CANONICAL_SIZE) | |
| # Fixed ARCFACE 5-point positions scaled to CANONICAL_SIZE | |
| ARCFACE_112 = np.array([ | |
| [38.2946, 51.6963], | |
| [73.5318, 51.5014], | |
| [56.0252, 71.7366], | |
| [41.5493, 92.3655], | |
| [70.7299, 92.2041], | |
| ], dtype=np.float32) | |
| canonical_kps = ARCFACE_112 * (CANONICAL_SIZE / 112.0) | |
| # ββ Step 5: Forward warp: canonical β render space ββββββββββββββββββββββββ | |
| # 4-DOF similarity (scale + rotation + translation) with all 5 kps. | |
| # FORWARD direction: canonical_kps β render_kps so that warpAffine places | |
| # the face at exactly the render-space coordinates, downsampling cleanly. | |
| fwd_M, inliers = cv2.estimateAffinePartial2D( | |
| canonical_kps.astype(np.float32), | |
| render_kps.astype(np.float32), | |
| method=cv2.LMEDS, | |
| ) | |
| print(f' Forward warp M:\n{fwd_M}') | |
| face_on_render_bgr = cv2.warpAffine( | |
| aligned_bgr, fwd_M, (W_r, H_r), | |
| flags=cv2.INTER_LANCZOS4, | |
| borderMode=cv2.BORDER_CONSTANT, borderValue=0, | |
| ) | |
| face_on_render_rgb = cv2.cvtColor(face_on_render_bgr, | |
| cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 | |
| # ββ Step 6: Render-space face hull mask βββββββββββββββββββββββββββββββββββ | |
| # Only paint UV texels that correspond to pixels inside the face region. | |
| hull_pts = cv2.convexHull(render_kps.astype(np.float32)).squeeze(1) | |
| hull_cx, hull_cy = hull_pts.mean(axis=0) | |
| hull_expanded = (hull_pts - [hull_cx, hull_cy]) * 4.0 + [hull_cx, hull_cy] | |
| face_mask_render = np.zeros((H_r, W_r), dtype=np.float32) | |
| cv2.fillPoly(face_mask_render, [hull_expanded.astype(np.int32)], 1.0) | |
| # Restrict to where the warped face actually has content | |
| face_content = (face_on_render_bgr.mean(axis=2) > 3.0 / 255.0).astype(np.float32) | |
| face_mask_render = face_mask_render * face_content | |
| face_mask_render = cv2.GaussianBlur(face_mask_render, (51, 51), 15) | |
| # ββ Step 7: Project face-on-render into UV space ββββββββββββββββββββββββββ | |
| # uv_render_attr uses uv_pos_ndc as a lookup: for each UV texel, sample the | |
| # render-space image at that texel's render NDC position. | |
| # Since face_on_render is already in render-space coords, this is exact. | |
| print('[fsr] Projecting face into UV space via uv_render_attr ...') | |
| face_t = torch.tensor(face_on_render_rgb, device=device).unsqueeze(0) # (1,H,W,3) | |
| mask_t = torch.tensor(face_mask_render[None], device=device) | |
| uv_attr_out = uv_render_attr( | |
| images=face_t, | |
| masks=mask_t, | |
| uv_render_geometry_output=uv_geom, | |
| ) | |
| uv_face_img = uv_attr_out.uv_attr_proj[0].cpu().numpy() # (uv, uv, 3) | |
| uv_face_mask = uv_attr_out.uv_mask_proj[0].cpu().numpy() # (uv, uv) | |
| # Rescale to tex resolution if needed | |
| if uv_size != tex_H or uv_size != tex_W: | |
| uv_face_img_rs = cv2.resize(uv_face_img, (tex_W, tex_H), interpolation=cv2.INTER_LINEAR) | |
| uv_face_mask_rs = cv2.resize(uv_face_mask, (tex_W, tex_H), interpolation=cv2.INTER_LINEAR) | |
| else: | |
| uv_face_img_rs = uv_face_img | |
| uv_face_mask_rs = uv_face_mask | |
| # ββ Step 7b: Apply geometry mask β kill back-of-head / ear UV islands ββββ | |
| uv_face_mask_rs = uv_face_mask_rs * geom_uv_mask | |
| # Final blend alpha β use full blend=1.0 inside the face region so no | |
| # original texture leaks through and creates duplicate features | |
| alpha = np.clip(uv_face_mask_rs, 0, 1)[..., None] | |
| painted_px = int((alpha[..., 0] > 0.01).sum()) | |
| print(f' Painted texels: {painted_px}') | |
| if debug_dir: | |
| cv2.imwrite(os.path.join(debug_dir, 'fsr_aligned_ref.png'), aligned_bgr) | |
| cv2.imwrite(os.path.join(debug_dir, 'fsr_face_on_render.png'), face_on_render_bgr) | |
| cv2.imwrite(os.path.join(debug_dir, 'fsr_face_mask_render.png'), | |
| (face_mask_render * 255).astype(np.uint8)) | |
| cv2.imwrite(os.path.join(debug_dir, 'fsr_geom_mask.png'), | |
| (geom_uv_mask * 255).astype(np.uint8)) | |
| cv2.imwrite(os.path.join(debug_dir, 'fsr_uv_mask.png'), | |
| (uv_face_mask_rs * 255).astype(np.uint8)) | |
| Image.fromarray((uv_face_img_rs * 255).clip(0, 255).astype(np.uint8)).save( | |
| os.path.join(debug_dir, 'fsr_uv_face.png')) | |
| print(f' Debug files saved to {debug_dir}') | |
| # ββ Step 8: Blend into original texture βββββββββββββββββββββββββββββββββββ | |
| print(f'[fsr] Blending (blend={blend}) ...') | |
| new_tex = uv_face_img_rs * alpha + orig_tex * (1.0 - alpha) | |
| # ββ Step 9: Save GLB ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| new_pil = Image.fromarray((new_tex * 255).clip(0, 255).astype(np.uint8)) | |
| mesh_t.visual = TextureVisuals(uv=uvs, material=PBRMaterial(baseColorTexture=new_pil)) | |
| if geom_name and isinstance(scene_t, trimesh.Scene): | |
| scene_t.geometry[geom_name] = mesh_t | |
| scene_t.export(out_glb) | |
| else: | |
| mesh_t.export(out_glb) | |
| print(f'[fsr] Saved: {out_glb} ({os.path.getsize(out_glb)//1024} KB)') | |
| return out_glb | |
| if __name__ == '__main__': | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument('--body', required=True) | |
| ap.add_argument('--face', required=True) | |
| ap.add_argument('--render', required=True, help='Front render (e.g. render_front.png)') | |
| ap.add_argument('--out', required=True) | |
| ap.add_argument('--blend', type=float, default=0.93) | |
| ap.add_argument('--uv_size', type=int, default=4096) | |
| ap.add_argument('--neck_frac', type=float, default=0.76) | |
| ap.add_argument('--debug_dir', default=None) | |
| args = ap.parse_args() | |
| face_swap_render(args.body, args.face, args.render, args.out, | |
| blend=args.blend, uv_size=args.uv_size, | |
| neck_frac=args.neck_frac, debug_dir=args.debug_dir) | |