aastha-malik commited on
Commit
5dd008b
Β·
1 Parent(s): 38e41fd

Add Gradio app, requirements, and system packages

Browse files
Files changed (3) hide show
  1. app.py +276 -0
  2. packages.txt +2 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys, shutil, types, subprocess
2
+ import numpy as np
3
+ import cv2
4
+ import gradio as gr
5
+
6
+ # ── Paths ────────────────────────────────────────────────────────────
7
+ MODEL_DIR = "/tmp/models"
8
+ WORK_DIR = "/tmp/workspace"
9
+ os.makedirs(MODEL_DIR, exist_ok=True)
10
+ os.makedirs(f"{WORK_DIR}/temp", exist_ok=True)
11
+ os.makedirs(f"{WORK_DIR}/outputs", exist_ok=True)
12
+
13
+ # ── Torchvision compatibility patch ──────────────────────────────────
14
+ try:
15
+ from torchvision.transforms import functional as TF
16
+ module = types.ModuleType("torchvision.transforms.functional_tensor")
17
+ module.rgb_to_grayscale = TF.rgb_to_grayscale
18
+ sys.modules["torchvision.transforms.functional_tensor"] = module
19
+ except Exception:
20
+ pass
21
+
22
+ # ── Model download ───────────────────────────────────────────────────
23
+ INSWAPPER_PATH = f"{MODEL_DIR}/inswapper_128.onnx"
24
+ GFPGAN_PATH = f"{MODEL_DIR}/GFPGANv1.4.pth"
25
+
26
+ def download_models():
27
+ from huggingface_hub import hf_hub_download
28
+ if not os.path.exists(INSWAPPER_PATH):
29
+ print("Downloading inswapper_128.onnx ...")
30
+ hf_hub_download(
31
+ repo_id="ezioruan/inswapper_128.onnx",
32
+ filename="inswapper_128.onnx",
33
+ local_dir=MODEL_DIR,
34
+ )
35
+ print("inswapper ready.")
36
+ if not os.path.exists(GFPGAN_PATH):
37
+ print("Downloading GFPGANv1.4.pth ...")
38
+ subprocess.run([
39
+ "wget", "-q", "--show-progress",
40
+ "-O", GFPGAN_PATH,
41
+ "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/GFPGANv1.4.pth",
42
+ ], check=True)
43
+ print("GFPGAN ready.")
44
+
45
+ download_models()
46
+
47
+ # ── Load models ──────────────────────────────────────────────────────
48
+ import insightface
49
+ from insightface.app import FaceAnalysis
50
+ from gfpgan import GFPGANer
51
+ import onnxruntime as ort
52
+
53
+ PROVIDERS = (
54
+ ["CUDAExecutionProvider", "CPUExecutionProvider"]
55
+ if "CUDAExecutionProvider" in ort.get_available_providers()
56
+ else ["CPUExecutionProvider"]
57
+ )
58
+ print(f"Using providers: {PROVIDERS}")
59
+
60
+ face_app = FaceAnalysis(name="buffalo_l", providers=PROVIDERS)
61
+ face_app.prepare(ctx_id=0, det_size=(640, 640))
62
+
63
+ swapper = insightface.model_zoo.get_model(INSWAPPER_PATH, providers=PROVIDERS)
64
+
65
+ enhancer = GFPGANer(
66
+ model_path=GFPGAN_PATH,
67
+ upscale=1,
68
+ arch="clean",
69
+ channel_multiplier=2,
70
+ bg_upsampler=None,
71
+ )
72
+
73
+ print("All models loaded.")
74
+
75
+ # ── Helpers ──────────────────────────────────────────────────────────
76
+ def download_youtube(url: str, out_path: str):
77
+ raw = out_path.replace(".mp4", "_raw.mp4")
78
+ import yt_dlp
79
+ ydl_opts = {
80
+ "outtmpl": raw,
81
+ "format": "best[height<=480]/best",
82
+ "http_headers": {
83
+ "User-Agent": (
84
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
85
+ "AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36"
86
+ )
87
+ },
88
+ }
89
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
90
+ ydl.download([url])
91
+ subprocess.run(
92
+ ["ffmpeg", "-y", "-i", raw,
93
+ "-vcodec", "libx264", "-acodec", "aac", "-preset", "fast",
94
+ out_path, "-loglevel", "error"],
95
+ check=True,
96
+ )
97
+ if os.path.exists(raw):
98
+ os.remove(raw)
99
+
100
+
101
+ def to_h264(src: str, dst: str):
102
+ subprocess.run(
103
+ ["ffmpeg", "-y", "-i", src,
104
+ "-vcodec", "libx264", "-acodec", "aac", "-preset", "fast",
105
+ dst, "-loglevel", "error"],
106
+ check=True,
107
+ )
108
+
109
+
110
+ # ── Core processing ──────────────────────────────────────────────────
111
+ def process(face_image, video_file, youtube_url, trim_seconds, progress=gr.Progress(track_tqdm=True)):
112
+ if face_image is None:
113
+ return None, "Please upload a source face image."
114
+ if video_file is None and not (youtube_url or "").strip():
115
+ return None, "Please upload a video file or paste a YouTube URL."
116
+
117
+ try:
118
+ progress(0.0, desc="Detecting source face...")
119
+
120
+ # Source face
121
+ source_img = cv2.imread(face_image)
122
+ source_faces = face_app.get(source_img)
123
+ if not source_faces:
124
+ source_img_r = cv2.resize(source_img, (640, 640))
125
+ source_faces = face_app.get(source_img_r)
126
+ if not source_faces:
127
+ return None, "No face detected β€” use a clear, front-facing photo."
128
+
129
+ source_face = sorted(
130
+ source_faces,
131
+ key=lambda f: (f.bbox[2] - f.bbox[0]) * (f.bbox[3] - f.bbox[1]),
132
+ reverse=True,
133
+ )[0]
134
+ source_face.embedding /= np.linalg.norm(source_face.embedding)
135
+
136
+ # Prepare video
137
+ progress(0.05, desc="Preparing video...")
138
+ raw_video = f"{WORK_DIR}/temp/input.mp4"
139
+
140
+ if (youtube_url or "").strip():
141
+ progress(0.05, desc="Downloading YouTube video...")
142
+ download_youtube(youtube_url.strip(), raw_video)
143
+ else:
144
+ shutil.copy(video_file, raw_video)
145
+ converted = f"{WORK_DIR}/temp/input_h264.mp4"
146
+ to_h264(raw_video, converted)
147
+ shutil.move(converted, raw_video)
148
+
149
+ # Verify codec
150
+ cap_check = cv2.VideoCapture(raw_video)
151
+ ok, _ = cap_check.read()
152
+ cap_check.release()
153
+ if not ok:
154
+ return None, "Could not read the video. Try a different file or URL."
155
+
156
+ # Trim
157
+ input_video = raw_video
158
+ if trim_seconds and int(trim_seconds) > 0:
159
+ trimmed = f"{WORK_DIR}/temp/input_trimmed.mp4"
160
+ subprocess.run(
161
+ ["ffmpeg", "-y", "-i", raw_video,
162
+ "-t", str(int(trim_seconds)),
163
+ "-c:v", "libx264", "-c:a", "aac",
164
+ trimmed, "-loglevel", "error"],
165
+ check=True,
166
+ )
167
+ input_video = trimmed
168
+
169
+ # Video info
170
+ cap = cv2.VideoCapture(input_video)
171
+ fps = cap.get(cv2.CAP_PROP_FPS)
172
+ total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
173
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
174
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
175
+
176
+ # Frame pipeline
177
+ temp_out = f"{WORK_DIR}/temp/no_audio.mp4"
178
+ final_out = f"{WORK_DIR}/outputs/face_swapped.mp4"
179
+
180
+ writer = cv2.VideoWriter(
181
+ temp_out, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h)
182
+ )
183
+
184
+ for i in range(total):
185
+ ret, frame = cap.read()
186
+ if not ret:
187
+ break
188
+ progress(0.1 + 0.8 * (i / total), desc=f"Frame {i+1}/{total}")
189
+
190
+ faces = face_app.get(frame)
191
+ result = frame.copy()
192
+
193
+ for face in faces:
194
+ result = swapper.get(result, face, source_face, paste_back=True)
195
+
196
+ try:
197
+ _, _, result = enhancer.enhance(
198
+ result, has_aligned=False, only_center_face=False, paste_back=True
199
+ )
200
+ except Exception:
201
+ pass
202
+
203
+ writer.write(result)
204
+
205
+ cap.release()
206
+ writer.release()
207
+
208
+ # Merge audio
209
+ progress(0.92, desc="Merging audio...")
210
+ subprocess.run(
211
+ ["ffmpeg", "-y",
212
+ "-i", temp_out, "-i", input_video,
213
+ "-map", "0:v:0", "-map", "1:a:0",
214
+ "-c:v", "copy", "-c:a", "aac", "-shortest",
215
+ final_out, "-loglevel", "error"],
216
+ )
217
+ if not os.path.exists(final_out):
218
+ shutil.copy(temp_out, final_out)
219
+
220
+ progress(1.0, desc="Done!")
221
+ size = os.path.getsize(final_out) / (1024 * 1024)
222
+ return final_out, f"Done! {total} frames | {size:.1f} MB output"
223
+
224
+ except Exception as e:
225
+ return None, f"Error: {e}"
226
+
227
+
228
+ # ── Gradio UI ────────────────────────────────────────────────────────
229
+ with gr.Blocks(title="Face Fusion") as demo:
230
+
231
+ gr.Markdown("""
232
+ # 🎭 Face Fusion β€” AI Video Face Swap
233
+ Swap any face into a video using **InsightFace + inswapper_128 + GFPGAN**.
234
+
235
+ > **Note:** This Space runs on CPU β€” processing takes ~1–3 min per 10 seconds of video.
236
+ > For faster results, run the notebook on Kaggle with a free GPU.
237
+ """)
238
+
239
+ with gr.Row():
240
+ with gr.Column():
241
+ face_input = gr.Image(
242
+ label="Source Face Photo",
243
+ type="filepath",
244
+ height=220,
245
+ )
246
+ youtube_input = gr.Textbox(
247
+ label="YouTube URL (optional)",
248
+ placeholder="https://www.youtube.com/watch?v=...",
249
+ )
250
+ gr.Markdown("**β€” or β€”**")
251
+ video_input = gr.Video(label="Upload Video File")
252
+ trim_input = gr.Slider(
253
+ label="Trim to first N seconds (0 = full video)",
254
+ minimum=0, maximum=60, step=5, value=10,
255
+ )
256
+ run_btn = gr.Button("Run Face Swap", variant="primary", size="lg")
257
+
258
+ with gr.Column():
259
+ status_box = gr.Textbox(label="Status", interactive=False, lines=2)
260
+ video_out = gr.Video(label="Output Video", height=400)
261
+
262
+ gr.Markdown("""
263
+ ---
264
+ **Tips for best results**
265
+ - Clear, front-facing photo β€” no sunglasses or heavy shadows
266
+ - Keep video under 15 seconds for reasonable CPU processing time
267
+ - Single-face videos give the cleanest swap
268
+ """)
269
+
270
+ run_btn.click(
271
+ fn=process,
272
+ inputs=[face_input, video_input, youtube_input, trim_input],
273
+ outputs=[video_out, status_box],
274
+ )
275
+
276
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ffmpeg
2
+ libgl1
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ insightface==0.7.3
2
+ onnxruntime
3
+ opencv-python-headless
4
+ gfpgan
5
+ huggingface_hub
6
+ yt-dlp
7
+ gradio