roop_unleashed01 / ui /tabs /faceswap_tab.py
Boka73's picture
upload 58 files
e20a9b5 verified
import os
import shutil
import pathlib
import gradio as gr
import roop.utilities as util
import roop.globals
import ui.globals
from roop.face_util import extract_face_images, create_blank_image
from roop.capturer import get_video_frame, get_video_frame_total, get_image_frame
from roop.ProcessEntry import ProcessEntry
from roop.ProcessOptions import ProcessOptions
from roop.FaceSet import FaceSet
from roop.utilities import clean_dir
last_image = None
IS_INPUT = True
SELECTED_FACE_INDEX = 0
SELECTED_INPUT_FACE_INDEX = 0
SELECTED_TARGET_FACE_INDEX = 0
input_faces = None
target_faces = None
face_selection = None
previewimage = None
selected_preview_index = 0
is_processing = False
list_files_process: list[ProcessEntry] = []
model_swap_choices = ["InSwapper 128", "ReSwapper 128", "ReSwapper 256"]
no_face_choices = [
"Use untouched original frame",
"Retry rotated",
"Skip Frame",
"Skip Frame if no similar face",
"Use last swapped",
]
swap_choices = [
"First found",
"All input faces",
"All input faces (random)",
"All female",
"All male",
"All faces",
"Selected face",
]
current_video_fps = 50
manual_masking = False
def faceswap_tab():
global no_face_choices, previewimage
with gr.Tab("🎭 Face Swap"):
with gr.Row(variant="panel"):
with gr.Column(scale=2):
with gr.Row():
input_faces = gr.Gallery(
label="Input faces gallery",
allow_preview=False,
preview=False,
height=138,
columns=64,
object_fit="scale-down",
interactive=False,
)
target_faces = gr.Gallery(
label="Target faces gallery",
allow_preview=False,
preview=False,
height=138,
columns=64,
object_fit="scale-down",
interactive=False,
)
with gr.Row():
bt_move_left_input = gr.Button("⬅ Move left", size="sm")
bt_move_right_input = gr.Button("➡ Move right", size="sm")
bt_move_left_target = gr.Button("⬅ Move left", size="sm")
bt_move_right_target = gr.Button("➡ Move right", size="sm")
with gr.Row():
bt_remove_selected_input_face = gr.Button(
"❌ Remove selected", size="sm"
)
bt_clear_input_faces = gr.Button(
"💥 Clear all", variant="stop", size="sm"
)
bt_remove_selected_target_face = gr.Button(
"❌ Remove selected", size="sm"
)
bt_add_local = gr.Button("Add local files from", size="sm")
with gr.Row():
with gr.Column(scale=2):
with gr.Accordion(label="Advanced Masking", open=False):
chk_showmaskoffsets = gr.Checkbox(
label="Show mask overlay in preview",
value=False,
interactive=True,
)
chk_restoreoriginalmouth = gr.Checkbox(
label="Restore original mouth area",
value=False,
interactive=True,
)
mask_top = gr.Slider(
0,
1.0,
value=0,
label="Offset Face Top",
step=0.01,
interactive=True,
)
mask_bottom = gr.Slider(
0,
1.0,
value=0,
label="Offset Face Bottom",
step=0.01,
interactive=True,
)
mask_left = gr.Slider(
0,
1.0,
value=0,
label="Offset Face Left",
step=0.01,
interactive=True,
)
mask_right = gr.Slider(
0,
1.0,
value=0,
label="Offset Face Right",
step=0.01,
interactive=True,
)
mask_erosion = gr.Slider(
1.0,
3.0,
value=1.0,
label="Erosion Iterations",
step=1.00,
interactive=True,
)
mask_blur = gr.Slider(
10.0,
50.0,
value=20.0,
label="Blur size",
step=1.00,
interactive=True,
)
bt_toggle_masking = gr.Button(
"Toggle manual masking", variant="secondary", size="sm"
)
selected_mask_engine = gr.Dropdown(
["None", "Clip2Seg", "DFL XSeg"],
value="None",
label="Face masking engine",
)
clip_text = gr.Textbox(
label="List of objects to mask and restore back on fake face",
value="cup,hands,hair,banana",
interactive=False,
)
bt_preview_mask = gr.Button(
"👥 Show Mask Preview", variant="secondary"
)
with gr.Column(scale=2):
local_folder = gr.Textbox(
show_label=False, placeholder="/content/", interactive=True
)
with gr.Row(variant="panel"):
bt_srcfiles = gr.Files(
label="Source Images or Facesets",
file_count="multiple",
file_types=["image", ".fsz", ".webp"],
elem_id="filelist",
height=233,
)
bt_destfiles = gr.Files(
label="Target File(s)",
file_count="multiple",
file_types=["image", "video", ".webp"],
elem_id="filelist",
height=233,
)
with gr.Row(variant="panel"):
ui.globals.ui_selected_swap_model = gr.Dropdown(
model_swap_choices,
value=model_swap_choices[0],
label="Specify Face Swap Model",
)
forced_fps = gr.Slider(
minimum=0,
maximum=120,
value=0,
label="Video FPS",
info="Overrides detected fps if not 0",
step=1.0,
interactive=True,
container=True,
)
with gr.Column(scale=2):
previewimage = gr.Image(
label="Preview Image",
height=576,
interactive=False,
visible=True,
format=get_gradio_output_format(),
)
maskimage = gr.ImageEditor(
label="Manual mask Image",
sources=["clipboard"],
transforms="",
type="numpy",
brush=gr.Brush(
color_mode="fixed", colors=["rgba(255, 255, 255, 1"]
),
interactive=True,
visible=False,
)
with gr.Row(variant="panel"):
fake_preview = gr.Checkbox(label="Face swap frames", value=False)
bt_refresh_preview = gr.Button(
"🔄 Refresh", variant="secondary", size="sm"
)
bt_use_face_from_preview = gr.Button(
"Use Face from this Frame", variant="primary", size="sm"
)
with gr.Row():
preview_frame_num = gr.Slider(
1,
1,
value=1,
label="Frame Number",
info="0:00:00",
step=1.0,
interactive=True,
)
with gr.Row():
text_frame_clip = gr.Markdown("Processing frame range [0 - 0]")
set_frame_start = gr.Button("⬅ Set as Start", size="sm")
set_frame_end = gr.Button("➡ Set as End", size="sm")
with gr.Row(visible=False) as dynamic_face_selection:
with gr.Column(scale=2):
face_selection = gr.Gallery(
label="Detected faces",
allow_preview=False,
preview=False,
height=138,
object_fit="cover",
columns=32,
)
with gr.Column():
bt_faceselect = gr.Button("☑ Use selected face", size="sm")
bt_cancelfaceselect = gr.Button("Done", size="sm")
with gr.Column():
gr.Markdown(" ")
with gr.Row(variant="panel"):
with gr.Column(scale=1):
selected_face_detection = gr.Dropdown(
swap_choices,
value="First found",
label="Specify face selection for swapping",
)
with gr.Column(scale=1):
num_swap_steps = gr.Slider(
1,
5,
value=1,
step=1.0,
label="Number of swapping steps",
info="More steps may increase likeness",
)
with gr.Column(scale=2):
ui.globals.ui_selected_enhancer = gr.Dropdown(
[
"None",
"Codeformer",
"DMDNet",
"GFPGAN",
"GPEN",
"Restoreformer++",
],
value="None",
label="Select post-processing",
)
with gr.Row(variant="panel"):
with gr.Column(scale=1):
max_face_distance = gr.Slider(
0.01,
1.0,
value=0.65,
label="Max Face Similarity Threshold",
info="0.0 = identical 1.0 = no similarity",
)
with gr.Column(scale=1):
ui.globals.ui_upscale = gr.Dropdown(
["128px", "256px", "512px"],
value="128px",
label="Subsample upscale to",
interactive=True,
)
with gr.Column(scale=2):
ui.globals.ui_blend_ratio = gr.Slider(
0.0,
1.0,
value=0.65,
label="Original/Enhanced image blend ratio",
info="Only used with active post-processing",
)
with gr.Row(variant="panel"):
with gr.Column(scale=1):
video_swapping_method = gr.Dropdown(
["Extract Frames to media", "In-Memory processing"],
value="In-Memory processing",
label="Select video processing method",
interactive=True,
)
no_face_action = gr.Dropdown(
choices=no_face_choices,
value=no_face_choices[0],
label="Action on no face detected",
interactive=True,
)
vr_mode = gr.Checkbox(label="VR Mode", value=False)
with gr.Column(scale=1):
with gr.Group():
autorotate = gr.Checkbox(
label="Auto rotate horizontal Faces", value=True
)
roop.globals.skip_audio = gr.Checkbox(
label="Skip audio", value=False
)
roop.globals.keep_frames = gr.Checkbox(
label="Keep Frames (relevant only when extracting frames)",
value=False,
)
roop.globals.wait_after_extraction = gr.Checkbox(
label="Wait for user key press before creating video ",
value=False,
)
with gr.Row(variant="panel"):
with gr.Column():
bt_start = gr.Button("▶ Start", variant="primary")
with gr.Column():
bt_stop = gr.Button("⏹ Stop", variant="secondary", interactive=False)
gr.Button("👀 Open Output Folder", size="sm").click(
fn=lambda: util.open_folder(roop.globals.output_path)
)
with gr.Column(scale=2):
output_method = gr.Dropdown(
["File", "Virtual Camera", "Both"],
value="File",
label="Select Output Method",
interactive=True,
)
with gr.Row(variant="panel"):
with gr.Column():
resultfiles = gr.Files(label="Processed File(s)", interactive=False)
with gr.Column():
resultimage = gr.Image(
type="filepath", label="Final Image", interactive=False
)
resultvideo = gr.Video(
label="Final Video", interactive=False, visible=False
)
previewinputs = [
ui.globals.ui_selected_swap_model,
preview_frame_num,
bt_destfiles,
fake_preview,
ui.globals.ui_selected_enhancer,
selected_face_detection,
max_face_distance,
ui.globals.ui_blend_ratio,
selected_mask_engine,
clip_text,
no_face_action,
vr_mode,
autorotate,
maskimage,
chk_showmaskoffsets,
chk_restoreoriginalmouth,
num_swap_steps,
ui.globals.ui_upscale,
]
previewoutputs = [previewimage, maskimage, preview_frame_num]
input_faces.select(on_select_input_face, None, None).success(
fn=on_preview_frame_changed, inputs=previewinputs, outputs=previewoutputs
)
bt_move_left_input.click(
fn=move_selected_input, inputs=[bt_move_left_input], outputs=[input_faces]
)
bt_move_right_input.click(
fn=move_selected_input, inputs=[bt_move_right_input], outputs=[input_faces]
)
bt_move_left_target.click(
fn=move_selected_target, inputs=[bt_move_left_target], outputs=[target_faces]
)
bt_move_right_target.click(
fn=move_selected_target, inputs=[bt_move_right_target], outputs=[target_faces]
)
bt_remove_selected_input_face.click(
fn=remove_selected_input_face, outputs=[input_faces]
)
bt_srcfiles.change(
fn=on_srcfile_changed,
show_progress="full",
inputs=bt_srcfiles,
outputs=[dynamic_face_selection, face_selection, input_faces, bt_srcfiles],
)
mask_top.release(fn=on_mask_top_changed, inputs=[mask_top], show_progress="hidden")
mask_bottom.release(
fn=on_mask_bottom_changed, inputs=[mask_bottom], show_progress="hidden"
)
mask_left.release(
fn=on_mask_left_changed, inputs=[mask_left], show_progress="hidden"
)
mask_right.release(
fn=on_mask_right_changed, inputs=[mask_right], show_progress="hidden"
)
mask_erosion.release(
fn=on_mask_erosion_changed, inputs=[mask_erosion], show_progress="hidden"
)
mask_blur.release(
fn=on_mask_blur_changed, inputs=[mask_blur], show_progress="hidden"
)
selected_mask_engine.change(
fn=on_mask_engine_changed,
inputs=[selected_mask_engine],
outputs=[clip_text],
show_progress="hidden",
)
target_faces.select(on_select_target_face, None, None)
bt_remove_selected_target_face.click(
fn=remove_selected_target_face, outputs=[target_faces]
)
forced_fps.change(fn=on_fps_changed, inputs=[forced_fps], show_progress="hidden")
bt_destfiles.change(
fn=on_destfiles_changed,
inputs=[bt_destfiles],
outputs=[preview_frame_num, text_frame_clip],
show_progress="hidden",
).success(
fn=on_preview_frame_changed,
inputs=previewinputs,
outputs=previewoutputs,
show_progress="hidden",
)
bt_destfiles.select(
fn=on_destfiles_selected,
outputs=[preview_frame_num, text_frame_clip, forced_fps],
show_progress="hidden",
).success(
fn=on_preview_frame_changed,
inputs=previewinputs,
outputs=previewoutputs,
show_progress="hidden",
)
bt_destfiles.clear(
fn=on_clear_destfiles, outputs=[target_faces, selected_face_detection]
)
resultfiles.select(
fn=on_resultfiles_selected,
inputs=[resultfiles],
outputs=[resultimage, resultvideo],
)
face_selection.select(on_select_face, None, None)
bt_faceselect.click(
fn=on_selected_face,
outputs=[input_faces, target_faces, selected_face_detection],
)
bt_cancelfaceselect.click(
fn=on_end_face_selection, outputs=[dynamic_face_selection, face_selection]
)
bt_clear_input_faces.click(fn=on_clear_input_faces, outputs=[input_faces])
bt_add_local.click(
fn=on_add_local_folder, inputs=[local_folder], outputs=[bt_destfiles]
)
bt_preview_mask.click(
fn=on_preview_mask,
inputs=[
ui.globals.ui_selected_swap_model,
preview_frame_num,
bt_destfiles,
clip_text,
selected_mask_engine,
],
outputs=[previewimage],
)
start_event = bt_start.click(
fn=start_swap,
inputs=[
ui.globals.ui_selected_swap_model,
output_method,
ui.globals.ui_selected_enhancer,
selected_face_detection,
roop.globals.keep_frames,
roop.globals.wait_after_extraction,
roop.globals.skip_audio,
max_face_distance,
ui.globals.ui_blend_ratio,
selected_mask_engine,
clip_text,
video_swapping_method,
no_face_action,
vr_mode,
autorotate,
chk_restoreoriginalmouth,
num_swap_steps,
ui.globals.ui_upscale,
maskimage,
],
outputs=[bt_start, bt_stop, resultfiles],
show_progress="full",
)
after_swap_event = start_event.success(
fn=on_resultfiles_finished,
inputs=[resultfiles],
outputs=[resultimage, resultvideo],
)
bt_stop.click(
fn=stop_swap,
cancels=[start_event, after_swap_event],
outputs=[bt_start, bt_stop],
queue=False,
)
bt_refresh_preview.click(
fn=on_preview_frame_changed, inputs=previewinputs, outputs=previewoutputs
)
bt_toggle_masking.click(
fn=on_toggle_masking,
inputs=[previewimage, maskimage],
outputs=[previewimage, maskimage],
)
fake_preview.change(
fn=on_preview_frame_changed, inputs=previewinputs, outputs=previewoutputs
)
preview_frame_num.release(
fn=on_preview_frame_changed,
inputs=previewinputs,
outputs=previewoutputs,
show_progress="hidden",
)
bt_use_face_from_preview.click(
fn=on_use_face_from_selected,
show_progress="full",
inputs=[bt_destfiles, preview_frame_num],
outputs=[
dynamic_face_selection,
face_selection,
target_faces,
selected_face_detection,
],
)
set_frame_start.click(
fn=on_set_frame,
inputs=[set_frame_start, preview_frame_num],
outputs=[text_frame_clip],
)
set_frame_end.click(
fn=on_set_frame,
inputs=[set_frame_end, preview_frame_num],
outputs=[text_frame_clip],
)
def on_mask_top_changed(mask_offset):
set_mask_offset(0, mask_offset)
def on_mask_bottom_changed(mask_offset):
set_mask_offset(1, mask_offset)
def on_mask_left_changed(mask_offset):
set_mask_offset(2, mask_offset)
def on_mask_right_changed(mask_offset):
set_mask_offset(3, mask_offset)
def on_mask_erosion_changed(mask_offset):
set_mask_offset(4, mask_offset)
def on_mask_blur_changed(mask_offset):
set_mask_offset(5, mask_offset)
def set_mask_offset(index, mask_offset):
global SELECTED_INPUT_FACE_INDEX
if len(roop.globals.INPUT_FACESETS) > SELECTED_INPUT_FACE_INDEX:
offs = (
roop.globals.INPUT_FACESETS[SELECTED_INPUT_FACE_INDEX].faces[0].mask_offsets
)
offs[index] = mask_offset
if offs[0] + offs[1] > 0.99:
offs[0] = 0.99
offs[1] = 0.0
if offs[2] + offs[3] > 0.99:
offs[2] = 0.99
offs[3] = 0.0
roop.globals.INPUT_FACESETS[SELECTED_INPUT_FACE_INDEX].faces[
0
].mask_offsets = offs
def on_mask_engine_changed(mask_engine):
if mask_engine == "Clip2Seg":
return gr.Textbox(interactive=True)
return gr.Textbox(interactive=False)
def on_add_local_folder(folder):
files = util.get_local_files_from_folder(folder)
if files is None:
gr.Warning("Empty folder or folder not found!")
return files
def on_srcfile_changed(srcfiles, progress=gr.Progress()):
global SELECTION_FACES_DATA, IS_INPUT, input_faces, face_selection, last_image
IS_INPUT = True
if srcfiles is None or len(srcfiles) < 1:
return gr.Column(visible=False), None, ui.globals.ui_input_thumbs, None
for f in srcfiles:
source_path = f.name
if source_path.lower().endswith("fsz"):
progress(0, desc="Retrieving faces from Faceset File")
unzipfolder = os.path.join(os.environ["TEMP"], "faceset")
if os.path.isdir(unzipfolder):
files = os.listdir(unzipfolder)
for file in files:
os.remove(os.path.join(unzipfolder, file))
else:
os.makedirs(unzipfolder)
util.mkdir_with_umask(unzipfolder)
util.unzip(source_path, unzipfolder)
is_first = True
face_set = FaceSet()
for file in os.listdir(unzipfolder):
if file.endswith(".png"):
filename = os.path.join(unzipfolder, file)
progress(0, desc="Extracting faceset")
SELECTION_FACES_DATA = extract_face_images(filename, (False, 0))
for f in SELECTION_FACES_DATA:
face = f[0]
face.mask_offsets = (0, 0, 0, 0, 1, 20)
face_set.faces.append(face)
if is_first:
image = util.convert_to_gradio(f[1])
ui.globals.ui_input_thumbs.append(image)
is_first = False
face_set.ref_images.append(get_image_frame(filename))
if len(face_set.faces) > 0:
if len(face_set.faces) > 1:
face_set.AverageEmbeddings()
roop.globals.INPUT_FACESETS.append(face_set)
elif util.has_image_extension(source_path):
progress(0, desc="Retrieving faces from image")
roop.globals.source_path = source_path
SELECTION_FACES_DATA = extract_face_images(
roop.globals.source_path, (False, 0)
)
progress(0.5, desc="Retrieving faces from image")
for f in SELECTION_FACES_DATA:
face_set = FaceSet()
face = f[0]
face.mask_offsets = (0, 0, 0, 0, 1, 20)
face_set.faces.append(face)
image = util.convert_to_gradio(f[1])
ui.globals.ui_input_thumbs.append(image)
roop.globals.INPUT_FACESETS.append(face_set)
progress(1.0)
return gr.Column(visible=False), None, ui.globals.ui_input_thumbs, None
def on_select_input_face(evt: gr.SelectData):
global SELECTED_INPUT_FACE_INDEX
SELECTED_INPUT_FACE_INDEX = evt.index
def remove_selected_input_face():
global SELECTED_INPUT_FACE_INDEX
if len(roop.globals.INPUT_FACESETS) > SELECTED_INPUT_FACE_INDEX:
f = roop.globals.INPUT_FACESETS.pop(SELECTED_INPUT_FACE_INDEX)
del f
if len(ui.globals.ui_input_thumbs) > SELECTED_INPUT_FACE_INDEX:
f = ui.globals.ui_input_thumbs.pop(SELECTED_INPUT_FACE_INDEX)
del f
return ui.globals.ui_input_thumbs
def move_selected_input(button_text):
global SELECTED_INPUT_FACE_INDEX
if button_text == "⬅ Move left":
if SELECTED_INPUT_FACE_INDEX <= 0:
return ui.globals.ui_input_thumbs
offset = -1
else:
if len(ui.globals.ui_input_thumbs) <= SELECTED_INPUT_FACE_INDEX:
return ui.globals.ui_input_thumbs
offset = 1
f = roop.globals.INPUT_FACESETS.pop(SELECTED_INPUT_FACE_INDEX)
roop.globals.INPUT_FACESETS.insert(SELECTED_INPUT_FACE_INDEX + offset, f)
f = ui.globals.ui_input_thumbs.pop(SELECTED_INPUT_FACE_INDEX)
ui.globals.ui_input_thumbs.insert(SELECTED_INPUT_FACE_INDEX + offset, f)
return ui.globals.ui_input_thumbs
def move_selected_target(button_text):
global SELECTED_TARGET_FACE_INDEX
if button_text == "⬅ Move left":
if SELECTED_TARGET_FACE_INDEX <= 0:
return ui.globals.ui_target_thumbs
offset = -1
else:
if len(ui.globals.ui_target_thumbs) <= SELECTED_TARGET_FACE_INDEX:
return ui.globals.ui_target_thumbs
offset = 1
f = roop.globals.TARGET_FACES.pop(SELECTED_TARGET_FACE_INDEX)
roop.globals.TARGET_FACES.insert(SELECTED_TARGET_FACE_INDEX + offset, f)
f = ui.globals.ui_target_thumbs.pop(SELECTED_TARGET_FACE_INDEX)
ui.globals.ui_target_thumbs.insert(SELECTED_TARGET_FACE_INDEX + offset, f)
return ui.globals.ui_target_thumbs
def on_select_target_face(evt: gr.SelectData):
global SELECTED_TARGET_FACE_INDEX
SELECTED_TARGET_FACE_INDEX = evt.index
def remove_selected_target_face():
if len(ui.globals.ui_target_thumbs) > SELECTED_TARGET_FACE_INDEX:
f = roop.globals.TARGET_FACES.pop(SELECTED_TARGET_FACE_INDEX)
del f
if len(ui.globals.ui_target_thumbs) > SELECTED_TARGET_FACE_INDEX:
f = ui.globals.ui_target_thumbs.pop(SELECTED_TARGET_FACE_INDEX)
del f
return ui.globals.ui_target_thumbs
def on_use_face_from_selected(files, frame_num):
global IS_INPUT, SELECTION_FACES_DATA
IS_INPUT = False
thumbs = []
roop.globals.target_path = files[selected_preview_index].name
if util.is_image(
roop.globals.target_path
) and not roop.globals.target_path.lower().endswith(("gif")):
SELECTION_FACES_DATA = extract_face_images(roop.globals.target_path, (False, 0))
if len(SELECTION_FACES_DATA) > 0:
for f in SELECTION_FACES_DATA:
image = util.convert_to_gradio(f[1])
thumbs.append(image)
else:
gr.Info("No faces detected!")
roop.globals.target_path = None
elif util.is_video(
roop.globals.target_path
) or roop.globals.target_path.lower().endswith(("gif")):
selected_frame = frame_num
SELECTION_FACES_DATA = extract_face_images(
roop.globals.target_path, (True, selected_frame)
)
if len(SELECTION_FACES_DATA) > 0:
for f in SELECTION_FACES_DATA:
image = util.convert_to_gradio(f[1])
thumbs.append(image)
else:
gr.Info("No faces detected!")
roop.globals.target_path = None
else:
gr.Info("Unknown image/video type!")
roop.globals.target_path = None
if len(thumbs) == 1:
roop.globals.TARGET_FACES.append(SELECTION_FACES_DATA[0][0])
ui.globals.ui_target_thumbs.append(thumbs[0])
return (
gr.Row(visible=False),
None,
ui.globals.ui_target_thumbs,
gr.Dropdown(value="Selected face"),
)
return (
gr.Row(visible=True),
thumbs,
gr.Gallery(visible=True),
gr.Dropdown(visible=True),
)
def on_select_face(evt: gr.SelectData): # SelectData is a subclass of EventData
global SELECTED_FACE_INDEX
SELECTED_FACE_INDEX = evt.index
def on_selected_face():
global IS_INPUT, SELECTED_FACE_INDEX, SELECTION_FACES_DATA
fd = SELECTION_FACES_DATA[SELECTED_FACE_INDEX]
image = util.convert_to_gradio(fd[1])
if IS_INPUT:
face_set = FaceSet()
fd[0].mask_offsets = (0, 0, 0, 0, 1, 20)
face_set.faces.append(fd[0])
roop.globals.INPUT_FACESETS.append(face_set)
ui.globals.ui_input_thumbs.append(image)
return (
ui.globals.ui_input_thumbs,
gr.Gallery(visible=True),
gr.Dropdown(visible=True),
)
else:
roop.globals.TARGET_FACES.append(fd[0])
ui.globals.ui_target_thumbs.append(image)
return (
gr.Gallery(visible=True),
ui.globals.ui_target_thumbs,
gr.Dropdown(value="Selected face"),
)
# bt_faceselect.click(fn=on_selected_face, outputs=[dynamic_face_selection, face_selection, input_faces, target_faces])
def on_end_face_selection():
return gr.Column(visible=False), None
def on_preview_frame_changed(
swap_model,
frame_num,
files,
fake_preview,
enhancer,
detection,
face_distance,
blend_ratio,
selected_mask_engine,
clip_text,
no_face_action,
vr_mode,
auto_rotate,
maskimage,
show_face_area,
restore_original_mouth,
num_steps,
upsample,
):
global SELECTED_INPUT_FACE_INDEX, manual_masking, current_video_fps
from roop.core import live_swap, get_processing_plugins
manual_masking = False
mask_offsets = (0, 0, 0, 0)
if len(roop.globals.INPUT_FACESETS) > SELECTED_INPUT_FACE_INDEX:
if not hasattr(
roop.globals.INPUT_FACESETS[SELECTED_INPUT_FACE_INDEX].faces[0],
"mask_offsets",
):
roop.globals.INPUT_FACESETS[SELECTED_INPUT_FACE_INDEX].faces[
0
].mask_offsets = mask_offsets
mask_offsets = (
roop.globals.INPUT_FACESETS[SELECTED_INPUT_FACE_INDEX].faces[0].mask_offsets
)
timeinfo = "0:00:00"
if files is None or selected_preview_index >= len(files) or frame_num is None:
return None, None, gr.Slider(info=timeinfo)
filename = files[selected_preview_index].name
if util.is_video(filename) or filename.lower().endswith("gif"):
current_frame = get_video_frame(filename, frame_num)
if current_video_fps == 0:
current_video_fps = 1
secs = (frame_num - 1) / current_video_fps
minutes = secs / 60
secs = secs % 60
hours = minutes / 60
minutes = minutes % 60
milliseconds = (secs - int(secs)) * 1000
timeinfo = f"{int(hours):0>2}:{int(minutes):0>2}:{int(secs):0>2}.{int(milliseconds):0>3}"
else:
current_frame = get_image_frame(filename)
if current_frame is None:
return None, None, gr.Slider(info=timeinfo)
layers = None
if maskimage is not None:
layers = maskimage["layers"]
if not fake_preview or len(roop.globals.INPUT_FACESETS) < 1:
return (
gr.Image(value=util.convert_to_gradio(current_frame), visible=True),
gr.ImageEditor(visible=False),
gr.Slider(info=timeinfo),
)
roop.globals.face_swap_mode = translate_swap_mode(detection)
roop.globals.selected_enhancer = enhancer
roop.globals.distance_threshold = face_distance
roop.globals.blend_ratio = blend_ratio
roop.globals.no_face_action = index_of_no_face_action(no_face_action)
roop.globals.vr_mode = vr_mode
roop.globals.autorotate_faces = auto_rotate
roop.globals.subsample_size = int(upsample[:3])
mask_engine = map_mask_engine(selected_mask_engine, clip_text)
roop.globals.execution_threads = roop.globals.CFG.max_threads
mask = layers[0] if layers is not None else None
face_index = SELECTED_INPUT_FACE_INDEX
if len(roop.globals.INPUT_FACESETS) <= face_index:
face_index = 0
options = ProcessOptions(
swap_model,
get_processing_plugins(mask_engine),
roop.globals.distance_threshold,
roop.globals.blend_ratio,
roop.globals.face_swap_mode,
face_index,
clip_text,
maskimage,
num_steps,
roop.globals.subsample_size,
show_face_area,
restore_original_mouth,
)
current_frame = live_swap(current_frame, options)
if current_frame is None:
return gr.Image(visible=True), None, gr.Slider(info=timeinfo)
return (
gr.Image(value=util.convert_to_gradio(current_frame), visible=True),
gr.ImageEditor(visible=False),
gr.Slider(info=timeinfo),
)
def map_mask_engine(selected_mask_engine, clip_text):
if selected_mask_engine == "Clip2Seg":
mask_engine = "mask_clip2seg"
if clip_text is None or len(clip_text) < 1:
mask_engine = None
elif selected_mask_engine == "DFL XSeg":
mask_engine = "mask_xseg"
else:
mask_engine = None
return mask_engine
def on_toggle_masking(previewimage, mask):
global manual_masking
manual_masking = not manual_masking
if manual_masking:
layers = mask["layers"]
if len(layers) == 1:
layers = [create_blank_image(previewimage.shape[1], previewimage.shape[0])]
return gr.Image(visible=False), gr.ImageEditor(
value={"background": previewimage, "layers": layers, "composite": None},
visible=True,
)
return gr.Image(visible=True), gr.ImageEditor(visible=False)
def gen_processing_text(start, end):
return f"Processing frame range [{start} - {end}]"
def on_set_frame(sender: str, frame_num):
global selected_preview_index, list_files_process
idx = selected_preview_index
if list_files_process[idx].endframe == 0:
return gen_processing_text(0, 0)
start = list_files_process[idx].startframe
end = list_files_process[idx].endframe
if sender.lower().endswith("start"):
list_files_process[idx].startframe = min(frame_num, end)
else:
list_files_process[idx].endframe = max(frame_num, start)
return gen_processing_text(
list_files_process[idx].startframe, list_files_process[idx].endframe
)
def on_preview_mask(swap_model, frame_num, files, clip_text, mask_engine):
from roop.core import live_swap, get_processing_plugins
global is_processing
if (
is_processing
or files is None
or selected_preview_index >= len(files)
or clip_text is None
or frame_num is None
):
return None
filename = files[selected_preview_index].name
if util.is_video(filename) or filename.lower().endswith("gif"):
current_frame = get_video_frame(filename, frame_num)
else:
current_frame = get_image_frame(filename)
if current_frame is None or mask_engine is None:
return None
if mask_engine == "Clip2Seg":
mask_engine = "mask_clip2seg"
if clip_text is None or len(clip_text) < 1:
mask_engine = None
elif mask_engine == "DFL XSeg":
mask_engine = "mask_xseg"
options = ProcessOptions(
swap_model,
get_processing_plugins(mask_engine),
roop.globals.distance_threshold,
roop.globals.blend_ratio,
"all",
0,
clip_text,
None,
0,
128,
False,
False,
True,
)
current_frame = live_swap(current_frame, options)
return util.convert_to_gradio(current_frame)
def on_clear_input_faces():
ui.globals.ui_input_thumbs.clear()
roop.globals.INPUT_FACESETS.clear()
return ui.globals.ui_input_thumbs
def on_clear_destfiles():
roop.globals.TARGET_FACES.clear()
ui.globals.ui_target_thumbs.clear()
return ui.globals.ui_target_thumbs, gr.Dropdown(value="First found")
def index_of_no_face_action(dropdown_text):
global no_face_choices
return no_face_choices.index(dropdown_text)
def translate_swap_mode(dropdown_text):
if dropdown_text == "Selected face":
return "selected"
elif dropdown_text == "First found":
return "first"
elif dropdown_text == "All input faces":
return "all_input"
elif dropdown_text == "All input faces (random)":
return "all_random"
elif dropdown_text == "All female":
return "all_female"
elif dropdown_text == "All male":
return "all_male"
return "all"
def start_swap(
swap_model,
output_method,
enhancer,
detection,
keep_frames,
wait_after_extraction,
skip_audio,
face_distance,
blend_ratio,
selected_mask_engine,
clip_text,
processing_method,
no_face_action,
vr_mode,
autorotate,
restore_original_mouth,
num_swap_steps,
upsample,
imagemask,
progress=gr.Progress(),
):
from ui.main import prepare_environment
from roop.core import batch_process_regular
global is_processing, list_files_process
if list_files_process is None or len(list_files_process) <= 0:
return gr.Button(variant="primary"), None, None
if roop.globals.CFG.clear_output:
clean_dir(roop.globals.output_path)
if not util.is_installed("ffmpeg"):
msg = "ffmpeg is not installed! No video processing possible."
gr.Warning(msg)
prepare_environment()
roop.globals.selected_enhancer = enhancer
roop.globals.target_path = None
roop.globals.distance_threshold = face_distance
roop.globals.blend_ratio = blend_ratio
roop.globals.keep_frames = keep_frames
roop.globals.wait_after_extraction = wait_after_extraction
roop.globals.skip_audio = skip_audio
roop.globals.face_swap_mode = translate_swap_mode(detection)
roop.globals.no_face_action = index_of_no_face_action(no_face_action)
roop.globals.vr_mode = vr_mode
roop.globals.autorotate_faces = autorotate
roop.globals.subsample_size = int(upsample[:3])
mask_engine = map_mask_engine(selected_mask_engine, clip_text)
if roop.globals.face_swap_mode == "selected":
if len(roop.globals.TARGET_FACES) < 1:
gr.Error("No Target Face selected!")
return gr.Button(variant="primary"), None, None
is_processing = True
yield (
gr.Button(variant="secondary", interactive=False),
gr.Button(variant="primary", interactive=True),
None,
)
roop.globals.execution_threads = roop.globals.CFG.max_threads
roop.globals.video_encoder = roop.globals.CFG.output_video_codec
roop.globals.video_quality = roop.globals.CFG.video_quality
roop.globals.max_memory = (
roop.globals.CFG.memory_limit if roop.globals.CFG.memory_limit > 0 else None
)
batch_process_regular(
swap_model,
output_method,
list_files_process,
mask_engine,
clip_text,
processing_method == "In-Memory processing",
imagemask,
restore_original_mouth,
num_swap_steps,
progress,
SELECTED_INPUT_FACE_INDEX,
)
is_processing = False
outdir = pathlib.Path(roop.globals.output_path)
outfiles = [str(item) for item in outdir.rglob("*") if item.is_file()]
if len(outfiles) > 0:
yield (
gr.Button(variant="primary", interactive=True),
gr.Button(variant="secondary", interactive=False),
gr.Files(value=outfiles),
)
else:
yield (
gr.Button(variant="primary", interactive=True),
gr.Button(variant="secondary", interactive=False),
None,
)
def stop_swap():
roop.globals.processing = False
gr.Info("Aborting processing - please wait for the remaining threads to be stopped")
return (
gr.Button(variant="primary", interactive=True),
gr.Button(variant="secondary", interactive=False),
None,
)
def on_fps_changed(fps):
global selected_preview_index, list_files_process
if (
len(list_files_process) < 1
or list_files_process[selected_preview_index].endframe < 1
):
return
list_files_process[selected_preview_index].fps = fps
def on_destfiles_changed(destfiles):
global selected_preview_index, list_files_process, current_video_fps
if destfiles is None or len(destfiles) < 1:
list_files_process.clear()
return gr.Slider(value=1, maximum=1, info="0:00:00"), ""
for f in destfiles:
list_files_process.append(ProcessEntry(f.name, 0, 0, 0))
selected_preview_index = 0
idx = selected_preview_index
filename = list_files_process[idx].filename
if util.is_video(filename) or filename.lower().endswith("gif"):
total_frames = get_video_frame_total(filename)
if total_frames is None or total_frames < 1:
total_frames = 1
gr.Warning(f"Corrupted video {filename}, can't detect number of frames!")
else:
current_video_fps = util.detect_fps(filename)
else:
total_frames = 1
list_files_process[idx].endframe = total_frames
if total_frames > 1:
return gr.Slider(
value=1, maximum=total_frames, info="0:00:00"
), gen_processing_text(
list_files_process[idx].startframe, list_files_process[idx].endframe
)
return gr.Slider(value=1, maximum=total_frames, info="0:00:00"), ""
def on_destfiles_selected(evt: gr.SelectData):
global selected_preview_index, list_files_process, current_video_fps
if evt is not None:
selected_preview_index = evt.index
idx = selected_preview_index
filename = list_files_process[idx].filename
fps = list_files_process[idx].fps
if util.is_video(filename) or filename.lower().endswith("gif"):
total_frames = get_video_frame_total(filename)
current_video_fps = util.detect_fps(filename)
if list_files_process[idx].endframe == 0:
list_files_process[idx].endframe = total_frames
else:
total_frames = 1
if total_frames > 1:
return (
gr.Slider(
value=list_files_process[idx].startframe,
maximum=total_frames,
info="0:00:00",
),
gen_processing_text(
list_files_process[idx].startframe, list_files_process[idx].endframe
),
fps,
)
return (
gr.Slider(value=1, maximum=total_frames, info="0:00:00"),
gen_processing_text(0, 0),
fps,
)
def on_resultfiles_selected(evt: gr.SelectData, files):
selected_index = evt.index
filename = files[selected_index].name
return display_output(filename)
def on_resultfiles_finished(files):
selected_index = 0
if files is None or len(files) < 1:
return None, None
filename = files[selected_index].name
return display_output(filename)
def get_gradio_output_format():
if roop.globals.CFG.output_image_format == "jpg":
return "jpeg"
return roop.globals.CFG.output_image_format
def display_output(filename):
if util.is_video(filename) and roop.globals.CFG.output_show_video:
return gr.Image(visible=False), gr.Video(visible=True, value=filename)
else:
if util.is_video(filename) or filename.lower().endswith("gif"):
current_frame = get_video_frame(filename)
else:
current_frame = get_image_frame(filename)
return gr.Image(
visible=True, value=util.convert_to_gradio(current_frame)
), gr.Video(visible=False)