diff --git a/.gitignore b/.gitignore index 41b03a2a..8e56275c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .idea .assets .temp +__pycache__/ +.vscode diff --git a/README.md b/README.md index 1a48a109..573dc6c1 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ python run.py [options] -o OUTPUT_PATH, --output OUTPUT_PATH specify the output file or directory --frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] choose from the available frame processors (choices: face_enhancer, face_swapper, frame_enhancer, ...) --ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] choose from the available ui layouts (choices: benchmark, default, ...) ---keep-fps preserve the frames per second (fps) of the target +--fps-cap cap the frames per second (fps) of the target to 30 --keep-temp retain temporary frames after processing --skip-audio omit audio from the target --face-recognition {reference,many} specify the method for face recognition diff --git a/facefusion/core.py b/facefusion/core.py index 24dcce75..6ef2d4ad 100755 --- a/facefusion/core.py +++ b/facefusion/core.py @@ -34,7 +34,7 @@ def parse_args() -> None: program.add_argument('-o', '--output', help = wording.get('output_help'), dest = 'output_path') program.add_argument('--frame-processors', help = wording.get('frame_processors_help').format(choices = ', '.join(list_module_names('facefusion/processors/frame/modules'))), dest = 'frame_processors', default = ['face_swapper'], nargs='+') program.add_argument('--ui-layouts', help = wording.get('ui_layouts_help').format(choices = ', '.join(list_module_names('facefusion/uis/layouts'))), dest = 'ui_layouts', default = ['default'], nargs='+') - program.add_argument('--keep-fps', help = wording.get('keep_fps_help'), dest = 'keep_fps', action='store_true') + program.add_argument('--fps-cap', help = wording.get('fps_cap_help'), dest = 'fps_cap', action='store_true') program.add_argument('--keep-temp', help = wording.get('keep_temp_help'), dest = 'keep_temp', action='store_true') program.add_argument('--skip-audio', help = wording.get('skip_audio_help'), dest = 'skip_audio', action='store_true') program.add_argument('--face-recognition', help = wording.get('face_recognition_help'), dest = 'face_recognition', default = 'reference', choices = facefusion.choices.face_recognition) @@ -64,7 +64,7 @@ def parse_args() -> None: facefusion.globals.headless = facefusion.globals.source_path is not None and facefusion.globals.target_path is not None and facefusion.globals.output_path is not None facefusion.globals.frame_processors = args.frame_processors facefusion.globals.ui_layouts = args.ui_layouts - facefusion.globals.keep_fps = args.keep_fps + facefusion.globals.fps_cap = args.fps_cap facefusion.globals.keep_temp = args.keep_temp facefusion.globals.skip_audio = args.skip_audio facefusion.globals.face_recognition = args.face_recognition @@ -153,13 +153,11 @@ def process_video() -> None: update_status(wording.get('creating_temp')) create_temp(facefusion.globals.target_path) # extract frames - if facefusion.globals.keep_fps: - fps = detect_fps(facefusion.globals.target_path) - update_status(wording.get('extracting_frames_fps').format(fps = fps)) - extract_frames(facefusion.globals.target_path, fps) - else: - update_status(wording.get('extracting_frames_fps').format(fps = 30)) - extract_frames(facefusion.globals.target_path) + fps = detect_fps(facefusion.globals.target_path) + if facefusion.globals.fps_cap and fps > 30: + fps = 30 + update_status(wording.get('extracting_frames_fps').format(fps = fps)) + extract_frames(facefusion.globals.target_path, fps) # process frame temp_frame_paths = get_temp_frame_paths(facefusion.globals.target_path) if temp_frame_paths: @@ -171,25 +169,17 @@ def process_video() -> None: update_status(wording.get('temp_frames_not_found')) return # create video - if facefusion.globals.keep_fps: - fps = detect_fps(facefusion.globals.target_path) - update_status(wording.get('creating_video_fps').format(fps = fps)) - if not create_video(facefusion.globals.target_path, fps): - update_status(wording.get('creating_video_failed')) - else: - update_status(wording.get('creating_video_fps').format(fps = 30)) - if not create_video(facefusion.globals.target_path): - update_status(wording.get('creating_video_failed')) + update_status(wording.get('creating_video_fps').format(fps = fps)) + if not create_video(facefusion.globals.target_path, fps): + update_status(wording.get('creating_video_failed')) + # handle audio if facefusion.globals.skip_audio: move_temp(facefusion.globals.target_path, facefusion.globals.output_path) update_status(wording.get('skipping_audio')) else: - if facefusion.globals.keep_fps: - update_status(wording.get('restoring_audio')) - else: - update_status(wording.get('restoring_audio_issues')) - restore_audio(facefusion.globals.target_path, facefusion.globals.output_path) + update_status(wording.get('restoring_audio')) + restore_audio(facefusion.globals.target_path, facefusion.globals.output_path, fps) # clear temp update_status(wording.get('clearing_temp')) clear_temp(facefusion.globals.target_path) diff --git a/facefusion/globals.py b/facefusion/globals.py index d88ec95e..10692f7c 100644 --- a/facefusion/globals.py +++ b/facefusion/globals.py @@ -8,7 +8,7 @@ output_path : Optional[str] = None headless : Optional[bool] = None frame_processors : List[str] = [] ui_layouts : List[str] = [] -keep_fps : Optional[bool] = None +fps_cap : Optional[bool] = None keep_temp : Optional[bool] = None skip_audio : Optional[bool] = None face_recognition : Optional[FaceRecognition] = None diff --git a/facefusion/uis/components/settings.py b/facefusion/uis/components/settings.py index 0f0efe08..b4e8519b 100644 --- a/facefusion/uis/components/settings.py +++ b/facefusion/uis/components/settings.py @@ -5,20 +5,20 @@ import facefusion.globals from facefusion import wording from facefusion.uis.typing import Update -KEEP_FPS_CHECKBOX : Optional[gradio.Checkbox] = None +FPS_CAP_CHECKBOX : Optional[gradio.Checkbox] = None KEEP_TEMP_CHECKBOX : Optional[gradio.Checkbox] = None SKIP_AUDIO_CHECKBOX : Optional[gradio.Checkbox] = None def render() -> None: - global KEEP_FPS_CHECKBOX + global FPS_CAP_CHECKBOX global KEEP_TEMP_CHECKBOX global SKIP_AUDIO_CHECKBOX with gradio.Box(): - KEEP_FPS_CHECKBOX = gradio.Checkbox( - label = wording.get('keep_fps_checkbox_label'), - value = facefusion.globals.keep_fps + FPS_CAP_CHECKBOX = gradio.Checkbox( + label = wording.get('fps_cap_checkbox_label'), + value = facefusion.globals.fps_cap ) KEEP_TEMP_CHECKBOX = gradio.Checkbox( label = wording.get('keep_temp_checkbox_label'), @@ -31,7 +31,7 @@ def render() -> None: def listen() -> None: - KEEP_FPS_CHECKBOX.change(lambda value: update_checkbox('keep_fps', value), inputs = KEEP_FPS_CHECKBOX, outputs = KEEP_FPS_CHECKBOX) + FPS_CAP_CHECKBOX.change(lambda value: update_checkbox('fps_cap', value), inputs = FPS_CAP_CHECKBOX, outputs = FPS_CAP_CHECKBOX) KEEP_TEMP_CHECKBOX.change(lambda value: update_checkbox('keep_temp', value), inputs = KEEP_TEMP_CHECKBOX, outputs = KEEP_TEMP_CHECKBOX) SKIP_AUDIO_CHECKBOX.change(lambda value: update_checkbox('skip_audio', value), inputs = SKIP_AUDIO_CHECKBOX, outputs = SKIP_AUDIO_CHECKBOX) diff --git a/facefusion/utilities.py b/facefusion/utilities.py index 41d2d32c..b25f8421 100644 --- a/facefusion/utilities.py +++ b/facefusion/utilities.py @@ -51,11 +51,11 @@ def extract_frames(target_path : str, fps : float = 30) -> bool: trim_frame_end = facefusion.globals.trim_frame_end commands = [ '-hwaccel', 'auto', '-i', target_path, '-q:v', str(temp_frame_quality), '-pix_fmt', 'rgb24' ] if trim_frame_start is not None and trim_frame_end is not None: - commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(fps)]) + commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(fps) + ',setpts=(PTS-STARTPTS)']) elif trim_frame_start is not None: - commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(fps)]) + commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(fps) + ',setpts=(PTS-STARTPTS)']) elif trim_frame_end is not None: - commands.extend(['-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(fps)]) + commands.extend(['-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(fps) + ',setpts=(PTS-STARTPTS)']) else: commands.extend(['-vf', 'fps=' + str(fps)]) commands.extend([os.path.join(temp_directory_path, '%04d.' + facefusion.globals.temp_frame_format)]) @@ -75,18 +75,37 @@ def create_video(target_path : str, fps : float = 30) -> bool: return run_ffmpeg(commands) -def restore_audio(target_path : str, output_path : str) -> None: +def restore_audio(target_path : str, output_path : str, fps: int) -> None: trim_frame_start = facefusion.globals.trim_frame_start trim_frame_end = facefusion.globals.trim_frame_end temp_output_path = get_temp_output_path(target_path) - commands = [ '-hwaccel', 'auto', '-i', temp_output_path, '-i', target_path ] - if trim_frame_start is not None and trim_frame_end is not None: - commands.extend([ '-filter:v', 'select=between(n,' + str(trim_frame_start) + ',' + str(trim_frame_end) + ')' ]) - elif trim_frame_start is not None: - commands.extend([ '-filter:v', 'select=gt(n,' + str(trim_frame_start) + ')' ]) - elif trim_frame_end is not None: - commands.extend([ '-filter:v', 'select=lt(n,' + str(trim_frame_end) + ')' ]) - commands.extend([ '-c:a', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-y', output_path ]) + + # Create temp audio file extracted from target video + temp_target_audio_path = os.path.join(os.path.dirname(target_path), Path(target_path).stem + '.mp3') + commands = ['-hwaccel', 'auto', '-i', target_path, '-vn', '-y', temp_target_audio_path ] + done = run_ffmpeg(commands) + if not done: + move_temp(target_path, output_path) + return + + # Trim audio file + temp_target_audio_trimmed_path = temp_target_audio_path.replace('.mp3', '_trimmed.mp3') + if trim_frame_start is None: + trim_frame_start = 0 + start_time_ms = round(trim_frame_start/fps, 3) * 1000 + commands = ['-hwaccel', 'auto', '-ss', str(start_time_ms) + 'ms' ] + if trim_frame_end is not None: + end_time_ms = round(trim_frame_end/fps, 3) * 1000 + commands.extend([ '-to', str(end_time_ms) + 'ms']) + commands.extend(['-i', temp_target_audio_path, '-vn', '-c', 'copy', '-y', temp_target_audio_trimmed_path]) + print(commands) + done = run_ffmpeg(commands) + if not done: + move_temp(target_path, output_path) + return + + # Add audio to temp output + commands = ['-hwaccel', 'auto', '-i', temp_output_path, '-i', temp_target_audio_trimmed_path, '-c:v', 'copy', '-map', '0:v', '-map', '1:a', '-y', output_path ] done = run_ffmpeg(commands) if not done: move_temp(target_path, output_path) diff --git a/facefusion/wording.py b/facefusion/wording.py index 8a8dc717..c8245fa9 100644 --- a/facefusion/wording.py +++ b/facefusion/wording.py @@ -7,7 +7,7 @@ WORDING =\ 'output_help': 'specify the output file or directory', 'frame_processors_help': 'choose from the available frame processors (choices: {choices}, ...)', 'ui_layouts_help': 'choose from the available ui layouts (choices: {choices}, ...)', - 'keep_fps_help': 'preserve the frames per second (fps) of the target', + 'fps_cap_help': 'cap the frames per second (fps) of the target to 30', 'keep_temp_help': 'retain temporary frames after processing', 'skip_audio_help': 'omit audio from the target', 'face_recognition_help': 'specify the method for face recognition', @@ -68,7 +68,7 @@ WORDING =\ 'preview_image_label': 'PREVIEW', 'preview_frame_slider_label': 'PREVIEW FRAME', 'frame_processors_checkbox_group_label': 'FRAME PROCESSORS', - 'keep_fps_checkbox_label': 'KEEP FPS', + 'fps_cap_checkbox_label': 'FPS CAP', 'keep_temp_checkbox_label': 'KEEP TEMP', 'skip_audio_checkbox_label': 'SKIP AUDIO', 'temp_frame_format_dropdown_label': 'TEMP FRAME FORMAT',