Fix audio sync

This commit is contained in:
Xpert104 2023-08-21 01:08:36 -04:00
parent b050624eee
commit 87e5ba4dcc
7 changed files with 56 additions and 45 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
.idea
.assets
.temp
__pycache__/
.vscode

View File

@ -37,7 +37,7 @@ python run.py [options]
-o OUTPUT_PATH, --output OUTPUT_PATH specify the output file or directory
--frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] choose from the available frame processors (choices: face_enhancer, face_swapper, frame_enhancer, ...)
--ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] choose from the available ui layouts (choices: benchmark, default, ...)
--keep-fps preserve the frames per second (fps) of the target
--fps-cap cap the frames per second (fps) of the target to 30
--keep-temp retain temporary frames after processing
--skip-audio omit audio from the target
--face-recognition {reference,many} specify the method for face recognition

View File

@ -34,7 +34,7 @@ def parse_args() -> None:
program.add_argument('-o', '--output', help = wording.get('output_help'), dest = 'output_path')
program.add_argument('--frame-processors', help = wording.get('frame_processors_help').format(choices = ', '.join(list_module_names('facefusion/processors/frame/modules'))), dest = 'frame_processors', default = ['face_swapper'], nargs='+')
program.add_argument('--ui-layouts', help = wording.get('ui_layouts_help').format(choices = ', '.join(list_module_names('facefusion/uis/layouts'))), dest = 'ui_layouts', default = ['default'], nargs='+')
program.add_argument('--keep-fps', help = wording.get('keep_fps_help'), dest = 'keep_fps', action='store_true')
program.add_argument('--fps-cap', help = wording.get('fps_cap_help'), dest = 'fps_cap', action='store_true')
program.add_argument('--keep-temp', help = wording.get('keep_temp_help'), dest = 'keep_temp', action='store_true')
program.add_argument('--skip-audio', help = wording.get('skip_audio_help'), dest = 'skip_audio', action='store_true')
program.add_argument('--face-recognition', help = wording.get('face_recognition_help'), dest = 'face_recognition', default = 'reference', choices = facefusion.choices.face_recognition)
@ -64,7 +64,7 @@ def parse_args() -> None:
facefusion.globals.headless = facefusion.globals.source_path is not None and facefusion.globals.target_path is not None and facefusion.globals.output_path is not None
facefusion.globals.frame_processors = args.frame_processors
facefusion.globals.ui_layouts = args.ui_layouts
facefusion.globals.keep_fps = args.keep_fps
facefusion.globals.fps_cap = args.fps_cap
facefusion.globals.keep_temp = args.keep_temp
facefusion.globals.skip_audio = args.skip_audio
facefusion.globals.face_recognition = args.face_recognition
@ -153,13 +153,11 @@ def process_video() -> None:
update_status(wording.get('creating_temp'))
create_temp(facefusion.globals.target_path)
# extract frames
if facefusion.globals.keep_fps:
fps = detect_fps(facefusion.globals.target_path)
update_status(wording.get('extracting_frames_fps').format(fps = fps))
extract_frames(facefusion.globals.target_path, fps)
else:
update_status(wording.get('extracting_frames_fps').format(fps = 30))
extract_frames(facefusion.globals.target_path)
fps = detect_fps(facefusion.globals.target_path)
if facefusion.globals.fps_cap and fps > 30:
fps = 30
update_status(wording.get('extracting_frames_fps').format(fps = fps))
extract_frames(facefusion.globals.target_path, fps)
# process frame
temp_frame_paths = get_temp_frame_paths(facefusion.globals.target_path)
if temp_frame_paths:
@ -171,25 +169,17 @@ def process_video() -> None:
update_status(wording.get('temp_frames_not_found'))
return
# create video
if facefusion.globals.keep_fps:
fps = detect_fps(facefusion.globals.target_path)
update_status(wording.get('creating_video_fps').format(fps = fps))
if not create_video(facefusion.globals.target_path, fps):
update_status(wording.get('creating_video_failed'))
else:
update_status(wording.get('creating_video_fps').format(fps = 30))
if not create_video(facefusion.globals.target_path):
update_status(wording.get('creating_video_failed'))
update_status(wording.get('creating_video_fps').format(fps = fps))
if not create_video(facefusion.globals.target_path, fps):
update_status(wording.get('creating_video_failed'))
# handle audio
if facefusion.globals.skip_audio:
move_temp(facefusion.globals.target_path, facefusion.globals.output_path)
update_status(wording.get('skipping_audio'))
else:
if facefusion.globals.keep_fps:
update_status(wording.get('restoring_audio'))
else:
update_status(wording.get('restoring_audio_issues'))
restore_audio(facefusion.globals.target_path, facefusion.globals.output_path)
update_status(wording.get('restoring_audio'))
restore_audio(facefusion.globals.target_path, facefusion.globals.output_path, fps)
# clear temp
update_status(wording.get('clearing_temp'))
clear_temp(facefusion.globals.target_path)

View File

@ -8,7 +8,7 @@ output_path : Optional[str] = None
headless : Optional[bool] = None
frame_processors : List[str] = []
ui_layouts : List[str] = []
keep_fps : Optional[bool] = None
fps_cap : Optional[bool] = None
keep_temp : Optional[bool] = None
skip_audio : Optional[bool] = None
face_recognition : Optional[FaceRecognition] = None

View File

@ -5,20 +5,20 @@ import facefusion.globals
from facefusion import wording
from facefusion.uis.typing import Update
KEEP_FPS_CHECKBOX : Optional[gradio.Checkbox] = None
FPS_CAP_CHECKBOX : Optional[gradio.Checkbox] = None
KEEP_TEMP_CHECKBOX : Optional[gradio.Checkbox] = None
SKIP_AUDIO_CHECKBOX : Optional[gradio.Checkbox] = None
def render() -> None:
global KEEP_FPS_CHECKBOX
global FPS_CAP_CHECKBOX
global KEEP_TEMP_CHECKBOX
global SKIP_AUDIO_CHECKBOX
with gradio.Box():
KEEP_FPS_CHECKBOX = gradio.Checkbox(
label = wording.get('keep_fps_checkbox_label'),
value = facefusion.globals.keep_fps
FPS_CAP_CHECKBOX = gradio.Checkbox(
label = wording.get('fps_cap_checkbox_label'),
value = facefusion.globals.fps_cap
)
KEEP_TEMP_CHECKBOX = gradio.Checkbox(
label = wording.get('keep_temp_checkbox_label'),
@ -31,7 +31,7 @@ def render() -> None:
def listen() -> None:
KEEP_FPS_CHECKBOX.change(lambda value: update_checkbox('keep_fps', value), inputs = KEEP_FPS_CHECKBOX, outputs = KEEP_FPS_CHECKBOX)
FPS_CAP_CHECKBOX.change(lambda value: update_checkbox('fps_cap', value), inputs = FPS_CAP_CHECKBOX, outputs = FPS_CAP_CHECKBOX)
KEEP_TEMP_CHECKBOX.change(lambda value: update_checkbox('keep_temp', value), inputs = KEEP_TEMP_CHECKBOX, outputs = KEEP_TEMP_CHECKBOX)
SKIP_AUDIO_CHECKBOX.change(lambda value: update_checkbox('skip_audio', value), inputs = SKIP_AUDIO_CHECKBOX, outputs = SKIP_AUDIO_CHECKBOX)

View File

@ -51,11 +51,11 @@ def extract_frames(target_path : str, fps : float = 30) -> bool:
trim_frame_end = facefusion.globals.trim_frame_end
commands = [ '-hwaccel', 'auto', '-i', target_path, '-q:v', str(temp_frame_quality), '-pix_fmt', 'rgb24' ]
if trim_frame_start is not None and trim_frame_end is not None:
commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(fps)])
commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(fps) + ',setpts=(PTS-STARTPTS)'])
elif trim_frame_start is not None:
commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(fps)])
commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(fps) + ',setpts=(PTS-STARTPTS)'])
elif trim_frame_end is not None:
commands.extend(['-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(fps)])
commands.extend(['-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(fps) + ',setpts=(PTS-STARTPTS)'])
else:
commands.extend(['-vf', 'fps=' + str(fps)])
commands.extend([os.path.join(temp_directory_path, '%04d.' + facefusion.globals.temp_frame_format)])
@ -75,18 +75,37 @@ def create_video(target_path : str, fps : float = 30) -> bool:
return run_ffmpeg(commands)
def restore_audio(target_path : str, output_path : str) -> None:
def restore_audio(target_path : str, output_path : str, fps: int) -> None:
trim_frame_start = facefusion.globals.trim_frame_start
trim_frame_end = facefusion.globals.trim_frame_end
temp_output_path = get_temp_output_path(target_path)
commands = [ '-hwaccel', 'auto', '-i', temp_output_path, '-i', target_path ]
if trim_frame_start is not None and trim_frame_end is not None:
commands.extend([ '-filter:v', 'select=between(n,' + str(trim_frame_start) + ',' + str(trim_frame_end) + ')' ])
elif trim_frame_start is not None:
commands.extend([ '-filter:v', 'select=gt(n,' + str(trim_frame_start) + ')' ])
elif trim_frame_end is not None:
commands.extend([ '-filter:v', 'select=lt(n,' + str(trim_frame_end) + ')' ])
commands.extend([ '-c:a', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-y', output_path ])
# Create temp audio file extracted from target video
temp_target_audio_path = os.path.join(os.path.dirname(target_path), Path(target_path).stem + '.mp3')
commands = ['-hwaccel', 'auto', '-i', target_path, '-vn', '-y', temp_target_audio_path ]
done = run_ffmpeg(commands)
if not done:
move_temp(target_path, output_path)
return
# Trim audio file
temp_target_audio_trimmed_path = temp_target_audio_path.replace('.mp3', '_trimmed.mp3')
if trim_frame_start is None:
trim_frame_start = 0
start_time_ms = round(trim_frame_start/fps, 3) * 1000
commands = ['-hwaccel', 'auto', '-ss', str(start_time_ms) + 'ms' ]
if trim_frame_end is not None:
end_time_ms = round(trim_frame_end/fps, 3) * 1000
commands.extend([ '-to', str(end_time_ms) + 'ms'])
commands.extend(['-i', temp_target_audio_path, '-vn', '-c', 'copy', '-y', temp_target_audio_trimmed_path])
print(commands)
done = run_ffmpeg(commands)
if not done:
move_temp(target_path, output_path)
return
# Add audio to temp output
commands = ['-hwaccel', 'auto', '-i', temp_output_path, '-i', temp_target_audio_trimmed_path, '-c:v', 'copy', '-map', '0:v', '-map', '1:a', '-y', output_path ]
done = run_ffmpeg(commands)
if not done:
move_temp(target_path, output_path)

View File

@ -7,7 +7,7 @@ WORDING =\
'output_help': 'specify the output file or directory',
'frame_processors_help': 'choose from the available frame processors (choices: {choices}, ...)',
'ui_layouts_help': 'choose from the available ui layouts (choices: {choices}, ...)',
'keep_fps_help': 'preserve the frames per second (fps) of the target',
'fps_cap_help': 'cap the frames per second (fps) of the target to 30',
'keep_temp_help': 'retain temporary frames after processing',
'skip_audio_help': 'omit audio from the target',
'face_recognition_help': 'specify the method for face recognition',
@ -68,7 +68,7 @@ WORDING =\
'preview_image_label': 'PREVIEW',
'preview_frame_slider_label': 'PREVIEW FRAME',
'frame_processors_checkbox_group_label': 'FRAME PROCESSORS',
'keep_fps_checkbox_label': 'KEEP FPS',
'fps_cap_checkbox_label': 'FPS CAP',
'keep_temp_checkbox_label': 'KEEP TEMP',
'skip_audio_checkbox_label': 'SKIP AUDIO',
'temp_frame_format_dropdown_label': 'TEMP FRAME FORMAT',