Fix audio sync
This commit is contained in:
parent
b050624eee
commit
87e5ba4dcc
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,3 +1,5 @@
|
||||
.idea
|
||||
.assets
|
||||
.temp
|
||||
__pycache__/
|
||||
.vscode
|
||||
|
@ -37,7 +37,7 @@ python run.py [options]
|
||||
-o OUTPUT_PATH, --output OUTPUT_PATH specify the output file or directory
|
||||
--frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] choose from the available frame processors (choices: face_enhancer, face_swapper, frame_enhancer, ...)
|
||||
--ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] choose from the available ui layouts (choices: benchmark, default, ...)
|
||||
--keep-fps preserve the frames per second (fps) of the target
|
||||
--fps-cap cap the frames per second (fps) of the target to 30
|
||||
--keep-temp retain temporary frames after processing
|
||||
--skip-audio omit audio from the target
|
||||
--face-recognition {reference,many} specify the method for face recognition
|
||||
|
@ -34,7 +34,7 @@ def parse_args() -> None:
|
||||
program.add_argument('-o', '--output', help = wording.get('output_help'), dest = 'output_path')
|
||||
program.add_argument('--frame-processors', help = wording.get('frame_processors_help').format(choices = ', '.join(list_module_names('facefusion/processors/frame/modules'))), dest = 'frame_processors', default = ['face_swapper'], nargs='+')
|
||||
program.add_argument('--ui-layouts', help = wording.get('ui_layouts_help').format(choices = ', '.join(list_module_names('facefusion/uis/layouts'))), dest = 'ui_layouts', default = ['default'], nargs='+')
|
||||
program.add_argument('--keep-fps', help = wording.get('keep_fps_help'), dest = 'keep_fps', action='store_true')
|
||||
program.add_argument('--fps-cap', help = wording.get('fps_cap_help'), dest = 'fps_cap', action='store_true')
|
||||
program.add_argument('--keep-temp', help = wording.get('keep_temp_help'), dest = 'keep_temp', action='store_true')
|
||||
program.add_argument('--skip-audio', help = wording.get('skip_audio_help'), dest = 'skip_audio', action='store_true')
|
||||
program.add_argument('--face-recognition', help = wording.get('face_recognition_help'), dest = 'face_recognition', default = 'reference', choices = facefusion.choices.face_recognition)
|
||||
@ -64,7 +64,7 @@ def parse_args() -> None:
|
||||
facefusion.globals.headless = facefusion.globals.source_path is not None and facefusion.globals.target_path is not None and facefusion.globals.output_path is not None
|
||||
facefusion.globals.frame_processors = args.frame_processors
|
||||
facefusion.globals.ui_layouts = args.ui_layouts
|
||||
facefusion.globals.keep_fps = args.keep_fps
|
||||
facefusion.globals.fps_cap = args.fps_cap
|
||||
facefusion.globals.keep_temp = args.keep_temp
|
||||
facefusion.globals.skip_audio = args.skip_audio
|
||||
facefusion.globals.face_recognition = args.face_recognition
|
||||
@ -153,13 +153,11 @@ def process_video() -> None:
|
||||
update_status(wording.get('creating_temp'))
|
||||
create_temp(facefusion.globals.target_path)
|
||||
# extract frames
|
||||
if facefusion.globals.keep_fps:
|
||||
fps = detect_fps(facefusion.globals.target_path)
|
||||
update_status(wording.get('extracting_frames_fps').format(fps = fps))
|
||||
extract_frames(facefusion.globals.target_path, fps)
|
||||
else:
|
||||
update_status(wording.get('extracting_frames_fps').format(fps = 30))
|
||||
extract_frames(facefusion.globals.target_path)
|
||||
fps = detect_fps(facefusion.globals.target_path)
|
||||
if facefusion.globals.fps_cap and fps > 30:
|
||||
fps = 30
|
||||
update_status(wording.get('extracting_frames_fps').format(fps = fps))
|
||||
extract_frames(facefusion.globals.target_path, fps)
|
||||
# process frame
|
||||
temp_frame_paths = get_temp_frame_paths(facefusion.globals.target_path)
|
||||
if temp_frame_paths:
|
||||
@ -171,25 +169,17 @@ def process_video() -> None:
|
||||
update_status(wording.get('temp_frames_not_found'))
|
||||
return
|
||||
# create video
|
||||
if facefusion.globals.keep_fps:
|
||||
fps = detect_fps(facefusion.globals.target_path)
|
||||
update_status(wording.get('creating_video_fps').format(fps = fps))
|
||||
if not create_video(facefusion.globals.target_path, fps):
|
||||
update_status(wording.get('creating_video_failed'))
|
||||
else:
|
||||
update_status(wording.get('creating_video_fps').format(fps = 30))
|
||||
if not create_video(facefusion.globals.target_path):
|
||||
update_status(wording.get('creating_video_failed'))
|
||||
update_status(wording.get('creating_video_fps').format(fps = fps))
|
||||
if not create_video(facefusion.globals.target_path, fps):
|
||||
update_status(wording.get('creating_video_failed'))
|
||||
|
||||
# handle audio
|
||||
if facefusion.globals.skip_audio:
|
||||
move_temp(facefusion.globals.target_path, facefusion.globals.output_path)
|
||||
update_status(wording.get('skipping_audio'))
|
||||
else:
|
||||
if facefusion.globals.keep_fps:
|
||||
update_status(wording.get('restoring_audio'))
|
||||
else:
|
||||
update_status(wording.get('restoring_audio_issues'))
|
||||
restore_audio(facefusion.globals.target_path, facefusion.globals.output_path)
|
||||
update_status(wording.get('restoring_audio'))
|
||||
restore_audio(facefusion.globals.target_path, facefusion.globals.output_path, fps)
|
||||
# clear temp
|
||||
update_status(wording.get('clearing_temp'))
|
||||
clear_temp(facefusion.globals.target_path)
|
||||
|
@ -8,7 +8,7 @@ output_path : Optional[str] = None
|
||||
headless : Optional[bool] = None
|
||||
frame_processors : List[str] = []
|
||||
ui_layouts : List[str] = []
|
||||
keep_fps : Optional[bool] = None
|
||||
fps_cap : Optional[bool] = None
|
||||
keep_temp : Optional[bool] = None
|
||||
skip_audio : Optional[bool] = None
|
||||
face_recognition : Optional[FaceRecognition] = None
|
||||
|
@ -5,20 +5,20 @@ import facefusion.globals
|
||||
from facefusion import wording
|
||||
from facefusion.uis.typing import Update
|
||||
|
||||
KEEP_FPS_CHECKBOX : Optional[gradio.Checkbox] = None
|
||||
FPS_CAP_CHECKBOX : Optional[gradio.Checkbox] = None
|
||||
KEEP_TEMP_CHECKBOX : Optional[gradio.Checkbox] = None
|
||||
SKIP_AUDIO_CHECKBOX : Optional[gradio.Checkbox] = None
|
||||
|
||||
|
||||
def render() -> None:
|
||||
global KEEP_FPS_CHECKBOX
|
||||
global FPS_CAP_CHECKBOX
|
||||
global KEEP_TEMP_CHECKBOX
|
||||
global SKIP_AUDIO_CHECKBOX
|
||||
|
||||
with gradio.Box():
|
||||
KEEP_FPS_CHECKBOX = gradio.Checkbox(
|
||||
label = wording.get('keep_fps_checkbox_label'),
|
||||
value = facefusion.globals.keep_fps
|
||||
FPS_CAP_CHECKBOX = gradio.Checkbox(
|
||||
label = wording.get('fps_cap_checkbox_label'),
|
||||
value = facefusion.globals.fps_cap
|
||||
)
|
||||
KEEP_TEMP_CHECKBOX = gradio.Checkbox(
|
||||
label = wording.get('keep_temp_checkbox_label'),
|
||||
@ -31,7 +31,7 @@ def render() -> None:
|
||||
|
||||
|
||||
def listen() -> None:
|
||||
KEEP_FPS_CHECKBOX.change(lambda value: update_checkbox('keep_fps', value), inputs = KEEP_FPS_CHECKBOX, outputs = KEEP_FPS_CHECKBOX)
|
||||
FPS_CAP_CHECKBOX.change(lambda value: update_checkbox('fps_cap', value), inputs = FPS_CAP_CHECKBOX, outputs = FPS_CAP_CHECKBOX)
|
||||
KEEP_TEMP_CHECKBOX.change(lambda value: update_checkbox('keep_temp', value), inputs = KEEP_TEMP_CHECKBOX, outputs = KEEP_TEMP_CHECKBOX)
|
||||
SKIP_AUDIO_CHECKBOX.change(lambda value: update_checkbox('skip_audio', value), inputs = SKIP_AUDIO_CHECKBOX, outputs = SKIP_AUDIO_CHECKBOX)
|
||||
|
||||
|
@ -51,11 +51,11 @@ def extract_frames(target_path : str, fps : float = 30) -> bool:
|
||||
trim_frame_end = facefusion.globals.trim_frame_end
|
||||
commands = [ '-hwaccel', 'auto', '-i', target_path, '-q:v', str(temp_frame_quality), '-pix_fmt', 'rgb24' ]
|
||||
if trim_frame_start is not None and trim_frame_end is not None:
|
||||
commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(fps)])
|
||||
commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(fps) + ',setpts=(PTS-STARTPTS)'])
|
||||
elif trim_frame_start is not None:
|
||||
commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(fps)])
|
||||
commands.extend(['-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(fps) + ',setpts=(PTS-STARTPTS)'])
|
||||
elif trim_frame_end is not None:
|
||||
commands.extend(['-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(fps)])
|
||||
commands.extend(['-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(fps) + ',setpts=(PTS-STARTPTS)'])
|
||||
else:
|
||||
commands.extend(['-vf', 'fps=' + str(fps)])
|
||||
commands.extend([os.path.join(temp_directory_path, '%04d.' + facefusion.globals.temp_frame_format)])
|
||||
@ -75,18 +75,37 @@ def create_video(target_path : str, fps : float = 30) -> bool:
|
||||
return run_ffmpeg(commands)
|
||||
|
||||
|
||||
def restore_audio(target_path : str, output_path : str) -> None:
|
||||
def restore_audio(target_path : str, output_path : str, fps: int) -> None:
|
||||
trim_frame_start = facefusion.globals.trim_frame_start
|
||||
trim_frame_end = facefusion.globals.trim_frame_end
|
||||
temp_output_path = get_temp_output_path(target_path)
|
||||
commands = [ '-hwaccel', 'auto', '-i', temp_output_path, '-i', target_path ]
|
||||
if trim_frame_start is not None and trim_frame_end is not None:
|
||||
commands.extend([ '-filter:v', 'select=between(n,' + str(trim_frame_start) + ',' + str(trim_frame_end) + ')' ])
|
||||
elif trim_frame_start is not None:
|
||||
commands.extend([ '-filter:v', 'select=gt(n,' + str(trim_frame_start) + ')' ])
|
||||
elif trim_frame_end is not None:
|
||||
commands.extend([ '-filter:v', 'select=lt(n,' + str(trim_frame_end) + ')' ])
|
||||
commands.extend([ '-c:a', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-y', output_path ])
|
||||
|
||||
# Create temp audio file extracted from target video
|
||||
temp_target_audio_path = os.path.join(os.path.dirname(target_path), Path(target_path).stem + '.mp3')
|
||||
commands = ['-hwaccel', 'auto', '-i', target_path, '-vn', '-y', temp_target_audio_path ]
|
||||
done = run_ffmpeg(commands)
|
||||
if not done:
|
||||
move_temp(target_path, output_path)
|
||||
return
|
||||
|
||||
# Trim audio file
|
||||
temp_target_audio_trimmed_path = temp_target_audio_path.replace('.mp3', '_trimmed.mp3')
|
||||
if trim_frame_start is None:
|
||||
trim_frame_start = 0
|
||||
start_time_ms = round(trim_frame_start/fps, 3) * 1000
|
||||
commands = ['-hwaccel', 'auto', '-ss', str(start_time_ms) + 'ms' ]
|
||||
if trim_frame_end is not None:
|
||||
end_time_ms = round(trim_frame_end/fps, 3) * 1000
|
||||
commands.extend([ '-to', str(end_time_ms) + 'ms'])
|
||||
commands.extend(['-i', temp_target_audio_path, '-vn', '-c', 'copy', '-y', temp_target_audio_trimmed_path])
|
||||
print(commands)
|
||||
done = run_ffmpeg(commands)
|
||||
if not done:
|
||||
move_temp(target_path, output_path)
|
||||
return
|
||||
|
||||
# Add audio to temp output
|
||||
commands = ['-hwaccel', 'auto', '-i', temp_output_path, '-i', temp_target_audio_trimmed_path, '-c:v', 'copy', '-map', '0:v', '-map', '1:a', '-y', output_path ]
|
||||
done = run_ffmpeg(commands)
|
||||
if not done:
|
||||
move_temp(target_path, output_path)
|
||||
|
@ -7,7 +7,7 @@ WORDING =\
|
||||
'output_help': 'specify the output file or directory',
|
||||
'frame_processors_help': 'choose from the available frame processors (choices: {choices}, ...)',
|
||||
'ui_layouts_help': 'choose from the available ui layouts (choices: {choices}, ...)',
|
||||
'keep_fps_help': 'preserve the frames per second (fps) of the target',
|
||||
'fps_cap_help': 'cap the frames per second (fps) of the target to 30',
|
||||
'keep_temp_help': 'retain temporary frames after processing',
|
||||
'skip_audio_help': 'omit audio from the target',
|
||||
'face_recognition_help': 'specify the method for face recognition',
|
||||
@ -68,7 +68,7 @@ WORDING =\
|
||||
'preview_image_label': 'PREVIEW',
|
||||
'preview_frame_slider_label': 'PREVIEW FRAME',
|
||||
'frame_processors_checkbox_group_label': 'FRAME PROCESSORS',
|
||||
'keep_fps_checkbox_label': 'KEEP FPS',
|
||||
'fps_cap_checkbox_label': 'FPS CAP',
|
||||
'keep_temp_checkbox_label': 'KEEP TEMP',
|
||||
'skip_audio_checkbox_label': 'SKIP AUDIO',
|
||||
'temp_frame_format_dropdown_label': 'TEMP FRAME FORMAT',
|
||||
|
Loading…
Reference in New Issue
Block a user