Introduce more helpers to suite the lip-syncer needs

This commit is contained in:
henryruhs 2024-02-01 21:06:23 +01:00
parent 0276f33184
commit 2013653760
9 changed files with 60 additions and 80 deletions

View File

@ -14,5 +14,5 @@ def create_float_range(start : float, stop : float, step : float) -> List[float]
return (numpy.around(numpy.arange(start, stop + step, step), decimals = 2)).tolist()
def get_first_item(__list__ : Any) -> Any:
def get_first(__list__ : Any) -> Any:
return next(iter(__list__), None)

View File

@ -19,7 +19,7 @@ from facefusion.face_store import get_reference_faces, append_reference_face
from facefusion import face_analyser, face_masker, content_analyser, config, metadata, logger, wording
from facefusion.content_analyser import analyse_image, analyse_video
from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module
from facefusion.common_helper import create_metavar, get_first_item
from facefusion.common_helper import create_metavar, get_first
from facefusion.execution_helper import encode_execution_providers, decode_execution_providers
from facefusion.normalizer import normalize_output_path, normalize_padding, normalize_fps
from facefusion.memory import limit_system_memory
@ -297,8 +297,8 @@ def process_video(start_time : float) -> None:
move_temp(facefusion.globals.target_path, facefusion.globals.output_path)
else:
if 'lip_syncer' in facefusion.globals.frame_processors:
audio_path = get_first_item(filter_audio_paths(facefusion.globals.source_paths))
if not audio_path or not replace_audio(facefusion.globals.target_path, audio_path, facefusion.globals.output_path):
source_audio_path = get_first(filter_audio_paths(facefusion.globals.source_paths))
if not source_audio_path or not replace_audio(facefusion.globals.target_path, source_audio_path, facefusion.globals.output_path):
logger.warn(wording.get('restoring_audio_skipped'), __name__.upper())
move_temp(facefusion.globals.target_path, facefusion.globals.output_path)
else:

View File

@ -63,51 +63,39 @@ def is_directory(directory_path : str) -> bool:
def is_audio(audio_path : str) -> bool:
if is_file(audio_path):
return filetype.helpers.is_audio(audio_path)
return False
return is_file(audio_path) and filetype.helpers.is_audio(audio_path)
def are_audios(audio_paths : List[str]) -> bool:
def has_audio(audio_paths : List[str]) -> bool:
if audio_paths:
return all(is_audio(audio_path) for audio_path in audio_paths)
return any(is_audio(audio_path) for audio_path in audio_paths)
return False
def is_image(image_path : str) -> bool:
if is_file(image_path):
return filetype.helpers.is_image(image_path)
return False
return is_file(image_path) and filetype.helpers.is_image(image_path)
def are_images(image_paths : List[str]) -> bool:
def has_image(image_paths: List[str]) -> bool:
if image_paths:
return all(is_image(image_path) for image_path in image_paths)
return any(is_image(image_path) for image_path in image_paths)
return False
def is_video(video_path : str) -> bool:
if is_file(video_path):
return filetype.helpers.is_video(video_path)
return False
return is_file(video_path) and filetype.helpers.is_video(video_path)
def filter_audio_paths(paths : List[str]) -> List[Optional[str]]:
audio_paths = []
def filter_audio_paths(paths : List[str]) -> List[str]:
if paths:
for path in paths:
if is_audio(path):
audio_paths.append(path)
return audio_paths
return [ path for path in paths if is_audio(path) ]
return []
def filter_image_paths(paths : List[str]) -> List[Optional[str]]:
image_paths = []
def filter_image_paths(paths : List[str]) -> List[str]:
if paths:
for path in paths:
if is_image(path):
image_paths.append(path)
return image_paths
return [ path for path in paths if is_image(path) ]
return []
def resolve_relative_path(path : str) -> str:

View File

@ -16,7 +16,7 @@ from facefusion.face_helper import warp_face_by_kps, paste_back
from facefusion.face_store import get_reference_faces
from facefusion.content_analyser import clear_content_analyser
from facefusion.typing import Face, FaceSet, VisionFrame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, Embedding
from facefusion.filesystem import is_file, is_image, is_video, resolve_relative_path, filter_image_paths
from facefusion.filesystem import is_file, is_image, has_image, is_video, filter_image_paths, resolve_relative_path
from facefusion.download import conditional_download, is_download_done
from facefusion.vision import read_image, read_static_image, read_static_images, write_image
from facefusion.processors.frame import globals as frame_processors_globals
@ -173,11 +173,12 @@ def post_check() -> bool:
def pre_process(mode : ProcessMode) -> bool:
source_images = filter_image_paths(facefusion.globals.source_paths)
if not source_images:
if not has_image(facefusion.globals.source_paths):
logger.error(wording.get('select_image_source') + wording.get('exclamation_mark'), NAME)
return False
for source_frame in read_static_images(source_images):
source_image_paths = filter_image_paths(facefusion.globals.source_paths)
source_frames = read_static_images(source_image_paths)
for source_frame in source_frames:
if not get_one_face(source_frame):
logger.error(wording.get('no_source_face_detected') + wording.get('exclamation_mark'), NAME)
return False

View File

@ -14,7 +14,7 @@ from facefusion.face_helper import paste_back, warp_face_by_kps, warp_face_by_bb
from facefusion.face_store import get_reference_faces
from facefusion.content_analyser import clear_content_analyser
from facefusion.typing import Face, FaceSet, VisionFrame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, AudioFrame
from facefusion.filesystem import is_file, resolve_relative_path
from facefusion.filesystem import is_file, has_audio, resolve_relative_path
from facefusion.download import conditional_download, is_download_done
from facefusion.audio import read_static_audio, get_audio_frame
from facefusion.filesystem import is_image, is_video, filter_audio_paths
@ -22,7 +22,7 @@ from facefusion.vision import read_image, write_image, detect_video_fps, read_st
from facefusion.processors.frame import globals as frame_processors_globals
from facefusion.processors.frame import choices as frame_processors_choices
from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, clear_face_occluder, create_region_mask, clear_face_parser
from facefusion.common_helper import get_first_item
from facefusion.common_helper import get_first
FRAME_PROCESSOR = None
MODEL_MATRIX = None
@ -102,8 +102,7 @@ def post_check() -> bool:
def pre_process(mode : ProcessMode) -> bool:
audio_path = get_first_item(filter_audio_paths(facefusion.globals.source_paths))
if not audio_path:
if not has_audio(facefusion.globals.source_paths):
logger.error(wording.get('select_audio_source') + wording.get('exclamation_mark'), NAME)
return False
if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path):
@ -178,11 +177,7 @@ def normalize_crop_frame(crop_frame : VisionFrame) -> VisionFrame:
def get_reference_frame(source_face : Face, target_face : Face, temp_frame : VisionFrame) -> VisionFrame:
audio_path = get_first_item(filter_audio_paths(facefusion.globals.source_paths))
audio_frame = get_audio_frame(audio_path, detect_video_fps(facefusion.globals.target_path), facefusion.globals.reference_frame_number)
if audio_frame is not None:
return lip_sync(audio_frame, target_face, temp_frame)
return temp_frame
pass
def process_frame(audio_frame : AudioFrame, reference_faces : FaceSet, temp_frame : VisionFrame) -> VisionFrame:
@ -205,7 +200,7 @@ def process_frame(audio_frame : AudioFrame, reference_faces : FaceSet, temp_fram
def process_frames(source_paths : List[str], temp_frame_paths : List[str], update_progress : Update_Process) -> None:
reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None
source_audio_path = get_first_item(filter_audio_paths(source_paths))
source_audio_path = get_first(filter_audio_paths(source_paths))
video_fps = detect_video_fps(facefusion.globals.target_path)
for temp_frame_path in temp_frame_paths:
frame_number = int(os.path.basename(temp_frame_path).split(".")[0])
@ -219,7 +214,7 @@ def process_frames(source_paths : List[str], temp_frame_paths : List[str], updat
def process_image(source_paths : List[str], target_path : str, output_path : str) -> None:
reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None
source_audio_path = get_first_item(filter_audio_paths(source_paths))
source_audio_path = get_first(filter_audio_paths(source_paths))
audio_frame = get_audio_frame(source_audio_path, 25, 0)
if audio_frame is not None:
target_frame = read_static_image(target_path)

View File

@ -4,8 +4,8 @@ import gradio
import facefusion.globals
from facefusion import wording
from facefusion.uis.typing import File
from facefusion.common_helper import get_first_item
from facefusion.filesystem import are_audios, are_images, filter_audio_paths, filter_image_paths
from facefusion.common_helper import get_first
from facefusion.filesystem import has_audio, has_image, filter_audio_paths, filter_image_paths
from facefusion.uis.core import register_ui_component
SOURCE_FILE : Optional[gradio.File] = None
@ -18,8 +18,8 @@ def render() -> None:
global SOURCE_AUDIO
global SOURCE_IMAGE
are_source_audios = are_audios(facefusion.globals.source_paths)
are_source_images = are_images(facefusion.globals.source_paths)
has_source_audio = has_audio(facefusion.globals.source_paths)
has_source_image = has_image(facefusion.globals.source_paths)
SOURCE_FILE = gradio.File(
file_count = 'multiple',
file_types =
@ -31,19 +31,19 @@ def render() -> None:
'.webp'
],
label = wording.get('uis.source_file'),
value = facefusion.globals.source_paths if are_source_audios or are_source_images else None
value = facefusion.globals.source_paths if has_source_audio or has_source_image else None
)
source_file_names = [ source_file_value['name'] for source_file_value in SOURCE_FILE.value ] if SOURCE_FILE.value else None
source_audio_path = get_first_item(filter_audio_paths(source_file_names))
source_image_path = get_first_item(filter_image_paths(source_file_names))
source_audio_path = get_first(filter_audio_paths(source_file_names))
source_image_path = get_first(filter_image_paths(source_file_names))
SOURCE_AUDIO = gradio.Audio(
value = source_audio_path if are_source_audios else None,
visible = are_source_audios,
value = source_audio_path if has_source_audio else None,
visible = has_source_audio,
show_label = False
)
SOURCE_IMAGE = gradio.Image(
value = source_image_path if are_source_images else None,
visible = are_source_images,
value = source_image_path if has_source_image else None,
visible = has_source_image,
show_label = False
)
register_ui_component('source_audio', SOURCE_AUDIO)
@ -56,12 +56,12 @@ def listen() -> None:
def update(files : List[File]) -> Tuple[gradio.Audio, gradio.Image]:
file_names = [ file.name for file in files ] if files else None
audio_path = get_first_item(filter_audio_paths(file_names))
image_path = get_first_item(filter_image_paths(file_names))
has_audio = bool(audio_path)
has_image = bool(image_path)
if audio_path or image_path:
has_source_audio = has_audio(file_names)
has_source_image = has_image(file_names)
if has_source_audio or has_source_image:
source_audio_path = get_first(filter_audio_paths(file_names))
source_image_path = get_first(filter_image_paths(file_names))
facefusion.globals.source_paths = file_names
return gradio.Audio(value = audio_path, visible = has_audio), gradio.Image(value = image_path, visible = has_image)
return gradio.Audio(value = source_audio_path, visible = has_source_audio), gradio.Image(value = source_image_path, visible = has_source_image)
facefusion.globals.source_paths = None
return gradio.Audio(value = None, visible = False), gradio.Image(value = None, visible = False)

View File

@ -24,11 +24,11 @@ WORDING : Dict[str, Any] =\
'processing_video_failed': 'Processing to video failed',
'model_download_not_done': 'Download of the model is not done',
'model_file_not_present': 'File of the model is not present',
'select_image_source': 'Select an image for source path',
'select_audio_source': 'Select an audio for source path',
'select_image_source': 'Select a image for source path',
'select_audio_source': 'Select a audio for source path',
'select_video_target': 'Select a video for target path',
'select_image_or_video_target': 'Select an image or video for target path',
'select_file_or_directory_output': 'Select an file or directory for output path',
'select_image_or_video_target': 'Select a image or video for target path',
'select_file_or_directory_output': 'Select a file or directory for output path',
'no_source_face_detected': 'No source face detected',
'frame_processor_not_loaded': 'Frame processor {frame_processor} could not be loaded',
'frame_processor_not_implemented': 'Frame processor {frame_processor} not implemented correctly',

View File

@ -106,9 +106,3 @@ def test_read_audio_buffer() -> None:
assert isinstance(read_audio_buffer('.assets/examples/source.mp3', 1, 1), bytes)
assert isinstance(read_audio_buffer('.assets/examples/source.wav', 1, 1), bytes)
assert read_audio_buffer('.assets/examples/invalid.mp3', 1, 1) is None
def test_replace_audio() -> None:
# todo: testing
# todo: question: does this work with trim start and trim end?
pass

View File

@ -1,7 +1,7 @@
import pytest
from facefusion.download import conditional_download
from facefusion.filesystem import is_file, is_directory, is_audio, are_audios, is_image, are_images, is_video, filter_audio_paths, filter_image_paths, list_directory
from facefusion.filesystem import is_file, is_directory, is_audio, has_audio, is_image, has_image, is_video, filter_audio_paths, filter_image_paths, list_directory
@pytest.fixture(scope = 'module', autouse = True)
@ -32,10 +32,11 @@ def test_is_audio() -> None:
assert is_audio('invalid') is False
def test_are_audios() -> None:
assert are_audios([ '.assets/examples/source.mp3' ]) is True
assert are_audios([ '.assets/examples/source.mp3', '.assets/examples/target-240p.mp4' ]) is False
assert are_audios([ 'invalid' ]) is False
def test_has_audio() -> None:
assert has_audio([ '.assets/examples/source.mp3' ]) is True
assert has_audio([ '.assets/examples/source.mp3', '.assets/examples/source.jpg' ]) is True
assert has_audio([ '.assets/examples/source.jpg', '.assets/examples/source.jpg' ]) is False
assert has_audio([ 'invalid' ]) is False
def test_is_image() -> None:
@ -44,10 +45,11 @@ def test_is_image() -> None:
assert is_image('invalid') is False
def test_are_images() -> None:
assert are_images([ '.assets/examples/source.jpg' ]) is True
assert are_images([ '.assets/examples/source.jpg', '.assets/examples/target-240p.mp4' ]) is False
assert are_images([ 'invalid' ]) is False
def test_has_image() -> None:
assert has_image([ '.assets/examples/source.jpg' ]) is True
assert has_image([ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ]) is True
assert has_image([ '.assets/examples/source.mp3', '.assets/examples/source.mp3' ]) is False
assert has_image([ 'invalid' ]) is False
def test_is_video() -> None: