diff --git a/app.py b/app.py index 5ba8ec1..678e0e7 100644 --- a/app.py +++ b/app.py @@ -2,6 +2,7 @@ from refacer import Refacer import argparse import ngrok +import os parser = argparse.ArgumentParser(description='Refacer') parser.add_argument("--max_num_faces", type=int, help="Max number of faces on UI", default=5) @@ -51,6 +52,7 @@ def run(*vars): origins=vars[1:(num_faces+1)] destinations=vars[(num_faces+1):(num_faces*2)+1] thresholds=vars[(num_faces*2)+1:] + upscaler=vars[-1] faces = [] for k in range(0,num_faces): @@ -61,11 +63,15 @@ def run(*vars): 'threshold':thresholds[k] }) - return refacer.reface(video_path,faces) + return refacer.reface(video_path,faces,upscaler) origin = [] destination = [] thresholds = [] +upscaler = [] +upscaler_models = ['None'] +upscaler_models += [file for file in os.listdir('upscaler_models') if file.endswith('.onnx')] +print(upscaler_models) with gr.Blocks() as demo: with gr.Row(): @@ -81,10 +87,12 @@ def run(*vars): destination.append(gr.Image(label="Destination face")) with gr.Row(): thresholds.append(gr.Slider(label="Threshold",minimum=0.0,maximum=1.0,value=0.2)) + with gr.Row(): + upscaler.append(gr.Radio(label="Face upscaler", choices=upscaler_models, value=upscaler_models[0], interactive=True)) with gr.Row(): button=gr.Button("Reface", variant="primary") - button.click(fn=run,inputs=[video]+origin+destination+thresholds,outputs=[video2]) + button.click(fn=run,inputs=[video]+origin+destination+thresholds+upscaler,outputs=[video2]) if args.ngrok is not None: connect(args.ngrok, args.server_port, {'region': args.ngrok_region, 'authtoken_from_env': False}) diff --git a/esrgan_onnx.py b/esrgan_onnx.py new file mode 100644 index 0000000..46eddae --- /dev/null +++ b/esrgan_onnx.py @@ -0,0 +1,24 @@ +import numpy as np + +class ESRGAN: + def __init__(self, session): + self.session = session + self.model_input = self.session.get_inputs()[0].name + + def _pre_process(self, image_array): + image_array = image_array.transpose(2, 0, 1).astype('float32') / 255.0 + image_array = np.expand_dims(image_array, axis=0) + return image_array + + def _post_process(self, result): + result = np.clip(result.transpose(1, 2, 0), 0, 1) * 255.0 + return result.astype(np.uint8) + + def get(self, image_array): + input_size = image_array.shape[1] + image_array = self._pre_process(image_array) + ort_inputs = {self.model_input: image_array} + result = self.session.run(None, ort_inputs)[0][0] + result = self._post_process(result) + scale_factor = int(result.shape[1] / input_size) + return result, scale_factor diff --git a/gfpgan_onnx.py b/gfpgan_onnx.py new file mode 100644 index 0000000..7faa063 --- /dev/null +++ b/gfpgan_onnx.py @@ -0,0 +1,31 @@ +import cv2 +import numpy as np + +class GFPGAN: + def __init__(self, session): + self.session = session + self.model_input = self.session.get_inputs()[0].name + + def _pre_process(self, image_array): + image_array = cv2.resize(image_array, (512, 512)) + image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB) + image_array = image_array.astype('float32') / 255.0 + image_array = (image_array - 0.5) / 0.5 + image_array = np.expand_dims(image_array, axis=0).transpose(0, 3, 1, 2) + return image_array + + def _post_process(self, result): + result = np.clip(result, -1, 1) + result = (result + 1) / 2 + result = result.transpose(1, 2, 0) * 255.0 + result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR) + return result.astype(np.uint8) + + def get(self, image_array): + input_size = image_array.shape[1] + image_array = self._pre_process(image_array) + ort_inputs = {self.model_input: image_array} + result = self.session.run(None, ort_inputs)[0][0] + result = self._post_process(result) + scale_factor = int(result.shape[1] / input_size) + return result, scale_factor diff --git a/refacer.py b/refacer.py index 5be6d70..f9b619f 100644 --- a/refacer.py +++ b/refacer.py @@ -20,6 +20,9 @@ from insightface.utils.storage import ensure_available import re import subprocess +import numpy as np +from esrgan_onnx import ESRGAN +from gfpgan_onnx import GFPGAN class RefacerMode(Enum): CPU, CUDA, COREML, TENSORRT = range(1, 5) @@ -93,6 +96,10 @@ def __init_apps(self): model_path = 'inswapper_128.onnx' sess_swap = rt.InferenceSession(model_path, self.sess_options, providers=self.providers) self.face_swapper = INSwapper(model_path,sess_swap) + self.face_swapper_input_size = self.face_swapper.input_size[0] + #print("INSwapper resolution = ",self.face_swapper_input_size) + + def prepare_faces(self, faces): self.replacement_faces=[] @@ -149,10 +156,58 @@ def __get_faces(self,frame,max_num=0): ret.append(face) return ret + def paste_upscale(self, bgr_fake, M, img): + upsk_face, self.scale_factor = self.face_upscaler_model.get(bgr_fake) + M_scale = M * self.scale_factor + target_img = img + IM = cv2.invertAffineTransform(M_scale) + + face_matte = np.full((target_img.shape[0],target_img.shape[1]), 255, dtype=np.uint8) + + ##Generate white square sized as a upsk_face + img_matte = np.full((upsk_face.shape[0],upsk_face.shape[1]), 255, dtype=np.uint8) + ##Transform white square back to target_img + img_matte = cv2.warpAffine(img_matte, IM, (target_img.shape[1], target_img.shape[0]), flags=cv2.INTER_NEAREST, borderValue=0.0) + ##Blacken the edges of face_matte by 1 pixels (so the mask in not expanded on the image edges) + img_matte[:1,:] = img_matte[-1:,:] = img_matte[:,:1] = img_matte[:,-1:] = 0 + #Detect the affine transformed white area + mask_h_inds, mask_w_inds = np.where(img_matte==255) + #Calculate the size (and diagonal size) of transformed white area width and height boundaries + mask_h = np.max(mask_h_inds) - np.min(mask_h_inds) + mask_w = np.max(mask_w_inds) - np.min(mask_w_inds) + mask_size = int(np.sqrt(mask_h*mask_w)) + #Calculate the kernel size for eroding img_matte by kernel (insightface empirical guess for best size was max(mask_size//10,10)) + k = max(mask_size//12, 8) + kernel = np.ones((k,k),np.uint8) + img_matte = cv2.erode(img_matte,kernel,iterations = 1) + #Calculate the kernel size for blurring img_matte by blur_size (insightface empirical guess for best size was max(mask_size//20, 5)) + k = max(mask_size//24, 4) + kernel_size = (k, k) + blur_size = tuple(2*i+1 for i in kernel_size) + img_matte = cv2.GaussianBlur(img_matte, blur_size, 0) + + #Normalize images to float values and reshape + img_matte = img_matte.astype(np.float32)/255 + face_matte = face_matte.astype(np.float32)/255 + img_matte = np.minimum(face_matte, img_matte) + img_matte = np.reshape(img_matte, [img_matte.shape[0],img_matte.shape[1],1]) + ##Transform upcaled face back to target_img + paste_face = cv2.warpAffine(upsk_face, IM, (target_img.shape[1], target_img.shape[0]), borderMode=cv2.BORDER_REPLICATE) + ##Re-assemble image + paste_face = img_matte * paste_face + paste_face = paste_face + (1-img_matte) * target_img.astype(np.float32) + return paste_face.astype(np.uint8) + def process_first_face(self,frame): faces = self.__get_faces(frame,max_num=1) if len(faces) != 0: - frame = self.face_swapper.get(frame, faces[0], self.replacement_faces[0][1], paste_back=True) + if not self.upscale_en: + #print('\nRun native paste_back') + frame = self.face_swapper.get(frame, faces[0], self.replacement_faces[0][1], paste_back=True) + else: + #print('\nRun upscale') + bgr_fake, M = self.face_swapper.get(frame, faces[0], self.replacement_faces[0][1], paste_back=False) + frame = self.paste_upscale(bgr_fake,M,frame) return frame def process_faces(self,frame): @@ -161,7 +216,13 @@ def process_faces(self,frame): for i in range(len(faces) - 1, -1, -1): sim = self.rec_app.compute_sim(rep_face[0], faces[i].embedding) if sim>=rep_face[2]: - frame = self.face_swapper.get(frame, faces[i], rep_face[1], paste_back=True) + if not self.upscale_en: + #print('\nRun native paste_back') + frame = self.face_swapper.get(frame, faces[i], rep_face[1], paste_back=True) + else: + #print('\nRun upscale') + bgr_fake, M = self.face_swapper.get(frame, faces[i], rep_face[1], paste_back=False) + frame = self.paste_upscale(bgr_fake,M,frame) del faces[i] break return frame @@ -182,7 +243,19 @@ def reface_group(self, faces, frames, output): for result in results: output.write(result) - def reface(self, video_path, faces): + def reface(self, video_path, faces, upscaler): + self.upscale_en = False + if upscaler != 'None': + self.upscale_en = True + model_path = osp.join('upscaler_models',upscaler) + sess_upsk = rt.InferenceSession(model_path, self.sess_options, providers=self.providers) + if 'GFPGAN' in str(upscaler): + self.face_upscaler_model = GFPGAN(sess_upsk) + #print('\nGFPGAN upscaling.') + else: + self.face_upscaler_model = ESRGAN(sess_upsk) + #print('\nESRGAN upscaling.') + #else: print('\nNot upscaling.') self.__check_video_has_audio(video_path) output_video_path = os.path.join('out',Path(video_path).name) self.prepare_faces(faces) diff --git a/upscaler_models/Put ESRGAN and GFPGAN ONNX models here.txt b/upscaler_models/Put ESRGAN and GFPGAN ONNX models here.txt new file mode 100644 index 0000000..98d57d5 --- /dev/null +++ b/upscaler_models/Put ESRGAN and GFPGAN ONNX models here.txt @@ -0,0 +1,2 @@ +ESRGAN models can have any filename +GFPGAN models should be named GFPGAN*** \ No newline at end of file