| import json |
| from tensorflow.keras.models import model_from_json |
| from networks.layers import AdaIN, AdaptiveAttention |
| import tensorflow as tf |
|
|
| import numpy as np |
| import cv2 |
| import math |
| from skimage import transform as trans |
| from scipy.signal import convolve2d |
| from skimage.color import rgb2yuv, yuv2rgb |
|
|
| from PIL import Image |
|
|
|
|
| def save_model_internal(model, path, name, num): |
| json_model = model.to_json() |
| with open(path + name + '.json', "w") as json_file: |
| json_file.write(json_model) |
|
|
| model.save_weights(path + name + '_' + str(num) + '.h5') |
|
|
|
|
| def load_model_internal(path, name, num): |
| with open(path + name + '.json', 'r') as json_file: |
| model_dict = json_file.read() |
|
|
| mod = model_from_json(model_dict, custom_objects={'AdaIN': AdaIN, 'AdaptiveAttention': AdaptiveAttention}) |
| mod.load_weights(path + name + '_' + str(num) + '.h5') |
|
|
| return mod |
|
|
|
|
| def save_training_meta(state_dict, path, num): |
| with open(path + str(num) + '.json', 'w') as json_file: |
| json.dump(state_dict, json_file, indent=2) |
|
|
|
|
| def load_training_meta(path, num): |
| with open(path + str(num) + '.json', 'r') as json_file: |
| state_dict = json.load(json_file) |
| return state_dict |
|
|
|
|
| def log_info(sw, results_dict, iteration): |
| with sw.as_default(): |
| for key in results_dict.keys(): |
| tf.summary.scalar(key, results_dict[key], step=iteration) |
|
|
|
|
| src1 = np.array([[51.642, 50.115], [57.617, 49.990], [35.740, 69.007], |
| [51.157, 89.050], [57.025, 89.702]], |
| dtype=np.float32) |
| |
| src2 = np.array([[45.031, 50.118], [65.568, 50.872], [39.677, 68.111], |
| [45.177, 86.190], [64.246, 86.758]], |
| dtype=np.float32) |
|
|
| |
| src3 = np.array([[39.730, 51.138], [72.270, 51.138], [56.000, 68.493], |
| [42.463, 87.010], [69.537, 87.010]], |
| dtype=np.float32) |
|
|
| |
| src4 = np.array([[46.845, 50.872], [67.382, 50.118], [72.737, 68.111], |
| [48.167, 86.758], [67.236, 86.190]], |
| dtype=np.float32) |
|
|
| |
| src5 = np.array([[54.796, 49.990], [60.771, 50.115], [76.673, 69.007], |
| [55.388, 89.702], [61.257, 89.050]], |
| dtype=np.float32) |
|
|
| src = np.array([src1, src2, src3, src4, src5]) |
| src_map = {112: src, 224: src * 2} |
|
|
| |
| arcface_src = np.array( |
| [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], |
| [41.5493, 92.3655], [70.7299, 92.2041]], |
| dtype=np.float32) |
|
|
| arcface_src = np.expand_dims(arcface_src, axis=0) |
|
|
|
|
| def extract_face(img, bb, absolute_center, mode='arcface', extention_rate=0.05, debug=False): |
| """Extract face from image given a bounding box""" |
| |
| x1, y1, x2, y2 = bb + 60 |
| adjusted_absolute_center = (absolute_center[0] + 60, absolute_center[1] + 60) |
| if debug: |
| print(bb + 60) |
| x1, y1, x2, y2 = bb |
| cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 3) |
| cv2.circle(img, absolute_center, 1, (255, 0, 255), 2) |
| Image.fromarray(img).show() |
| x1, y1, x2, y2 = bb + 60 |
| |
| padded_img = np.zeros(shape=(248, 248, 3), dtype=np.uint8) |
| padded_img[60:-60, 60:-60, :] = img |
|
|
| if debug: |
| cv2.rectangle(padded_img, (x1, y1), (x2, y2), (0, 255, 255), 3) |
| cv2.circle(padded_img, adjusted_absolute_center, 1, (255, 255, 255), 2) |
| Image.fromarray(padded_img).show() |
|
|
| y_len = abs(y1 - y2) |
| x_len = abs(x1 - x2) |
|
|
| new_len = (y_len + x_len) // 2 |
|
|
| extension = int(new_len * extention_rate) |
|
|
| x_adjust = (x_len - new_len) // 2 |
| y_adjust = (y_len - new_len) // 2 |
|
|
| x_1_adjusted = x1 + x_adjust - extension |
| x_2_adjusted = x2 - x_adjust + extension |
|
|
| if mode == 'arcface': |
| y_1_adjusted = y1 - extension |
| y_2_adjusted = y2 - 2 * y_adjust + extension |
| else: |
| y_1_adjusted = y1 + 2 * y_adjust - extension |
| y_2_adjusted = y2 + extension |
|
|
| move_x = adjusted_absolute_center[0] - (x_1_adjusted + x_2_adjusted) // 2 |
| move_y = adjusted_absolute_center[1] - (y_1_adjusted + y_2_adjusted) // 2 |
|
|
| x_1_adjusted = x_1_adjusted + move_x |
| x_2_adjusted = x_2_adjusted + move_x |
| y_1_adjusted = y_1_adjusted + move_y |
| y_2_adjusted = y_2_adjusted + move_y |
|
|
| |
|
|
| return padded_img[y_1_adjusted:y_2_adjusted, x_1_adjusted:x_2_adjusted] |
|
|
|
|
| def distance(a, b): |
| return np.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) |
|
|
|
|
| def euclidean_distance(a, b): |
| x1 = a[0]; y1 = a[1] |
| x2 = b[0]; y2 = b[1] |
| return np.sqrt(((x2 - x1) * (x2 - x1)) + ((y2 - y1) * (y2 - y1))) |
|
|
|
|
| def align_face(img, landmarks, debug=False): |
| nose, right_eye, left_eye = landmarks |
|
|
| left_eye_x = left_eye[0] |
| left_eye_y = left_eye[1] |
|
|
| right_eye_x = right_eye[0] |
| right_eye_y = right_eye[1] |
|
|
| center_eye = ((left_eye[0] + right_eye[0]) // 2, (left_eye[1] + right_eye[1]) // 2) |
|
|
| if left_eye_y < right_eye_y: |
| point_3rd = (right_eye_x, left_eye_y) |
| direction = -1 |
| else: |
| point_3rd = (left_eye_x, right_eye_y) |
| direction = 1 |
|
|
| if debug: |
| cv2.circle(img, point_3rd, 1, (255, 0, 0), 1) |
| cv2.circle(img, center_eye, 1, (255, 0, 0), 1) |
|
|
| cv2.line(img, right_eye, left_eye, (0, 0, 0), 1) |
| cv2.line(img, left_eye, point_3rd, (0, 0, 0), 1) |
| cv2.line(img, right_eye, point_3rd, (0, 0, 0), 1) |
|
|
| a = euclidean_distance(left_eye, point_3rd) |
| b = euclidean_distance(right_eye, left_eye) |
| c = euclidean_distance(right_eye, point_3rd) |
|
|
| cos_a = (b * b + c * c - a * a) / (2 * b * c) |
|
|
| angle = np.arccos(cos_a) |
|
|
| angle = (angle * 180) / np.pi |
|
|
| if direction == -1: |
| angle = 90 - angle |
| ang = math.radians(direction * angle) |
| else: |
| ang = math.radians(direction * angle) |
| angle = 0 - angle |
|
|
| M = cv2.getRotationMatrix2D((64, 64), angle, 1) |
| new_img = cv2.warpAffine(img, M, (128, 128), |
| flags=cv2.INTER_CUBIC) |
|
|
| rotated_nose = (int((nose[0] - 64) * np.cos(ang) - (nose[1] - 64) * np.sin(ang) + 64), |
| int((nose[0] - 64) * np.sin(ang) + (nose[1] - 64) * np.cos(ang) + 64)) |
|
|
| rotated_center_eye = (int((center_eye[0] - 64) * np.cos(ang) - (center_eye[1] - 64) * np.sin(ang) + 64), |
| int((center_eye[0] - 64) * np.sin(ang) + (center_eye[1] - 64) * np.cos(ang) + 64)) |
|
|
| abolute_center = (rotated_center_eye[0], (rotated_nose[1] + rotated_center_eye[1]) // 2) |
|
|
| if debug: |
| cv2.circle(new_img, rotated_nose, 1, (0, 0, 255), 1) |
| cv2.circle(new_img, rotated_center_eye, 1, (0, 0, 255), 1) |
| cv2.circle(new_img, abolute_center, 1, (0, 0, 255), 1) |
|
|
| return new_img, abolute_center |
|
|
|
|
| def estimate_norm(lmk, image_size=112, mode='arcface', shrink_factor=1.0): |
| assert lmk.shape == (5, 2) |
| tform = trans.SimilarityTransform() |
| lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1) |
| min_M = [] |
| min_index = [] |
| min_error = float('inf') |
| src_factor = image_size / 112 |
| if mode == 'arcface': |
| src = arcface_src * shrink_factor + (1 - shrink_factor) * 56 |
| src = src * src_factor |
| else: |
| src = src_map[image_size] * src_factor |
| for i in np.arange(src.shape[0]): |
| tform.estimate(lmk, src[i]) |
| M = tform.params[0:2, :] |
| results = np.dot(M, lmk_tran.T) |
| results = results.T |
| error = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1))) |
| |
| if error < min_error: |
| min_error = error |
| min_M = M |
| min_index = i |
| return min_M, min_index |
|
|
|
|
| def inverse_estimate_norm(lmk, t_lmk, image_size=112, mode='arcface', shrink_factor=1.0): |
| assert lmk.shape == (5, 2) |
| tform = trans.SimilarityTransform() |
| lmk_tran = np.insert(lmk, 2, values=np.ones(5), axis=1) |
| min_M = [] |
| min_index = [] |
| min_error = float('inf') |
| src_factor = image_size / 112 |
| if mode == 'arcface': |
| src = arcface_src * shrink_factor + (1 - shrink_factor) * 56 |
| src = src * src_factor |
| else: |
| src = src_map[image_size] * src_factor |
| for i in np.arange(src.shape[0]): |
| tform.estimate(t_lmk, lmk) |
| M = tform.params[0:2, :] |
| results = np.dot(M, lmk_tran.T) |
| results = results.T |
| error = np.sum(np.sqrt(np.sum((results - src[i])**2, axis=1))) |
| |
| if error < min_error: |
| min_error = error |
| min_M = M |
| min_index = i |
| return min_M, min_index |
|
|
|
|
| def norm_crop(img, landmark, image_size=112, mode='arcface', shrink_factor=1.0): |
| """ |
| Align and crop the image based of the facial landmarks in the image. The alignment is done with |
| a similarity transformation based of source coordinates. |
| :param img: Image to transform. |
| :param landmark: Five landmark coordinates in the image. |
| :param image_size: Desired output size after transformation. |
| :param mode: 'arcface' aligns the face for the use of Arcface facial recognition model. Useful for |
| both facial recognition tasks and face swapping tasks. |
| :param shrink_factor: Shrink factor that shrinks the source landmark coordinates. This will include more border |
| information around the face. Useful when you want to include more background information when performing face swaps. |
| The lower the shrink factor the more of the face is included. Default value 1.0 will align the image to be ready |
| for the Arcface recognition model, but usually omits part of the chin. Value of 0.0 would transform all source points |
| to the middle of the image, probably rendering the alignment procedure useless. |
| |
| If you process the image with a shrink factor of 0.85 and then want to extract the identity embedding with arcface, |
| you simply do a central crop of factor 0.85 to yield same cropped result as using shrink factor 1.0. This will |
| reduce the resolution, the recommendation is to processed images to output resolutions higher than 112 is using |
| Arcface. This will make sure no information is lost by resampling the image after central crop. |
| :return: Returns the transformed image. |
| """ |
| M, pose_index = estimate_norm(landmark, image_size, mode, shrink_factor=shrink_factor) |
| warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0) |
| return warped |
|
|
|
|
| def transform_landmark_points(M, points): |
| lmk_tran = np.insert(points, 2, values=np.ones(5), axis=1) |
| transformed_lmk = np.dot(M, lmk_tran.T) |
| transformed_lmk = transformed_lmk.T |
|
|
| return transformed_lmk |
|
|
|
|
| def multi_convolver(image, kernel, iterations): |
| if kernel == "Sharpen": |
| kernel = np.array([[0, -1, 0], |
| [-1, 5, -1], |
| [0, -1, 0]]) |
| elif kernel == "Unsharp_mask": |
| kernel = np.array([[1, 4, 6, 4, 1], |
| [4, 16, 24, 16, 1], |
| [6, 24, -476, 24, 1], |
| [4, 16, 24, 16, 1], |
| [1, 4, 6, 4, 1]]) * (-1 / 256) |
| elif kernel == "Blur": |
| kernel = (1 / 16.0) * np.array([[1., 2., 1.], |
| [2., 4., 2.], |
| [1., 2., 1.]]) |
| for i in range(iterations): |
| image = convolve2d(image, kernel, 'same', boundary='fill', fillvalue = 0) |
| return image |
|
|
|
|
| def convolve_rgb(image, kernel, iterations=1): |
| img_yuv = rgb2yuv(image) |
| img_yuv[:, :, 0] = multi_convolver(img_yuv[:, :, 0], kernel, |
| iterations) |
| final_image = yuv2rgb(img_yuv) |
|
|
| return final_image.astype('float32') |
|
|
|
|
| def generate_mask_from_landmarks(lms, im_size): |
| blend_mask_lm = np.zeros(shape=(im_size, im_size, 3), dtype='float32') |
|
|
| |
| blend_mask_lm = cv2.circle(blend_mask_lm, |
| (int(lms[0][0]), int(lms[0][1])), 12, (255, 255, 255), 30) |
| blend_mask_lm = cv2.circle(blend_mask_lm, |
| (int(lms[1][0]), int(lms[1][1])), 12, (255, 255, 255), 30) |
| blend_mask_lm = cv2.circle(blend_mask_lm, |
| (int((lms[0][0] + lms[1][0]) / 2), int((lms[0][1] + lms[1][1]) / 2)), |
| 16, (255, 255, 255), 65) |
|
|
| |
| blend_mask_lm = cv2.circle(blend_mask_lm, |
| (int(lms[2][0]), int(lms[2][1])), 5, (255, 255, 255), 5) |
| blend_mask_lm = cv2.circle(blend_mask_lm, |
| (int((lms[0][0] + lms[1][0]) / 2), int(lms[2][1])), 16, (255, 255, 255), 100) |
|
|
| |
| blend_mask_lm = cv2.circle(blend_mask_lm, |
| (int(lms[3][0]), int(lms[3][1])), 6, (255, 255, 255), 30) |
| blend_mask_lm = cv2.circle(blend_mask_lm, |
| (int(lms[4][0]), int(lms[4][1])), 6, (255, 255, 255), 30) |
|
|
| blend_mask_lm = cv2.circle(blend_mask_lm, |
| (int((lms[3][0] + lms[4][0]) / 2), int((lms[3][1] + lms[4][1]) / 2)), |
| 16, (255, 255, 255), 40) |
| return blend_mask_lm |
|
|
|
|
| def display_distance_text(im, distance, lms, im_w, im_h, scale=2): |
| blended_insert = cv2.putText(im, str(distance)[:4], |
| (int(lms[4] * im_w * 0.5), int(lms[5] * im_h * 0.8)), |
| cv2.FONT_HERSHEY_SIMPLEX, scale * 0.5, (0.08, 0.16, 0.08), int(scale * 2)) |
| blended_insert = cv2.putText(blended_insert, str(distance)[:4], |
| (int(lms[4] * im_w * 0.5), int(lms[5] * im_h * 0.8)), |
| cv2.FONT_HERSHEY_SIMPLEX, scale* 0.5, (0.3, 0.7, 0.32), int(scale * 1)) |
| return blended_insert |
|
|
|
|
| def get_lm(annotation, im_w, im_h): |
| lm_align = np.array([[annotation[4] * im_w, annotation[5] * im_h], |
| [annotation[6] * im_w, annotation[7] * im_h], |
| [annotation[8] * im_w, annotation[9] * im_h], |
| [annotation[10] * im_w, annotation[11] * im_h], |
| [annotation[12] * im_w, annotation[13] * im_h]], |
| dtype=np.float32) |
| return lm_align |
|
|