diff --git a/head_segmentation/face_parsing/.gitignore b/head_segmentation/face_parsing/.gitignore
new file mode 100644
index 0000000..0b46e1f
--- /dev/null
+++ b/head_segmentation/face_parsing/.gitignore
@@ -0,0 +1,5 @@
+*.mp4
+*.jpg
+*.png
+*.zip
+*.avi
\ No newline at end of file
diff --git a/head_segmentation/face_parsing/LICENSE b/head_segmentation/face_parsing/LICENSE
new file mode 100644
index 0000000..bfae0b0
--- /dev/null
+++ b/head_segmentation/face_parsing/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 zll
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/head_segmentation/face_parsing/README.md b/head_segmentation/face_parsing/README.md
new file mode 100644
index 0000000..849d55e
--- /dev/null
+++ b/head_segmentation/face_parsing/README.md
@@ -0,0 +1,68 @@
+# face-parsing.PyTorch
+
+
+
+
+
+
+
+### Contents
+- [Training](#training)
+- [Demo](#Demo)
+- [References](#references)
+
+## Training
+
+1. Prepare training data:
+ -- download [CelebAMask-HQ dataset](https://github.com/switchablenorms/CelebAMask-HQ)
+
+ -- change file path in the `prepropess_data.py` and run
+```Shell
+python prepropess_data.py
+```
+
+2. Train the model using CelebAMask-HQ dataset:
+Just run the train script:
+```
+ $ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 train.py
+```
+
+If you do not wish to train the model, you can download [our pre-trained model](https://drive.google.com/open?id=154JgKpzCPW82qINcVieuPH3fZ2e0P812) and save it in `res/cp`.
+
+
+## Demo
+1. Evaluate the trained model using:
+```Shell
+# evaluate using GPU
+python test.py
+```
+
+## Face makeup using parsing maps
+[**face-makeup.PyTorch**](https://github.com/zllrunning/face-makeup.PyTorch)
+
+
+
+| |
+Hair |
+Lip |
+
+
+
+
+| Original Input |
+ |
+ |
+
+
+
+
+| Color |
+ |
+ |
+
+
+
+
+
+## References
+- [BiSeNet](https://github.com/CoinCheung/BiSeNet)
\ No newline at end of file
diff --git a/head_segmentation/face_parsing/evaluate.py b/head_segmentation/face_parsing/evaluate.py
new file mode 100644
index 0000000..cb0864d
--- /dev/null
+++ b/head_segmentation/face_parsing/evaluate.py
@@ -0,0 +1,95 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+from logger import setup_logger
+from model import BiSeNet
+from face_dataset import FaceMask
+
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+import torch.nn.functional as F
+import torch.distributed as dist
+
+import os
+import os.path as osp
+import logging
+import time
+import numpy as np
+from tqdm import tqdm
+import math
+from PIL import Image
+import torchvision.transforms as transforms
+import cv2
+
+def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'):
+ # Colors for all 20 parts
+ part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
+ [255, 0, 85], [255, 0, 170],
+ [0, 255, 0], [85, 255, 0], [170, 255, 0],
+ [0, 255, 85], [0, 255, 170],
+ [0, 0, 255], [85, 0, 255], [170, 0, 255],
+ [0, 85, 255], [0, 170, 255],
+ [255, 255, 0], [255, 255, 85], [255, 255, 170],
+ [255, 0, 255], [255, 85, 255], [255, 170, 255],
+ [0, 255, 255], [85, 255, 255], [170, 255, 255]]
+
+ im = np.array(im)
+ vis_im = im.copy().astype(np.uint8)
+ vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
+ vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
+ vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
+
+ num_of_class = np.max(vis_parsing_anno)
+
+ for pi in range(1, num_of_class + 1):
+ index = np.where(vis_parsing_anno == pi)
+ vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
+
+ vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
+ # print(vis_parsing_anno_color.shape, vis_im.shape)
+ vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
+
+ # Save result or not
+ if save_im:
+ cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
+
+ # return vis_im
+
+def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
+
+ if not os.path.exists(respth):
+ os.makedirs(respth)
+
+ n_classes = 19
+ net = BiSeNet(n_classes=n_classes)
+ net.cuda()
+ save_pth = osp.join('res/cp', cp)
+ net.load_state_dict(torch.load(save_pth))
+ net.eval()
+
+ to_tensor = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+ ])
+ with torch.no_grad():
+ for image_path in os.listdir(dspth):
+ img = Image.open(osp.join(dspth, image_path))
+ image = img.resize((512, 512), Image.BILINEAR)
+ img = to_tensor(image)
+ img = torch.unsqueeze(img, 0)
+ img = img.cuda()
+ out = net(img)[0]
+ parsing = out.squeeze(0).cpu().numpy().argmax(0)
+
+ vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path))
+
+
+
+
+
+
+
+if __name__ == "__main__":
+ setup_logger('./res')
+ evaluate()
diff --git a/head_segmentation/face_parsing/face_dataset.py b/head_segmentation/face_parsing/face_dataset.py
new file mode 100644
index 0000000..a1ece7f
--- /dev/null
+++ b/head_segmentation/face_parsing/face_dataset.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+
+import os.path as osp
+import os
+from PIL import Image
+import numpy as np
+import json
+import cv2
+
+from transform import *
+
+
+
+class FaceMask(Dataset):
+ def __init__(self, rootpth, cropsize=(640, 480), mode='train', *args, **kwargs):
+ super(FaceMask, self).__init__(*args, **kwargs)
+ assert mode in ('train', 'val', 'test')
+ self.mode = mode
+ self.ignore_lb = 255
+ self.rootpth = rootpth
+
+ self.imgs = os.listdir(os.path.join(self.rootpth, 'CelebA-HQ-img'))
+
+ # pre-processing
+ self.to_tensor = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+ ])
+ self.trans_train = Compose([
+ ColorJitter(
+ brightness=0.5,
+ contrast=0.5,
+ saturation=0.5),
+ HorizontalFlip(),
+ RandomScale((0.75, 1.0, 1.25, 1.5, 1.75, 2.0)),
+ RandomCrop(cropsize)
+ ])
+
+ def __getitem__(self, idx):
+ impth = self.imgs[idx]
+ img = Image.open(osp.join(self.rootpth, 'CelebA-HQ-img', impth))
+ img = img.resize((512, 512), Image.BILINEAR)
+ label = Image.open(osp.join(self.rootpth, 'mask', impth[:-3]+'png')).convert('P')
+ # print(np.unique(np.array(label)))
+ if self.mode == 'train':
+ im_lb = dict(im=img, lb=label)
+ im_lb = self.trans_train(im_lb)
+ img, label = im_lb['im'], im_lb['lb']
+ img = self.to_tensor(img)
+ label = np.array(label).astype(np.int64)[np.newaxis, :]
+ return img, label
+
+ def __len__(self):
+ return len(self.imgs)
+
+
+if __name__ == "__main__":
+ face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img'
+ face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno'
+ mask_path = '/home/zll/data/CelebAMask-HQ/mask'
+ counter = 0
+ total = 0
+ for i in range(15):
+ # files = os.listdir(osp.join(face_sep_mask, str(i)))
+
+ atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
+ 'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
+
+ for j in range(i*2000, (i+1)*2000):
+
+ mask = np.zeros((512, 512))
+
+ for l, att in enumerate(atts, 1):
+ total += 1
+ file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
+ path = osp.join(face_sep_mask, str(i), file_name)
+
+ if os.path.exists(path):
+ counter += 1
+ sep_mask = np.array(Image.open(path).convert('P'))
+ # print(np.unique(sep_mask))
+
+ mask[sep_mask == 225] = l
+ cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
+ print(j)
+
+ print(counter, total)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/head_segmentation/face_parsing/logger.py b/head_segmentation/face_parsing/logger.py
new file mode 100644
index 0000000..d3f9ddc
--- /dev/null
+++ b/head_segmentation/face_parsing/logger.py
@@ -0,0 +1,23 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+
+import os.path as osp
+import time
+import sys
+import logging
+
+import torch.distributed as dist
+
+
+def setup_logger(logpth):
+ logfile = 'BiSeNet-{}.log'.format(time.strftime('%Y-%m-%d-%H-%M-%S'))
+ logfile = osp.join(logpth, logfile)
+ FORMAT = '%(levelname)s %(filename)s(%(lineno)d): %(message)s'
+ log_level = logging.INFO
+ if dist.is_initialized() and not dist.get_rank()==0:
+ log_level = logging.ERROR
+ logging.basicConfig(level=log_level, format=FORMAT, filename=logfile)
+ logging.root.addHandler(logging.StreamHandler())
+
+
diff --git a/head_segmentation/face_parsing/loss.py b/head_segmentation/face_parsing/loss.py
new file mode 100644
index 0000000..f8f65aa
--- /dev/null
+++ b/head_segmentation/face_parsing/loss.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+import numpy as np
+
+
+class OhemCELoss(nn.Module):
+ def __init__(self, thresh, n_min, ignore_lb=255, *args, **kwargs):
+ super(OhemCELoss, self).__init__()
+ self.thresh = -torch.log(torch.tensor(thresh, dtype=torch.float)).cuda()
+ self.n_min = n_min
+ self.ignore_lb = ignore_lb
+ self.criteria = nn.CrossEntropyLoss(ignore_index=ignore_lb, reduction='none')
+
+ def forward(self, logits, labels):
+ N, C, H, W = logits.size()
+ loss = self.criteria(logits, labels).view(-1)
+ loss, _ = torch.sort(loss, descending=True)
+ if loss[self.n_min] > self.thresh:
+ loss = loss[loss>self.thresh]
+ else:
+ loss = loss[:self.n_min]
+ return torch.mean(loss)
+
+
+class SoftmaxFocalLoss(nn.Module):
+ def __init__(self, gamma, ignore_lb=255, *args, **kwargs):
+ super(SoftmaxFocalLoss, self).__init__()
+ self.gamma = gamma
+ self.nll = nn.NLLLoss(ignore_index=ignore_lb)
+
+ def forward(self, logits, labels):
+ scores = F.softmax(logits, dim=1)
+ factor = torch.pow(1.-scores, self.gamma)
+ log_score = F.log_softmax(logits, dim=1)
+ log_score = factor * log_score
+ loss = self.nll(log_score, labels)
+ return loss
+
+
+if __name__ == '__main__':
+ torch.manual_seed(15)
+ criteria1 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
+ criteria2 = OhemCELoss(thresh=0.7, n_min=16*20*20//16).cuda()
+ net1 = nn.Sequential(
+ nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
+ )
+ net1.cuda()
+ net1.train()
+ net2 = nn.Sequential(
+ nn.Conv2d(3, 19, kernel_size=3, stride=2, padding=1),
+ )
+ net2.cuda()
+ net2.train()
+
+ with torch.no_grad():
+ inten = torch.randn(16, 3, 20, 20).cuda()
+ lbs = torch.randint(0, 19, [16, 20, 20]).cuda()
+ lbs[1, :, :] = 255
+
+ logits1 = net1(inten)
+ logits1 = F.interpolate(logits1, inten.size()[2:], mode='bilinear')
+ logits2 = net2(inten)
+ logits2 = F.interpolate(logits2, inten.size()[2:], mode='bilinear')
+
+ loss1 = criteria1(logits1, lbs)
+ loss2 = criteria2(logits2, lbs)
+ loss = loss1 + loss2
+ print(loss.detach().cpu())
+ loss.backward()
diff --git a/head_segmentation/face_parsing/main.py b/head_segmentation/face_parsing/main.py
new file mode 100644
index 0000000..27532f0
--- /dev/null
+++ b/head_segmentation/face_parsing/main.py
@@ -0,0 +1,189 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+from model import BiSeNet
+
+import torch
+
+import os
+import os.path as osp
+import numpy as np
+from PIL import Image
+import torchvision.transforms as transforms
+import cv2
+
+import sieve
+
+@sieve.Model(
+ name="face-parsing-head-segmentation",
+ python_packages=[
+ "torch",
+ "Pillow",
+ "numpy",
+ "opencv-python-headless",
+ "torchvision",
+ ],
+ system_packages=[
+ "ffmpeg",
+ "libx264-dev",
+ "zip",
+ ],
+ python_version="3.10",
+ cuda_version="11.8",
+ gpu=sieve.gpu.L4(),
+)
+class HeadSegmentationModel:
+
+ def __setup__(self):
+ n_classes = 19
+ net = BiSeNet(n_classes=n_classes)
+ net.cuda()
+ checkpoint_path = '79999_iter.pth'
+ net.load_state_dict(torch.load(checkpoint_path))
+ net.eval()
+
+ self.net = net
+ self.to_tensor = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+ ])
+
+ def __predict__(self, file: sieve.File, debug_viz: bool = False) -> sieve.File:
+ file_path = file.path
+
+ is_video = False
+
+ import os
+ _, file_extension = os.path.splitext(file_path)
+ if file_extension in ['.jpg', '.jpeg', '.png']:
+ print("This is an image file.")
+ elif file_extension in ['.mp4', '.avi', '.mov']:
+ print("This is a video file.")
+ is_video = True
+ else:
+ raise ValueError("Unsupported file format, must be one of: jpg, jpeg, png, mp4, avi, mov")
+
+ import cv2
+
+ if is_video:
+ # Process video
+ import numpy as np
+ import cv2
+
+ import time
+ st = time.time()
+
+ video = cv2.VideoCapture(file_path)
+ frame_width = int(video.get(3))
+ frame_height = int(video.get(4))
+ fps = video.get(cv2.CAP_PROP_FPS)
+ size = (frame_width, frame_height)
+
+ if debug_viz:
+ if os.path.exists("temp_viz.mp4"):
+ os.remove("temp_viz.mp4")
+ out_viz = cv2.VideoWriter('temp_viz.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
+
+ masks_dir = "masks"
+
+ import shutil
+ shutil.rmtree(masks_dir, ignore_errors=True)
+ os.makedirs(masks_dir)
+
+ counter = 0
+ while True:
+ ret, frame = video.read()
+ if not ret:
+ break
+ frame_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+ vis_parsing_anno = self.get_parsing_anno(frame_image)
+ if debug_viz:
+ vis_im = vis_parsing_maps(frame_image, vis_parsing_anno)
+ out_viz.write(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR))
+
+ cv2.imwrite(f"{masks_dir}/%06d.png" % counter, vis_parsing_anno)
+ counter +=1
+
+ import subprocess
+
+ video.release()
+
+ if os.path.exists('masks.zip'):
+ os.remove('masks.zip')
+
+ command = "zip -r masks.zip masks"
+ process = subprocess.Popen(command, shell=True, cwd=os.getcwd(), stdout=subprocess.PIPE)
+ output, error = process.communicate()
+
+ print("time to process: ", time.time() - st)
+
+ if debug_viz:
+ out_viz.release()
+ command = "ffmpeg -loglevel error -y -i temp_viz.mp4 -c:v libx264 -crf 17 segmentation_map_viz.mp4"
+ subprocess.call(command, shell=True)
+
+ return (sieve.File(path="masks.zip"), sieve.File(path="segmentation_map_viz.mp4"))
+
+ return sieve.File(path="masks.zip")
+ else:
+ image = Image.open(file.path)
+ save_path = "save_path.jpg"
+ save_path_viz = "save_path_viz.jpg"
+ if os.path.exists(save_path):
+ os.remove(save_path)
+ if os.path.exists(save_path_viz):
+ os.remove(save_path_viz)
+
+ vis_parsing_anno = self.get_parsing_anno(image)
+ cv2.imwrite(save_path, vis_parsing_anno)
+
+ if debug_viz:
+ vis_im = vis_parsing_maps(image, vis_parsing_anno)
+ cv2.imwrite(save_path_viz, cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR))
+
+ return (sieve.File(path=save_path), sieve.File(path=save_path_viz))
+
+ return sieve.File(path=save_path)
+
+ def get_parsing_anno(self, image):
+ img = self.to_tensor(image)
+ img = torch.unsqueeze(img, 0)
+ img = img.cuda()
+ with torch.no_grad():
+ out = self.net(img)[0]
+ parsing = out.squeeze(0).argmax(0).cpu().numpy()
+ return parsing.astype(np.uint8)
+
+
+def vis_parsing_maps(im, vis_parsing_anno):
+ # Colors for all 20 parts
+ part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
+ [255, 0, 85], [255, 0, 170],
+ [0, 255, 0], [85, 255, 0], [170, 255, 0],
+ [0, 255, 85], [0, 255, 170],
+ [0, 0, 255], [85, 0, 255], [170, 0, 255],
+ [0, 85, 255], [0, 170, 255],
+ [255, 255, 0], [255, 255, 85], [255, 255, 170],
+ [255, 0, 255], [255, 85, 255], [255, 170, 255],
+ [0, 255, 255], [85, 255, 255], [170, 255, 255]]
+
+ im = np.array(im)
+ vis_im = im.copy().astype(np.uint8)
+ vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
+
+ num_of_class = np.max(vis_parsing_anno)
+
+ for pi in range(1, num_of_class + 1):
+ index = np.where(vis_parsing_anno == pi)
+ vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
+
+ vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
+ # print(vis_parsing_anno_color.shape, vis_im.shape)
+ vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
+
+ return vis_im
+
+
+if __name__ == "__main__":
+ a = HeadSegmentationModel()
+ a.__predict__(sieve.File(path="/home/abhinav_ayalur_gmail_com/examples/head_segmentation/face_parsing/hdtr.mp4"), debug_viz=False)
diff --git a/head_segmentation/face_parsing/makeup.py b/head_segmentation/face_parsing/makeup.py
new file mode 100644
index 0000000..b03f141
--- /dev/null
+++ b/head_segmentation/face_parsing/makeup.py
@@ -0,0 +1,130 @@
+import cv2
+import os
+import numpy as np
+from skimage.filters import gaussian
+
+
+def sharpen(img):
+ img = img * 1.0
+ gauss_out = gaussian(img, sigma=5, multichannel=True)
+
+ alpha = 1.5
+ img_out = (img - gauss_out) * alpha + img
+
+ img_out = img_out / 255.0
+
+ mask_1 = img_out < 0
+ mask_2 = img_out > 1
+
+ img_out = img_out * (1 - mask_1)
+ img_out = img_out * (1 - mask_2) + mask_2
+ img_out = np.clip(img_out, 0, 1)
+ img_out = img_out * 255
+ return np.array(img_out, dtype=np.uint8)
+
+
+def hair(image, parsing, part=17, color=[230, 50, 20]):
+ b, g, r = color #[10, 50, 250] # [10, 250, 10]
+ tar_color = np.zeros_like(image)
+ tar_color[:, :, 0] = b
+ tar_color[:, :, 1] = g
+ tar_color[:, :, 2] = r
+
+ image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+ tar_hsv = cv2.cvtColor(tar_color, cv2.COLOR_BGR2HSV)
+
+ if part == 12 or part == 13:
+ image_hsv[:, :, 0:2] = tar_hsv[:, :, 0:2]
+ else:
+ image_hsv[:, :, 0:1] = tar_hsv[:, :, 0:1]
+
+ changed = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2BGR)
+
+ if part == 17:
+ changed = sharpen(changed)
+
+ changed[parsing != part] = image[parsing != part]
+ # changed = cv2.resize(changed, (512, 512))
+ return changed
+
+#
+# def lip(image, parsing, part=17, color=[230, 50, 20]):
+# b, g, r = color #[10, 50, 250] # [10, 250, 10]
+# tar_color = np.zeros_like(image)
+# tar_color[:, :, 0] = b
+# tar_color[:, :, 1] = g
+# tar_color[:, :, 2] = r
+#
+# image_lab = cv2.cvtColor(image, cv2.COLOR_BGR2Lab)
+# il, ia, ib = cv2.split(image_lab)
+#
+# tar_lab = cv2.cvtColor(tar_color, cv2.COLOR_BGR2Lab)
+# tl, ta, tb = cv2.split(tar_lab)
+#
+# image_lab[:, :, 0] = np.clip(il - np.mean(il) + tl, 0, 100)
+# image_lab[:, :, 1] = np.clip(ia - np.mean(ia) + ta, -127, 128)
+# image_lab[:, :, 2] = np.clip(ib - np.mean(ib) + tb, -127, 128)
+#
+#
+# changed = cv2.cvtColor(image_lab, cv2.COLOR_Lab2BGR)
+#
+# if part == 17:
+# changed = sharpen(changed)
+#
+# changed[parsing != part] = image[parsing != part]
+# # changed = cv2.resize(changed, (512, 512))
+# return changed
+
+
+if __name__ == '__main__':
+ # 1 face
+ # 10 nose
+ # 11 teeth
+ # 12 upper lip
+ # 13 lower lip
+ # 17 hair
+ num = 116
+ table = {
+ 'hair': 17,
+ 'upper_lip': 12,
+ 'lower_lip': 13
+ }
+ image_path = '/home/zll/data/CelebAMask-HQ/test-img/{}.jpg'.format(num)
+ parsing_path = 'res/test_res/{}.png'.format(num)
+
+ image = cv2.imread(image_path)
+ ori = image.copy()
+ parsing = np.array(cv2.imread(parsing_path, 0))
+ parsing = cv2.resize(parsing, image.shape[0:2], interpolation=cv2.INTER_NEAREST)
+
+ parts = [table['hair'], table['upper_lip'], table['lower_lip']]
+ # colors = [[20, 20, 200], [100, 100, 230], [100, 100, 230]]
+ colors = [[100, 200, 100]]
+ for part, color in zip(parts, colors):
+ image = hair(image, parsing, part, color)
+ cv2.imwrite('res/makeup/116_ori.png', cv2.resize(ori, (512, 512)))
+ cv2.imwrite('res/makeup/116_2.png', cv2.resize(image, (512, 512)))
+
+ cv2.imshow('image', cv2.resize(ori, (512, 512)))
+ cv2.imshow('color', cv2.resize(image, (512, 512)))
+
+ # cv2.imshow('image', ori)
+ # cv2.imshow('color', image)
+
+ cv2.waitKey(0)
+ cv2.destroyAllWindows()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/head_segmentation/face_parsing/model.py b/head_segmentation/face_parsing/model.py
new file mode 100644
index 0000000..040f41f
--- /dev/null
+++ b/head_segmentation/face_parsing/model.py
@@ -0,0 +1,283 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+
+from resnet import Resnet18
+# from modules.bn import InPlaceABNSync as BatchNorm2d
+
+
+class ConvBNReLU(nn.Module):
+ def __init__(self, in_chan, out_chan, ks=3, stride=1, padding=1, *args, **kwargs):
+ super(ConvBNReLU, self).__init__()
+ self.conv = nn.Conv2d(in_chan,
+ out_chan,
+ kernel_size = ks,
+ stride = stride,
+ padding = padding,
+ bias = False)
+ self.bn = nn.BatchNorm2d(out_chan)
+ self.init_weight()
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = F.relu(self.bn(x))
+ return x
+
+ def init_weight(self):
+ for ly in self.children():
+ if isinstance(ly, nn.Conv2d):
+ nn.init.kaiming_normal_(ly.weight, a=1)
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+class BiSeNetOutput(nn.Module):
+ def __init__(self, in_chan, mid_chan, n_classes, *args, **kwargs):
+ super(BiSeNetOutput, self).__init__()
+ self.conv = ConvBNReLU(in_chan, mid_chan, ks=3, stride=1, padding=1)
+ self.conv_out = nn.Conv2d(mid_chan, n_classes, kernel_size=1, bias=False)
+ self.init_weight()
+
+ def forward(self, x):
+ x = self.conv(x)
+ x = self.conv_out(x)
+ return x
+
+ def init_weight(self):
+ for ly in self.children():
+ if isinstance(ly, nn.Conv2d):
+ nn.init.kaiming_normal_(ly.weight, a=1)
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+ def get_params(self):
+ wd_params, nowd_params = [], []
+ for name, module in self.named_modules():
+ if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+ wd_params.append(module.weight)
+ if not module.bias is None:
+ nowd_params.append(module.bias)
+ elif isinstance(module, nn.BatchNorm2d):
+ nowd_params += list(module.parameters())
+ return wd_params, nowd_params
+
+
+class AttentionRefinementModule(nn.Module):
+ def __init__(self, in_chan, out_chan, *args, **kwargs):
+ super(AttentionRefinementModule, self).__init__()
+ self.conv = ConvBNReLU(in_chan, out_chan, ks=3, stride=1, padding=1)
+ self.conv_atten = nn.Conv2d(out_chan, out_chan, kernel_size= 1, bias=False)
+ self.bn_atten = nn.BatchNorm2d(out_chan)
+ self.sigmoid_atten = nn.Sigmoid()
+ self.init_weight()
+
+ def forward(self, x):
+ feat = self.conv(x)
+ atten = F.avg_pool2d(feat, feat.size()[2:])
+ atten = self.conv_atten(atten)
+ atten = self.bn_atten(atten)
+ atten = self.sigmoid_atten(atten)
+ out = torch.mul(feat, atten)
+ return out
+
+ def init_weight(self):
+ for ly in self.children():
+ if isinstance(ly, nn.Conv2d):
+ nn.init.kaiming_normal_(ly.weight, a=1)
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+
+class ContextPath(nn.Module):
+ def __init__(self, *args, **kwargs):
+ super(ContextPath, self).__init__()
+ self.resnet = Resnet18()
+ self.arm16 = AttentionRefinementModule(256, 128)
+ self.arm32 = AttentionRefinementModule(512, 128)
+ self.conv_head32 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+ self.conv_head16 = ConvBNReLU(128, 128, ks=3, stride=1, padding=1)
+ self.conv_avg = ConvBNReLU(512, 128, ks=1, stride=1, padding=0)
+
+ self.init_weight()
+
+ def forward(self, x):
+ H0, W0 = x.size()[2:]
+ feat8, feat16, feat32 = self.resnet(x)
+ H8, W8 = feat8.size()[2:]
+ H16, W16 = feat16.size()[2:]
+ H32, W32 = feat32.size()[2:]
+
+ avg = F.avg_pool2d(feat32, feat32.size()[2:])
+ avg = self.conv_avg(avg)
+ avg_up = F.interpolate(avg, (H32, W32), mode='nearest')
+
+ feat32_arm = self.arm32(feat32)
+ feat32_sum = feat32_arm + avg_up
+ feat32_up = F.interpolate(feat32_sum, (H16, W16), mode='nearest')
+ feat32_up = self.conv_head32(feat32_up)
+
+ feat16_arm = self.arm16(feat16)
+ feat16_sum = feat16_arm + feat32_up
+ feat16_up = F.interpolate(feat16_sum, (H8, W8), mode='nearest')
+ feat16_up = self.conv_head16(feat16_up)
+
+ return feat8, feat16_up, feat32_up # x8, x8, x16
+
+ def init_weight(self):
+ for ly in self.children():
+ if isinstance(ly, nn.Conv2d):
+ nn.init.kaiming_normal_(ly.weight, a=1)
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+ def get_params(self):
+ wd_params, nowd_params = [], []
+ for name, module in self.named_modules():
+ if isinstance(module, (nn.Linear, nn.Conv2d)):
+ wd_params.append(module.weight)
+ if not module.bias is None:
+ nowd_params.append(module.bias)
+ elif isinstance(module, nn.BatchNorm2d):
+ nowd_params += list(module.parameters())
+ return wd_params, nowd_params
+
+
+### This is not used, since I replace this with the resnet feature with the same size
+class SpatialPath(nn.Module):
+ def __init__(self, *args, **kwargs):
+ super(SpatialPath, self).__init__()
+ self.conv1 = ConvBNReLU(3, 64, ks=7, stride=2, padding=3)
+ self.conv2 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+ self.conv3 = ConvBNReLU(64, 64, ks=3, stride=2, padding=1)
+ self.conv_out = ConvBNReLU(64, 128, ks=1, stride=1, padding=0)
+ self.init_weight()
+
+ def forward(self, x):
+ feat = self.conv1(x)
+ feat = self.conv2(feat)
+ feat = self.conv3(feat)
+ feat = self.conv_out(feat)
+ return feat
+
+ def init_weight(self):
+ for ly in self.children():
+ if isinstance(ly, nn.Conv2d):
+ nn.init.kaiming_normal_(ly.weight, a=1)
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+ def get_params(self):
+ wd_params, nowd_params = [], []
+ for name, module in self.named_modules():
+ if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+ wd_params.append(module.weight)
+ if not module.bias is None:
+ nowd_params.append(module.bias)
+ elif isinstance(module, nn.BatchNorm2d):
+ nowd_params += list(module.parameters())
+ return wd_params, nowd_params
+
+
+class FeatureFusionModule(nn.Module):
+ def __init__(self, in_chan, out_chan, *args, **kwargs):
+ super(FeatureFusionModule, self).__init__()
+ self.convblk = ConvBNReLU(in_chan, out_chan, ks=1, stride=1, padding=0)
+ self.conv1 = nn.Conv2d(out_chan,
+ out_chan//4,
+ kernel_size = 1,
+ stride = 1,
+ padding = 0,
+ bias = False)
+ self.conv2 = nn.Conv2d(out_chan//4,
+ out_chan,
+ kernel_size = 1,
+ stride = 1,
+ padding = 0,
+ bias = False)
+ self.relu = nn.ReLU(inplace=True)
+ self.sigmoid = nn.Sigmoid()
+ self.init_weight()
+
+ def forward(self, fsp, fcp):
+ fcat = torch.cat([fsp, fcp], dim=1)
+ feat = self.convblk(fcat)
+ atten = F.avg_pool2d(feat, feat.size()[2:])
+ atten = self.conv1(atten)
+ atten = self.relu(atten)
+ atten = self.conv2(atten)
+ atten = self.sigmoid(atten)
+ feat_atten = torch.mul(feat, atten)
+ feat_out = feat_atten + feat
+ return feat_out
+
+ def init_weight(self):
+ for ly in self.children():
+ if isinstance(ly, nn.Conv2d):
+ nn.init.kaiming_normal_(ly.weight, a=1)
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+ def get_params(self):
+ wd_params, nowd_params = [], []
+ for name, module in self.named_modules():
+ if isinstance(module, nn.Linear) or isinstance(module, nn.Conv2d):
+ wd_params.append(module.weight)
+ if not module.bias is None:
+ nowd_params.append(module.bias)
+ elif isinstance(module, nn.BatchNorm2d):
+ nowd_params += list(module.parameters())
+ return wd_params, nowd_params
+
+
+class BiSeNet(nn.Module):
+ def __init__(self, n_classes, *args, **kwargs):
+ super(BiSeNet, self).__init__()
+ self.cp = ContextPath()
+ ## here self.sp is deleted
+ self.ffm = FeatureFusionModule(256, 256)
+ self.conv_out = BiSeNetOutput(256, 256, n_classes)
+ self.conv_out16 = BiSeNetOutput(128, 64, n_classes)
+ self.conv_out32 = BiSeNetOutput(128, 64, n_classes)
+ self.init_weight()
+
+ def forward(self, x):
+ H, W = x.size()[2:]
+ feat_res8, feat_cp8, feat_cp16 = self.cp(x) # here return res3b1 feature
+ feat_sp = feat_res8 # use res3b1 feature to replace spatial path feature
+ feat_fuse = self.ffm(feat_sp, feat_cp8)
+
+ feat_out = self.conv_out(feat_fuse)
+ feat_out16 = self.conv_out16(feat_cp8)
+ feat_out32 = self.conv_out32(feat_cp16)
+
+ feat_out = F.interpolate(feat_out, (H, W), mode='bilinear', align_corners=True)
+ feat_out16 = F.interpolate(feat_out16, (H, W), mode='bilinear', align_corners=True)
+ feat_out32 = F.interpolate(feat_out32, (H, W), mode='bilinear', align_corners=True)
+ return feat_out, feat_out16, feat_out32
+
+ def init_weight(self):
+ for ly in self.children():
+ if isinstance(ly, nn.Conv2d):
+ nn.init.kaiming_normal_(ly.weight, a=1)
+ if not ly.bias is None: nn.init.constant_(ly.bias, 0)
+
+ def get_params(self):
+ wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = [], [], [], []
+ for name, child in self.named_children():
+ child_wd_params, child_nowd_params = child.get_params()
+ if isinstance(child, FeatureFusionModule) or isinstance(child, BiSeNetOutput):
+ lr_mul_wd_params += child_wd_params
+ lr_mul_nowd_params += child_nowd_params
+ else:
+ wd_params += child_wd_params
+ nowd_params += child_nowd_params
+ return wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params
+
+
+if __name__ == "__main__":
+ net = BiSeNet(19)
+ net.cuda()
+ net.eval()
+ in_ten = torch.randn(16, 3, 640, 480).cuda()
+ out, out16, out32 = net(in_ten)
+ print(out.shape)
+
+ net.get_params()
diff --git a/head_segmentation/face_parsing/modules/__init__.py b/head_segmentation/face_parsing/modules/__init__.py
new file mode 100644
index 0000000..8a098de
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/__init__.py
@@ -0,0 +1,5 @@
+from .bn import ABN, InPlaceABN, InPlaceABNSync
+from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
+from .misc import GlobalAvgPool2d, SingleGPU
+from .residual import IdentityResidualBlock
+from .dense import DenseModule
diff --git a/head_segmentation/face_parsing/modules/bn.py b/head_segmentation/face_parsing/modules/bn.py
new file mode 100644
index 0000000..cd3928b
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/bn.py
@@ -0,0 +1,130 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as functional
+
+try:
+ from queue import Queue
+except ImportError:
+ from Queue import Queue
+
+from .functions import *
+
+
+class ABN(nn.Module):
+ """Activated Batch Normalization
+
+ This gathers a `BatchNorm2d` and an activation function in a single module
+ """
+
+ def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
+ """Creates an Activated Batch Normalization module
+
+ Parameters
+ ----------
+ num_features : int
+ Number of feature channels in the input and output.
+ eps : float
+ Small constant to prevent numerical issues.
+ momentum : float
+ Momentum factor applied to compute running statistics as.
+ affine : bool
+ If `True` apply learned scale and shift transformation after normalization.
+ activation : str
+ Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
+ slope : float
+ Negative slope for the `leaky_relu` activation.
+ """
+ super(ABN, self).__init__()
+ self.num_features = num_features
+ self.affine = affine
+ self.eps = eps
+ self.momentum = momentum
+ self.activation = activation
+ self.slope = slope
+ if self.affine:
+ self.weight = nn.Parameter(torch.ones(num_features))
+ self.bias = nn.Parameter(torch.zeros(num_features))
+ else:
+ self.register_parameter('weight', None)
+ self.register_parameter('bias', None)
+ self.register_buffer('running_mean', torch.zeros(num_features))
+ self.register_buffer('running_var', torch.ones(num_features))
+ self.reset_parameters()
+
+ def reset_parameters(self):
+ nn.init.constant_(self.running_mean, 0)
+ nn.init.constant_(self.running_var, 1)
+ if self.affine:
+ nn.init.constant_(self.weight, 1)
+ nn.init.constant_(self.bias, 0)
+
+ def forward(self, x):
+ x = functional.batch_norm(x, self.running_mean, self.running_var, self.weight, self.bias,
+ self.training, self.momentum, self.eps)
+
+ if self.activation == ACT_RELU:
+ return functional.relu(x, inplace=True)
+ elif self.activation == ACT_LEAKY_RELU:
+ return functional.leaky_relu(x, negative_slope=self.slope, inplace=True)
+ elif self.activation == ACT_ELU:
+ return functional.elu(x, inplace=True)
+ else:
+ return x
+
+ def __repr__(self):
+ rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
+ ' affine={affine}, activation={activation}'
+ if self.activation == "leaky_relu":
+ rep += ', slope={slope})'
+ else:
+ rep += ')'
+ return rep.format(name=self.__class__.__name__, **self.__dict__)
+
+
+class InPlaceABN(ABN):
+ """InPlace Activated Batch Normalization"""
+
+ def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, activation="leaky_relu", slope=0.01):
+ """Creates an InPlace Activated Batch Normalization module
+
+ Parameters
+ ----------
+ num_features : int
+ Number of feature channels in the input and output.
+ eps : float
+ Small constant to prevent numerical issues.
+ momentum : float
+ Momentum factor applied to compute running statistics as.
+ affine : bool
+ If `True` apply learned scale and shift transformation after normalization.
+ activation : str
+ Name of the activation functions, one of: `leaky_relu`, `elu` or `none`.
+ slope : float
+ Negative slope for the `leaky_relu` activation.
+ """
+ super(InPlaceABN, self).__init__(num_features, eps, momentum, affine, activation, slope)
+
+ def forward(self, x):
+ return inplace_abn(x, self.weight, self.bias, self.running_mean, self.running_var,
+ self.training, self.momentum, self.eps, self.activation, self.slope)
+
+
+class InPlaceABNSync(ABN):
+ """InPlace Activated Batch Normalization with cross-GPU synchronization
+ This assumes that it will be replicated across GPUs using the same mechanism as in `nn.DistributedDataParallel`.
+ """
+
+ def forward(self, x):
+ return inplace_abn_sync(x, self.weight, self.bias, self.running_mean, self.running_var,
+ self.training, self.momentum, self.eps, self.activation, self.slope)
+
+ def __repr__(self):
+ rep = '{name}({num_features}, eps={eps}, momentum={momentum},' \
+ ' affine={affine}, activation={activation}'
+ if self.activation == "leaky_relu":
+ rep += ', slope={slope})'
+ else:
+ rep += ')'
+ return rep.format(name=self.__class__.__name__, **self.__dict__)
+
+
diff --git a/head_segmentation/face_parsing/modules/deeplab.py b/head_segmentation/face_parsing/modules/deeplab.py
new file mode 100644
index 0000000..fd25b78
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/deeplab.py
@@ -0,0 +1,84 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as functional
+
+from models._util import try_index
+from .bn import ABN
+
+
+class DeeplabV3(nn.Module):
+ def __init__(self,
+ in_channels,
+ out_channels,
+ hidden_channels=256,
+ dilations=(12, 24, 36),
+ norm_act=ABN,
+ pooling_size=None):
+ super(DeeplabV3, self).__init__()
+ self.pooling_size = pooling_size
+
+ self.map_convs = nn.ModuleList([
+ nn.Conv2d(in_channels, hidden_channels, 1, bias=False),
+ nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[0], padding=dilations[0]),
+ nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[1], padding=dilations[1]),
+ nn.Conv2d(in_channels, hidden_channels, 3, bias=False, dilation=dilations[2], padding=dilations[2])
+ ])
+ self.map_bn = norm_act(hidden_channels * 4)
+
+ self.global_pooling_conv = nn.Conv2d(in_channels, hidden_channels, 1, bias=False)
+ self.global_pooling_bn = norm_act(hidden_channels)
+
+ self.red_conv = nn.Conv2d(hidden_channels * 4, out_channels, 1, bias=False)
+ self.pool_red_conv = nn.Conv2d(hidden_channels, out_channels, 1, bias=False)
+ self.red_bn = norm_act(out_channels)
+
+ self.reset_parameters(self.map_bn.activation, self.map_bn.slope)
+
+ def reset_parameters(self, activation, slope):
+ gain = nn.init.calculate_gain(activation, slope)
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.xavier_normal_(m.weight.data, gain)
+ if hasattr(m, "bias") and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+ elif isinstance(m, ABN):
+ if hasattr(m, "weight") and m.weight is not None:
+ nn.init.constant_(m.weight, 1)
+ if hasattr(m, "bias") and m.bias is not None:
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, x):
+ # Map convolutions
+ out = torch.cat([m(x) for m in self.map_convs], dim=1)
+ out = self.map_bn(out)
+ out = self.red_conv(out)
+
+ # Global pooling
+ pool = self._global_pooling(x)
+ pool = self.global_pooling_conv(pool)
+ pool = self.global_pooling_bn(pool)
+ pool = self.pool_red_conv(pool)
+ if self.training or self.pooling_size is None:
+ pool = pool.repeat(1, 1, x.size(2), x.size(3))
+
+ out += pool
+ out = self.red_bn(out)
+ return out
+
+ def _global_pooling(self, x):
+ if self.training or self.pooling_size is None:
+ pool = x.view(x.size(0), x.size(1), -1).mean(dim=-1)
+ pool = pool.view(x.size(0), x.size(1), 1, 1)
+ else:
+ pooling_size = (min(try_index(self.pooling_size, 0), x.shape[2]),
+ min(try_index(self.pooling_size, 1), x.shape[3]))
+ padding = (
+ (pooling_size[1] - 1) // 2,
+ (pooling_size[1] - 1) // 2 if pooling_size[1] % 2 == 1 else (pooling_size[1] - 1) // 2 + 1,
+ (pooling_size[0] - 1) // 2,
+ (pooling_size[0] - 1) // 2 if pooling_size[0] % 2 == 1 else (pooling_size[0] - 1) // 2 + 1
+ )
+
+ pool = functional.avg_pool2d(x, pooling_size, stride=1)
+ pool = functional.pad(pool, pad=padding, mode="replicate")
+ return pool
diff --git a/head_segmentation/face_parsing/modules/dense.py b/head_segmentation/face_parsing/modules/dense.py
new file mode 100644
index 0000000..9638d6e
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/dense.py
@@ -0,0 +1,42 @@
+from collections import OrderedDict
+
+import torch
+import torch.nn as nn
+
+from .bn import ABN
+
+
+class DenseModule(nn.Module):
+ def __init__(self, in_channels, growth, layers, bottleneck_factor=4, norm_act=ABN, dilation=1):
+ super(DenseModule, self).__init__()
+ self.in_channels = in_channels
+ self.growth = growth
+ self.layers = layers
+
+ self.convs1 = nn.ModuleList()
+ self.convs3 = nn.ModuleList()
+ for i in range(self.layers):
+ self.convs1.append(nn.Sequential(OrderedDict([
+ ("bn", norm_act(in_channels)),
+ ("conv", nn.Conv2d(in_channels, self.growth * bottleneck_factor, 1, bias=False))
+ ])))
+ self.convs3.append(nn.Sequential(OrderedDict([
+ ("bn", norm_act(self.growth * bottleneck_factor)),
+ ("conv", nn.Conv2d(self.growth * bottleneck_factor, self.growth, 3, padding=dilation, bias=False,
+ dilation=dilation))
+ ])))
+ in_channels += self.growth
+
+ @property
+ def out_channels(self):
+ return self.in_channels + self.growth * self.layers
+
+ def forward(self, x):
+ inputs = [x]
+ for i in range(self.layers):
+ x = torch.cat(inputs, dim=1)
+ x = self.convs1[i](x)
+ x = self.convs3[i](x)
+ inputs += [x]
+
+ return torch.cat(inputs, dim=1)
diff --git a/head_segmentation/face_parsing/modules/functions.py b/head_segmentation/face_parsing/modules/functions.py
new file mode 100644
index 0000000..093615f
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/functions.py
@@ -0,0 +1,234 @@
+from os import path
+import torch
+import torch.distributed as dist
+import torch.autograd as autograd
+import torch.cuda.comm as comm
+from torch.autograd.function import once_differentiable
+from torch.utils.cpp_extension import load
+
+_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
+_backend = load(name="inplace_abn",
+ extra_cflags=["-O3"],
+ sources=[path.join(_src_path, f) for f in [
+ "inplace_abn.cpp",
+ "inplace_abn_cpu.cpp",
+ "inplace_abn_cuda.cu",
+ "inplace_abn_cuda_half.cu"
+ ]],
+ extra_cuda_cflags=["--expt-extended-lambda"])
+
+# Activation names
+ACT_RELU = "relu"
+ACT_LEAKY_RELU = "leaky_relu"
+ACT_ELU = "elu"
+ACT_NONE = "none"
+
+
+def _check(fn, *args, **kwargs):
+ success = fn(*args, **kwargs)
+ if not success:
+ raise RuntimeError("CUDA Error encountered in {}".format(fn))
+
+
+def _broadcast_shape(x):
+ out_size = []
+ for i, s in enumerate(x.size()):
+ if i != 1:
+ out_size.append(1)
+ else:
+ out_size.append(s)
+ return out_size
+
+
+def _reduce(x):
+ if len(x.size()) == 2:
+ return x.sum(dim=0)
+ else:
+ n, c = x.size()[0:2]
+ return x.contiguous().view((n, c, -1)).sum(2).sum(0)
+
+
+def _count_samples(x):
+ count = 1
+ for i, s in enumerate(x.size()):
+ if i != 1:
+ count *= s
+ return count
+
+
+def _act_forward(ctx, x):
+ if ctx.activation == ACT_LEAKY_RELU:
+ _backend.leaky_relu_forward(x, ctx.slope)
+ elif ctx.activation == ACT_ELU:
+ _backend.elu_forward(x)
+ elif ctx.activation == ACT_NONE:
+ pass
+
+
+def _act_backward(ctx, x, dx):
+ if ctx.activation == ACT_LEAKY_RELU:
+ _backend.leaky_relu_backward(x, dx, ctx.slope)
+ elif ctx.activation == ACT_ELU:
+ _backend.elu_backward(x, dx)
+ elif ctx.activation == ACT_NONE:
+ pass
+
+
+class InPlaceABN(autograd.Function):
+ @staticmethod
+ def forward(ctx, x, weight, bias, running_mean, running_var,
+ training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
+ # Save context
+ ctx.training = training
+ ctx.momentum = momentum
+ ctx.eps = eps
+ ctx.activation = activation
+ ctx.slope = slope
+ ctx.affine = weight is not None and bias is not None
+
+ # Prepare inputs
+ count = _count_samples(x)
+ x = x.contiguous()
+ weight = weight.contiguous() if ctx.affine else x.new_empty(0)
+ bias = bias.contiguous() if ctx.affine else x.new_empty(0)
+
+ if ctx.training:
+ mean, var = _backend.mean_var(x)
+
+ # Update running stats
+ running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
+ running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))
+
+ # Mark in-place modified tensors
+ ctx.mark_dirty(x, running_mean, running_var)
+ else:
+ mean, var = running_mean.contiguous(), running_var.contiguous()
+ ctx.mark_dirty(x)
+
+ # BN forward + activation
+ _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
+ _act_forward(ctx, x)
+
+ # Output
+ ctx.var = var
+ ctx.save_for_backward(x, var, weight, bias)
+ return x
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, dz):
+ z, var, weight, bias = ctx.saved_tensors
+ dz = dz.contiguous()
+
+ # Undo activation
+ _act_backward(ctx, z, dz)
+
+ if ctx.training:
+ edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
+ else:
+ # TODO: implement simplified CUDA backward for inference mode
+ edz = dz.new_zeros(dz.size(1))
+ eydz = dz.new_zeros(dz.size(1))
+
+ dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
+ dweight = eydz * weight.sign() if ctx.affine else None
+ dbias = edz if ctx.affine else None
+
+ return dx, dweight, dbias, None, None, None, None, None, None, None
+
+class InPlaceABNSync(autograd.Function):
+ @classmethod
+ def forward(cls, ctx, x, weight, bias, running_mean, running_var,
+ training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
+ # Save context
+ ctx.training = training
+ ctx.momentum = momentum
+ ctx.eps = eps
+ ctx.activation = activation
+ ctx.slope = slope
+ ctx.affine = weight is not None and bias is not None
+
+ # Prepare inputs
+ ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1
+
+ #count = _count_samples(x)
+ batch_size = x.new_tensor([x.shape[0]],dtype=torch.long)
+
+ x = x.contiguous()
+ weight = weight.contiguous() if ctx.affine else x.new_empty(0)
+ bias = bias.contiguous() if ctx.affine else x.new_empty(0)
+
+ if ctx.training:
+ mean, var = _backend.mean_var(x)
+ if ctx.world_size>1:
+ # get global batch size
+ if equal_batches:
+ batch_size *= ctx.world_size
+ else:
+ dist.all_reduce(batch_size, dist.ReduceOp.SUM)
+
+ ctx.factor = x.shape[0]/float(batch_size.item())
+
+ mean_all = mean.clone() * ctx.factor
+ dist.all_reduce(mean_all, dist.ReduceOp.SUM)
+
+ var_all = (var + (mean - mean_all) ** 2) * ctx.factor
+ dist.all_reduce(var_all, dist.ReduceOp.SUM)
+
+ mean = mean_all
+ var = var_all
+
+ # Update running stats
+ running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
+ count = batch_size.item() * x.view(x.shape[0],x.shape[1],-1).shape[-1]
+ running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))
+
+ # Mark in-place modified tensors
+ ctx.mark_dirty(x, running_mean, running_var)
+ else:
+ mean, var = running_mean.contiguous(), running_var.contiguous()
+ ctx.mark_dirty(x)
+
+ # BN forward + activation
+ _backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
+ _act_forward(ctx, x)
+
+ # Output
+ ctx.var = var
+ ctx.save_for_backward(x, var, weight, bias)
+ return x
+
+ @staticmethod
+ @once_differentiable
+ def backward(ctx, dz):
+ z, var, weight, bias = ctx.saved_tensors
+ dz = dz.contiguous()
+
+ # Undo activation
+ _act_backward(ctx, z, dz)
+
+ if ctx.training:
+ edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
+ edz_local = edz.clone()
+ eydz_local = eydz.clone()
+
+ if ctx.world_size>1:
+ edz *= ctx.factor
+ dist.all_reduce(edz, dist.ReduceOp.SUM)
+
+ eydz *= ctx.factor
+ dist.all_reduce(eydz, dist.ReduceOp.SUM)
+ else:
+ edz_local = edz = dz.new_zeros(dz.size(1))
+ eydz_local = eydz = dz.new_zeros(dz.size(1))
+
+ dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
+ dweight = eydz_local * weight.sign() if ctx.affine else None
+ dbias = edz_local if ctx.affine else None
+
+ return dx, dweight, dbias, None, None, None, None, None, None, None
+
+inplace_abn = InPlaceABN.apply
+inplace_abn_sync = InPlaceABNSync.apply
+
+__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]
diff --git a/head_segmentation/face_parsing/modules/misc.py b/head_segmentation/face_parsing/modules/misc.py
new file mode 100644
index 0000000..3c50b69
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/misc.py
@@ -0,0 +1,21 @@
+import torch.nn as nn
+import torch
+import torch.distributed as dist
+
+class GlobalAvgPool2d(nn.Module):
+ def __init__(self):
+ """Global average pooling over the input's spatial dimensions"""
+ super(GlobalAvgPool2d, self).__init__()
+
+ def forward(self, inputs):
+ in_size = inputs.size()
+ return inputs.view((in_size[0], in_size[1], -1)).mean(dim=2)
+
+class SingleGPU(nn.Module):
+ def __init__(self, module):
+ super(SingleGPU, self).__init__()
+ self.module=module
+
+ def forward(self, input):
+ return self.module(input.cuda(non_blocking=True))
+
diff --git a/head_segmentation/face_parsing/modules/residual.py b/head_segmentation/face_parsing/modules/residual.py
new file mode 100644
index 0000000..b7d51ad
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/residual.py
@@ -0,0 +1,88 @@
+from collections import OrderedDict
+
+import torch.nn as nn
+
+from .bn import ABN
+
+
+class IdentityResidualBlock(nn.Module):
+ def __init__(self,
+ in_channels,
+ channels,
+ stride=1,
+ dilation=1,
+ groups=1,
+ norm_act=ABN,
+ dropout=None):
+ """Configurable identity-mapping residual block
+
+ Parameters
+ ----------
+ in_channels : int
+ Number of input channels.
+ channels : list of int
+ Number of channels in the internal feature maps. Can either have two or three elements: if three construct
+ a residual block with two `3 x 3` convolutions, otherwise construct a bottleneck block with `1 x 1`, then
+ `3 x 3` then `1 x 1` convolutions.
+ stride : int
+ Stride of the first `3 x 3` convolution
+ dilation : int
+ Dilation to apply to the `3 x 3` convolutions.
+ groups : int
+ Number of convolution groups. This is used to create ResNeXt-style blocks and is only compatible with
+ bottleneck blocks.
+ norm_act : callable
+ Function to create normalization / activation Module.
+ dropout: callable
+ Function to create Dropout Module.
+ """
+ super(IdentityResidualBlock, self).__init__()
+
+ # Check parameters for inconsistencies
+ if len(channels) != 2 and len(channels) != 3:
+ raise ValueError("channels must contain either two or three values")
+ if len(channels) == 2 and groups != 1:
+ raise ValueError("groups > 1 are only valid if len(channels) == 3")
+
+ is_bottleneck = len(channels) == 3
+ need_proj_conv = stride != 1 or in_channels != channels[-1]
+
+ self.bn1 = norm_act(in_channels)
+ if not is_bottleneck:
+ layers = [
+ ("conv1", nn.Conv2d(in_channels, channels[0], 3, stride=stride, padding=dilation, bias=False,
+ dilation=dilation)),
+ ("bn2", norm_act(channels[0])),
+ ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
+ dilation=dilation))
+ ]
+ if dropout is not None:
+ layers = layers[0:2] + [("dropout", dropout())] + layers[2:]
+ else:
+ layers = [
+ ("conv1", nn.Conv2d(in_channels, channels[0], 1, stride=stride, padding=0, bias=False)),
+ ("bn2", norm_act(channels[0])),
+ ("conv2", nn.Conv2d(channels[0], channels[1], 3, stride=1, padding=dilation, bias=False,
+ groups=groups, dilation=dilation)),
+ ("bn3", norm_act(channels[1])),
+ ("conv3", nn.Conv2d(channels[1], channels[2], 1, stride=1, padding=0, bias=False))
+ ]
+ if dropout is not None:
+ layers = layers[0:4] + [("dropout", dropout())] + layers[4:]
+ self.convs = nn.Sequential(OrderedDict(layers))
+
+ if need_proj_conv:
+ self.proj_conv = nn.Conv2d(in_channels, channels[-1], 1, stride=stride, padding=0, bias=False)
+
+ def forward(self, x):
+ if hasattr(self, "proj_conv"):
+ bn1 = self.bn1(x)
+ shortcut = self.proj_conv(bn1)
+ else:
+ shortcut = x.clone()
+ bn1 = self.bn1(x)
+
+ out = self.convs(bn1)
+ out.add_(shortcut)
+
+ return out
diff --git a/head_segmentation/face_parsing/modules/src/checks.h b/head_segmentation/face_parsing/modules/src/checks.h
new file mode 100644
index 0000000..e761a6f
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/src/checks.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include
+
+// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
+#ifndef AT_CHECK
+#define AT_CHECK AT_ASSERT
+#endif
+
+#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
+#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
+
+#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
+#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
\ No newline at end of file
diff --git a/head_segmentation/face_parsing/modules/src/inplace_abn.cpp b/head_segmentation/face_parsing/modules/src/inplace_abn.cpp
new file mode 100644
index 0000000..0a6b112
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/src/inplace_abn.cpp
@@ -0,0 +1,95 @@
+#include
+
+#include
+
+#include "inplace_abn.h"
+
+std::vector mean_var(at::Tensor x) {
+ if (x.is_cuda()) {
+ if (x.type().scalarType() == at::ScalarType::Half) {
+ return mean_var_cuda_h(x);
+ } else {
+ return mean_var_cuda(x);
+ }
+ } else {
+ return mean_var_cpu(x);
+ }
+}
+
+at::Tensor forward(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps) {
+ if (x.is_cuda()) {
+ if (x.type().scalarType() == at::ScalarType::Half) {
+ return forward_cuda_h(x, mean, var, weight, bias, affine, eps);
+ } else {
+ return forward_cuda(x, mean, var, weight, bias, affine, eps);
+ }
+ } else {
+ return forward_cpu(x, mean, var, weight, bias, affine, eps);
+ }
+}
+
+std::vector edz_eydz(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps) {
+ if (z.is_cuda()) {
+ if (z.type().scalarType() == at::ScalarType::Half) {
+ return edz_eydz_cuda_h(z, dz, weight, bias, affine, eps);
+ } else {
+ return edz_eydz_cuda(z, dz, weight, bias, affine, eps);
+ }
+ } else {
+ return edz_eydz_cpu(z, dz, weight, bias, affine, eps);
+ }
+}
+
+at::Tensor backward(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+ if (z.is_cuda()) {
+ if (z.type().scalarType() == at::ScalarType::Half) {
+ return backward_cuda_h(z, dz, var, weight, bias, edz, eydz, affine, eps);
+ } else {
+ return backward_cuda(z, dz, var, weight, bias, edz, eydz, affine, eps);
+ }
+ } else {
+ return backward_cpu(z, dz, var, weight, bias, edz, eydz, affine, eps);
+ }
+}
+
+void leaky_relu_forward(at::Tensor z, float slope) {
+ at::leaky_relu_(z, slope);
+}
+
+void leaky_relu_backward(at::Tensor z, at::Tensor dz, float slope) {
+ if (z.is_cuda()) {
+ if (z.type().scalarType() == at::ScalarType::Half) {
+ return leaky_relu_backward_cuda_h(z, dz, slope);
+ } else {
+ return leaky_relu_backward_cuda(z, dz, slope);
+ }
+ } else {
+ return leaky_relu_backward_cpu(z, dz, slope);
+ }
+}
+
+void elu_forward(at::Tensor z) {
+ at::elu_(z);
+}
+
+void elu_backward(at::Tensor z, at::Tensor dz) {
+ if (z.is_cuda()) {
+ return elu_backward_cuda(z, dz);
+ } else {
+ return elu_backward_cpu(z, dz);
+ }
+}
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("mean_var", &mean_var, "Mean and variance computation");
+ m.def("forward", &forward, "In-place forward computation");
+ m.def("edz_eydz", &edz_eydz, "First part of backward computation");
+ m.def("backward", &backward, "Second part of backward computation");
+ m.def("leaky_relu_forward", &leaky_relu_forward, "Leaky relu forward computation");
+ m.def("leaky_relu_backward", &leaky_relu_backward, "Leaky relu backward computation and inversion");
+ m.def("elu_forward", &elu_forward, "Elu forward computation");
+ m.def("elu_backward", &elu_backward, "Elu backward computation and inversion");
+}
diff --git a/head_segmentation/face_parsing/modules/src/inplace_abn.h b/head_segmentation/face_parsing/modules/src/inplace_abn.h
new file mode 100644
index 0000000..17afd11
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/src/inplace_abn.h
@@ -0,0 +1,88 @@
+#pragma once
+
+#include
+
+#include
+
+std::vector mean_var_cpu(at::Tensor x);
+std::vector mean_var_cuda(at::Tensor x);
+std::vector mean_var_cuda_h(at::Tensor x);
+
+at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps);
+at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps);
+at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps);
+
+std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps);
+std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps);
+std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps);
+
+at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps);
+at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps);
+at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps);
+
+void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope);
+void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope);
+void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope);
+
+void elu_backward_cpu(at::Tensor z, at::Tensor dz);
+void elu_backward_cuda(at::Tensor z, at::Tensor dz);
+
+static void get_dims(at::Tensor x, int64_t& num, int64_t& chn, int64_t& sp) {
+ num = x.size(0);
+ chn = x.size(1);
+ sp = 1;
+ for (int64_t i = 2; i < x.ndimension(); ++i)
+ sp *= x.size(i);
+}
+
+/*
+ * Specialized CUDA reduction functions for BN
+ */
+#ifdef __CUDACC__
+
+#include "utils/cuda.cuh"
+
+template
+__device__ T reduce(Op op, int plane, int N, int S) {
+ T sum = (T)0;
+ for (int batch = 0; batch < N; ++batch) {
+ for (int x = threadIdx.x; x < S; x += blockDim.x) {
+ sum += op(batch, plane, x);
+ }
+ }
+
+ // sum over NumThreads within a warp
+ sum = warpSum(sum);
+
+ // 'transpose', and reduce within warp again
+ __shared__ T shared[32];
+ __syncthreads();
+ if (threadIdx.x % WARP_SIZE == 0) {
+ shared[threadIdx.x / WARP_SIZE] = sum;
+ }
+ if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) {
+ // zero out the other entries in shared
+ shared[threadIdx.x] = (T)0;
+ }
+ __syncthreads();
+ if (threadIdx.x / WARP_SIZE == 0) {
+ sum = warpSum(shared[threadIdx.x]);
+ if (threadIdx.x == 0) {
+ shared[0] = sum;
+ }
+ }
+ __syncthreads();
+
+ // Everyone picks it up, should be broadcast into the whole gradInput
+ return shared[0];
+}
+#endif
diff --git a/head_segmentation/face_parsing/modules/src/inplace_abn_cpu.cpp b/head_segmentation/face_parsing/modules/src/inplace_abn_cpu.cpp
new file mode 100644
index 0000000..ffc6d38
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/src/inplace_abn_cpu.cpp
@@ -0,0 +1,119 @@
+#include
+
+#include
+
+#include "utils/checks.h"
+#include "inplace_abn.h"
+
+at::Tensor reduce_sum(at::Tensor x) {
+ if (x.ndimension() == 2) {
+ return x.sum(0);
+ } else {
+ auto x_view = x.view({x.size(0), x.size(1), -1});
+ return x_view.sum(-1).sum(0);
+ }
+}
+
+at::Tensor broadcast_to(at::Tensor v, at::Tensor x) {
+ if (x.ndimension() == 2) {
+ return v;
+ } else {
+ std::vector broadcast_size = {1, -1};
+ for (int64_t i = 2; i < x.ndimension(); ++i)
+ broadcast_size.push_back(1);
+
+ return v.view(broadcast_size);
+ }
+}
+
+int64_t count(at::Tensor x) {
+ int64_t count = x.size(0);
+ for (int64_t i = 2; i < x.ndimension(); ++i)
+ count *= x.size(i);
+
+ return count;
+}
+
+at::Tensor invert_affine(at::Tensor z, at::Tensor weight, at::Tensor bias, bool affine, float eps) {
+ if (affine) {
+ return (z - broadcast_to(bias, z)) / broadcast_to(at::abs(weight) + eps, z);
+ } else {
+ return z;
+ }
+}
+
+std::vector mean_var_cpu(at::Tensor x) {
+ auto num = count(x);
+ auto mean = reduce_sum(x) / num;
+ auto diff = x - broadcast_to(mean, x);
+ auto var = reduce_sum(diff.pow(2)) / num;
+
+ return {mean, var};
+}
+
+at::Tensor forward_cpu(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps) {
+ auto gamma = affine ? at::abs(weight) + eps : at::ones_like(var);
+ auto mul = at::rsqrt(var + eps) * gamma;
+
+ x.sub_(broadcast_to(mean, x));
+ x.mul_(broadcast_to(mul, x));
+ if (affine) x.add_(broadcast_to(bias, x));
+
+ return x;
+}
+
+std::vector edz_eydz_cpu(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps) {
+ auto edz = reduce_sum(dz);
+ auto y = invert_affine(z, weight, bias, affine, eps);
+ auto eydz = reduce_sum(y * dz);
+
+ return {edz, eydz};
+}
+
+at::Tensor backward_cpu(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+ auto y = invert_affine(z, weight, bias, affine, eps);
+ auto mul = affine ? at::rsqrt(var + eps) * (at::abs(weight) + eps) : at::rsqrt(var + eps);
+
+ auto num = count(z);
+ auto dx = (dz - broadcast_to(edz / num, dz) - y * broadcast_to(eydz / num, dz)) * broadcast_to(mul, dz);
+ return dx;
+}
+
+void leaky_relu_backward_cpu(at::Tensor z, at::Tensor dz, float slope) {
+ CHECK_CPU_INPUT(z);
+ CHECK_CPU_INPUT(dz);
+
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cpu", ([&] {
+ int64_t count = z.numel();
+ auto *_z = z.data();
+ auto *_dz = dz.data();
+
+ for (int64_t i = 0; i < count; ++i) {
+ if (_z[i] < 0) {
+ _z[i] *= 1 / slope;
+ _dz[i] *= slope;
+ }
+ }
+ }));
+}
+
+void elu_backward_cpu(at::Tensor z, at::Tensor dz) {
+ CHECK_CPU_INPUT(z);
+ CHECK_CPU_INPUT(dz);
+
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "elu_backward_cpu", ([&] {
+ int64_t count = z.numel();
+ auto *_z = z.data();
+ auto *_dz = dz.data();
+
+ for (int64_t i = 0; i < count; ++i) {
+ if (_z[i] < 0) {
+ _z[i] = log1p(_z[i]);
+ _dz[i] *= (_z[i] + 1.f);
+ }
+ }
+ }));
+}
diff --git a/head_segmentation/face_parsing/modules/src/inplace_abn_cuda.cu b/head_segmentation/face_parsing/modules/src/inplace_abn_cuda.cu
new file mode 100644
index 0000000..b157b06
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/src/inplace_abn_cuda.cu
@@ -0,0 +1,333 @@
+#include
+
+#include
+#include
+
+#include
+
+#include "utils/checks.h"
+#include "utils/cuda.cuh"
+#include "inplace_abn.h"
+
+#include
+
+// Operations for reduce
+template
+struct SumOp {
+ __device__ SumOp(const T *t, int c, int s)
+ : tensor(t), chn(c), sp(s) {}
+ __device__ __forceinline__ T operator()(int batch, int plane, int n) {
+ return tensor[(batch * chn + plane) * sp + n];
+ }
+ const T *tensor;
+ const int chn;
+ const int sp;
+};
+
+template
+struct VarOp {
+ __device__ VarOp(T m, const T *t, int c, int s)
+ : mean(m), tensor(t), chn(c), sp(s) {}
+ __device__ __forceinline__ T operator()(int batch, int plane, int n) {
+ T val = tensor[(batch * chn + plane) * sp + n];
+ return (val - mean) * (val - mean);
+ }
+ const T mean;
+ const T *tensor;
+ const int chn;
+ const int sp;
+};
+
+template
+struct GradOp {
+ __device__ GradOp(T _weight, T _bias, const T *_z, const T *_dz, int c, int s)
+ : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
+ __device__ __forceinline__ Pair operator()(int batch, int plane, int n) {
+ T _y = (z[(batch * chn + plane) * sp + n] - bias) / weight;
+ T _dz = dz[(batch * chn + plane) * sp + n];
+ return Pair(_dz, _y * _dz);
+ }
+ const T weight;
+ const T bias;
+ const T *z;
+ const T *dz;
+ const int chn;
+ const int sp;
+};
+
+/***********
+ * mean_var
+ ***********/
+
+template
+__global__ void mean_var_kernel(const T *x, T *mean, T *var, int num, int chn, int sp) {
+ int plane = blockIdx.x;
+ T norm = T(1) / T(num * sp);
+
+ T _mean = reduce>(SumOp(x, chn, sp), plane, num, sp) * norm;
+ __syncthreads();
+ T _var = reduce>(VarOp(_mean, x, chn, sp), plane, num, sp) * norm;
+
+ if (threadIdx.x == 0) {
+ mean[plane] = _mean;
+ var[plane] = _var;
+ }
+}
+
+std::vector mean_var_cuda(at::Tensor x) {
+ CHECK_CUDA_INPUT(x);
+
+ // Extract dimensions
+ int64_t num, chn, sp;
+ get_dims(x, num, chn, sp);
+
+ // Prepare output tensors
+ auto mean = at::empty({chn}, x.options());
+ auto var = at::empty({chn}, x.options());
+
+ // Run kernel
+ dim3 blocks(chn);
+ dim3 threads(getNumThreads(sp));
+ auto stream = at::cuda::getCurrentCUDAStream();
+ AT_DISPATCH_FLOATING_TYPES(x.type(), "mean_var_cuda", ([&] {
+ mean_var_kernel<<>>(
+ x.data(),
+ mean.data(),
+ var.data(),
+ num, chn, sp);
+ }));
+
+ return {mean, var};
+}
+
+/**********
+ * forward
+ **********/
+
+template
+__global__ void forward_kernel(T *x, const T *mean, const T *var, const T *weight, const T *bias,
+ bool affine, float eps, int num, int chn, int sp) {
+ int plane = blockIdx.x;
+
+ T _mean = mean[plane];
+ T _var = var[plane];
+ T _weight = affine ? abs(weight[plane]) + eps : T(1);
+ T _bias = affine ? bias[plane] : T(0);
+
+ T mul = rsqrt(_var + eps) * _weight;
+
+ for (int batch = 0; batch < num; ++batch) {
+ for (int n = threadIdx.x; n < sp; n += blockDim.x) {
+ T _x = x[(batch * chn + plane) * sp + n];
+ T _y = (_x - _mean) * mul + _bias;
+
+ x[(batch * chn + plane) * sp + n] = _y;
+ }
+ }
+}
+
+at::Tensor forward_cuda(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps) {
+ CHECK_CUDA_INPUT(x);
+ CHECK_CUDA_INPUT(mean);
+ CHECK_CUDA_INPUT(var);
+ CHECK_CUDA_INPUT(weight);
+ CHECK_CUDA_INPUT(bias);
+
+ // Extract dimensions
+ int64_t num, chn, sp;
+ get_dims(x, num, chn, sp);
+
+ // Run kernel
+ dim3 blocks(chn);
+ dim3 threads(getNumThreads(sp));
+ auto stream = at::cuda::getCurrentCUDAStream();
+ AT_DISPATCH_FLOATING_TYPES(x.type(), "forward_cuda", ([&] {
+ forward_kernel<<>>(
+ x.data(),
+ mean.data(),
+ var.data(),
+ weight.data(),
+ bias.data(),
+ affine, eps, num, chn, sp);
+ }));
+
+ return x;
+}
+
+/***********
+ * edz_eydz
+ ***********/
+
+template
+__global__ void edz_eydz_kernel(const T *z, const T *dz, const T *weight, const T *bias,
+ T *edz, T *eydz, bool affine, float eps, int num, int chn, int sp) {
+ int plane = blockIdx.x;
+
+ T _weight = affine ? abs(weight[plane]) + eps : 1.f;
+ T _bias = affine ? bias[plane] : 0.f;
+
+ Pair res = reduce, GradOp>(GradOp(_weight, _bias, z, dz, chn, sp), plane, num, sp);
+ __syncthreads();
+
+ if (threadIdx.x == 0) {
+ edz[plane] = res.v1;
+ eydz[plane] = res.v2;
+ }
+}
+
+std::vector edz_eydz_cuda(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps) {
+ CHECK_CUDA_INPUT(z);
+ CHECK_CUDA_INPUT(dz);
+ CHECK_CUDA_INPUT(weight);
+ CHECK_CUDA_INPUT(bias);
+
+ // Extract dimensions
+ int64_t num, chn, sp;
+ get_dims(z, num, chn, sp);
+
+ auto edz = at::empty({chn}, z.options());
+ auto eydz = at::empty({chn}, z.options());
+
+ // Run kernel
+ dim3 blocks(chn);
+ dim3 threads(getNumThreads(sp));
+ auto stream = at::cuda::getCurrentCUDAStream();
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "edz_eydz_cuda", ([&] {
+ edz_eydz_kernel<<>>(
+ z.data(),
+ dz.data(),
+ weight.data(),
+ bias.data(),
+ edz.data(),
+ eydz.data(),
+ affine, eps, num, chn, sp);
+ }));
+
+ return {edz, eydz};
+}
+
+/***********
+ * backward
+ ***********/
+
+template
+__global__ void backward_kernel(const T *z, const T *dz, const T *var, const T *weight, const T *bias, const T *edz,
+ const T *eydz, T *dx, bool affine, float eps, int num, int chn, int sp) {
+ int plane = blockIdx.x;
+
+ T _weight = affine ? abs(weight[plane]) + eps : 1.f;
+ T _bias = affine ? bias[plane] : 0.f;
+ T _var = var[plane];
+ T _edz = edz[plane];
+ T _eydz = eydz[plane];
+
+ T _mul = _weight * rsqrt(_var + eps);
+ T count = T(num * sp);
+
+ for (int batch = 0; batch < num; ++batch) {
+ for (int n = threadIdx.x; n < sp; n += blockDim.x) {
+ T _dz = dz[(batch * chn + plane) * sp + n];
+ T _y = (z[(batch * chn + plane) * sp + n] - _bias) / _weight;
+
+ dx[(batch * chn + plane) * sp + n] = (_dz - _edz / count - _y * _eydz / count) * _mul;
+ }
+ }
+}
+
+at::Tensor backward_cuda(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+ CHECK_CUDA_INPUT(z);
+ CHECK_CUDA_INPUT(dz);
+ CHECK_CUDA_INPUT(var);
+ CHECK_CUDA_INPUT(weight);
+ CHECK_CUDA_INPUT(bias);
+ CHECK_CUDA_INPUT(edz);
+ CHECK_CUDA_INPUT(eydz);
+
+ // Extract dimensions
+ int64_t num, chn, sp;
+ get_dims(z, num, chn, sp);
+
+ auto dx = at::zeros_like(z);
+
+ // Run kernel
+ dim3 blocks(chn);
+ dim3 threads(getNumThreads(sp));
+ auto stream = at::cuda::getCurrentCUDAStream();
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "backward_cuda", ([&] {
+ backward_kernel<<>>(
+ z.data(),
+ dz.data(),
+ var.data(),
+ weight.data(),
+ bias.data(),
+ edz.data(),
+ eydz.data(),
+ dx.data(),
+ affine, eps, num, chn, sp);
+ }));
+
+ return dx;
+}
+
+/**************
+ * activations
+ **************/
+
+template
+inline void leaky_relu_backward_impl(T *z, T *dz, float slope, int64_t count) {
+ // Create thrust pointers
+ thrust::device_ptr th_z = thrust::device_pointer_cast(z);
+ thrust::device_ptr th_dz = thrust::device_pointer_cast(dz);
+
+ auto stream = at::cuda::getCurrentCUDAStream();
+ thrust::transform_if(thrust::cuda::par.on(stream),
+ th_dz, th_dz + count, th_z, th_dz,
+ [slope] __device__ (const T& dz) { return dz * slope; },
+ [] __device__ (const T& z) { return z < 0; });
+ thrust::transform_if(thrust::cuda::par.on(stream),
+ th_z, th_z + count, th_z,
+ [slope] __device__ (const T& z) { return z / slope; },
+ [] __device__ (const T& z) { return z < 0; });
+}
+
+void leaky_relu_backward_cuda(at::Tensor z, at::Tensor dz, float slope) {
+ CHECK_CUDA_INPUT(z);
+ CHECK_CUDA_INPUT(dz);
+
+ int64_t count = z.numel();
+
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
+ leaky_relu_backward_impl(z.data(), dz.data(), slope, count);
+ }));
+}
+
+template
+inline void elu_backward_impl(T *z, T *dz, int64_t count) {
+ // Create thrust pointers
+ thrust::device_ptr th_z = thrust::device_pointer_cast(z);
+ thrust::device_ptr th_dz = thrust::device_pointer_cast(dz);
+
+ auto stream = at::cuda::getCurrentCUDAStream();
+ thrust::transform_if(thrust::cuda::par.on(stream),
+ th_dz, th_dz + count, th_z, th_z, th_dz,
+ [] __device__ (const T& dz, const T& z) { return dz * (z + 1.); },
+ [] __device__ (const T& z) { return z < 0; });
+ thrust::transform_if(thrust::cuda::par.on(stream),
+ th_z, th_z + count, th_z,
+ [] __device__ (const T& z) { return log1p(z); },
+ [] __device__ (const T& z) { return z < 0; });
+}
+
+void elu_backward_cuda(at::Tensor z, at::Tensor dz) {
+ CHECK_CUDA_INPUT(z);
+ CHECK_CUDA_INPUT(dz);
+
+ int64_t count = z.numel();
+
+ AT_DISPATCH_FLOATING_TYPES(z.type(), "leaky_relu_backward_cuda", ([&] {
+ elu_backward_impl(z.data(), dz.data(), count);
+ }));
+}
diff --git a/head_segmentation/face_parsing/modules/src/inplace_abn_cuda_half.cu b/head_segmentation/face_parsing/modules/src/inplace_abn_cuda_half.cu
new file mode 100644
index 0000000..bb63e73
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/src/inplace_abn_cuda_half.cu
@@ -0,0 +1,275 @@
+#include
+
+#include
+
+#include
+
+#include "utils/checks.h"
+#include "utils/cuda.cuh"
+#include "inplace_abn.h"
+
+#include
+
+// Operations for reduce
+struct SumOpH {
+ __device__ SumOpH(const half *t, int c, int s)
+ : tensor(t), chn(c), sp(s) {}
+ __device__ __forceinline__ float operator()(int batch, int plane, int n) {
+ return __half2float(tensor[(batch * chn + plane) * sp + n]);
+ }
+ const half *tensor;
+ const int chn;
+ const int sp;
+};
+
+struct VarOpH {
+ __device__ VarOpH(float m, const half *t, int c, int s)
+ : mean(m), tensor(t), chn(c), sp(s) {}
+ __device__ __forceinline__ float operator()(int batch, int plane, int n) {
+ const auto t = __half2float(tensor[(batch * chn + plane) * sp + n]);
+ return (t - mean) * (t - mean);
+ }
+ const float mean;
+ const half *tensor;
+ const int chn;
+ const int sp;
+};
+
+struct GradOpH {
+ __device__ GradOpH(float _weight, float _bias, const half *_z, const half *_dz, int c, int s)
+ : weight(_weight), bias(_bias), z(_z), dz(_dz), chn(c), sp(s) {}
+ __device__ __forceinline__ Pair operator()(int batch, int plane, int n) {
+ float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - bias) / weight;
+ float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
+ return Pair(_dz, _y * _dz);
+ }
+ const float weight;
+ const float bias;
+ const half *z;
+ const half *dz;
+ const int chn;
+ const int sp;
+};
+
+/***********
+ * mean_var
+ ***********/
+
+__global__ void mean_var_kernel_h(const half *x, float *mean, float *var, int num, int chn, int sp) {
+ int plane = blockIdx.x;
+ float norm = 1.f / static_cast(num * sp);
+
+ float _mean = reduce(SumOpH(x, chn, sp), plane, num, sp) * norm;
+ __syncthreads();
+ float _var = reduce(VarOpH(_mean, x, chn, sp), plane, num, sp) * norm;
+
+ if (threadIdx.x == 0) {
+ mean[plane] = _mean;
+ var[plane] = _var;
+ }
+}
+
+std::vector mean_var_cuda_h(at::Tensor x) {
+ CHECK_CUDA_INPUT(x);
+
+ // Extract dimensions
+ int64_t num, chn, sp;
+ get_dims(x, num, chn, sp);
+
+ // Prepare output tensors
+ auto mean = at::empty({chn},x.options().dtype(at::kFloat));
+ auto var = at::empty({chn},x.options().dtype(at::kFloat));
+
+ // Run kernel
+ dim3 blocks(chn);
+ dim3 threads(getNumThreads(sp));
+ auto stream = at::cuda::getCurrentCUDAStream();
+ mean_var_kernel_h<<>>(
+ reinterpret_cast(x.data()),
+ mean.data(),
+ var.data(),
+ num, chn, sp);
+
+ return {mean, var};
+}
+
+/**********
+ * forward
+ **********/
+
+__global__ void forward_kernel_h(half *x, const float *mean, const float *var, const float *weight, const float *bias,
+ bool affine, float eps, int num, int chn, int sp) {
+ int plane = blockIdx.x;
+
+ const float _mean = mean[plane];
+ const float _var = var[plane];
+ const float _weight = affine ? abs(weight[plane]) + eps : 1.f;
+ const float _bias = affine ? bias[plane] : 0.f;
+
+ const float mul = rsqrt(_var + eps) * _weight;
+
+ for (int batch = 0; batch < num; ++batch) {
+ for (int n = threadIdx.x; n < sp; n += blockDim.x) {
+ half *x_ptr = x + (batch * chn + plane) * sp + n;
+ float _x = __half2float(*x_ptr);
+ float _y = (_x - _mean) * mul + _bias;
+
+ *x_ptr = __float2half(_y);
+ }
+ }
+}
+
+at::Tensor forward_cuda_h(at::Tensor x, at::Tensor mean, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps) {
+ CHECK_CUDA_INPUT(x);
+ CHECK_CUDA_INPUT(mean);
+ CHECK_CUDA_INPUT(var);
+ CHECK_CUDA_INPUT(weight);
+ CHECK_CUDA_INPUT(bias);
+
+ // Extract dimensions
+ int64_t num, chn, sp;
+ get_dims(x, num, chn, sp);
+
+ // Run kernel
+ dim3 blocks(chn);
+ dim3 threads(getNumThreads(sp));
+ auto stream = at::cuda::getCurrentCUDAStream();
+ forward_kernel_h<<>>(
+ reinterpret_cast(x.data()),
+ mean.data(),
+ var.data(),
+ weight.data(),
+ bias.data(),
+ affine, eps, num, chn, sp);
+
+ return x;
+}
+
+__global__ void edz_eydz_kernel_h(const half *z, const half *dz, const float *weight, const float *bias,
+ float *edz, float *eydz, bool affine, float eps, int num, int chn, int sp) {
+ int plane = blockIdx.x;
+
+ float _weight = affine ? abs(weight[plane]) + eps : 1.f;
+ float _bias = affine ? bias[plane] : 0.f;
+
+ Pair res = reduce, GradOpH>(GradOpH(_weight, _bias, z, dz, chn, sp), plane, num, sp);
+ __syncthreads();
+
+ if (threadIdx.x == 0) {
+ edz[plane] = res.v1;
+ eydz[plane] = res.v2;
+ }
+}
+
+std::vector edz_eydz_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor weight, at::Tensor bias,
+ bool affine, float eps) {
+ CHECK_CUDA_INPUT(z);
+ CHECK_CUDA_INPUT(dz);
+ CHECK_CUDA_INPUT(weight);
+ CHECK_CUDA_INPUT(bias);
+
+ // Extract dimensions
+ int64_t num, chn, sp;
+ get_dims(z, num, chn, sp);
+
+ auto edz = at::empty({chn},z.options().dtype(at::kFloat));
+ auto eydz = at::empty({chn},z.options().dtype(at::kFloat));
+
+ // Run kernel
+ dim3 blocks(chn);
+ dim3 threads(getNumThreads(sp));
+ auto stream = at::cuda::getCurrentCUDAStream();
+ edz_eydz_kernel_h<<>>(
+ reinterpret_cast(z.data()),
+ reinterpret_cast(dz.data()),
+ weight.data(),
+ bias.data(),
+ edz.data(),
+ eydz.data(),
+ affine, eps, num, chn, sp);
+
+ return {edz, eydz};
+}
+
+__global__ void backward_kernel_h(const half *z, const half *dz, const float *var, const float *weight, const float *bias, const float *edz,
+ const float *eydz, half *dx, bool affine, float eps, int num, int chn, int sp) {
+ int plane = blockIdx.x;
+
+ float _weight = affine ? abs(weight[plane]) + eps : 1.f;
+ float _bias = affine ? bias[plane] : 0.f;
+ float _var = var[plane];
+ float _edz = edz[plane];
+ float _eydz = eydz[plane];
+
+ float _mul = _weight * rsqrt(_var + eps);
+ float count = float(num * sp);
+
+ for (int batch = 0; batch < num; ++batch) {
+ for (int n = threadIdx.x; n < sp; n += blockDim.x) {
+ float _dz = __half2float(dz[(batch * chn + plane) * sp + n]);
+ float _y = (__half2float(z[(batch * chn + plane) * sp + n]) - _bias) / _weight;
+
+ dx[(batch * chn + plane) * sp + n] = __float2half((_dz - _edz / count - _y * _eydz / count) * _mul);
+ }
+ }
+}
+
+at::Tensor backward_cuda_h(at::Tensor z, at::Tensor dz, at::Tensor var, at::Tensor weight, at::Tensor bias,
+ at::Tensor edz, at::Tensor eydz, bool affine, float eps) {
+ CHECK_CUDA_INPUT(z);
+ CHECK_CUDA_INPUT(dz);
+ CHECK_CUDA_INPUT(var);
+ CHECK_CUDA_INPUT(weight);
+ CHECK_CUDA_INPUT(bias);
+ CHECK_CUDA_INPUT(edz);
+ CHECK_CUDA_INPUT(eydz);
+
+ // Extract dimensions
+ int64_t num, chn, sp;
+ get_dims(z, num, chn, sp);
+
+ auto dx = at::zeros_like(z);
+
+ // Run kernel
+ dim3 blocks(chn);
+ dim3 threads(getNumThreads(sp));
+ auto stream = at::cuda::getCurrentCUDAStream();
+ backward_kernel_h<<>>(
+ reinterpret_cast(z.data()),
+ reinterpret_cast(dz.data()),
+ var.data(),
+ weight.data(),
+ bias.data(),
+ edz.data(),
+ eydz.data(),
+ reinterpret_cast(dx.data()),
+ affine, eps, num, chn, sp);
+
+ return dx;
+}
+
+__global__ void leaky_relu_backward_impl_h(half *z, half *dz, float slope, int64_t count) {
+ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < count; i += blockDim.x * gridDim.x){
+ float _z = __half2float(z[i]);
+ if (_z < 0) {
+ dz[i] = __float2half(__half2float(dz[i]) * slope);
+ z[i] = __float2half(_z / slope);
+ }
+ }
+}
+
+void leaky_relu_backward_cuda_h(at::Tensor z, at::Tensor dz, float slope) {
+ CHECK_CUDA_INPUT(z);
+ CHECK_CUDA_INPUT(dz);
+
+ int64_t count = z.numel();
+ dim3 threads(getNumThreads(count));
+ dim3 blocks = (count + threads.x - 1) / threads.x;
+ auto stream = at::cuda::getCurrentCUDAStream();
+ leaky_relu_backward_impl_h<<>>(
+ reinterpret_cast(z.data()),
+ reinterpret_cast(dz.data()),
+ slope, count);
+}
+
diff --git a/head_segmentation/face_parsing/modules/src/utils/checks.h b/head_segmentation/face_parsing/modules/src/utils/checks.h
new file mode 100644
index 0000000..e761a6f
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/src/utils/checks.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include
+
+// Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT
+#ifndef AT_CHECK
+#define AT_CHECK AT_ASSERT
+#endif
+
+#define CHECK_CUDA(x) AT_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CPU(x) AT_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor")
+#define CHECK_CONTIGUOUS(x) AT_CHECK((x).is_contiguous(), #x " must be contiguous")
+
+#define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
+#define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x)
\ No newline at end of file
diff --git a/head_segmentation/face_parsing/modules/src/utils/common.h b/head_segmentation/face_parsing/modules/src/utils/common.h
new file mode 100644
index 0000000..e8403ee
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/src/utils/common.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include
+
+/*
+ * Functions to share code between CPU and GPU
+ */
+
+#ifdef __CUDACC__
+// CUDA versions
+
+#define HOST_DEVICE __host__ __device__
+#define INLINE_HOST_DEVICE __host__ __device__ inline
+#define FLOOR(x) floor(x)
+
+#if __CUDA_ARCH__ >= 600
+// Recent compute capabilities have block-level atomicAdd for all data types, so we use that
+#define ACCUM(x,y) atomicAdd_block(&(x),(y))
+#else
+// Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float
+// and use the known atomicCAS-based implementation for double
+template
+__device__ inline data_t atomic_add(data_t *address, data_t val) {
+ return atomicAdd(address, val);
+}
+
+template<>
+__device__ inline double atomic_add(double *address, double val) {
+ unsigned long long int* address_as_ull = (unsigned long long int*)address;
+ unsigned long long int old = *address_as_ull, assumed;
+ do {
+ assumed = old;
+ old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
+ } while (assumed != old);
+ return __longlong_as_double(old);
+}
+
+#define ACCUM(x,y) atomic_add(&(x),(y))
+#endif // #if __CUDA_ARCH__ >= 600
+
+#else
+// CPU versions
+
+#define HOST_DEVICE
+#define INLINE_HOST_DEVICE inline
+#define FLOOR(x) std::floor(x)
+#define ACCUM(x,y) (x) += (y)
+
+#endif // #ifdef __CUDACC__
\ No newline at end of file
diff --git a/head_segmentation/face_parsing/modules/src/utils/cuda.cuh b/head_segmentation/face_parsing/modules/src/utils/cuda.cuh
new file mode 100644
index 0000000..60c0023
--- /dev/null
+++ b/head_segmentation/face_parsing/modules/src/utils/cuda.cuh
@@ -0,0 +1,71 @@
+#pragma once
+
+/*
+ * General settings and functions
+ */
+const int WARP_SIZE = 32;
+const int MAX_BLOCK_SIZE = 1024;
+
+static int getNumThreads(int nElem) {
+ int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE};
+ for (int i = 0; i < 6; ++i) {
+ if (nElem <= threadSizes[i]) {
+ return threadSizes[i];
+ }
+ }
+ return MAX_BLOCK_SIZE;
+}
+
+/*
+ * Reduction utilities
+ */
+template
+__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize,
+ unsigned int mask = 0xffffffff) {
+#if CUDART_VERSION >= 9000
+ return __shfl_xor_sync(mask, value, laneMask, width);
+#else
+ return __shfl_xor(value, laneMask, width);
+#endif
+}
+
+__device__ __forceinline__ int getMSB(int val) { return 31 - __clz(val); }
+
+template
+struct Pair {
+ T v1, v2;
+ __device__ Pair() {}
+ __device__ Pair(T _v1, T _v2) : v1(_v1), v2(_v2) {}
+ __device__ Pair(T v) : v1(v), v2(v) {}
+ __device__ Pair(int v) : v1(v), v2(v) {}
+ __device__ Pair &operator+=(const Pair &a) {
+ v1 += a.v1;
+ v2 += a.v2;
+ return *this;
+ }
+};
+
+template
+static __device__ __forceinline__ T warpSum(T val) {
+#if __CUDA_ARCH__ >= 300
+ for (int i = 0; i < getMSB(WARP_SIZE); ++i) {
+ val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE);
+ }
+#else
+ __shared__ T values[MAX_BLOCK_SIZE];
+ values[threadIdx.x] = val;
+ __threadfence_block();
+ const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE;
+ for (int i = 1; i < WARP_SIZE; i++) {
+ val += values[base + ((i + threadIdx.x) % WARP_SIZE)];
+ }
+#endif
+ return val;
+}
+
+template
+static __device__ __forceinline__ Pair warpSum(Pair value) {
+ value.v1 = warpSum(value.v1);
+ value.v2 = warpSum(value.v2);
+ return value;
+}
\ No newline at end of file
diff --git a/head_segmentation/face_parsing/optimizer.py b/head_segmentation/face_parsing/optimizer.py
new file mode 100644
index 0000000..0c99e06
--- /dev/null
+++ b/head_segmentation/face_parsing/optimizer.py
@@ -0,0 +1,69 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+
+import torch
+import logging
+
+logger = logging.getLogger()
+
+class Optimizer(object):
+ def __init__(self,
+ model,
+ lr0,
+ momentum,
+ wd,
+ warmup_steps,
+ warmup_start_lr,
+ max_iter,
+ power,
+ *args, **kwargs):
+ self.warmup_steps = warmup_steps
+ self.warmup_start_lr = warmup_start_lr
+ self.lr0 = lr0
+ self.lr = self.lr0
+ self.max_iter = float(max_iter)
+ self.power = power
+ self.it = 0
+ wd_params, nowd_params, lr_mul_wd_params, lr_mul_nowd_params = model.get_params()
+ param_list = [
+ {'params': wd_params},
+ {'params': nowd_params, 'weight_decay': 0},
+ {'params': lr_mul_wd_params, 'lr_mul': True},
+ {'params': lr_mul_nowd_params, 'weight_decay': 0, 'lr_mul': True}]
+ self.optim = torch.optim.SGD(
+ param_list,
+ lr = lr0,
+ momentum = momentum,
+ weight_decay = wd)
+ self.warmup_factor = (self.lr0/self.warmup_start_lr)**(1./self.warmup_steps)
+
+
+ def get_lr(self):
+ if self.it <= self.warmup_steps:
+ lr = self.warmup_start_lr*(self.warmup_factor**self.it)
+ else:
+ factor = (1-(self.it-self.warmup_steps)/(self.max_iter-self.warmup_steps))**self.power
+ lr = self.lr0 * factor
+ return lr
+
+
+ def step(self):
+ self.lr = self.get_lr()
+ for pg in self.optim.param_groups:
+ if pg.get('lr_mul', False):
+ pg['lr'] = self.lr * 10
+ else:
+ pg['lr'] = self.lr
+ if self.optim.defaults.get('lr_mul', False):
+ self.optim.defaults['lr'] = self.lr * 10
+ else:
+ self.optim.defaults['lr'] = self.lr
+ self.it += 1
+ self.optim.step()
+ if self.it == self.warmup_steps+2:
+ logger.info('==> warmup done, start to implement poly lr strategy')
+
+ def zero_grad(self):
+ self.optim.zero_grad()
+
diff --git a/head_segmentation/face_parsing/prepropess_data.py b/head_segmentation/face_parsing/prepropess_data.py
new file mode 100644
index 0000000..ee7ed56
--- /dev/null
+++ b/head_segmentation/face_parsing/prepropess_data.py
@@ -0,0 +1,38 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+import os.path as osp
+import os
+import cv2
+from transform import *
+from PIL import Image
+
+face_data = '/home/zll/data/CelebAMask-HQ/CelebA-HQ-img'
+face_sep_mask = '/home/zll/data/CelebAMask-HQ/CelebAMask-HQ-mask-anno'
+mask_path = '/home/zll/data/CelebAMask-HQ/mask'
+counter = 0
+total = 0
+for i in range(15):
+
+ atts = ['skin', 'l_brow', 'r_brow', 'l_eye', 'r_eye', 'eye_g', 'l_ear', 'r_ear', 'ear_r',
+ 'nose', 'mouth', 'u_lip', 'l_lip', 'neck', 'neck_l', 'cloth', 'hair', 'hat']
+
+ for j in range(i * 2000, (i + 1) * 2000):
+
+ mask = np.zeros((512, 512))
+
+ for l, att in enumerate(atts, 1):
+ total += 1
+ file_name = ''.join([str(j).rjust(5, '0'), '_', att, '.png'])
+ path = osp.join(face_sep_mask, str(i), file_name)
+
+ if os.path.exists(path):
+ counter += 1
+ sep_mask = np.array(Image.open(path).convert('P'))
+ # print(np.unique(sep_mask))
+
+ mask[sep_mask == 225] = l
+ cv2.imwrite('{}/{}.png'.format(mask_path, j), mask)
+ print(j)
+
+print(counter, total)
\ No newline at end of file
diff --git a/head_segmentation/face_parsing/resnet.py b/head_segmentation/face_parsing/resnet.py
new file mode 100644
index 0000000..aa2bf95
--- /dev/null
+++ b/head_segmentation/face_parsing/resnet.py
@@ -0,0 +1,109 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.model_zoo as modelzoo
+
+# from modules.bn import InPlaceABNSync as BatchNorm2d
+
+resnet18_url = 'https://download.pytorch.org/models/resnet18-5c106cde.pth'
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+ """3x3 convolution with padding"""
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+ padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+ def __init__(self, in_chan, out_chan, stride=1):
+ super(BasicBlock, self).__init__()
+ self.conv1 = conv3x3(in_chan, out_chan, stride)
+ self.bn1 = nn.BatchNorm2d(out_chan)
+ self.conv2 = conv3x3(out_chan, out_chan)
+ self.bn2 = nn.BatchNorm2d(out_chan)
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = None
+ if in_chan != out_chan or stride != 1:
+ self.downsample = nn.Sequential(
+ nn.Conv2d(in_chan, out_chan,
+ kernel_size=1, stride=stride, bias=False),
+ nn.BatchNorm2d(out_chan),
+ )
+
+ def forward(self, x):
+ residual = self.conv1(x)
+ residual = F.relu(self.bn1(residual))
+ residual = self.conv2(residual)
+ residual = self.bn2(residual)
+
+ shortcut = x
+ if self.downsample is not None:
+ shortcut = self.downsample(x)
+
+ out = shortcut + residual
+ out = self.relu(out)
+ return out
+
+
+def create_layer_basic(in_chan, out_chan, bnum, stride=1):
+ layers = [BasicBlock(in_chan, out_chan, stride=stride)]
+ for i in range(bnum-1):
+ layers.append(BasicBlock(out_chan, out_chan, stride=1))
+ return nn.Sequential(*layers)
+
+
+class Resnet18(nn.Module):
+ def __init__(self):
+ super(Resnet18, self).__init__()
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+ bias=False)
+ self.bn1 = nn.BatchNorm2d(64)
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+ self.layer1 = create_layer_basic(64, 64, bnum=2, stride=1)
+ self.layer2 = create_layer_basic(64, 128, bnum=2, stride=2)
+ self.layer3 = create_layer_basic(128, 256, bnum=2, stride=2)
+ self.layer4 = create_layer_basic(256, 512, bnum=2, stride=2)
+ self.init_weight()
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = F.relu(self.bn1(x))
+ x = self.maxpool(x)
+
+ x = self.layer1(x)
+ feat8 = self.layer2(x) # 1/8
+ feat16 = self.layer3(feat8) # 1/16
+ feat32 = self.layer4(feat16) # 1/32
+ return feat8, feat16, feat32
+
+ def init_weight(self):
+ state_dict = modelzoo.load_url(resnet18_url)
+ self_state_dict = self.state_dict()
+ for k, v in state_dict.items():
+ if 'fc' in k: continue
+ self_state_dict.update({k: v})
+ self.load_state_dict(self_state_dict)
+
+ def get_params(self):
+ wd_params, nowd_params = [], []
+ for name, module in self.named_modules():
+ if isinstance(module, (nn.Linear, nn.Conv2d)):
+ wd_params.append(module.weight)
+ if not module.bias is None:
+ nowd_params.append(module.bias)
+ elif isinstance(module, nn.BatchNorm2d):
+ nowd_params += list(module.parameters())
+ return wd_params, nowd_params
+
+
+if __name__ == "__main__":
+ net = Resnet18()
+ x = torch.randn(16, 3, 224, 224)
+ out = net(x)
+ print(out[0].size())
+ print(out[1].size())
+ print(out[2].size())
+ net.get_params()
diff --git a/head_segmentation/face_parsing/test.py b/head_segmentation/face_parsing/test.py
new file mode 100644
index 0000000..ada286f
--- /dev/null
+++ b/head_segmentation/face_parsing/test.py
@@ -0,0 +1,93 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+from logger import setup_logger
+from model import BiSeNet
+
+import torch
+
+import os
+import os.path as osp
+import numpy as np
+from PIL import Image
+import torchvision.transforms as transforms
+import cv2
+
+def vis_parsing_maps(im, parsing_anno, stride, save_im=False, save_path='vis_results/parsing_map_on_im.jpg'):
+ # Colors for all 20 parts
+ part_colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0],
+ [255, 0, 85], [255, 0, 170],
+ [0, 255, 0], [85, 255, 0], [170, 255, 0],
+ [0, 255, 85], [0, 255, 170],
+ [0, 0, 255], [85, 0, 255], [170, 0, 255],
+ [0, 85, 255], [0, 170, 255],
+ [255, 255, 0], [255, 255, 85], [255, 255, 170],
+ [255, 0, 255], [255, 85, 255], [255, 170, 255],
+ [0, 255, 255], [85, 255, 255], [170, 255, 255]]
+
+ im = np.array(im)
+ vis_im = im.copy().astype(np.uint8)
+ vis_parsing_anno = parsing_anno.copy().astype(np.uint8)
+ vis_parsing_anno = cv2.resize(vis_parsing_anno, None, fx=stride, fy=stride, interpolation=cv2.INTER_NEAREST)
+ vis_parsing_anno_color = np.zeros((vis_parsing_anno.shape[0], vis_parsing_anno.shape[1], 3)) + 255
+
+ num_of_class = np.max(vis_parsing_anno)
+
+ for pi in range(1, num_of_class + 1):
+ index = np.where(vis_parsing_anno == pi)
+ vis_parsing_anno_color[index[0], index[1], :] = part_colors[pi]
+
+ vis_parsing_anno_color = vis_parsing_anno_color.astype(np.uint8)
+ # print(vis_parsing_anno_color.shape, vis_im.shape)
+ vis_im = cv2.addWeighted(cv2.cvtColor(vis_im, cv2.COLOR_RGB2BGR), 0.4, vis_parsing_anno_color, 0.6, 0)
+
+ # Save result or not
+ if save_im:
+ cv2.imwrite(save_path[:-4] +'.png', vis_parsing_anno)
+ cv2.imwrite(save_path, vis_im, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
+
+ # return vis_im
+
+def evaluate(respth='./res/test_res', dspth='./data', cp='model_final_diss.pth'):
+
+ if not os.path.exists(respth):
+ os.makedirs(respth)
+
+ n_classes = 19
+ net = BiSeNet(n_classes=n_classes)
+ net.cuda()
+ save_pth = osp.join('res/cp', cp)
+ net.load_state_dict(torch.load(save_pth))
+ net.eval()
+
+ to_tensor = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+ ])
+ with torch.no_grad():
+ for image_path in os.listdir(dspth):
+ image = Image.open(osp.join(dspth, image_path))
+ # image = img.resize((512, 512), Image.BILINEAR)
+ img = to_tensor(image)
+ img = torch.unsqueeze(img, 0)
+ img = img.cuda()
+ import time
+ start = time.time()
+ out = net(img)[0]
+ print('Time: ', time.time() - start)
+ parsing = out.squeeze(0).cpu().numpy().argmax(0)
+ # print(parsing)
+ print(np.unique(parsing))
+
+ vis_parsing_maps(image, parsing, stride=1, save_im=True, save_path=osp.join(respth, image_path))
+
+
+
+
+
+
+
+if __name__ == "__main__":
+ evaluate(dspth='/home/abhinav_ayalur_gmail_com/examples/head_segmentation/face_parsing/face-parsing.PyTorch/hard', cp='79999_iter.pth')
+
+
diff --git a/head_segmentation/face_parsing/train.py b/head_segmentation/face_parsing/train.py
new file mode 100644
index 0000000..580a0ca
--- /dev/null
+++ b/head_segmentation/face_parsing/train.py
@@ -0,0 +1,179 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+from logger import setup_logger
+from model import BiSeNet
+from face_dataset import FaceMask
+from loss import OhemCELoss
+from evaluate import evaluate
+from optimizer import Optimizer
+import cv2
+import numpy as np
+
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+import torch.nn.functional as F
+import torch.distributed as dist
+
+import os
+import os.path as osp
+import logging
+import time
+import datetime
+import argparse
+
+
+respth = './res'
+if not osp.exists(respth):
+ os.makedirs(respth)
+logger = logging.getLogger()
+
+
+def parse_args():
+ parse = argparse.ArgumentParser()
+ parse.add_argument(
+ '--local_rank',
+ dest = 'local_rank',
+ type = int,
+ default = -1,
+ )
+ return parse.parse_args()
+
+
+def train():
+ args = parse_args()
+ torch.cuda.set_device(args.local_rank)
+ dist.init_process_group(
+ backend = 'nccl',
+ init_method = 'tcp://127.0.0.1:33241',
+ world_size = torch.cuda.device_count(),
+ rank=args.local_rank
+ )
+ setup_logger(respth)
+
+ # dataset
+ n_classes = 19
+ n_img_per_gpu = 16
+ n_workers = 8
+ cropsize = [448, 448]
+ data_root = '/home/zll/data/CelebAMask-HQ/'
+
+ ds = FaceMask(data_root, cropsize=cropsize, mode='train')
+ sampler = torch.utils.data.distributed.DistributedSampler(ds)
+ dl = DataLoader(ds,
+ batch_size = n_img_per_gpu,
+ shuffle = False,
+ sampler = sampler,
+ num_workers = n_workers,
+ pin_memory = True,
+ drop_last = True)
+
+ # model
+ ignore_idx = -100
+ net = BiSeNet(n_classes=n_classes)
+ net.cuda()
+ net.train()
+ net = nn.parallel.DistributedDataParallel(net,
+ device_ids = [args.local_rank, ],
+ output_device = args.local_rank
+ )
+ score_thres = 0.7
+ n_min = n_img_per_gpu * cropsize[0] * cropsize[1]//16
+ LossP = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
+ Loss2 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
+ Loss3 = OhemCELoss(thresh=score_thres, n_min=n_min, ignore_lb=ignore_idx)
+
+ ## optimizer
+ momentum = 0.9
+ weight_decay = 5e-4
+ lr_start = 1e-2
+ max_iter = 80000
+ power = 0.9
+ warmup_steps = 1000
+ warmup_start_lr = 1e-5
+ optim = Optimizer(
+ model = net.module,
+ lr0 = lr_start,
+ momentum = momentum,
+ wd = weight_decay,
+ warmup_steps = warmup_steps,
+ warmup_start_lr = warmup_start_lr,
+ max_iter = max_iter,
+ power = power)
+
+ ## train loop
+ msg_iter = 50
+ loss_avg = []
+ st = glob_st = time.time()
+ diter = iter(dl)
+ epoch = 0
+ for it in range(max_iter):
+ try:
+ im, lb = next(diter)
+ if not im.size()[0] == n_img_per_gpu:
+ raise StopIteration
+ except StopIteration:
+ epoch += 1
+ sampler.set_epoch(epoch)
+ diter = iter(dl)
+ im, lb = next(diter)
+ im = im.cuda()
+ lb = lb.cuda()
+ H, W = im.size()[2:]
+ lb = torch.squeeze(lb, 1)
+
+ optim.zero_grad()
+ out, out16, out32 = net(im)
+ lossp = LossP(out, lb)
+ loss2 = Loss2(out16, lb)
+ loss3 = Loss3(out32, lb)
+ loss = lossp + loss2 + loss3
+ loss.backward()
+ optim.step()
+
+ loss_avg.append(loss.item())
+
+ # print training log message
+ if (it+1) % msg_iter == 0:
+ loss_avg = sum(loss_avg) / len(loss_avg)
+ lr = optim.lr
+ ed = time.time()
+ t_intv, glob_t_intv = ed - st, ed - glob_st
+ eta = int((max_iter - it) * (glob_t_intv / it))
+ eta = str(datetime.timedelta(seconds=eta))
+ msg = ', '.join([
+ 'it: {it}/{max_it}',
+ 'lr: {lr:4f}',
+ 'loss: {loss:.4f}',
+ 'eta: {eta}',
+ 'time: {time:.4f}',
+ ]).format(
+ it = it+1,
+ max_it = max_iter,
+ lr = lr,
+ loss = loss_avg,
+ time = t_intv,
+ eta = eta
+ )
+ logger.info(msg)
+ loss_avg = []
+ st = ed
+ if dist.get_rank() == 0:
+ if (it+1) % 5000 == 0:
+ state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
+ if dist.get_rank() == 0:
+ torch.save(state, './res/cp/{}_iter.pth'.format(it))
+ evaluate(dspth='/home/zll/data/CelebAMask-HQ/test-img', cp='{}_iter.pth'.format(it))
+
+ # dump the final model
+ save_pth = osp.join(respth, 'model_final_diss.pth')
+ # net.cpu()
+ state = net.module.state_dict() if hasattr(net, 'module') else net.state_dict()
+ if dist.get_rank() == 0:
+ torch.save(state, save_pth)
+ logger.info('training done, model saved to: {}'.format(save_pth))
+
+
+if __name__ == "__main__":
+ train()
diff --git a/head_segmentation/face_parsing/transform.py b/head_segmentation/face_parsing/transform.py
new file mode 100644
index 0000000..9479ae3
--- /dev/null
+++ b/head_segmentation/face_parsing/transform.py
@@ -0,0 +1,129 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+
+
+from PIL import Image
+import PIL.ImageEnhance as ImageEnhance
+import random
+import numpy as np
+
+class RandomCrop(object):
+ def __init__(self, size, *args, **kwargs):
+ self.size = size
+
+ def __call__(self, im_lb):
+ im = im_lb['im']
+ lb = im_lb['lb']
+ assert im.size == lb.size
+ W, H = self.size
+ w, h = im.size
+
+ if (W, H) == (w, h): return dict(im=im, lb=lb)
+ if w < W or h < H:
+ scale = float(W) / w if w < h else float(H) / h
+ w, h = int(scale * w + 1), int(scale * h + 1)
+ im = im.resize((w, h), Image.BILINEAR)
+ lb = lb.resize((w, h), Image.NEAREST)
+ sw, sh = random.random() * (w - W), random.random() * (h - H)
+ crop = int(sw), int(sh), int(sw) + W, int(sh) + H
+ return dict(
+ im = im.crop(crop),
+ lb = lb.crop(crop)
+ )
+
+
+class HorizontalFlip(object):
+ def __init__(self, p=0.5, *args, **kwargs):
+ self.p = p
+
+ def __call__(self, im_lb):
+ if random.random() > self.p:
+ return im_lb
+ else:
+ im = im_lb['im']
+ lb = im_lb['lb']
+
+ # atts = [1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye', 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r',
+ # 10 'nose', 11 'mouth', 12 'u_lip', 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
+
+ flip_lb = np.array(lb)
+ flip_lb[lb == 2] = 3
+ flip_lb[lb == 3] = 2
+ flip_lb[lb == 4] = 5
+ flip_lb[lb == 5] = 4
+ flip_lb[lb == 7] = 8
+ flip_lb[lb == 8] = 7
+ flip_lb = Image.fromarray(flip_lb)
+ return dict(im = im.transpose(Image.FLIP_LEFT_RIGHT),
+ lb = flip_lb.transpose(Image.FLIP_LEFT_RIGHT),
+ )
+
+
+class RandomScale(object):
+ def __init__(self, scales=(1, ), *args, **kwargs):
+ self.scales = scales
+
+ def __call__(self, im_lb):
+ im = im_lb['im']
+ lb = im_lb['lb']
+ W, H = im.size
+ scale = random.choice(self.scales)
+ w, h = int(W * scale), int(H * scale)
+ return dict(im = im.resize((w, h), Image.BILINEAR),
+ lb = lb.resize((w, h), Image.NEAREST),
+ )
+
+
+class ColorJitter(object):
+ def __init__(self, brightness=None, contrast=None, saturation=None, *args, **kwargs):
+ if not brightness is None and brightness>0:
+ self.brightness = [max(1-brightness, 0), 1+brightness]
+ if not contrast is None and contrast>0:
+ self.contrast = [max(1-contrast, 0), 1+contrast]
+ if not saturation is None and saturation>0:
+ self.saturation = [max(1-saturation, 0), 1+saturation]
+
+ def __call__(self, im_lb):
+ im = im_lb['im']
+ lb = im_lb['lb']
+ r_brightness = random.uniform(self.brightness[0], self.brightness[1])
+ r_contrast = random.uniform(self.contrast[0], self.contrast[1])
+ r_saturation = random.uniform(self.saturation[0], self.saturation[1])
+ im = ImageEnhance.Brightness(im).enhance(r_brightness)
+ im = ImageEnhance.Contrast(im).enhance(r_contrast)
+ im = ImageEnhance.Color(im).enhance(r_saturation)
+ return dict(im = im,
+ lb = lb,
+ )
+
+
+class MultiScale(object):
+ def __init__(self, scales):
+ self.scales = scales
+
+ def __call__(self, img):
+ W, H = img.size
+ sizes = [(int(W*ratio), int(H*ratio)) for ratio in self.scales]
+ imgs = []
+ [imgs.append(img.resize(size, Image.BILINEAR)) for size in sizes]
+ return imgs
+
+
+class Compose(object):
+ def __init__(self, do_list):
+ self.do_list = do_list
+
+ def __call__(self, im_lb):
+ for comp in self.do_list:
+ im_lb = comp(im_lb)
+ return im_lb
+
+
+
+
+if __name__ == '__main__':
+ flip = HorizontalFlip(p = 1)
+ crop = RandomCrop((321, 321))
+ rscales = RandomScale((0.75, 1.0, 1.5, 1.75, 2.0))
+ img = Image.open('data/img.jpg')
+ lb = Image.open('data/label.png')