1#!/usr/bin/env python 2''' 3You can download the converted pb model from https://www.dropbox.com/s/qag9vzambhhkvxr/lip_jppnet_384.pb?dl=0 4or convert the model yourself. 5 6Follow these steps if you want to convert the original model yourself: 7 To get original .meta pre-trained model download https://drive.google.com/file/d/1BFVXgeln-bek8TCbRjN6utPAgRE0LJZg/view 8 For correct convert .meta to .pb model download original repository https://github.com/Engineering-Course/LIP_JPPNet 9 Change script evaluate_parsing_JPPNet-s2.py for human parsing 10 1. Remove preprocessing to create image_batch_origin: 11 with tf.name_scope("create_inputs"): 12 ... 13 Add 14 image_batch_origin = tf.placeholder(tf.float32, shape=(2, None, None, 3), name='input') 15 16 2. Create input 17 image = cv2.imread(path/to/image) 18 image_rev = np.flip(image, axis=1) 19 input = np.stack([image, image_rev], axis=0) 20 21 3. Hardcode image_h and image_w shapes to determine output shapes. 22 We use default INPUT_SIZE = (384, 384) from evaluate_parsing_JPPNet-s2.py. 23 parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, INPUT_SIZE), 24 tf.image.resize_images(parsing_out1_075, INPUT_SIZE), 25 tf.image.resize_images(parsing_out1_125, INPUT_SIZE)]), axis=0) 26 Do similarly with parsing_out2, parsing_out3 27 4. Remove postprocessing. Last net operation: 28 raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) 29 Change: 30 parsing_ = sess.run(raw_output, feed_dict={'input:0': input}) 31 32 5. To save model after sess.run(...) add: 33 input_graph_def = tf.get_default_graph().as_graph_def() 34 output_node = "Mean_3" 35 output_graph_def = tf.graph_util.convert_variables_to_constants(sess, input_graph_def, output_node) 36 37 output_graph = "LIP_JPPNet.pb" 38 with tf.gfile.GFile(output_graph, "wb") as f: 39 f.write(output_graph_def.SerializeToString())' 40''' 41 42import argparse 43import os.path 44import numpy as np 45import cv2 as cv 46 47 48backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV, 49 cv.dnn.DNN_BACKEND_VKCOM, cv.dnn.DNN_BACKEND_CUDA) 50targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD, 51 cv.dnn.DNN_TARGET_HDDL, cv.dnn.DNN_TARGET_VULKAN, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16) 52 53 54def preprocess(image): 55 """ 56 Create 4-dimensional blob from image and flip image 57 :param image: input image 58 """ 59 image_rev = np.flip(image, axis=1) 60 input = cv.dnn.blobFromImages([image, image_rev], mean=(104.00698793, 116.66876762, 122.67891434)) 61 return input 62 63 64def run_net(input, model_path, backend, target): 65 """ 66 Read network and infer model 67 :param model_path: path to JPPNet model 68 :param backend: computation backend 69 :param target: computation device 70 """ 71 net = cv.dnn.readNet(model_path) 72 net.setPreferableBackend(backend) 73 net.setPreferableTarget(target) 74 net.setInput(input) 75 out = net.forward() 76 return out 77 78 79def postprocess(out, input_shape): 80 """ 81 Create a grayscale human segmentation 82 :param out: network output 83 :param input_shape: input image width and height 84 """ 85 # LIP classes 86 # 0 Background 87 # 1 Hat 88 # 2 Hair 89 # 3 Glove 90 # 4 Sunglasses 91 # 5 UpperClothes 92 # 6 Dress 93 # 7 Coat 94 # 8 Socks 95 # 9 Pants 96 # 10 Jumpsuits 97 # 11 Scarf 98 # 12 Skirt 99 # 13 Face 100 # 14 LeftArm 101 # 15 RightArm 102 # 16 LeftLeg 103 # 17 RightLeg 104 # 18 LeftShoe 105 # 19 RightShoe 106 head_output, tail_output = np.split(out, indices_or_sections=[1], axis=0) 107 head_output = head_output.squeeze(0) 108 tail_output = tail_output.squeeze(0) 109 110 head_output = np.stack([cv.resize(img, dsize=input_shape) for img in head_output[:, ...]]) 111 tail_output = np.stack([cv.resize(img, dsize=input_shape) for img in tail_output[:, ...]]) 112 113 tail_list = np.split(tail_output, indices_or_sections=list(range(1, 20)), axis=0) 114 tail_list = [arr.squeeze(0) for arr in tail_list] 115 tail_list_rev = [tail_list[i] for i in range(14)] 116 tail_list_rev.extend([tail_list[15], tail_list[14], tail_list[17], tail_list[16], tail_list[19], tail_list[18]]) 117 tail_output_rev = np.stack(tail_list_rev, axis=0) 118 tail_output_rev = np.flip(tail_output_rev, axis=2) 119 raw_output_all = np.mean(np.stack([head_output, tail_output_rev], axis=0), axis=0, keepdims=True) 120 raw_output_all = np.argmax(raw_output_all, axis=1) 121 raw_output_all = raw_output_all.transpose(1, 2, 0) 122 return raw_output_all 123 124 125def decode_labels(gray_image): 126 """ 127 Colorize image according to labels 128 :param gray_image: grayscale human segmentation result 129 """ 130 height, width, _ = gray_image.shape 131 colors = [(0, 0, 0), (128, 0, 0), (255, 0, 0), (0, 85, 0), (170, 0, 51), (255, 85, 0), 132 (0, 0, 85), (0, 119, 221), (85, 85, 0), (0, 85, 85), (85, 51, 0), (52, 86, 128), 133 (0, 128, 0), (0, 0, 255), (51, 170, 221), (0, 255, 255),(85, 255, 170), 134 (170, 255, 85), (255, 255, 0), (255, 170, 0)] 135 136 segm = np.stack([colors[idx] for idx in gray_image.flatten()]) 137 segm = segm.reshape(height, width, 3).astype(np.uint8) 138 segm = cv.cvtColor(segm, cv.COLOR_BGR2RGB) 139 return segm 140 141 142def parse_human(image, model_path, backend=cv.dnn.DNN_BACKEND_OPENCV, target=cv.dnn.DNN_TARGET_CPU): 143 """ 144 Prepare input for execution, run net and postprocess output to parse human. 145 :param image: input image 146 :param model_path: path to JPPNet model 147 :param backend: name of computation backend 148 :param target: name of computation target 149 """ 150 input = preprocess(image) 151 input_h, input_w = input.shape[2:] 152 output = run_net(input, model_path, backend, target) 153 grayscale_out = postprocess(output, (input_w, input_h)) 154 segmentation = decode_labels(grayscale_out) 155 return segmentation 156 157 158if __name__ == '__main__': 159 parser = argparse.ArgumentParser(description='Use this script to run human parsing using JPPNet', 160 formatter_class=argparse.ArgumentDefaultsHelpFormatter) 161 parser.add_argument('--input', '-i', required=True, help='Path to input image.') 162 parser.add_argument('--model', '-m', default='lip_jppnet_384.pb', help='Path to pb model.') 163 parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int, 164 help="Choose one of computation backends: " 165 "%d: automatically (by default), " 166 "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), " 167 "%d: OpenCV implementation, " 168 "%d: VKCOM, " 169 "%d: CUDA"% backends) 170 parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int, 171 help='Choose one of target computation devices: ' 172 '%d: CPU target (by default), ' 173 '%d: OpenCL, ' 174 '%d: OpenCL fp16 (half-float precision), ' 175 '%d: NCS2 VPU, ' 176 '%d: HDDL VPU, ' 177 '%d: Vulkan, ' 178 '%d: CUDA, ' 179 '%d: CUDA fp16 (half-float preprocess)' % targets) 180 args, _ = parser.parse_known_args() 181 182 if not os.path.isfile(args.model): 183 raise OSError("Model not exist") 184 185 image = cv.imread(args.input) 186 output = parse_human(image, args.model, args.backend, args.target) 187 winName = 'Deep learning human parsing in OpenCV' 188 cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) 189 cv.imshow(winName, output) 190 cv.waitKey() 191