1# Licensed to the Apache Software Foundation (ASF) under one 2# or more contributor license agreements. See the NOTICE file 3# distributed with this work for additional information 4# regarding copyright ownership. The ASF licenses this file 5# to you under the Apache License, Version 2.0 (the 6# "License"); you may not use this file except in compliance 7# with the License. You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, 12# software distributed under the License is distributed on an 13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14# KIND, either express or implied. See the License for the 15# specific language governing permissions and limitations 16# under the License. 17""" Helper classes for multiprocess captcha image generation 18This module also provides script for saving captcha images to file using CLI. 19""" 20 21from __future__ import print_function 22import random 23 24import numpy as np 25from captcha.image import ImageCaptcha 26import cv2 27from multiproc_data import MPData 28 29 30class CaptchaGen(object): 31 """Generates a captcha image 32 """ 33 def __init__(self, h, w, font_paths): 34 """ 35 Parameters 36 ---------- 37 h: int 38 Height of the generated images 39 w: int 40 Width of the generated images 41 font_paths: list of str 42 List of all fonts in ttf format 43 """ 44 self.captcha = ImageCaptcha(fonts=font_paths) 45 self.h = h 46 self.w = w 47 48 def image(self, captcha_str): 49 """Generate a greyscale captcha image representing number string 50 51 Parameters 52 ---------- 53 captcha_str: str 54 string a characters for captcha image 55 56 Returns 57 ------- 58 numpy.ndarray 59 Generated greyscale image in np.ndarray float type with values normalized to [0, 1] 60 """ 61 img = self.captcha.generate(captcha_str) 62 img = np.fromstring(img.getvalue(), dtype='uint8') 63 img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE) 64 img = cv2.resize(img, (self.h, self.w)) 65 img = img.transpose(1, 0) 66 img = np.multiply(img, 1 / 255.0) 67 return img 68 69 70class DigitCaptcha(object): 71 """Provides shape() and get() interface for digit-captcha image generation 72 """ 73 def __init__(self, font_paths, h, w, num_digit_min, num_digit_max): 74 """ 75 Parameters 76 ---------- 77 font_paths: list of str 78 List of path to ttf font files 79 h: int 80 height of the generated image 81 w: int 82 width of the generated image 83 num_digit_min: int 84 minimum number of digits in generated captcha image 85 num_digit_max: int 86 maximum number of digits in generated captcha image 87 """ 88 self.num_digit_min = num_digit_min 89 self.num_digit_max = num_digit_max 90 self.captcha = CaptchaGen(h=h, w=w, font_paths=font_paths) 91 92 @property 93 def shape(self): 94 """Returns shape of the image data generated 95 96 Returns 97 ------- 98 tuple(int, int) 99 """ 100 return self.captcha.h, self.captcha.w 101 102 def get(self): 103 """Get an image from the queue 104 105 Returns 106 ------- 107 np.ndarray 108 A captcha image, normalized to [0, 1] 109 """ 110 return self._gen_sample() 111 112 @staticmethod 113 def get_rand(num_digit_min, num_digit_max): 114 """Generates a character string of digits. Number of digits are 115 between self.num_digit_min and self.num_digit_max 116 Returns 117 ------- 118 str 119 """ 120 buf = "" 121 max_len = random.randint(num_digit_min, num_digit_max) 122 for i in range(max_len): 123 buf += str(random.randint(0, 9)) 124 return buf 125 126 def _gen_sample(self): 127 """Generate a random captcha image sample 128 Returns 129 ------- 130 (numpy.ndarray, str) 131 Tuple of image (numpy ndarray) and character string of digits used to generate the image 132 """ 133 num_str = self.get_rand(self.num_digit_min, self.num_digit_max) 134 return self.captcha.image(num_str), num_str 135 136 137class MPDigitCaptcha(DigitCaptcha): 138 """Handles multi-process captcha image generation 139 """ 140 def __init__(self, font_paths, h, w, num_digit_min, num_digit_max, num_processes, max_queue_size): 141 """Parameters 142 ---------- 143 font_paths: list of str 144 List of path to ttf font files 145 h: int 146 height of the generated image 147 w: int 148 width of the generated image 149 num_digit_min: int 150 minimum number of digits in generated captcha image 151 num_digit_max: int 152 maximum number of digits in generated captcha image 153 num_processes: int 154 Number of processes to spawn 155 max_queue_size: int 156 Maximum images in queue before processes wait 157 """ 158 super(MPDigitCaptcha, self).__init__(font_paths, h, w, num_digit_min, num_digit_max) 159 self.mp_data = MPData(num_processes, max_queue_size, self._gen_sample) 160 161 def start(self): 162 """Starts the processes""" 163 self.mp_data.start() 164 165 def get(self): 166 """Get an image from the queue 167 168 Returns 169 ------- 170 np.ndarray 171 A captcha image, normalized to [0, 1] 172 """ 173 return self.mp_data.get() 174 175 def reset(self): 176 """Resets the generator by stopping all processes""" 177 self.mp_data.reset() 178 179 180if __name__ == '__main__': 181 import argparse 182 183 def main(): 184 """Program entry point""" 185 parser = argparse.ArgumentParser() 186 parser.add_argument("font_path", help="Path to ttf font file") 187 parser.add_argument("output", help="Output filename including extension (e.g. 'sample.jpg')") 188 parser.add_argument("--num", help="Up to 4 digit number [Default: random]") 189 args = parser.parse_args() 190 191 captcha = ImageCaptcha(fonts=[args.font_path]) 192 captcha_str = args.num if args.num else DigitCaptcha.get_rand(3, 4) 193 img = captcha.generate(captcha_str) 194 img = np.fromstring(img.getvalue(), dtype='uint8') 195 img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE) 196 cv2.imwrite(args.output, img) 197 print("Captcha image with digits {} written to {}".format([int(c) for c in captcha_str], args.output)) 198 199 main() 200