1# Licensed to the Apache Software Foundation (ASF) under one
2# or more contributor license agreements.  See the NOTICE file
3# distributed with this work for additional information
4# regarding copyright ownership.  The ASF licenses this file
5# to you under the Apache License, Version 2.0 (the
6# "License"); you may not use this file except in compliance
7# with the License.  You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing,
12# software distributed under the License is distributed on an
13# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14# KIND, either express or implied.  See the License for the
15# specific language governing permissions and limitations
16# under the License.
17""" Helper classes for multiprocess captcha image generation
18This module also provides script for saving captcha images to file using CLI.
19"""
20
21from __future__ import print_function
22import random
23
24import numpy as np
25from captcha.image import ImageCaptcha
26import cv2
27from multiproc_data import MPData
28
29
30class CaptchaGen(object):
31    """Generates a captcha image
32    """
33    def __init__(self, h, w, font_paths):
34        """
35        Parameters
36        ----------
37        h: int
38            Height of the generated images
39        w: int
40            Width of the generated images
41        font_paths: list of str
42            List of all fonts in ttf format
43        """
44        self.captcha = ImageCaptcha(fonts=font_paths)
45        self.h = h
46        self.w = w
47
48    def image(self, captcha_str):
49        """Generate a greyscale captcha image representing number string
50
51        Parameters
52        ----------
53        captcha_str: str
54            string a characters for captcha image
55
56        Returns
57        -------
58        numpy.ndarray
59            Generated greyscale image in np.ndarray float type with values normalized to [0, 1]
60        """
61        img = self.captcha.generate(captcha_str)
62        img = np.fromstring(img.getvalue(), dtype='uint8')
63        img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE)
64        img = cv2.resize(img, (self.h, self.w))
65        img = img.transpose(1, 0)
66        img = np.multiply(img, 1 / 255.0)
67        return img
68
69
70class DigitCaptcha(object):
71    """Provides shape() and get() interface for digit-captcha image generation
72    """
73    def __init__(self, font_paths, h, w, num_digit_min, num_digit_max):
74        """
75        Parameters
76        ----------
77        font_paths: list of str
78            List of path to ttf font files
79        h: int
80            height of the generated image
81        w: int
82            width of the generated image
83        num_digit_min: int
84            minimum number of digits in generated captcha image
85        num_digit_max: int
86            maximum number of digits in generated captcha image
87        """
88        self.num_digit_min = num_digit_min
89        self.num_digit_max = num_digit_max
90        self.captcha = CaptchaGen(h=h, w=w, font_paths=font_paths)
91
92    @property
93    def shape(self):
94        """Returns shape of the image data generated
95
96        Returns
97        -------
98        tuple(int, int)
99        """
100        return self.captcha.h, self.captcha.w
101
102    def get(self):
103        """Get an image from the queue
104
105        Returns
106        -------
107        np.ndarray
108            A captcha image, normalized to [0, 1]
109        """
110        return self._gen_sample()
111
112    @staticmethod
113    def get_rand(num_digit_min, num_digit_max):
114        """Generates a character string of digits. Number of digits are
115        between self.num_digit_min and self.num_digit_max
116        Returns
117        -------
118        str
119        """
120        buf = ""
121        max_len = random.randint(num_digit_min, num_digit_max)
122        for i in range(max_len):
123            buf += str(random.randint(0, 9))
124        return buf
125
126    def _gen_sample(self):
127        """Generate a random captcha image sample
128        Returns
129        -------
130        (numpy.ndarray, str)
131            Tuple of image (numpy ndarray) and character string of digits used to generate the image
132        """
133        num_str = self.get_rand(self.num_digit_min, self.num_digit_max)
134        return self.captcha.image(num_str), num_str
135
136
137class MPDigitCaptcha(DigitCaptcha):
138    """Handles multi-process captcha image generation
139    """
140    def __init__(self, font_paths, h, w, num_digit_min, num_digit_max, num_processes, max_queue_size):
141        """Parameters
142        ----------
143        font_paths: list of str
144            List of path to ttf font files
145        h: int
146            height of the generated image
147        w: int
148            width of the generated image
149        num_digit_min: int
150            minimum number of digits in generated captcha image
151        num_digit_max: int
152            maximum number of digits in generated captcha image
153        num_processes: int
154            Number of processes to spawn
155        max_queue_size: int
156            Maximum images in queue before processes wait
157        """
158        super(MPDigitCaptcha, self).__init__(font_paths, h, w, num_digit_min, num_digit_max)
159        self.mp_data = MPData(num_processes, max_queue_size, self._gen_sample)
160
161    def start(self):
162        """Starts the processes"""
163        self.mp_data.start()
164
165    def get(self):
166        """Get an image from the queue
167
168        Returns
169        -------
170        np.ndarray
171            A captcha image, normalized to [0, 1]
172        """
173        return self.mp_data.get()
174
175    def reset(self):
176        """Resets the generator by stopping all processes"""
177        self.mp_data.reset()
178
179
180if __name__ == '__main__':
181    import argparse
182
183    def main():
184        """Program entry point"""
185        parser = argparse.ArgumentParser()
186        parser.add_argument("font_path", help="Path to ttf font file")
187        parser.add_argument("output", help="Output filename including extension (e.g. 'sample.jpg')")
188        parser.add_argument("--num", help="Up to 4 digit number [Default: random]")
189        args = parser.parse_args()
190
191        captcha = ImageCaptcha(fonts=[args.font_path])
192        captcha_str = args.num if args.num else DigitCaptcha.get_rand(3, 4)
193        img = captcha.generate(captcha_str)
194        img = np.fromstring(img.getvalue(), dtype='uint8')
195        img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE)
196        cv2.imwrite(args.output, img)
197        print("Captcha image with digits {} written to {}".format([int(c) for c in captcha_str], args.output))
198
199    main()
200