1
2from __future__ import unicode_literals
3from __future__ import print_function
4
5import io
6import json
7import os
8import pydoc
9import re
10import sys
11
12json_file = os.path.join(os.path.dirname(__file__), '../src/sklearn-metadata.json')
13json_data = open(json_file).read()
14json_root = json.loads(json_data)
15
16def split_docstring(docstring):
17    headers = {}
18    current_header = ''
19    current_lines = []
20    lines = docstring.split('\n')
21    index = 0
22    while index < len(lines):
23        if index + 1 < len(lines) and len(lines[index + 1].strip(' ')) > 0 and len(lines[index + 1].strip(' ').strip('-')) == 0:
24            headers[current_header] = current_lines
25            current_header = lines[index].strip(' ')
26            current_lines = []
27            index = index + 1
28        else:
29            current_lines.append(lines[index])
30        index = index + 1
31    headers[current_header] = current_lines
32    return headers
33
34def update_description(schema, lines):
35    if len(''.join(lines).strip(' ')) > 0:
36        for i in range(0, len(lines)):
37            lines[i] = lines[i].lstrip(' ')
38        schema['description'] = '\n'.join(lines)
39
40def update_attribute(schema, name, description, attribute_type, option, default_value):
41    attribute = None
42    if not 'attributes' in schema:
43        schema['attributes'] = []
44    for current_attribute in schema['attributes']:
45        if 'name' in current_attribute and current_attribute['name'] == name:
46            attribute = current_attribute
47            break
48    if not attribute:
49        attribute = {}
50        attribute['name'] = name
51        schema['attributes'].append(attribute)
52    attribute['description'] = description
53    if attribute_type:
54        attribute['type'] = attribute_type
55    if option:
56        attribute['option'] = option
57    if default_value:
58        if attribute_type == 'float32':
59            if default_value == 'None':
60                attribute['default'] = None
61            elif default_value != "'auto'":
62                attribute['default'] = float(default_value)
63            else:
64                attribute['default'] = default_value.strip("'").strip('"')
65        elif attribute_type == 'int32':
66            if default_value == 'None':
67                attribute['default'] = None
68            elif default_value == "'auto'" or default_value == '"auto"':
69                attribute['default'] = default_value.strip("'").strip('"')
70            else:
71                attribute['default'] = int(default_value)
72        elif attribute_type == 'string':
73            attribute['default'] = default_value.strip("'").strip('"')
74        elif attribute_type == 'boolean':
75            if default_value == 'True':
76                attribute['default'] = True
77            elif default_value == 'False':
78                attribute['default'] = False
79            elif default_value == "'auto'":
80                attribute['default'] = default_value.strip("'").strip('"')
81            else:
82                raise Exception("Unknown boolean default value '" + str(default_value) + "'.")
83        else:
84            if attribute_type:
85                raise Exception("Unknown default type '" + attribute_type + "'.")
86            else:
87                if default_value == 'None':
88                    attribute['default'] = None
89                else:
90                    attribute['default'] = default_value.strip("'")
91
92def update_attributes(schema, lines):
93    index = 0
94    while index < len(lines):
95        line = lines[index]
96        if line.endswith('.'):
97            line = line[0:-1]
98        colon = line.find(':')
99        if colon == -1:
100            raise Exception("Expected ':' in parameter.")
101        name = line[0:colon].strip(' ')
102        line = line[colon + 1:].strip(' ')
103        attribute_type = None
104        type_map = { 'float': 'float32', 'boolean': 'boolean', 'bool': 'boolean', 'string': 'string', 'int': 'int32' }
105        skip_map = {
106            "'sigmoid' or 'isotonic'",
107            'instance BaseEstimator',
108            'callable or None (default)',
109            'str or callable',
110            "string {'english'}, list, or None (default)",
111            'tuple (min_n, max_n)',
112            "string, {'word', 'char', 'char_wb'} or callable",
113            "{'word', 'char'} or callable",
114            "string, {'word', 'char'} or callable",
115            'int, float, None or string',
116            "int, float, None or str",
117            "int or None, optional (default=None)",
118            "'l1', 'l2' or None, optional",
119            "{'strict', 'ignore', 'replace'} (default='strict')",
120            "{'ascii', 'unicode', None} (default=None)",
121            "string {'english'}, list, or None (default=None)",
122            "tuple (min_n, max_n) (default=(1, 1))",
123            "float in range [0.0, 1.0] or int (default=1.0)",
124            "float in range [0.0, 1.0] or int (default=1)",
125            "'l1', 'l2' or None, optional (default='l2')",
126            "{'scale', 'auto'} or float, optional (default='scale')",
127            "str {'auto', 'full', 'arpack', 'randomized'}",
128            "str {'filename', 'file', 'content'}",
129            "str, {'word', 'char', 'char_wb'} or callable",
130            "str {'english'}, list, or None (default=None)",
131            "{'scale', 'auto'} or float, optional (default='scale')",
132            "{'word', 'char', 'char_wb'} or callable, default='word'",
133            "{'scale', 'auto'} or float, default='scale'",
134            "{'uniform', 'distance'} or callable, default='uniform'",
135            "int, RandomState instance or None (default)"
136        }
137        if line == 'str':
138            line = 'string'
139        if line in skip_map:
140            line = ''
141        elif line.startswith('{'):
142            if line.endswith('}'):
143                line = ''
144            else:
145                end = line.find('},')
146                if end == -1:
147                    raise Exception("Expected '}' in parameter.")
148                # attribute_type = line[0:end + 1]
149                line = line[end + 2:].strip(' ')
150        elif line.startswith("'"):
151            while line.startswith("'"):
152                end = line.find("',")
153                if end == -1:
154                    raise Exception("Expected \' in parameter.")
155                line = line[end + 2:].strip(' ')
156        elif line in type_map:
157            attribute_type = line
158            line = ''
159        elif line.startswith('int, RandomState instance or None,'):
160            line = line[len('int, RandomState instance or None,'):]
161        elif line.find('|') != -1:
162            line = ''
163        else:
164            space = line.find(' {')
165            if space != -1 and line[0:space] in type_map and line[space:].find('}') != -1:
166                attribute_type = line[0:space]
167                end = line[space:].find('}')
168                line = line[space+end+1:]
169            else:
170                comma = line.find(',')
171                if comma == -1:
172                    comma = line.find(' (')
173                    if comma == -1:
174                        raise Exception("Expected ',' in parameter.")
175                attribute_type = line[0:comma]
176                line = line[comma + 1:].strip(' ')
177        if attribute_type in type_map:
178            attribute_type = type_map[attribute_type]
179        else:
180            attribute_type = None
181        # elif type == "{dict, 'balanced'}":
182        #    v = 'map'
183        # else:
184        #    raise Exception("Unknown attribute type '" + attribute_type + "'.")
185        option = None
186        default = None
187        while len(line.strip(' ')) > 0:
188            line = line.strip(' ')
189            if line.startswith('optional ') or line.startswith('optional,'):
190                option = 'optional'
191                line = line[9:]
192            elif line.startswith('optional'):
193                option = 'optional'
194                line = ''
195            elif line.startswith('('):
196                close = line.index(')')
197                if (close == -1):
198                    raise Exception("Expected ')' in parameter.")
199                line = line[1:close]
200            elif line.endswith(' by default'):
201                default = line[0:-11]
202                line = ''
203            elif line.startswith('default =') or line.startswith('default :'):
204                default = line[9:].strip(' ')
205                line = ''
206            elif line.startswith('default ') or line.startswith('default=') or line.startswith('default:'):
207                default = line[8:].strip(' ')
208                line = ''
209            else:
210                comma = line.index(',')
211                if comma == -1:
212                    raise Exception("Expected ',' in parameter.")
213                line = line[comma+1:]
214        index = index + 1
215        attribute_lines = []
216        while index < len(lines) and (len(lines[index].strip(' ')) == 0 or lines[index].startswith('        ')):
217            attribute_lines.append(lines[index].lstrip(' '))
218            index = index + 1
219        description = '\n'.join(attribute_lines)
220        update_attribute(schema, name, description, attribute_type, option, default)
221
222for entry in json_root:
223    name = entry['name']
224    schema = entry['schema']
225    if 'package' in schema:
226        class_name = schema['package'] + '.' + name
227        class_definition = pydoc.locate(class_name)
228        if not class_definition:
229            raise Exception('\'' + class_name + '\' not found.')
230        docstring = class_definition.__doc__
231        if not docstring:
232            raise Exception('\'' + class_name + '\' missing __doc__.')
233        headers = split_docstring(docstring)
234        if '' in headers:
235            update_description(schema, headers[''])
236        if 'Parameters' in headers:
237            update_attributes(schema, headers['Parameters'])
238
239with io.open(json_file, 'w', newline='') as fout:
240    json_data = json.dumps(json_root, sort_keys=True, indent=2)
241    for line in json_data.splitlines():
242        fout.write(line.rstrip())
243        fout.write('\n')
244