1 2from __future__ import unicode_literals 3from __future__ import print_function 4 5import io 6import json 7import os 8import pydoc 9import re 10import sys 11 12json_file = os.path.join(os.path.dirname(__file__), '../src/sklearn-metadata.json') 13json_data = open(json_file).read() 14json_root = json.loads(json_data) 15 16def split_docstring(docstring): 17 headers = {} 18 current_header = '' 19 current_lines = [] 20 lines = docstring.split('\n') 21 index = 0 22 while index < len(lines): 23 if index + 1 < len(lines) and len(lines[index + 1].strip(' ')) > 0 and len(lines[index + 1].strip(' ').strip('-')) == 0: 24 headers[current_header] = current_lines 25 current_header = lines[index].strip(' ') 26 current_lines = [] 27 index = index + 1 28 else: 29 current_lines.append(lines[index]) 30 index = index + 1 31 headers[current_header] = current_lines 32 return headers 33 34def update_description(schema, lines): 35 if len(''.join(lines).strip(' ')) > 0: 36 for i in range(0, len(lines)): 37 lines[i] = lines[i].lstrip(' ') 38 schema['description'] = '\n'.join(lines) 39 40def update_attribute(schema, name, description, attribute_type, option, default_value): 41 attribute = None 42 if not 'attributes' in schema: 43 schema['attributes'] = [] 44 for current_attribute in schema['attributes']: 45 if 'name' in current_attribute and current_attribute['name'] == name: 46 attribute = current_attribute 47 break 48 if not attribute: 49 attribute = {} 50 attribute['name'] = name 51 schema['attributes'].append(attribute) 52 attribute['description'] = description 53 if attribute_type: 54 attribute['type'] = attribute_type 55 if option: 56 attribute['option'] = option 57 if default_value: 58 if attribute_type == 'float32': 59 if default_value == 'None': 60 attribute['default'] = None 61 elif default_value != "'auto'": 62 attribute['default'] = float(default_value) 63 else: 64 attribute['default'] = default_value.strip("'").strip('"') 65 elif attribute_type == 'int32': 66 if default_value == 'None': 67 attribute['default'] = None 68 elif default_value == "'auto'" or default_value == '"auto"': 69 attribute['default'] = default_value.strip("'").strip('"') 70 else: 71 attribute['default'] = int(default_value) 72 elif attribute_type == 'string': 73 attribute['default'] = default_value.strip("'").strip('"') 74 elif attribute_type == 'boolean': 75 if default_value == 'True': 76 attribute['default'] = True 77 elif default_value == 'False': 78 attribute['default'] = False 79 elif default_value == "'auto'": 80 attribute['default'] = default_value.strip("'").strip('"') 81 else: 82 raise Exception("Unknown boolean default value '" + str(default_value) + "'.") 83 else: 84 if attribute_type: 85 raise Exception("Unknown default type '" + attribute_type + "'.") 86 else: 87 if default_value == 'None': 88 attribute['default'] = None 89 else: 90 attribute['default'] = default_value.strip("'") 91 92def update_attributes(schema, lines): 93 index = 0 94 while index < len(lines): 95 line = lines[index] 96 if line.endswith('.'): 97 line = line[0:-1] 98 colon = line.find(':') 99 if colon == -1: 100 raise Exception("Expected ':' in parameter.") 101 name = line[0:colon].strip(' ') 102 line = line[colon + 1:].strip(' ') 103 attribute_type = None 104 type_map = { 'float': 'float32', 'boolean': 'boolean', 'bool': 'boolean', 'string': 'string', 'int': 'int32' } 105 skip_map = { 106 "'sigmoid' or 'isotonic'", 107 'instance BaseEstimator', 108 'callable or None (default)', 109 'str or callable', 110 "string {'english'}, list, or None (default)", 111 'tuple (min_n, max_n)', 112 "string, {'word', 'char', 'char_wb'} or callable", 113 "{'word', 'char'} or callable", 114 "string, {'word', 'char'} or callable", 115 'int, float, None or string', 116 "int, float, None or str", 117 "int or None, optional (default=None)", 118 "'l1', 'l2' or None, optional", 119 "{'strict', 'ignore', 'replace'} (default='strict')", 120 "{'ascii', 'unicode', None} (default=None)", 121 "string {'english'}, list, or None (default=None)", 122 "tuple (min_n, max_n) (default=(1, 1))", 123 "float in range [0.0, 1.0] or int (default=1.0)", 124 "float in range [0.0, 1.0] or int (default=1)", 125 "'l1', 'l2' or None, optional (default='l2')", 126 "{'scale', 'auto'} or float, optional (default='scale')", 127 "str {'auto', 'full', 'arpack', 'randomized'}", 128 "str {'filename', 'file', 'content'}", 129 "str, {'word', 'char', 'char_wb'} or callable", 130 "str {'english'}, list, or None (default=None)", 131 "{'scale', 'auto'} or float, optional (default='scale')", 132 "{'word', 'char', 'char_wb'} or callable, default='word'", 133 "{'scale', 'auto'} or float, default='scale'", 134 "{'uniform', 'distance'} or callable, default='uniform'", 135 "int, RandomState instance or None (default)" 136 } 137 if line == 'str': 138 line = 'string' 139 if line in skip_map: 140 line = '' 141 elif line.startswith('{'): 142 if line.endswith('}'): 143 line = '' 144 else: 145 end = line.find('},') 146 if end == -1: 147 raise Exception("Expected '}' in parameter.") 148 # attribute_type = line[0:end + 1] 149 line = line[end + 2:].strip(' ') 150 elif line.startswith("'"): 151 while line.startswith("'"): 152 end = line.find("',") 153 if end == -1: 154 raise Exception("Expected \' in parameter.") 155 line = line[end + 2:].strip(' ') 156 elif line in type_map: 157 attribute_type = line 158 line = '' 159 elif line.startswith('int, RandomState instance or None,'): 160 line = line[len('int, RandomState instance or None,'):] 161 elif line.find('|') != -1: 162 line = '' 163 else: 164 space = line.find(' {') 165 if space != -1 and line[0:space] in type_map and line[space:].find('}') != -1: 166 attribute_type = line[0:space] 167 end = line[space:].find('}') 168 line = line[space+end+1:] 169 else: 170 comma = line.find(',') 171 if comma == -1: 172 comma = line.find(' (') 173 if comma == -1: 174 raise Exception("Expected ',' in parameter.") 175 attribute_type = line[0:comma] 176 line = line[comma + 1:].strip(' ') 177 if attribute_type in type_map: 178 attribute_type = type_map[attribute_type] 179 else: 180 attribute_type = None 181 # elif type == "{dict, 'balanced'}": 182 # v = 'map' 183 # else: 184 # raise Exception("Unknown attribute type '" + attribute_type + "'.") 185 option = None 186 default = None 187 while len(line.strip(' ')) > 0: 188 line = line.strip(' ') 189 if line.startswith('optional ') or line.startswith('optional,'): 190 option = 'optional' 191 line = line[9:] 192 elif line.startswith('optional'): 193 option = 'optional' 194 line = '' 195 elif line.startswith('('): 196 close = line.index(')') 197 if (close == -1): 198 raise Exception("Expected ')' in parameter.") 199 line = line[1:close] 200 elif line.endswith(' by default'): 201 default = line[0:-11] 202 line = '' 203 elif line.startswith('default =') or line.startswith('default :'): 204 default = line[9:].strip(' ') 205 line = '' 206 elif line.startswith('default ') or line.startswith('default=') or line.startswith('default:'): 207 default = line[8:].strip(' ') 208 line = '' 209 else: 210 comma = line.index(',') 211 if comma == -1: 212 raise Exception("Expected ',' in parameter.") 213 line = line[comma+1:] 214 index = index + 1 215 attribute_lines = [] 216 while index < len(lines) and (len(lines[index].strip(' ')) == 0 or lines[index].startswith(' ')): 217 attribute_lines.append(lines[index].lstrip(' ')) 218 index = index + 1 219 description = '\n'.join(attribute_lines) 220 update_attribute(schema, name, description, attribute_type, option, default) 221 222for entry in json_root: 223 name = entry['name'] 224 schema = entry['schema'] 225 if 'package' in schema: 226 class_name = schema['package'] + '.' + name 227 class_definition = pydoc.locate(class_name) 228 if not class_definition: 229 raise Exception('\'' + class_name + '\' not found.') 230 docstring = class_definition.__doc__ 231 if not docstring: 232 raise Exception('\'' + class_name + '\' missing __doc__.') 233 headers = split_docstring(docstring) 234 if '' in headers: 235 update_description(schema, headers['']) 236 if 'Parameters' in headers: 237 update_attributes(schema, headers['Parameters']) 238 239with io.open(json_file, 'w', newline='') as fout: 240 json_data = json.dumps(json_root, sort_keys=True, indent=2) 241 for line in json_data.splitlines(): 242 fout.write(line.rstrip()) 243 fout.write('\n') 244