1# 모든 보조용언에 대해, 앞에 어떤 형태로 붙여 쓰이는지 찾아낸다. 2 3import glob 4import sys 5import yaml 6import re 7 8def process_all(outdir): 9 filenames = glob.glob(outdir + '/*/*__보조_*.yaml') 10 for filename in filenames: 11 process_file(filename) 12 13def process_file(filename): 14 k = yaml.load(open(filename).read()) 15 word = k['imported']['한국어기초사전']['표제어'] 16 clue = k['imported']['한국어기초사전']['의미'][0]['의미 참고'] 17 props = [] 18 19 examples = [] 20 if '뒤에서 ' in clue: 21 examples = clue.split('뒤에서 ')[1].split('로 쓴다.')[0].split(', ') 22 elif '뒤에 ' in clue: 23 examples = clue.split('뒤에 ')[1].split('로 쓴다.')[0].split(', ') 24 25 if examples: 26 if len(examples) > 0: 27 examples = [k[1:-1] for k in examples] 28 for example in examples: 29 prefixes = example.split(' ')[0].split('/') 30 for prefix in prefixes: 31 if prefix[0] != '-': 32 prefix = '-' + prefix 33 props.append('보조용언:' + prefix) 34 else: 35 if word == '드리다': 36 props.append('보조용언:-어') 37 38 if len(props) == 0: 39 print(filename) 40 print('*** UNKNOWN:' + clue) 41 return 42 43 if '속성' not in k['processed']['맞춤법 검사']: 44 result_props = k['processed']['맞춤법 검사']['속성'] 45 else: 46 result_props = [] 47 48 result_props = [k for p in result_props if not k.startswith('보조용언:')] 49 result_props += props 50 result_props.sort() 51 k['processed']['맞춤법 검사']['속성'] = result_props 52 53 with open(filename, 'w') as fp: 54 fp.write(yaml.dump(k, allow_unicode=True, default_flow_style=False, indent=2)) 55 56if __name__ == '__main__': 57 outdir = './entries' 58 process_all(outdir) 59