1# 모든 보조용언에 대해, 앞에 어떤 형태로 붙여 쓰이는지 찾아낸다.
2
3import glob
4import sys
5import yaml
6import re
7
8def process_all(outdir):
9    filenames = glob.glob(outdir + '/*/*__보조_*.yaml')
10    for filename in filenames:
11        process_file(filename)
12
13def process_file(filename):
14    k = yaml.load(open(filename).read())
15    word = k['imported']['한국어기초사전']['표제어']
16    clue = k['imported']['한국어기초사전']['의미'][0]['의미 참고']
17    props = []
18
19    examples = []
20    if '뒤에서 ' in clue:
21        examples = clue.split('뒤에서 ')[1].split('로 쓴다.')[0].split(', ')
22    elif '뒤에 ' in clue:
23        examples = clue.split('뒤에 ')[1].split('로 쓴다.')[0].split(', ')
24
25    if examples:
26        if len(examples) > 0:
27            examples = [k[1:-1] for k in examples]
28            for example in examples:
29                prefixes = example.split(' ')[0].split('/')
30                for prefix in prefixes:
31                    if prefix[0] != '-':
32                        prefix = '-' + prefix
33                    props.append('보조용언:' + prefix)
34    else:
35        if word == '드리다':
36            props.append('보조용언:-어')
37
38    if len(props) == 0:
39        print(filename)
40        print('*** UNKNOWN:' + clue)
41        return
42
43    if '속성' not in k['processed']['맞춤법 검사']:
44        result_props = k['processed']['맞춤법 검사']['속성']
45    else:
46        result_props = []
47
48    result_props = [k for p in result_props if not k.startswith('보조용언:')]
49    result_props += props
50    result_props.sort()
51    k['processed']['맞춤법 검사']['속성'] = result_props
52
53    with open(filename, 'w') as fp:
54        fp.write(yaml.dump(k, allow_unicode=True, default_flow_style=False, indent=2))
55
56if __name__ == '__main__':
57    outdir = './entries'
58    process_all(outdir)
59