1#!/usr/bin/env python3
2
3"""
4Generate src/bin/termptydbl.{c,h} from unicode files
5used with ucd.all.flat.xml from
6https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.all.flat.zip
7"""
8
9import argparse
10from collections import namedtuple
11import xml.etree.ElementTree as ET
12
13URange = namedtuple('unicode_range', ['width', 'start', 'end'])
14
15def get_ranges(xmlfile, emoji_as_wide):
16    tree = ET.parse(xmlfile)
17    root = tree.getroot()
18    repertoire = root.find("{http://www.unicode.org/ns/2003/ucd/1.0}repertoire")
19    chars = repertoire.findall("{http://www.unicode.org/ns/2003/ucd/1.0}char")
20
21    ranges = []
22    r = URange('N', 0, 0)
23    for c in chars:
24        ea = c.get('ea')
25        if ea in ('Na', 'H'):
26            ea = 'N'
27        if ea in ('F'):
28            ea = 'W'
29        cp = c.get('cp')
30        if not cp:
31            continue
32        if emoji_as_wide:
33            ext_pic = c.get('ExtPict')
34            emoji = c.get('Emoji')
35            if emoji == 'Y' and ext_pic == 'Y' and ea != 'A':
36                ea = 'W'
37            else:
38                blk = c.get('blk')
39                if blk == 'Misc_Pictographs':
40                    ea = 'W'
41
42        cp = int(cp, 16)
43        if ea != r[0]:
44            ranges.append(r)
45            r = URange(ea, cp, cp)
46        else:
47            r = r._replace(end=cp)
48
49    ranges.append(r)
50
51    return ranges
52
53def merge_ranges(ranges, is_same_width):
54    res = []
55    cur_range = ranges[0]
56    for r in ranges:
57        if is_same_width(r, cur_range):
58            cur_range = cur_range._replace(end=r.end)
59        else:
60            res.append(cur_range)
61            cur_range = r
62    res.append(cur_range)
63    return res
64
65def skip_ranges(ranges, width_skipped):
66    res = []
67    for r in ranges:
68        if r.width not in width_skipped:
69            res.append(r)
70    return res
71
72def gen_header(cur_range, file_header):
73    file_header.write(
74"""/* XXX: Code generated by tool unicode_dbl_width.py */
75#ifndef _TERMPTY_DBL_H__
76#define _TERMPTY_DBL_H__ 1
77
78Eina_Bool _termpty_is_wide(const Eina_Unicode g);
79Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g);
80
81static inline Eina_Bool
82_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g)
83{
84   /* optimize for latin1 non-ambiguous */
85""")
86    file_header.write(f"   if (g <= 0x{cur_range.end:X})")
87    file_header.write(
88"""
89     return EINA_FALSE;
90   if (!ty->termstate.cjk_ambiguous_wide)
91     return _termpty_is_wide(g);
92   else
93     return _termpty_is_ambigous_wide(g);
94}
95
96#endif
97""")
98
99def gen_ambigous(ranges, file_source):
100    file_source.write(
101"""
102__attribute__((const))
103Eina_Bool
104_termpty_is_ambigous_wide(Eina_Unicode g)
105{
106    switch (g)
107      {
108""")
109    def is_same_width(r1, r2):
110        if r1.width == 'N':
111            return r2.width == 'N'
112        else:
113            return r2.width in ('A', 'W')
114    ranges = merge_ranges(ranges[1:], is_same_width)
115    ranges = skip_ranges(ranges, ('N',))
116
117    fallthrough = " EINA_FALLTHROUGH;"
118    for idx, r in enumerate(ranges):
119        if r.width == 'N':
120            continue;
121        if idx == len(ranges) -1:
122            fallthrough = ""
123        if r.start == r.end:
124            file_source.write(f"       case 0x{r.start:X}:{fallthrough}\n")
125        else:
126            file_source.write(f"       case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n")
127
128    file_source.write(
129"""
130        return EINA_TRUE;
131    }
132   return EINA_FALSE;
133}
134""")
135
136def gen_wide(ranges, file_source):
137    file_source.write(
138"""
139__attribute__((const))
140Eina_Bool
141_termpty_is_wide(Eina_Unicode g)
142{
143    switch (g)
144      {
145""")
146    def is_same_width(r1, r2):
147        if r1.width in ('N', 'A'):
148            return r2.width in ('N', 'A')
149        else:
150            return r2.width == 'W'
151    ranges = merge_ranges(ranges[1:], is_same_width)
152    ranges = skip_ranges(ranges, ('N', 'A'))
153    fallthrough = " EINA_FALLTHROUGH;"
154    for idx, r in enumerate(ranges):
155        if r.width in ('N', 'A'):
156            continue;
157        if idx == len(ranges) -1:
158            fallthrough = ""
159        if r.start == r.end:
160            file_source.write(f"       case 0x{r.start:X}:{fallthrough}\n")
161        else:
162            file_source.write(f"       case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n")
163
164    file_source.write(
165"""
166        return EINA_TRUE;
167    }
168   return EINA_FALSE;
169}
170""")
171
172
173def gen_c(ranges, file_header, file_source):
174    gen_header(ranges[0], file_header)
175    file_source.write(
176"""/* XXX: Code generated by tool unicode_dbl_width.py */
177#include "private.h"
178
179#include <Elementary.h>
180#include "termpty.h"
181#include "termptydbl.h"
182""")
183    gen_ambigous(ranges, file_source)
184    gen_wide(ranges, file_source)
185
186parser = argparse.ArgumentParser(description='Generate code handling different widths of unicode codepoints.')
187parser.add_argument('xml', type=argparse.FileType('r'))
188parser.add_argument('header', type=argparse.FileType('w'))
189parser.add_argument('source', type=argparse.FileType('w'))
190
191args = parser.parse_args()
192
193ranges = get_ranges(args.xml, False)
194gen_c(ranges, args.header, args.source)
195