1#!/usr/bin/env python3 2 3""" 4Generate src/bin/termptydbl.{c,h} from unicode files 5used with ucd.all.flat.xml from 6https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.all.flat.zip 7""" 8 9import argparse 10from collections import namedtuple 11import xml.etree.ElementTree as ET 12 13URange = namedtuple('unicode_range', ['width', 'start', 'end']) 14 15def get_ranges(xmlfile, emoji_as_wide): 16 tree = ET.parse(xmlfile) 17 root = tree.getroot() 18 repertoire = root.find("{http://www.unicode.org/ns/2003/ucd/1.0}repertoire") 19 chars = repertoire.findall("{http://www.unicode.org/ns/2003/ucd/1.0}char") 20 21 ranges = [] 22 r = URange('N', 0, 0) 23 for c in chars: 24 ea = c.get('ea') 25 if ea in ('Na', 'H'): 26 ea = 'N' 27 if ea in ('F'): 28 ea = 'W' 29 cp = c.get('cp') 30 if not cp: 31 continue 32 if emoji_as_wide: 33 ext_pic = c.get('ExtPict') 34 emoji = c.get('Emoji') 35 if emoji == 'Y' and ext_pic == 'Y' and ea != 'A': 36 ea = 'W' 37 else: 38 blk = c.get('blk') 39 if blk == 'Misc_Pictographs': 40 ea = 'W' 41 42 cp = int(cp, 16) 43 if ea != r[0]: 44 ranges.append(r) 45 r = URange(ea, cp, cp) 46 else: 47 r = r._replace(end=cp) 48 49 ranges.append(r) 50 51 return ranges 52 53def merge_ranges(ranges, is_same_width): 54 res = [] 55 cur_range = ranges[0] 56 for r in ranges: 57 if is_same_width(r, cur_range): 58 cur_range = cur_range._replace(end=r.end) 59 else: 60 res.append(cur_range) 61 cur_range = r 62 res.append(cur_range) 63 return res 64 65def skip_ranges(ranges, width_skipped): 66 res = [] 67 for r in ranges: 68 if r.width not in width_skipped: 69 res.append(r) 70 return res 71 72def gen_header(cur_range, file_header): 73 file_header.write( 74"""/* XXX: Code generated by tool unicode_dbl_width.py */ 75#ifndef _TERMPTY_DBL_H__ 76#define _TERMPTY_DBL_H__ 1 77 78Eina_Bool _termpty_is_wide(const Eina_Unicode g); 79Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g); 80 81static inline Eina_Bool 82_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g) 83{ 84 /* optimize for latin1 non-ambiguous */ 85""") 86 file_header.write(f" if (g <= 0x{cur_range.end:X})") 87 file_header.write( 88""" 89 return EINA_FALSE; 90 if (!ty->termstate.cjk_ambiguous_wide) 91 return _termpty_is_wide(g); 92 else 93 return _termpty_is_ambigous_wide(g); 94} 95 96#endif 97""") 98 99def gen_ambigous(ranges, file_source): 100 file_source.write( 101""" 102__attribute__((const)) 103Eina_Bool 104_termpty_is_ambigous_wide(Eina_Unicode g) 105{ 106 switch (g) 107 { 108""") 109 def is_same_width(r1, r2): 110 if r1.width == 'N': 111 return r2.width == 'N' 112 else: 113 return r2.width in ('A', 'W') 114 ranges = merge_ranges(ranges[1:], is_same_width) 115 ranges = skip_ranges(ranges, ('N',)) 116 117 fallthrough = " EINA_FALLTHROUGH;" 118 for idx, r in enumerate(ranges): 119 if r.width == 'N': 120 continue; 121 if idx == len(ranges) -1: 122 fallthrough = "" 123 if r.start == r.end: 124 file_source.write(f" case 0x{r.start:X}:{fallthrough}\n") 125 else: 126 file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n") 127 128 file_source.write( 129""" 130 return EINA_TRUE; 131 } 132 return EINA_FALSE; 133} 134""") 135 136def gen_wide(ranges, file_source): 137 file_source.write( 138""" 139__attribute__((const)) 140Eina_Bool 141_termpty_is_wide(Eina_Unicode g) 142{ 143 switch (g) 144 { 145""") 146 def is_same_width(r1, r2): 147 if r1.width in ('N', 'A'): 148 return r2.width in ('N', 'A') 149 else: 150 return r2.width == 'W' 151 ranges = merge_ranges(ranges[1:], is_same_width) 152 ranges = skip_ranges(ranges, ('N', 'A')) 153 fallthrough = " EINA_FALLTHROUGH;" 154 for idx, r in enumerate(ranges): 155 if r.width in ('N', 'A'): 156 continue; 157 if idx == len(ranges) -1: 158 fallthrough = "" 159 if r.start == r.end: 160 file_source.write(f" case 0x{r.start:X}:{fallthrough}\n") 161 else: 162 file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n") 163 164 file_source.write( 165""" 166 return EINA_TRUE; 167 } 168 return EINA_FALSE; 169} 170""") 171 172 173def gen_c(ranges, file_header, file_source): 174 gen_header(ranges[0], file_header) 175 file_source.write( 176"""/* XXX: Code generated by tool unicode_dbl_width.py */ 177#include "private.h" 178 179#include <Elementary.h> 180#include "termpty.h" 181#include "termptydbl.h" 182""") 183 gen_ambigous(ranges, file_source) 184 gen_wide(ranges, file_source) 185 186parser = argparse.ArgumentParser(description='Generate code handling different widths of unicode codepoints.') 187parser.add_argument('xml', type=argparse.FileType('r')) 188parser.add_argument('header', type=argparse.FileType('w')) 189parser.add_argument('source', type=argparse.FileType('w')) 190 191args = parser.parse_args() 192 193ranges = get_ranges(args.xml, False) 194gen_c(ranges, args.header, args.source) 195