1#!/usr/bin/env python3
2
3import argparse
4
5from dataclasses import dataclass
6
7parser = argparse.ArgumentParser()
8parser.add_argument('input', type=argparse.FileType('r'))
9parser.add_argument('output', type=argparse.FileType('w'))
10opts = parser.parse_args()
11
12
13@dataclass
14class Emoji:
15    codepoint: int
16    count: int = 1
17    emoji_presentation: bool = False
18    modifier: bool = False
19    modifier_base: bool = False
20    component: bool = False
21    pictographic: bool = False
22
23
24emojis = {}
25
26
27for line in opts.input:
28    line = line.rstrip()
29    if not line:
30        continue
31    if line[0] == '#':
32        continue
33
34    codepoint_or_range, props_and_trash = line.split(';', maxsplit=1)
35    props = props_and_trash.split('#', maxsplit=1)[0].strip()
36
37    codepoint_range = tuple(
38        map(lambda s: int(s.strip(), 16), codepoint_or_range.split('..')))
39    assert len(codepoint_range) in [1, 2]
40
41    if len(codepoint_range) == 1:
42        codepoint_range = codepoint_range[0], codepoint_range[0]
43
44    for cp in range(codepoint_range[0], codepoint_range[1] + 1):
45        assert cp < (1 << 24), f'codepoint is outside range: 0x{cp:x}'
46
47    if props == 'Emoji':
48        for cp in range(codepoint_range[0], codepoint_range[1] + 1):
49            assert cp not in emojis
50            emojis[cp] = Emoji(codepoint=cp)
51
52    elif props == 'Emoji_Presentation':
53        for cp in range(codepoint_range[0], codepoint_range[1] + 1):
54            try:
55                emojis[cp].emoji_presentation = True
56            except KeyError:
57                pass
58
59    elif props == 'Emoji_Modifier':
60        for cp in range(codepoint_range[0], codepoint_range[1] + 1):
61            try:
62                emojis[cp].modifier = True
63            except KeyError:
64                pass
65
66    elif props == 'Emoji_Modifier_Base':
67        for cp in range(codepoint_range[0], codepoint_range[1] + 1):
68            try:
69                emojis[cp].modifier_base = True
70            except KeyError:
71                pass
72
73    elif props == 'Emoji_Component':
74        for cp in range(codepoint_range[0], codepoint_range[1] + 1):
75            try:
76                emojis[cp].component = True
77            except KeyError:
78                pass
79
80    elif props == 'Extended_Pictographic':
81        for cp in range(codepoint_range[0], codepoint_range[1] + 1):
82            try:
83                emojis[cp].pictographic = True
84            except KeyError:
85                pass
86
87# for emoji in emojis.values():
88#     print(f'{emoji.codepoint:x} ({chr(emoji.codepoint)}): '
89#           f'presentation={"emoji" if emoji.emoji_presentation else "text"}, '
90#           f'component={emoji.component}, '
91#           f'pictographic={emoji.pictographic}')
92# print(f'{len(emojis)} codepoints')
93# sys.exit(0)
94
95# Compact the list, by creating ranges of consecutive codepoints with
96# identical properties
97
98compacted = []
99last = None
100for emoji in sorted(emojis.values(), key=lambda x: x.codepoint):
101    if last is None:
102        last = emoji
103        compacted.append(last)
104        continue
105
106    if emoji.codepoint == last.codepoint + last.count and \
107       emoji.emoji_presentation == last.emoji_presentation and \
108       emoji.modifier == last.modifier and \
109       emoji.modifier_base == last.modifier_base and \
110       emoji.component == last.component and \
111       emoji.pictographic == last.pictographic:
112        last.count += 1
113    else:
114        last = emoji
115        compacted.append(last)
116
117print(f'compacted from {len(emojis)} to {len(compacted)} entries')
118
119opts.output.write('#pragma once\n')
120opts.output.write('#include <stdint.h>\n')
121opts.output.write('#include <stdbool.h>\n')
122opts.output.write('\n')
123opts.output.write('struct emoji {\n')
124opts.output.write('    bool emoji_presentation:1;\n')
125opts.output.write('    bool modifier:1;\n')
126opts.output.write('    bool modifier_base:1;\n')
127opts.output.write('    bool component:1;\n')
128opts.output.write('    bool pictographic:1;\n')
129opts.output.write('    uint32_t cp:24;\n')
130opts.output.write('    uint8_t count;\n')
131opts.output.write('} __attribute__((packed));\n')
132opts.output.write('_Static_assert(sizeof(struct emoji) == 5, "unexpected struct size");\n')
133opts.output.write('\n')
134opts.output.write('#if defined(FCFT_HAVE_HARFBUZZ)\n')
135opts.output.write('\n')
136opts.output.write(f'static const struct emoji emojis[{len(compacted)}] = {{\n')
137
138for emoji in compacted:
139    assert emoji.count < 256
140    opts.output.write('    {\n')
141    opts.output.write(f'       .emoji_presentation = {"true" if emoji.emoji_presentation else "false"},\n')
142    opts.output.write(f'       .modifier = {"true" if emoji.modifier else "false"},\n')
143    opts.output.write(f'       .modifier_base = {"true" if emoji.modifier_base else "false"},\n')
144    opts.output.write(f'       .component = {"true" if emoji.component else "false"},\n')
145    opts.output.write(f'       .pictographic = {"true" if emoji.pictographic else "false"},\n')
146    opts.output.write(f'       .cp = 0x{emoji.codepoint:05x},\n')
147    opts.output.write(f'       .count = {emoji.count},\n')
148    opts.output.write('    },\n')
149
150opts.output.write('};\n')
151
152opts.output.write('#else  /* !FCFT_HAVE_HARFBUZZ */\n')
153opts.output.write('static const struct emoji emojis[0];\n')
154opts.output.write('#endif  /* !FCFT_HAVE_HARFBUZZ */\n')
155