1 /*
2 This file is part of GNU APL, a free implementation of the
3 ISO/IEC Standard 13751, "Programming Language APL, Extended"
4
5 Copyright (C) 2008-2015 Dr. Jürgen Sauermann
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <fcntl.h>
22 #include <unistd.h>
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25
26 #include "Avec.hh"
27 #include "Command.hh"
28 #include "Output.hh"
29 #include "Parser.hh"
30 #include "PrintOperator.hh"
31 #include "UTF8_string.hh"
32 #include "Value.hh"
33
34 using namespace std;
35
36 // See RFC 3629 for UTF-8
37
38 /// Various character attributes.
39 struct Character_definition
40 {
41 CHT_Index av_val; ///< Atomic vector enum of the char.
42 Unicode unicode; ///< Unicode of the char.
43 const char * char_name; ///< Name of the char.
44 int def_line; ///< Line where the char is defined.
45 TokenTag token_tag; ///< Token tag for the char.
46 CharacterFlag flags; ///< Character class.
47 int av_pos; ///< position in ⎕AV (== ⎕AF unicode)
48 };
49
50 //-----------------------------------------------------------------------------
51 #define char_def(n, _u, t, f, p) \
52 { AV_ ## n, UNI_ ## n, # n, __LINE__, TOK_ ## t, FLG_ ## f, 0x ## p },
53 #define char_df1(_n, _u, _t, _f, _p)
54 const Character_definition character_table[MAX_AV] =
55 {
56 #include "Avec.def"
57 };
58 //-----------------------------------------------------------------------------
59 void
show_error_pos(int i,int line,bool cond,int def_line)60 Avec::show_error_pos(int i, int line, bool cond, int def_line)
61 {
62 if (!cond)
63 {
64 CERR << "Error index (line " << line << ", Avec.def line "
65 << def_line << ") = " << i << " = " << HEX(i) << endl;
66 Assert(0);
67 }
68 }
69 //-----------------------------------------------------------------------------
70 #if 0
71
72 void
73 Avec::check_file(const char * filename)
74 {
75 const int fd = open(filename, O_RDONLY);
76 if (fd == -1) return;
77
78 Log(LOG_startup) CERR << "Checking " << filename << endl;
79
80 uint32_t datalen;
81 {
82 struct stat s;
83 const int res = fstat(fd, &s);
84 Assert(res == 0);
85 datalen = s.st_size;
86 }
87
88 const void * data = mmap(NULL, datalen, PROT_READ, MAP_PRIVATE, fd, 0);
89 Assert(data != (void *)-1);
90
91 const UTF8_string utf = UTF8_string((const UTF8 *)data, datalen);
92 UCS_string ucs(utf);
93
94 loop(i, ucs.size())
95 {
96 if (!is_known_char(ucs[i]))
97 CERR << "APL char " << UNI(ucs[i]) << " is missing in AV ("
98 << i << ")" << endl;
99 }
100
101 close(fd);
102 }
103 #endif
104 //-----------------------------------------------------------------------------
105 void
init()106 Avec::init()
107 {
108 check_av_table();
109 }
110 //-----------------------------------------------------------------------------
111 void
check_av_table()112 Avec::check_av_table()
113 {
114 if (MAX_AV != 256)
115 {
116 CERR << "AV has " << MAX_AV << " entries (should be 256)" << endl;
117 return;
118 }
119
120 Assert(sizeof(character_table) / sizeof(*character_table) == MAX_AV);
121
122 // check that character_table.unicode is sorted by increasing UNICODE
123 //
124 for (int i = 1; i < MAX_AV; ++i)
125 show_error_pos(i, __LINE__, character_table[i].def_line,
126 character_table[i].unicode > character_table[i - 1].unicode);
127
128 // check that ASCII chars map to themselves.
129 //
130 loop(i, 0x80) show_error_pos(i, __LINE__, character_table[i].def_line,
131 character_table[i].unicode == i);
132
133 // display holes and duplicate AV positions in character_table
134 {
135 int holes = 0;
136 loop(pos, MAX_AV)
137 {
138 int count = 0;
139 loop(i, MAX_AV)
140 {
141 if (character_table[i].av_pos == pos) ++count;
142 }
143
144 if (count == 0)
145 {
146 ++holes;
147 CERR << "AV position " << HEX(pos) << " unused" << endl;
148 }
149
150 if (count > 1)
151 CERR << "duplicate AV position " << HEX(pos) << endl;
152 }
153
154 if (holes) CERR << holes << " unused positions in ⎕AV" << endl;
155 }
156
157 // check that find_char() works
158 //
159 loop(i, MAX_AV) show_error_pos(i, __LINE__, character_table[i].def_line,
160 i == find_char(character_table[i].unicode));
161
162 Log(LOG_SHOW_AV)
163 {
164 for (int i = 0x80; i < MAX_AV; ++i)
165 {
166 CERR << character_table[i].unicode << " is AV[" << i << "] AV_"
167 << character_table[i].char_name << endl;
168 }
169 }
170
171 // check_file("../keyboard");
172 // check_file("../keyboard1");
173 }
174 //-----------------------------------------------------------------------------
175 Unicode
unicode(CHT_Index av)176 Avec::unicode(CHT_Index av)
177 {
178 if (av < 0) return UNI_AV_MAX;
179 if (av >= MAX_AV) return UNI_AV_MAX;
180 return character_table[av].unicode;
181 }
182 //-----------------------------------------------------------------------------
183 uint32_t
get_av_pos(CHT_Index av)184 Avec::get_av_pos(CHT_Index av)
185 {
186 if (av < 0) return UNI_AV_MAX;
187 if (av >= MAX_AV) return UNI_AV_MAX;
188 return character_table[av].av_pos;
189 }
190 //-----------------------------------------------------------------------------
191 Token
uni_to_token(Unicode & uni,const char * loc)192 Avec::uni_to_token(Unicode & uni, const char * loc)
193 {
194 CHT_Index idx = find_char(uni);
195 if (idx != Invalid_CHT) return Token(character_table[idx].token_tag, uni);
196
197 // not found: try alternative characters.
198 //
199 idx = map_alternative_char(uni);
200 if (idx != Invalid_CHT)
201 {
202 uni = character_table[idx].unicode;
203 return Token(character_table[idx].token_tag,
204 character_table[idx].unicode);
205 }
206
207 Log(LOG_verbose_error)
208 {
209 CERR << endl << "Avec::uni_to_token() : Char " << UNI(uni)
210 << " (" << uni << ") not found in ⎕AV! (called from "
211 << loc << ")" << endl;
212
213 Backtrace::show(__FILE__, __LINE__);
214 }
215 return Token();
216 }
217 //-----------------------------------------------------------------------------
find_av_pos(Unicode av)218 uint32_t Avec::find_av_pos(Unicode av)
219 {
220 const CHT_Index pos = find_char(av);
221
222 if (pos < 0) return MAX_AV; // not found
223
224 return character_table[pos].av_pos;
225 }
226 //-----------------------------------------------------------------------------
227 CHT_Index
find_char(Unicode av)228 Avec::find_char(Unicode av)
229 {
230 int l = 0;
231 int h = sizeof(character_table)/sizeof(*character_table) - 1;
232
233 for (;;)
234 {
235 if (l > h) return Invalid_CHT; // not in table.
236
237 const int m((h + l)/2);
238 const Unicode um = character_table[m].unicode;
239 if (av < um) h = m - 1;
240 else if (av > um) l = m + 1;
241 else return CHT_Index(m);
242 }
243 }
244 //-----------------------------------------------------------------------------
245 CHT_Index
map_alternative_char(Unicode alt_av)246 Avec::map_alternative_char(Unicode alt_av)
247 {
248 // map characters that look similar to characters used in GNU APL
249 // to the GNU APL character.
250 //
251 switch(int(alt_av))
252 {
253 case 0x005E: return AV_AND; // map ^ to ∧
254 case 0x007C: return AV_DIVIDES; // map | to ∣
255 case 0x007E: return AV_TILDE_OPERATOR; // map ~ to ∼
256 case 0x03B1: return AV_ALPHA; // map α to ⍺
257 case 0x03B5: return AV_ELEMENT; // map ε to ∈
258 case 0x03B9: return AV_IOTA; // map ι to ⍳
259 case 0x03C1: return AV_RHO; // map ρ to ⍴
260 case 0x03C9: return AV_OMEGA; // map ω to ⍵
261 case 0x2018: return AV_SINGLE_QUOTE; // map ‘ to '
262 case 0x2019: return AV_SINGLE_QUOTE; // map ’ to '
263 case 0x220A: return AV_ELEMENT; // map ∊ to ∈
264 case 0x2212: return AV_ASCII_MINUS; // map − to -
265 case 0x22BC: return AV_NAND; // map ⊼ to ⍲
266 case 0x22BD: return AV_NOR; // map ⊽ to ⍱
267 case 0x22C4: return AV_DIAMOND; // map ⋄ to ◊
268 case 0x2377: return AV_EPSILON_UBAR; // map ⍷ to ⋸
269 case 0x25AF: return AV_Quad_Quad; // map ▯ to ⎕
270 case 0x25E6: return AV_RING_OPERATOR; // map ◦ to ∘
271 case 0x2662: return AV_DIAMOND; // map ♢ to ◊
272 case 0x26AA: return AV_CIRCLE; // map ⚪ to ○
273 case 0x2A7D: return AV_LESS_OR_EQUAL; // map ⩽ to ≤
274 case 0x2A7E: return AV_MORE_OR_EQUAL; // map ⩾ to ≥
275 case 0x2B25: return AV_DIAMOND; // map ⬥ to ◊
276 case 0x2B26: return AV_DIAMOND; // map ⬦ to ◊
277 case 0x2B27: return AV_DIAMOND; // map ⬧ to ◊
278 default: break;
279 }
280
281 return Invalid_CHT;
282 }
283 //-----------------------------------------------------------------------------
284 bool
is_known_char(Unicode av)285 Avec::is_known_char(Unicode av)
286 {
287 switch(av)
288 {
289 #define char_def(_n, u, _t, _f, _p) case u:
290 #define char_df1(_n, u, _t, _f, _p) case u:
291 #include "Avec.def"
292 return true;
293
294 default: break;
295 }
296
297 return false; // not found
298 }
299 //-----------------------------------------------------------------------------
300 bool
need_UCS(Unicode uni)301 Avec::need_UCS(Unicode uni)
302 {
303 if (is_control(uni)) return true; // ASCII control
304 if (uni >= 0 && uni < UNI_ASCII_DELETE) return false; // printable ASCII
305
306 const CHT_Index idx = find_char(uni);
307 if (idx == Invalid_CHT) return true; // char not in GNU APL's ⎕AV
308 if (unicode_to_cp(uni) == 0xB0) return true; // not in IBM's ⎕AV
309
310 return false;
311 }
312 //-----------------------------------------------------------------------------
313 bool
is_symbol_char(Unicode av)314 Avec::is_symbol_char(Unicode av)
315 {
316 const int32_t idx = find_char(av);
317 if (idx == -1) return false;
318 return (character_table[idx].flags & FLG_SYMBOL) != 0;
319 }
320 //-----------------------------------------------------------------------------
321 bool
is_first_symbol_char(Unicode av)322 Avec::is_first_symbol_char(Unicode av)
323 {
324 return is_symbol_char(av) && ! is_digit(av);
325 }
326 //-----------------------------------------------------------------------------
327 bool
no_space_after(Unicode av)328 Avec::no_space_after(Unicode av)
329 {
330 const int32_t idx = find_char(av);
331 if (idx == -1) return false;
332 return (character_table[idx].flags & FLG_NO_SPACE_AFTER) != 0;
333 }
334 //-----------------------------------------------------------------------------
335 bool
no_space_before(Unicode av)336 Avec::no_space_before(Unicode av)
337 {
338 const int32_t idx = find_char(av);
339 if (idx == -1) return false;
340 return (character_table[idx].flags & FLG_NO_SPACE_BEFORE) != 0;
341 }
342 //-----------------------------------------------------------------------------
343 Unicode
subscript(uint32_t i)344 Avec::subscript(uint32_t i)
345 {
346 switch(i)
347 {
348 case 0: return Unicode(0x2080);
349 case 1: return Unicode(0x2081);
350 case 2: return Unicode(0x2082);
351 case 3: return Unicode(0x2083);
352 case 4: return Unicode(0x2084);
353 case 5: return Unicode(0x2085);
354 case 6: return Unicode(0x2086);
355 case 7: return Unicode(0x2087);
356 case 8: return Unicode(0x2088);
357 case 9: return Unicode(0x2089);
358 }
359
360 return Unicode(0x2093);
361 }
362 //-----------------------------------------------------------------------------
363 Unicode
superscript(uint32_t i)364 Avec::superscript(uint32_t i)
365 {
366 switch(i)
367 {
368 case 0: return Unicode(0x2070);
369 case 1: return Unicode(0x00B9);
370 case 2: return Unicode(0x00B2);
371 case 3: return Unicode(0x00B3);
372 case 4: return Unicode(0x2074);
373 case 5: return Unicode(0x2075);
374 case 6: return Unicode(0x2076);
375 case 7: return Unicode(0x2077);
376 case 8: return Unicode(0x2078);
377 case 9: return Unicode(0x2079);
378 }
379
380 return Unicode(0x207A);
381 }
382 //-----------------------------------------------------------------------------
383 /* the IBM APL2 character set shown in lrm figure 68 on page 470
384
385 The table is indexed with an 8-bit position in IBM's ⎕AV and returns
386 the Unicode for that position. In addition CTRL-K is mapped to ⍬ for
387 compatibility with Dyalog-APL
388 */
389 static const int ibm_av[] =
390 {
391 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
392 0x0008, 0x0009, 0x000A, 0x236C, 0x000C, 0x000D, 0x000E, 0x000F,
393 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
394 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
395 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
396 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
397 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
398 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
399 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
400 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
401 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
402 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
403 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
404 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
405 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
406 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
407 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
408 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
409 0x2395, 0x235E, 0x2339, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
410 0x22A4, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x22A5, 0x2376, 0x2336,
411 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
412 0x00BF, 0x2308, 0x00AC, 0x00BD, 0x222A, 0x00A1, 0x2355, 0x234E,
413 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x235F, 0x2206, 0x2207,
414 0x2192, 0x2563, 0x2551, 0x2557, 0x255D, 0x2190, 0x230A, 0x2510,
415 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x2191, 0x2193,
416 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2261,
417 0x2378, 0x22F8, 0x2235, 0x2337, 0x2342, 0x233B, 0x22A2, 0x22A3,
418 0x25CA, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
419 0x237A, 0x2379, 0x2282, 0x2283, 0x235D, 0x2372, 0x2374, 0x2371,
420 0x233D, 0x2296, 0x25CB, 0x2228, 0x2373, 0x2349, 0x2208, 0x2229,
421 0x233F, 0x2340, 0x2265, 0x2264, 0x2260, 0x00D7, 0x00F7, 0x2359,
422 0x2218, 0x2375, 0x236B, 0x234B, 0x2352, 0x00AF, 0x00A8, 0x00A0
423 };
424
425 const Unicode *
IBM_quad_AV()426 Avec::IBM_quad_AV()
427 {
428 return reinterpret_cast<const Unicode *>(ibm_av);
429 }
430 //-----------------------------------------------------------------------------
431 Avec::Unicode_to_IBM_codepoint Avec::inverse_ibm_av[256] =
432 {
433 { 0x0000, 0 }, { 0x0001, 1 }, { 0x0002, 2 }, { 0x0003, 3 },
434 { 0x0004, 4 }, { 0x0005, 5 }, { 0x0006, 6 }, { 0x0007, 7 },
435 { 0x0008, 8 }, { 0x0009, 9 }, { 0x000A, 10 }, { 0x000C, 12 },
436 { 0x000D, 13 }, { 0x000E, 14 }, { 0x000F, 15 }, { 0x0010, 16 },
437 { 0x0011, 17 }, { 0x0012, 18 }, { 0x0013, 19 }, { 0x0014, 20 },
438 { 0x0015, 21 }, { 0x0016, 22 }, { 0x0017, 23 }, { 0x0018, 24 },
439 { 0x0019, 25 }, { 0x001A, 26 }, { 0x001B, 27 }, { 0x001C, 28 },
440 { 0x001D, 29 }, { 0x001E, 30 }, { 0x001F, 31 }, { 0x0020, 32 },
441 { 0x0021, 33 }, { 0x0022, 34 }, { 0x0023, 35 }, { 0x0024, 36 },
442 { 0x0025, 37 }, { 0x0026, 38 }, { 0x0027, 39 }, { 0x0028, 40 },
443 { 0x0029, 41 }, { 0x002A, 42 }, { 0x002B, 43 }, { 0x002C, 44 },
444 { 0x002D, 45 }, { 0x002E, 46 }, { 0x002F, 47 }, { 0x0030, 48 },
445 { 0x0031, 49 }, { 0x0032, 50 }, { 0x0033, 51 }, { 0x0034, 52 },
446 { 0x0035, 53 }, { 0x0036, 54 }, { 0x0037, 55 }, { 0x0038, 56 },
447 { 0x0039, 57 }, { 0x003A, 58 }, { 0x003B, 59 }, { 0x003C, 60 },
448 { 0x003D, 61 }, { 0x003E, 62 }, { 0x003F, 63 }, { 0x0040, 64 },
449 { 0x0041, 65 }, { 0x0042, 66 }, { 0x0043, 67 }, { 0x0044, 68 },
450 { 0x0045, 69 }, { 0x0046, 70 }, { 0x0047, 71 }, { 0x0048, 72 },
451 { 0x0049, 73 }, { 0x004A, 74 }, { 0x004B, 75 }, { 0x004C, 76 },
452 { 0x004D, 77 }, { 0x004E, 78 }, { 0x004F, 79 }, { 0x0050, 80 },
453 { 0x0051, 81 }, { 0x0052, 82 }, { 0x0053, 83 }, { 0x0054, 84 },
454 { 0x0055, 85 }, { 0x0056, 86 }, { 0x0057, 87 }, { 0x0058, 88 },
455 { 0x0059, 89 }, { 0x005A, 90 }, { 0x005B, 91 }, { 0x005C, 92 },
456 { 0x005D, 93 }, { 0x005E, 94 }, { 0x005F, 95 }, { 0x0060, 96 },
457 { 0x0061, 97 }, { 0x0062, 98 }, { 0x0063, 99 }, { 0x0064, 100 },
458 { 0x0065, 101 }, { 0x0066, 102 }, { 0x0067, 103 }, { 0x0068, 104 },
459 { 0x0069, 105 }, { 0x006A, 106 }, { 0x006B, 107 }, { 0x006C, 108 },
460 { 0x006D, 109 }, { 0x006E, 110 }, { 0x006F, 111 }, { 0x0070, 112 },
461 { 0x0071, 113 }, { 0x0072, 114 }, { 0x0073, 115 }, { 0x0074, 116 },
462 { 0x0075, 117 }, { 0x0076, 118 }, { 0x0077, 119 }, { 0x0078, 120 },
463 { 0x0079, 121 }, { 0x007A, 122 }, { 0x007B, 123 }, { 0x007C, 124 },
464 { 0x007D, 125 }, { 0x007E, 126 }, { 0x007F, 127 }, { 0x00A0, 255 },
465 { 0x00A1, 173 }, { 0x00A3, 156 }, { 0x00A6, 221 }, { 0x00A8, 254 },
466 { 0x00AA, 166 }, { 0x00AC, 170 }, { 0x00AF, 253 }, { 0x00BA, 167 },
467 { 0x00BD, 171 }, { 0x00BF, 168 }, { 0x00C4, 142 }, { 0x00C5, 143 },
468 { 0x00C7, 128 }, { 0x00CC, 222 }, { 0x00D1, 165 }, { 0x00D6, 153 },
469 { 0x00D7, 245 }, { 0x00DC, 154 }, { 0x00E0, 133 }, { 0x00E1, 160 },
470 { 0x00E2, 131 }, { 0x00E4, 132 }, { 0x00E5, 134 }, { 0x00E7, 135 },
471 { 0x00E8, 138 }, { 0x00E9, 130 }, { 0x00EA, 136 }, { 0x00EB, 137 },
472 { 0x00EC, 141 }, { 0x00ED, 161 }, { 0x00EE, 140 }, { 0x00EF, 139 },
473 { 0x00F1, 164 }, { 0x00F2, 149 }, { 0x00F3, 162 }, { 0x00F4, 147 },
474 { 0x00F6, 148 }, { 0x00F7, 246 }, { 0x00F8, 155 }, { 0x00F9, 151 },
475 { 0x00FA, 163 }, { 0x00FB, 150 }, { 0x00FC, 129 }, { 0x2190, 189 },
476 { 0x2191, 198 }, { 0x2192, 184 }, { 0x2193, 199 }, { 0x2206, 182 },
477 { 0x2207, 183 }, { 0x2208, 238 }, { 0x2218, 248 }, { 0x2228, 235 },
478 { 0x2229, 239 }, { 0x222A, 172 }, { 0x2235, 210 }, { 0x2260, 244 },
479 { 0x2261, 207 }, { 0x2264, 243 }, { 0x2265, 242 }, { 0x2282, 226 },
480 { 0x2283, 227 }, { 0x2296, 233 }, { 0x22A2, 214 }, { 0x22A3, 215 },
481 { 0x22A4, 152 }, { 0x22A5, 157 }, { 0x22F8, 209 }, { 0x2308, 169 },
482 { 0x230A, 190 }, { 0x2336, 159 }, { 0x2337, 211 }, { 0x2339, 146 },
483 { 0x233B, 213 }, { 0x233D, 232 }, { 0x233F, 240 }, { 0x2340, 241 },
484 { 0x2342, 212 }, { 0x2349, 237 }, { 0x234B, 251 }, { 0x234E, 175 },
485 { 0x2352, 252 }, { 0x2355, 174 }, { 0x2359, 247 }, { 0x235D, 228 },
486 { 0x235E, 145 }, { 0x235F, 181 }, { 0x236B, 250 }, { 0x236C, 11 },
487 { 0x2371, 231 }, { 0x2372, 229 }, { 0x2373, 236 }, { 0x2374, 230 },
488 { 0x2375, 249 }, { 0x2376, 158 }, { 0x2378, 208 }, { 0x2379, 225 },
489 { 0x237A, 224 }, { 0x2395, 144 }, { 0x2500, 196 }, { 0x2502, 179 },
490 { 0x250C, 218 }, { 0x2510, 191 }, { 0x2514, 192 }, { 0x2518, 217 },
491 { 0x251C, 195 }, { 0x2524, 180 }, { 0x252C, 194 }, { 0x2534, 193 },
492 { 0x253C, 197 }, { 0x2550, 205 }, { 0x2551, 186 }, { 0x2554, 201 },
493 { 0x2557, 187 }, { 0x255A, 200 }, { 0x255D, 188 }, { 0x2560, 204 },
494 { 0x2563, 185 }, { 0x2566, 203 }, { 0x2569, 202 }, { 0x256C, 206 },
495 { 0x2580, 223 }, { 0x2584, 220 }, { 0x2588, 219 }, { 0x2591, 176 },
496 { 0x2592, 177 }, { 0x2593, 178 }, { 0x25CA, 216 }, { 0x25CB, 234 }
497 };
498
499 void
print_inverse_IBM_quad_AV()500 Avec::print_inverse_IBM_quad_AV()
501 {
502 // a helper function that sorts ibm_av by Unicode and prints it on CERR
503 //
504 // To use it change #if 0 to #if 1 in Command.cc:1707
505 // recompile and start apl, and then )IN <file> or so.
506 //
507 loop(c, 256)
508 {
509 inverse_ibm_av[c].uni = -1;
510 inverse_ibm_av[c].cp = -1;
511 }
512
513 int current_max = -1;
514 Unicode_to_IBM_codepoint * map = inverse_ibm_av;
515 loop(c, 256)
516 {
517 // find next Unicode after current_max
518 //
519 int next_idx = -1;
520 loop(n, 256)
521 {
522 const int uni = ibm_av[n];
523 if (uni <= current_max) continue; // already done
524 if (next_idx == -1) next_idx = n;
525 else if (uni < ibm_av[next_idx]) next_idx = n;
526 }
527 current_max = map->uni = ibm_av[next_idx];
528 map->cp = next_idx;
529 ++map;
530 }
531
532 loop(row, 64)
533 {
534 CERR << " ";
535 loop(col, 4)
536 {
537 const int pos = col + 4*row;
538 CERR << " { " << HEX4(inverse_ibm_av[pos].uni) << ", "
539 << setw(3) << inverse_ibm_av[pos].cp << " }";
540 if (pos < 255) CERR << ",";
541 }
542 CERR << endl;
543 }
544 }
545 //-----------------------------------------------------------------------------
546 /// compare the unicodes of two chars in the IBM ⎕AV
547 int
compare_uni(const void * key,const void * entry)548 Avec::compare_uni(const void * key, const void * entry)
549 {
550 return *reinterpret_cast<const Unicode *>(key) -
551 reinterpret_cast<const Unicode_to_IBM_codepoint *>(entry)->uni;
552 }
553 //-----------------------------------------------------------------------------
554 unsigned char
unicode_to_cp(Unicode uni)555 Avec::unicode_to_cp(Unicode uni)
556 {
557 if (uni <= 0x80) return uni;
558 if (uni == UNI_STAR_OPERATOR) return '*'; // ⋆ → *
559 if (uni == UNI_AND) return '^'; // ∧ → ^
560 if (uni == UNI_TILDE_OPERATOR) return 126; // ∼ → ~
561
562 // search in uni_to_cp_map table
563 //
564 const void * where = bsearch(&uni, inverse_ibm_av, 256,
565 sizeof(Unicode_to_IBM_codepoint), compare_uni);
566
567 if (where == 0)
568 {
569 // the workspace being )OUT'ed can contain characters that are not
570 // in IBM's APL character set. We replace such characters by 0xB0
571 //
572 return 0xB0;
573 }
574
575 Assert(where);
576 return reinterpret_cast<const Unicode_to_IBM_codepoint *>(where)->cp;
577 }
578 //-----------------------------------------------------------------------------
579
580