1 /*
2     This file is part of GNU APL, a free implementation of the
3     ISO/IEC Standard 13751, "Programming Language APL, Extended"
4 
5     Copyright (C) 2008-2015  Dr. Jürgen Sauermann
6 
7     This program is free software: you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation, either version 3 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #include <fcntl.h>
22 #include <unistd.h>
23 #include <sys/mman.h>
24 #include <sys/stat.h>
25 
26 #include "Avec.hh"
27 #include "Command.hh"
28 #include "Output.hh"
29 #include "Parser.hh"
30 #include "PrintOperator.hh"
31 #include "UTF8_string.hh"
32 #include "Value.hh"
33 
34 using namespace std;
35 
36 // See RFC 3629 for UTF-8
37 
38 /// Various character attributes.
39 struct Character_definition
40 {
41    CHT_Index     av_val;      ///< Atomic vector enum of the char.
42    Unicode       unicode;     ///< Unicode of the char.
43    const char *  char_name;   ///< Name of the char.
44    int           def_line;    ///< Line where the char is defined.
45    TokenTag      token_tag;   ///< Token tag for the char.
46    CharacterFlag flags;       ///< Character class.
47    int           av_pos;      ///< position in ⎕AV (== ⎕AF unicode)
48 };
49 
50 //-----------------------------------------------------------------------------
51 #define char_def(n, _u, t, f, p) \
52    { AV_ ## n, UNI_ ## n, # n, __LINE__, TOK_ ## t, FLG_ ## f, 0x ## p },
53 #define char_df1(_n, _u, _t, _f, _p)
54 const Character_definition character_table[MAX_AV] =
55 {
56 #include "Avec.def"
57 };
58 //-----------------------------------------------------------------------------
59 void
show_error_pos(int i,int line,bool cond,int def_line)60 Avec::show_error_pos(int i, int line, bool cond, int def_line)
61 {
62    if (!cond)
63       {
64         CERR << "Error index (line " << line << ", Avec.def line "
65              << def_line << ") = " << i << " = " << HEX(i) << endl;
66         Assert(0);
67       }
68 }
69 //-----------------------------------------------------------------------------
70 #if 0
71 
72 void
73 Avec::check_file(const char * filename)
74 {
75 const int fd = open(filename, O_RDONLY);
76    if (fd == -1)   return;
77 
78    Log(LOG_startup)   CERR << "Checking " << filename << endl;
79 
80 uint32_t datalen;
81    {
82      struct stat s;
83      const int res = fstat(fd, &s);
84      Assert(res == 0);
85      datalen = s.st_size;
86    }
87 
88 const void * data = mmap(NULL, datalen, PROT_READ, MAP_PRIVATE, fd, 0);
89    Assert(data != (void *)-1);
90 
91 const UTF8_string utf = UTF8_string((const UTF8 *)data, datalen);
92 UCS_string ucs(utf);
93 
94    loop(i, ucs.size())
95        {
96          if (!is_known_char(ucs[i]))
97             CERR << "APL char " << UNI(ucs[i]) << " is missing in AV ("
98                  << i << ")" << endl;
99        }
100 
101    close(fd);
102 }
103 #endif
104 //-----------------------------------------------------------------------------
105 void
init()106 Avec::init()
107 {
108    check_av_table();
109 }
110 //-----------------------------------------------------------------------------
111 void
check_av_table()112 Avec::check_av_table()
113 {
114    if (MAX_AV != 256)
115       {
116         CERR << "AV has " << MAX_AV << " entries (should be 256)" << endl;
117         return;
118       }
119 
120    Assert(sizeof(character_table) / sizeof(*character_table) == MAX_AV);
121 
122    // check that character_table.unicode is sorted by increasing UNICODE
123    //
124    for (int i = 1; i < MAX_AV; ++i)
125        show_error_pos(i, __LINE__, character_table[i].def_line,
126           character_table[i].unicode > character_table[i - 1].unicode);
127 
128    // check that ASCII chars map to themselves.
129    //
130    loop(i, 0x80)   show_error_pos(i, __LINE__, character_table[i].def_line,
131                                   character_table[i].unicode == i);
132 
133    // display holes and duplicate AV positions in character_table
134    {
135      int holes = 0;
136      loop(pos, MAX_AV)
137         {
138           int count = 0;
139           loop(i, MAX_AV)
140               {
141                 if (character_table[i].av_pos == pos)   ++count;
142               }
143 
144           if (count == 0)
145              {
146                ++holes;
147                CERR << "AV position " << HEX(pos) << " unused" << endl;
148              }
149 
150           if (count > 1)
151              CERR << "duplicate AV position " << HEX(pos) << endl;
152         }
153 
154      if (holes)   CERR << holes << " unused positions in ⎕AV" << endl;
155    }
156 
157    // check that find_char() works
158    //
159    loop(i, MAX_AV)   show_error_pos(i, __LINE__, character_table[i].def_line,
160                                     i == find_char(character_table[i].unicode));
161 
162    Log(LOG_SHOW_AV)
163       {
164         for (int i = 0x80; i < MAX_AV; ++i)
165             {
166               CERR << character_table[i].unicode << " is AV[" << i << "] AV_"
167                    << character_table[i].char_name << endl;
168             }
169       }
170 
171 // check_file("../keyboard");
172 // check_file("../keyboard1");
173 }
174 //-----------------------------------------------------------------------------
175 Unicode
unicode(CHT_Index av)176 Avec::unicode(CHT_Index av)
177 {
178    if (av < 0)         return UNI_AV_MAX;
179    if (av >= MAX_AV)   return UNI_AV_MAX;
180    return character_table[av].unicode;
181 }
182 //-----------------------------------------------------------------------------
183 uint32_t
get_av_pos(CHT_Index av)184 Avec::get_av_pos(CHT_Index av)
185 {
186    if (av < 0)         return UNI_AV_MAX;
187    if (av >= MAX_AV)   return UNI_AV_MAX;
188    return character_table[av].av_pos;
189 }
190 //-----------------------------------------------------------------------------
191 Token
uni_to_token(Unicode & uni,const char * loc)192 Avec::uni_to_token(Unicode & uni, const char * loc)
193 {
194 CHT_Index idx = find_char(uni);
195    if (idx != Invalid_CHT)   return Token(character_table[idx].token_tag, uni);
196 
197    // not found: try alternative characters.
198    //
199    idx = map_alternative_char(uni);
200    if (idx != Invalid_CHT)
201       {
202         uni = character_table[idx].unicode;
203         return Token(character_table[idx].token_tag,
204                      character_table[idx].unicode);
205       }
206 
207    Log(LOG_verbose_error)
208       {
209         CERR << endl << "Avec::uni_to_token() : Char " << UNI(uni)
210              << " (" << uni << ") not found in ⎕AV! (called from "
211              << loc << ")" << endl;
212 
213          Backtrace::show(__FILE__, __LINE__);
214       }
215    return Token();
216 }
217 //-----------------------------------------------------------------------------
find_av_pos(Unicode av)218 uint32_t Avec::find_av_pos(Unicode av)
219 {
220 const CHT_Index pos = find_char(av);
221 
222    if (pos < 0)   return MAX_AV;   // not found
223 
224    return character_table[pos].av_pos;
225 }
226 //-----------------------------------------------------------------------------
227 CHT_Index
find_char(Unicode av)228 Avec::find_char(Unicode av)
229 {
230 int l = 0;
231 int h = sizeof(character_table)/sizeof(*character_table) - 1;
232 
233    for (;;)
234        {
235          if (l > h)   return Invalid_CHT;   // not in table.
236 
237          const int m((h + l)/2);
238          const Unicode um = character_table[m].unicode;
239          if      (av < um)   h = m - 1;
240          else if (av > um)   l = m + 1;
241          else                return CHT_Index(m);
242        }
243 }
244 //-----------------------------------------------------------------------------
245 CHT_Index
map_alternative_char(Unicode alt_av)246 Avec::map_alternative_char(Unicode alt_av)
247 {
248    // map characters that look similar to characters used in GNU APL
249    // to the GNU APL character.
250    //
251    switch(int(alt_av))
252       {
253         case 0x005E: return AV_AND;              //  map ^ to ∧
254         case 0x007C: return AV_DIVIDES;          //  map | to ∣
255         case 0x007E: return AV_TILDE_OPERATOR;   //  map ~ to ∼
256         case 0x03B1: return AV_ALPHA;            //  map α to ⍺
257         case 0x03B5: return AV_ELEMENT;          //  map ε to ∈
258         case 0x03B9: return AV_IOTA;             //  map ι to ⍳
259         case 0x03C1: return AV_RHO;              //  map ρ to ⍴
260         case 0x03C9: return AV_OMEGA;            //  map ω to ⍵
261         case 0x2018: return AV_SINGLE_QUOTE;     //  map ‘ to '
262         case 0x2019: return AV_SINGLE_QUOTE;     //  map ’ to '
263         case 0x220A: return AV_ELEMENT;          //  map ∊ to ∈
264         case 0x2212: return AV_ASCII_MINUS;      //  map − to -
265         case 0x22BC: return AV_NAND;             //  map ⊼ to ⍲
266         case 0x22BD: return AV_NOR;              //  map ⊽ to ⍱
267         case 0x22C4: return AV_DIAMOND;          //  map ⋄ to ◊
268         case 0x2377: return AV_EPSILON_UBAR;     //  map ⍷ to ⋸
269         case 0x25AF: return AV_Quad_Quad;        //  map ▯ to ⎕
270         case 0x25E6: return AV_RING_OPERATOR;    //  map ◦ to ∘
271         case 0x2662: return AV_DIAMOND;          //  map ♢ to ◊
272         case 0x26AA: return AV_CIRCLE;           //  map ⚪ to ○
273         case 0x2A7D: return AV_LESS_OR_EQUAL;    //  map ⩽ to ≤
274         case 0x2A7E: return AV_MORE_OR_EQUAL;    //  map ⩾ to ≥
275         case 0x2B25: return AV_DIAMOND;          //  map ⬥ to ◊
276         case 0x2B26: return AV_DIAMOND;          //  map ⬦ to ◊
277         case 0x2B27: return AV_DIAMOND;          //  map ⬧ to ◊
278         default:     break;
279       }
280 
281    return Invalid_CHT;
282 }
283 //-----------------------------------------------------------------------------
284 bool
is_known_char(Unicode av)285 Avec::is_known_char(Unicode av)
286 {
287    switch(av)
288       {
289 #define char_def(_n, u, _t, _f, _p)  case u:
290 #define char_df1(_n, u, _t, _f, _p)  case u:
291 #include "Avec.def"
292           return true;
293 
294         default: break;
295       }
296 
297    return false;   // not found
298 }
299 //-----------------------------------------------------------------------------
300 bool
need_UCS(Unicode uni)301 Avec::need_UCS(Unicode uni)
302 {
303    if (is_control(uni))   return true;                      // ASCII control
304    if (uni >= 0 && uni < UNI_ASCII_DELETE)  return false;   // printable ASCII
305 
306 const CHT_Index idx = find_char(uni);
307    if (idx == Invalid_CHT)   return true;           // char not in GNU APL's ⎕AV
308    if (unicode_to_cp(uni) == 0xB0)   return true;   // not in IBM's ⎕AV
309 
310    return false;
311 }
312 //-----------------------------------------------------------------------------
313 bool
is_symbol_char(Unicode av)314 Avec::is_symbol_char(Unicode av)
315 {
316 const int32_t idx = find_char(av);
317    if (idx == -1)   return false;
318    return (character_table[idx].flags & FLG_SYMBOL) != 0;
319 }
320 //-----------------------------------------------------------------------------
321 bool
is_first_symbol_char(Unicode av)322 Avec::is_first_symbol_char(Unicode av)
323 {
324    return is_symbol_char(av) && ! is_digit(av);
325 }
326 //-----------------------------------------------------------------------------
327 bool
no_space_after(Unicode av)328 Avec::no_space_after(Unicode av)
329 {
330 const int32_t idx = find_char(av);
331    if (idx == -1)   return false;
332    return (character_table[idx].flags & FLG_NO_SPACE_AFTER) != 0;
333 }
334 //-----------------------------------------------------------------------------
335 bool
no_space_before(Unicode av)336 Avec::no_space_before(Unicode av)
337 {
338 const int32_t idx = find_char(av);
339    if (idx == -1)   return false;
340    return (character_table[idx].flags & FLG_NO_SPACE_BEFORE) != 0;
341 }
342 //-----------------------------------------------------------------------------
343 Unicode
subscript(uint32_t i)344 Avec::subscript(uint32_t i)
345 {
346    switch(i)
347       {
348         case 0: return Unicode(0x2080);
349         case 1: return Unicode(0x2081);
350         case 2: return Unicode(0x2082);
351         case 3: return Unicode(0x2083);
352         case 4: return Unicode(0x2084);
353         case 5: return Unicode(0x2085);
354         case 6: return Unicode(0x2086);
355         case 7: return Unicode(0x2087);
356         case 8: return Unicode(0x2088);
357         case 9: return Unicode(0x2089);
358       }
359 
360    return Unicode(0x2093);
361 }
362 //-----------------------------------------------------------------------------
363 Unicode
superscript(uint32_t i)364 Avec::superscript(uint32_t i)
365 {
366    switch(i)
367       {
368         case 0: return Unicode(0x2070);
369         case 1: return Unicode(0x00B9);
370         case 2: return Unicode(0x00B2);
371         case 3: return Unicode(0x00B3);
372         case 4: return Unicode(0x2074);
373         case 5: return Unicode(0x2075);
374         case 6: return Unicode(0x2076);
375         case 7: return Unicode(0x2077);
376         case 8: return Unicode(0x2078);
377         case 9: return Unicode(0x2079);
378       }
379 
380    return Unicode(0x207A);
381 }
382 //-----------------------------------------------------------------------------
383 /* the IBM APL2 character set shown in lrm figure 68 on page 470
384 
385    The table is indexed with an 8-bit position in IBM's ⎕AV and returns
386    the Unicode for that position. In addition CTRL-K is mapped to ⍬ for
387    compatibility with Dyalog-APL
388  */
389 static const int ibm_av[] =
390 {
391   0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
392   0x0008, 0x0009, 0x000A, 0x236C, 0x000C, 0x000D, 0x000E, 0x000F,
393   0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
394   0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
395   0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
396   0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
397   0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
398   0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
399   0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
400   0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
401   0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
402   0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
403   0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
404   0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
405   0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
406   0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
407   0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
408   0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
409   0x2395, 0x235E, 0x2339, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
410   0x22A4, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x22A5, 0x2376, 0x2336,
411   0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
412   0x00BF, 0x2308, 0x00AC, 0x00BD, 0x222A, 0x00A1, 0x2355, 0x234E,
413   0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x235F, 0x2206, 0x2207,
414   0x2192, 0x2563, 0x2551, 0x2557, 0x255D, 0x2190, 0x230A, 0x2510,
415   0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x2191, 0x2193,
416   0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2261,
417   0x2378, 0x22F8, 0x2235, 0x2337, 0x2342, 0x233B, 0x22A2, 0x22A3,
418   0x25CA, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
419   0x237A, 0x2379, 0x2282, 0x2283, 0x235D, 0x2372, 0x2374, 0x2371,
420   0x233D, 0x2296, 0x25CB, 0x2228, 0x2373, 0x2349, 0x2208, 0x2229,
421   0x233F, 0x2340, 0x2265, 0x2264, 0x2260, 0x00D7, 0x00F7, 0x2359,
422   0x2218, 0x2375, 0x236B, 0x234B, 0x2352, 0x00AF, 0x00A8, 0x00A0
423 };
424 
425 const Unicode *
IBM_quad_AV()426 Avec::IBM_quad_AV()
427 {
428    return reinterpret_cast<const Unicode *>(ibm_av);
429 }
430 //-----------------------------------------------------------------------------
431 Avec::Unicode_to_IBM_codepoint Avec::inverse_ibm_av[256] =
432 {
433   { 0x0000,   0 }, { 0x0001,   1 }, { 0x0002,   2 }, { 0x0003,   3 },
434   { 0x0004,   4 }, { 0x0005,   5 }, { 0x0006,   6 }, { 0x0007,   7 },
435   { 0x0008,   8 }, { 0x0009,   9 }, { 0x000A,  10 }, { 0x000C,  12 },
436   { 0x000D,  13 }, { 0x000E,  14 }, { 0x000F,  15 }, { 0x0010,  16 },
437   { 0x0011,  17 }, { 0x0012,  18 }, { 0x0013,  19 }, { 0x0014,  20 },
438   { 0x0015,  21 }, { 0x0016,  22 }, { 0x0017,  23 }, { 0x0018,  24 },
439   { 0x0019,  25 }, { 0x001A,  26 }, { 0x001B,  27 }, { 0x001C,  28 },
440   { 0x001D,  29 }, { 0x001E,  30 }, { 0x001F,  31 }, { 0x0020,  32 },
441   { 0x0021,  33 }, { 0x0022,  34 }, { 0x0023,  35 }, { 0x0024,  36 },
442   { 0x0025,  37 }, { 0x0026,  38 }, { 0x0027,  39 }, { 0x0028,  40 },
443   { 0x0029,  41 }, { 0x002A,  42 }, { 0x002B,  43 }, { 0x002C,  44 },
444   { 0x002D,  45 }, { 0x002E,  46 }, { 0x002F,  47 }, { 0x0030,  48 },
445   { 0x0031,  49 }, { 0x0032,  50 }, { 0x0033,  51 }, { 0x0034,  52 },
446   { 0x0035,  53 }, { 0x0036,  54 }, { 0x0037,  55 }, { 0x0038,  56 },
447   { 0x0039,  57 }, { 0x003A,  58 }, { 0x003B,  59 }, { 0x003C,  60 },
448   { 0x003D,  61 }, { 0x003E,  62 }, { 0x003F,  63 }, { 0x0040,  64 },
449   { 0x0041,  65 }, { 0x0042,  66 }, { 0x0043,  67 }, { 0x0044,  68 },
450   { 0x0045,  69 }, { 0x0046,  70 }, { 0x0047,  71 }, { 0x0048,  72 },
451   { 0x0049,  73 }, { 0x004A,  74 }, { 0x004B,  75 }, { 0x004C,  76 },
452   { 0x004D,  77 }, { 0x004E,  78 }, { 0x004F,  79 }, { 0x0050,  80 },
453   { 0x0051,  81 }, { 0x0052,  82 }, { 0x0053,  83 }, { 0x0054,  84 },
454   { 0x0055,  85 }, { 0x0056,  86 }, { 0x0057,  87 }, { 0x0058,  88 },
455   { 0x0059,  89 }, { 0x005A,  90 }, { 0x005B,  91 }, { 0x005C,  92 },
456   { 0x005D,  93 }, { 0x005E,  94 }, { 0x005F,  95 }, { 0x0060,  96 },
457   { 0x0061,  97 }, { 0x0062,  98 }, { 0x0063,  99 }, { 0x0064, 100 },
458   { 0x0065, 101 }, { 0x0066, 102 }, { 0x0067, 103 }, { 0x0068, 104 },
459   { 0x0069, 105 }, { 0x006A, 106 }, { 0x006B, 107 }, { 0x006C, 108 },
460   { 0x006D, 109 }, { 0x006E, 110 }, { 0x006F, 111 }, { 0x0070, 112 },
461   { 0x0071, 113 }, { 0x0072, 114 }, { 0x0073, 115 }, { 0x0074, 116 },
462   { 0x0075, 117 }, { 0x0076, 118 }, { 0x0077, 119 }, { 0x0078, 120 },
463   { 0x0079, 121 }, { 0x007A, 122 }, { 0x007B, 123 }, { 0x007C, 124 },
464   { 0x007D, 125 }, { 0x007E, 126 }, { 0x007F, 127 }, { 0x00A0, 255 },
465   { 0x00A1, 173 }, { 0x00A3, 156 }, { 0x00A6, 221 }, { 0x00A8, 254 },
466   { 0x00AA, 166 }, { 0x00AC, 170 }, { 0x00AF, 253 }, { 0x00BA, 167 },
467   { 0x00BD, 171 }, { 0x00BF, 168 }, { 0x00C4, 142 }, { 0x00C5, 143 },
468   { 0x00C7, 128 }, { 0x00CC, 222 }, { 0x00D1, 165 }, { 0x00D6, 153 },
469   { 0x00D7, 245 }, { 0x00DC, 154 }, { 0x00E0, 133 }, { 0x00E1, 160 },
470   { 0x00E2, 131 }, { 0x00E4, 132 }, { 0x00E5, 134 }, { 0x00E7, 135 },
471   { 0x00E8, 138 }, { 0x00E9, 130 }, { 0x00EA, 136 }, { 0x00EB, 137 },
472   { 0x00EC, 141 }, { 0x00ED, 161 }, { 0x00EE, 140 }, { 0x00EF, 139 },
473   { 0x00F1, 164 }, { 0x00F2, 149 }, { 0x00F3, 162 }, { 0x00F4, 147 },
474   { 0x00F6, 148 }, { 0x00F7, 246 }, { 0x00F8, 155 }, { 0x00F9, 151 },
475   { 0x00FA, 163 }, { 0x00FB, 150 }, { 0x00FC, 129 }, { 0x2190, 189 },
476   { 0x2191, 198 }, { 0x2192, 184 }, { 0x2193, 199 }, { 0x2206, 182 },
477   { 0x2207, 183 }, { 0x2208, 238 }, { 0x2218, 248 }, { 0x2228, 235 },
478   { 0x2229, 239 }, { 0x222A, 172 }, { 0x2235, 210 }, { 0x2260, 244 },
479   { 0x2261, 207 }, { 0x2264, 243 }, { 0x2265, 242 }, { 0x2282, 226 },
480   { 0x2283, 227 }, { 0x2296, 233 }, { 0x22A2, 214 }, { 0x22A3, 215 },
481   { 0x22A4, 152 }, { 0x22A5, 157 }, { 0x22F8, 209 }, { 0x2308, 169 },
482   { 0x230A, 190 }, { 0x2336, 159 }, { 0x2337, 211 }, { 0x2339, 146 },
483   { 0x233B, 213 }, { 0x233D, 232 }, { 0x233F, 240 }, { 0x2340, 241 },
484   { 0x2342, 212 }, { 0x2349, 237 }, { 0x234B, 251 }, { 0x234E, 175 },
485   { 0x2352, 252 }, { 0x2355, 174 }, { 0x2359, 247 }, { 0x235D, 228 },
486   { 0x235E, 145 }, { 0x235F, 181 }, { 0x236B, 250 }, { 0x236C,  11 },
487   { 0x2371, 231 }, { 0x2372, 229 }, { 0x2373, 236 }, { 0x2374, 230 },
488   { 0x2375, 249 }, { 0x2376, 158 }, { 0x2378, 208 }, { 0x2379, 225 },
489   { 0x237A, 224 }, { 0x2395, 144 }, { 0x2500, 196 }, { 0x2502, 179 },
490   { 0x250C, 218 }, { 0x2510, 191 }, { 0x2514, 192 }, { 0x2518, 217 },
491   { 0x251C, 195 }, { 0x2524, 180 }, { 0x252C, 194 }, { 0x2534, 193 },
492   { 0x253C, 197 }, { 0x2550, 205 }, { 0x2551, 186 }, { 0x2554, 201 },
493   { 0x2557, 187 }, { 0x255A, 200 }, { 0x255D, 188 }, { 0x2560, 204 },
494   { 0x2563, 185 }, { 0x2566, 203 }, { 0x2569, 202 }, { 0x256C, 206 },
495   { 0x2580, 223 }, { 0x2584, 220 }, { 0x2588, 219 }, { 0x2591, 176 },
496   { 0x2592, 177 }, { 0x2593, 178 }, { 0x25CA, 216 }, { 0x25CB, 234 }
497 };
498 
499 void
print_inverse_IBM_quad_AV()500 Avec::print_inverse_IBM_quad_AV()
501 {
502    // a helper function that sorts ibm_av by Unicode and prints it on CERR
503    //
504    // To use it change #if 0 to #if 1 in Command.cc:1707
505    // recompile and start apl, and then )IN <file> or so.
506    //
507    loop(c, 256)
508       {
509         inverse_ibm_av[c].uni = -1;
510         inverse_ibm_av[c].cp  = -1;
511       }
512 
513 int current_max = -1;
514 Unicode_to_IBM_codepoint * map = inverse_ibm_av;
515    loop(c, 256)
516       {
517         // find next Unicode after current_max
518         //
519         int next_idx = -1;
520         loop(n, 256)
521            {
522              const int uni = ibm_av[n];
523              if (uni <= current_max)   continue;   // already done
524              if (next_idx == -1)               next_idx = n;
525              else if (uni < ibm_av[next_idx])   next_idx = n;
526            }
527         current_max = map->uni = ibm_av[next_idx];
528         map->cp  = next_idx;
529         ++map;
530       }
531 
532    loop(row, 64)
533       {
534         CERR << " ";
535         loop(col, 4)
536            {
537              const int pos = col + 4*row;
538              CERR << " { " << HEX4(inverse_ibm_av[pos].uni) << ", "
539                   << setw(3) << inverse_ibm_av[pos].cp << " }";
540              if (pos < 255)   CERR << ",";
541            }
542         CERR << endl;
543       }
544 }
545 //-----------------------------------------------------------------------------
546 /// compare the unicodes of two chars in the IBM ⎕AV
547 int
compare_uni(const void * key,const void * entry)548 Avec::compare_uni(const void * key, const void * entry)
549 {
550    return *reinterpret_cast<const Unicode *>(key) -
551            reinterpret_cast<const Unicode_to_IBM_codepoint *>(entry)->uni;
552 }
553 //-----------------------------------------------------------------------------
554 unsigned char
unicode_to_cp(Unicode uni)555 Avec::unicode_to_cp(Unicode uni)
556 {
557    if (uni <= 0x80)                return uni;
558    if (uni == UNI_STAR_OPERATOR)   return '*';   // ⋆ → *
559    if (uni == UNI_AND)             return '^';   // ∧ → ^
560    if (uni == UNI_TILDE_OPERATOR)  return 126;   // ∼ → ~
561 
562    // search in uni_to_cp_map table
563    //
564 const void * where = bsearch(&uni, inverse_ibm_av, 256,
565                              sizeof(Unicode_to_IBM_codepoint), compare_uni);
566 
567    if (where == 0)
568       {
569         // the workspace being )OUT'ed can contain characters that are not
570         // in IBM's APL character set. We replace such characters by 0xB0
571         //
572         return 0xB0;
573       }
574 
575    Assert(where);
576    return reinterpret_cast<const Unicode_to_IBM_codepoint *>(where)->cp;
577 }
578 //-----------------------------------------------------------------------------
579 
580