1 /* utfebcdic.h 2 * 3 * Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009, 4 * 2010, 2011 by Larry Wall, Nick Ing-Simmons, and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 * Macros to implement UTF-EBCDIC as perl's internal encoding 10 * Taken from version 7.1 of Unicode Technical Report #16: 11 * http://www.unicode.org/unicode/reports/tr16 12 * 13 * To summarize, the way it works is: 14 * To convert an EBCDIC character to UTF-EBCDIC: 15 * 1) convert to Unicode. The table in this file that does this for 16 * EBCDIC bytes is PL_e2a (with inverse PLa2e). The 'a' stands for 17 * ASCIIish, meaning latin1. 18 * 2) convert that to a utf8-like string called I8 (I stands for 19 * intermediate) with variant characters occupying multiple bytes. This 20 * step is similar to the utf8-creating step from Unicode, but the details 21 * are different. This transformation is called UTF8-Mod. There is a 22 * chart about the bit patterns in a comment later in this file. But 23 * essentially here are the differences: 24 * UTF8 I8 25 * invariant byte starts with 0 starts with 0 or 100 26 * continuation byte starts with 10 starts with 101 27 * start byte same in both: if the code point requires N bytes, 28 * then the leading N bits are 1, followed by a 0. (No 29 * trailing 0 for the very largest possible allocation 30 * in I8, far beyond the current Unicode standard's 31 * max, as shown in the comment later in this file.) 32 * 3) Use the table published in tr16 to convert each byte from step 2 into 33 * final UTF-EBCDIC. That table is reproduced in this file as PL_utf2e, 34 * and its inverse is PL_e2utf. They are constructed so that all EBCDIC 35 * invariants remain invariant, but no others do. For example, the 36 * ordinal value of 'A' is 193 in EBCDIC, and also is 193 in UTF-EBCDIC. 37 * Step 1) converts it to 65, Step 2 leaves it at 65, and Step 3 converts 38 * it back to 193. As an example of how a variant character works, take 39 * LATIN SMALL LETTER Y WITH DIAERESIS, which is typically 0xDF in 40 * EBCDIC. Step 1 converts it to the Unicode value, 0xFF. Step 2 41 * converts that to two bytes = 11000111 10111111 = C7 BF, and Step 3 42 * converts those to 0x8B 0x73. The table is constructed so that the 43 * first byte of the final form of a variant will always have its upper 44 * bit set (at least in the encodings that Perl recognizes, and probably 45 * all). But note that the upper bit of some invariants is also 1. 46 * 47 * If you're starting from Unicode, skip step 1. For UTF-EBCDIC to straight 48 * EBCDIC, reverse the steps. 49 * 50 * The EBCDIC invariants have been chosen to be those characters whose Unicode 51 * equivalents have ordinal numbers less than 160, that is the same characters 52 * that are expressible in ASCII, plus the C1 controls. So there are 160 53 * invariants instead of the 128 in UTF-8. (My guess is that this is because 54 * the C1 control NEL (and maybe others) is important in IBM.) 55 * 56 * The purpose of Step 3 is to make the encoding be invariant for the chosen 57 * characters. This messes up the convenient patterns found in step 2, so 58 * generally, one has to undo step 3 into a temporary to use them. However, 59 * a "shadow", or parallel table, PL_utf8skip, has been constructed so that for 60 * each byte, it says how long the sequence is if that byte were to begin it 61 * 62 * There are actually 3 slightly different UTF-EBCDIC encodings in this file, 63 * one for each of the code pages recognized by Perl. That means that there 64 * are actually three different sets of tables, one for each code page. (If 65 * Perl is compiled on platforms using another EBCDIC code page, it may not 66 * compile, or Perl may silently mistake it for one of the three.) 67 * 68 * EBCDIC characters above 0xFF are the same as Unicode in Perl's 69 * implementation of all 3 encodings, so for those Step 1 is trivial. 70 * 71 * (Note that the entries for invariant characters are necessarily the same in 72 * PL_e2a and PLe2f, and the same for their inverses.) 73 * 74 * UTF-EBCDIC strings are the same length or longer than UTF-8 representations 75 * of the same string. The maximum code point representable as 2 bytes in 76 * UTF-EBCDIC is 0x3FFF, instead of 0x7FFF in UTF-8. 77 */ 78 79 START_EXTERN_C 80 81 #ifdef DOINIT 82 /* Indexed by encoded byte this table gives the length of the sequence. 83 Adapted from the shadow flags table in tr16. 84 The entries marked 9 in tr16 are continuation bytes and are marked 85 as length 1 here so that we can recover. 86 */ 87 #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */ 88 EXTCONST unsigned char PL_utf8skip[] = { 89 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 90 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 91 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 92 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 93 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 94 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 95 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 96 1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1, 97 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, 98 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, 99 2,1,1,1,1,1,1,1,1,1,2,2,2,1,2,2, 100 2,2,2,2,2,2,2,3,3,3,3,3,3,1,3,3, 101 1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3, 102 1,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4, 103 1,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5, 104 1,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1 105 }; 106 #endif 107 108 #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */ 109 unsigned char PL_utf8skip[] = { 110 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 111 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 112 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 113 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 114 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 115 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 116 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 117 1,1,1,1,2,2,2,2,2,3,1,1,1,1,1,1, 118 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, 119 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, 120 2,3,1,1,1,1,1,1,1,1,2,2,2,3,2,2, 121 1,2,2,2,2,2,2,3,3,3,2,1,1,1,3,3, 122 4,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3, 123 1,1,1,1,1,1,1,1,1,1,3,3,4,6,4,4, 124 7,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5, 125 1,1,1,1,1,1,1,1,1,1,5,1,6,1,7,1 126 }; 127 #endif 128 129 #if '^' == 176 /* if defined(??) (OS/400?) 037 */ 130 unsigned char PL_utf8skip[] = { 131 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 132 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 133 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 134 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 135 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 136 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2, 137 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 138 1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1, 139 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, 140 2,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, 141 2,1,1,1,1,1,1,1,1,1,2,2,2,3,2,2, 142 1,2,2,2,2,2,2,3,3,3,1,1,3,3,3,3, 143 1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3, 144 1,1,1,1,1,1,1,1,1,1,3,3,4,4,4,4, 145 1,4,1,1,1,1,1,1,1,1,4,4,4,5,5,5, 146 1,1,1,1,1,1,1,1,1,1,5,6,6,7,7,1 147 }; 148 #endif 149 150 /* Transform tables from tr16 applied after encoding to render encoding EBCDIC 151 * like, meaning that all the invariants are actually invariant, eg, that 'A' 152 * remains 'A' */ 153 154 #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */ 155 EXTCONST unsigned char PL_utf2e[] = { /* I8 to UTFEBCDIC (IBM-1047) */ 156 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 157 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, 158 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 159 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 160 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 161 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D, 162 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 163 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07, 164 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, 165 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF, 166 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 167 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73, 168 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C, 169 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 170 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB, 171 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE 172 }; 173 174 EXTCONST unsigned char PL_e2utf[] = { /* UTFEBCDIC (IBM-1047) to I8 */ 175 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 176 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, 177 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, 178 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, 179 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, 180 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E, 181 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, 182 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, 183 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 184 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 185 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0x5B, 0xD7, 0xD8, 186 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0x5D, 0xE6, 0xE7, 187 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 188 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 189 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 190 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F 191 }; 192 #endif /* 1047 */ 193 194 #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */ 195 unsigned char PL_utf2e[] = { /* I8 to UTFEBCDIC (POSIX-BC) */ 196 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 197 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, 198 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 199 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 200 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 201 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D, 202 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 203 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07, 204 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, 205 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F, 206 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xB0, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 207 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xD0, 0x70, 0x71, 0x72, 0x73, 208 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C, 209 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0xBA, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 210 0xB7, 0xB8, 0xB9, 0xAD, 0x79, 0xA1, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB, 211 0xDC, 0xC0, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xDD, 0xFC, 0xE0, 0xFE 212 }; 213 214 unsigned char PL_e2utf[] = { /* UTFEBCDIC (POSIX-BC) to I8 */ 215 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 216 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, 217 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, 218 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, 219 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, 220 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F, 221 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, 222 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xE4, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, 223 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 224 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 225 0xD3, 0xE5, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8, 226 0xA9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0xD9, 0x5B, 0x5C, 0x5D, 0xE6, 0xE7, 227 0xF1, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 228 0xBB, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xFC, 0xF2, 0xF3, 229 0xFE, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 230 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0x7B, 0xFD, 0x7D, 0xFF, 0x7E 231 }; 232 #endif /* POSIX-BC */ 233 234 #if '^' == 176 /* if defined(??) (OS/400?) 037 */ 235 unsigned char PL_utf2e[] = { /* I8 to UTFEBCDIC (IBM-037) */ 236 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 237 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, 238 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 239 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 240 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 241 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D, 242 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 243 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07, 244 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, 245 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF, 246 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 247 0x57, 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x70, 0x71, 0x72, 0x73, 248 0x74, 0x75, 0x76, 0x77, 0x78, 0x80, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x9A, 0x9B, 0x9C, 249 0x9D, 0x9E, 0x9F, 0xA0, 0xAA, 0xAB, 0xAC, 0xAE, 0xAF, 0x5F, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 250 0xB7, 0xB8, 0xB9, 0xAD, 0xBD, 0xBC, 0xBE, 0xBF, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB, 251 0xDC, 0xDD, 0xDE, 0xDF, 0xE1, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE 252 }; 253 254 unsigned char PL_e2utf[] = { /* UTFEBCDIC (IBM-037) to I8 */ 255 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 256 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, 257 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, 258 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, 259 0x20, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, 260 0x26, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xB2, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xD9, 261 0x2D, 0x2F, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, 262 0xBC, 0xBD, 0xBE, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, 263 0xC5, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 264 0xCC, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xCD, 0xCE, 0xCF, 0xD0, 0xD1, 0xD2, 265 0xD3, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xD4, 0xD5, 0xD6, 0xE3, 0xD7, 0xD8, 266 0x5E, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 0xE0, 0xE1, 0xE2, 0x5B, 0x5D, 0xE5, 0xE4, 0xE6, 0xE7, 267 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 268 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xEE, 0xEF, 0xF0, 0xF1, 0xF2, 0xF3, 269 0x5C, 0xF4, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 270 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, 0x9F 271 }; 272 #endif /* 037 */ 273 274 /* These tables moved from perl.h and converted to hex. 275 They map platform code page from/to bottom 256 codes of Unicode (i.e. iso-8859-1). 276 */ 277 278 #if '^' == 95 /* if defined(__MVS__) || defined(??) (VM/ESA?) 1047 */ 279 EXTCONST unsigned char PL_a2e[] = { /* ASCII (iso-8859-1) to EBCDIC (IBM-1047) */ 280 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 281 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, 282 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 283 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 284 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 285 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D, 286 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 287 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07, 288 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, 289 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF, 290 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC, 291 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB, 292 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77, 293 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59, 294 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57, 295 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF 296 }; 297 298 EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-1047) to ASCII (iso-8859-1) */ 299 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 300 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, 301 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, 302 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, 303 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, 304 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E, 305 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, 306 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, 307 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1, 308 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4, 309 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0x5B, 0xDE, 0xAE, 310 0xAC, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xDD, 0xA8, 0xAF, 0x5D, 0xB4, 0xD7, 311 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5, 312 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF, 313 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5, 314 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F 315 }; 316 317 EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' => 318 'a'; 'a' => 'A' */ 319 0, 1, 2, 3, 4, 5, 6, 7, 320 8, 9, 10, 11, 12, 13, 14, 15, 321 16, 17, 18, 19, 20, 21, 22, 23, 322 24, 25, 26, 27, 28, 29, 30, 31, 323 32, 33, 34, 35, 36, 37, 38, 39, 324 40, 41, 42, 43, 44, 45, 46, 47, 325 48, 49, 50, 51, 52, 53, 54, 55, 326 56, 57, 58, 59, 60, 61, 62, 63, 327 64, 65, 98, 99, 100, 101, 102, 103, 328 104, 105, 74, 75, 76, 77, 78, 79, 329 80, 113, 114, 115, 116, 117, 118, 119, 330 120, 89, 90, 91, 92, 93, 94, 95, 331 96, 97, 66, 67, 68, 69, 70, 71, 332 72, 73, 106, 107, 108, 109, 110, 111, 333 128, 81, 82, 83, 84, 85, 86, 87, 334 88, 121, 122, 123, 124, 125, 126, 127, 335 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 336 'H', 'I', 138, 139, 172, 186, 174, 143, 337 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P', 338 'Q', 'R', 154, 155, 158, 157, 156, 159, 339 160, 161, 'S', 'T', 'U', 'V', 'W', 'X', 340 'Y', 'Z', 170, 171, 140, 173, 142, 175, 341 176, 177, 178, 179, 180, 181, 182, 183, 342 184, 185, 141, 187, 188, 189, 190, 191, 343 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 344 'h', 'i', 202, 235, 236, 237, 238, 239, 345 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 346 'q', 'r', 218, 251, 252, 253, 254, 223, 347 224, 225, 's', 't', 'u', 'v', 'w', 'x', 348 'y', 'z', 234, 203, 204, 205, 206, 207, 349 240, 241, 242, 243, 244, 245, 246, 247, 350 248, 249, 250, 219, 220, 221, 222, 255 351 }; 352 #endif /* 1047 */ 353 354 #if '^' == 106 /* if defined(_OSD_POSIX) POSIX-BC */ 355 EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (POSIX-BC) */ 356 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 357 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, 358 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 359 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 360 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 361 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBB, 0xBC, 0xBD, 0x6A, 0x6D, 362 0x4A, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 363 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xFB, 0x4F, 0xFD, 0xFF, 0x07, 364 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, 365 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0x5F, 366 0x41, 0xAA, 0xB0, 0xB1, 0x9F, 0xB2, 0xD0, 0xB5, 0x79, 0xB4, 0x9A, 0x8A, 0xBA, 0xCA, 0xAF, 0xA1, 367 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB, 368 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77, 369 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xE0, 0xFE, 0xDD, 0xFC, 0xAD, 0xAE, 0x59, 370 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57, 371 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xC0, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF 372 }; 373 374 EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (POSIX-BC) to ASCII (ISO8859-1) */ 375 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 376 0x10, 0x11, 0x12, 0x13, 0x9D, 0x0A, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, 377 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, 378 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, 379 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0x60, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, 380 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x9F, 381 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0x5E, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, 382 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0xA8, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, 383 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1, 384 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4, 385 0xB5, 0xAF, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE, 386 0xA2, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0xAC, 0x5B, 0x5C, 0x5D, 0xB4, 0xD7, 387 0xF9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5, 388 0xA6, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xDB, 0xFA, 0xFF, 389 0xD9, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5, 390 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0x7B, 0xDC, 0x7D, 0xDA, 0x7E 391 }; 392 393 EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' => 394 'a'; 'a' => 'A' */ 395 0, 1, 2, 3, 4, 5, 6, 7, 396 8, 9, 10, 11, 12, 13, 14, 15, 397 16, 17, 18, 19, 20, 21, 22, 23, 398 24, 25, 26, 27, 28, 29, 30, 31, 399 32, 33, 34, 35, 36, 37, 38, 39, 400 40, 41, 42, 43, 44, 45, 46, 47, 401 48, 49, 50, 51, 52, 53, 54, 55, 402 56, 57, 58, 59, 60, 61, 62, 63, 403 64, 65, 98, 99, 100, 101, 102, 103, 404 104, 105, 74, 75, 76, 77, 78, 79, 405 80, 113, 114, 115, 116, 117, 118, 119, 406 120, 89, 90, 91, 92, 93, 94, 95, 407 96, 97, 66, 67, 68, 69, 70, 71, 408 72, 73, 106, 107, 108, 109, 110, 111, 409 128, 81, 82, 83, 84, 85, 86, 87, 410 88, 121, 122, 123, 124, 125, 126, 127, 411 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 412 'H', 'I', 138, 139, 172, 173, 174, 143, 413 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P', 414 'Q', 'R', 154, 155, 158, 157, 156, 159, 415 160, 161, 'S', 'T', 'U', 'V', 'W', 'X', 416 'Y', 'Z', 170, 171, 140, 141, 142, 175, 417 176, 177, 178, 179, 180, 181, 182, 183, 418 184, 185, 186, 187, 188, 189, 190, 191, 419 224, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 420 'h', 'i', 202, 235, 236, 237, 238, 239, 421 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 422 'q', 'r', 218, 221, 252, 219, 254, 223, 423 192, 225, 's', 't', 'u', 'v', 'w', 'x', 424 'y', 'z', 234, 203, 204, 205, 206, 207, 425 240, 241, 242, 243, 244, 245, 246, 247, 426 248, 249, 250, 251, 220, 253, 222, 255 427 }; 428 #endif /* POSIX-BC */ 429 430 #if '^' == 176 /* if defined(??) (OS/400?) 037 */ 431 EXTCONST unsigned char PL_a2e[] = { /* ASCII (ISO8859-1) to EBCDIC (IBM-037) */ 432 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x25, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 433 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F, 434 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61, 435 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F, 436 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 437 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D, 438 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 439 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07, 440 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B, 441 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF, 442 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBD, 0xB4, 0x9A, 0x8A, 0x5F, 0xCA, 0xAF, 0xBC, 443 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB, 444 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77, 445 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xAD, 0xAE, 0x59, 446 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57, 447 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF 448 }; 449 450 EXTCONST unsigned char PL_e2a[] = { /* EBCDIC (IBM-037) to ASCII (ISO8859-1) */ 451 0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F, 0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 452 0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F, 453 0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07, 454 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A, 455 0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5, 0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C, 456 0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF, 0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC, 457 0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5, 0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F, 458 0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF, 0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22, 459 0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1, 460 0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4, 461 0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE, 462 0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC, 0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7, 463 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5, 464 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF, 465 0x5C, 0xF7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5, 466 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F 467 }; 468 469 EXTCONST unsigned char PL_fold[] = { /* fast EBCDIC case folding table, 'A' => 470 'a'; 'a' => 'A' */ 471 0, 1, 2, 3, 4, 5, 6, 7, 472 8, 9, 10, 11, 12, 13, 14, 15, 473 16, 17, 18, 19, 20, 21, 22, 23, 474 24, 25, 26, 27, 28, 29, 30, 31, 475 32, 33, 34, 35, 36, 37, 38, 39, 476 40, 41, 42, 43, 44, 45, 46, 47, 477 48, 49, 50, 51, 52, 53, 54, 55, 478 56, 57, 58, 59, 60, 61, 62, 63, 479 64, 65, 98, 99, 100, 101, 102, 103, 480 104, 105, 74, 75, 76, 77, 78, 79, 481 80, 113, 114, 115, 116, 117, 118, 119, 482 120, 89, 90, 91, 92, 93, 94, 95, 483 96, 97, 66, 67, 68, 69, 70, 71, 484 72, 73, 106, 107, 108, 109, 110, 111, 485 128, 81, 82, 83, 84, 85, 86, 87, 486 88, 121, 122, 123, 124, 125, 126, 127, 487 112, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 488 'H', 'I', 138, 139, 172, 173, 174, 143, 489 144, 'J', 'K', 'L', 'M', 'N', 'O', 'P', 490 'Q', 'R', 154, 155, 158, 157, 156, 159, 491 160, 161, 'S', 'T', 'U', 'V', 'W', 'X', 492 'Y', 'Z', 170, 171, 140, 141, 142, 175, 493 176, 177, 178, 179, 180, 181, 182, 183, 494 184, 185, 186, 187, 188, 189, 190, 191, 495 192, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 496 'h', 'i', 202, 235, 236, 237, 238, 239, 497 208, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 498 'q', 'r', 218, 251, 252, 253, 254, 223, 499 224, 225, 's', 't', 'u', 'v', 'w', 'x', 500 'y', 'z', 234, 203, 204, 205, 206, 207, 501 240, 241, 242, 243, 244, 245, 246, 247, 502 248, 249, 250, 219, 220, 221, 222, 255 503 }; 504 #endif /* 037 */ 505 506 /* Since the EBCDIC code pages are isomorphic to Latin1, that table is merely a 507 * duplicate */ 508 EXTCONST unsigned char * PL_fold_latin1 = PL_fold; 509 510 #else 511 EXTCONST unsigned char PL_utf8skip[]; 512 EXTCONST unsigned char PL_e2utf[]; 513 EXTCONST unsigned char PL_utf2e[]; 514 EXTCONST unsigned char PL_e2a[]; 515 EXTCONST unsigned char PL_a2e[]; 516 EXTCONST unsigned char PL_fold[]; 517 EXTCONST unsigned char * PL_fold_latin1; 518 #endif 519 520 END_EXTERN_C 521 522 /* EBCDIC-happy ways of converting native code to UTF-8 */ 523 524 /* Native to iso-8859-1 */ 525 #define NATIVE_TO_ASCII(ch) PL_e2a[(U8)(ch)] 526 #define ASCII_TO_NATIVE(ch) PL_a2e[(U8)(ch)] 527 /* Transform after encoding, essentially converts to/from I8 */ 528 #define NATIVE_TO_UTF(ch) PL_e2utf[(U8)(ch)] /* to I8 */ 529 #define NATIVE_TO_I8(ch) NATIVE_TO_UTF(ch) /* synonym */ 530 #define UTF_TO_NATIVE(ch) PL_utf2e[(U8)(ch)] /* from I8 */ 531 #define I8_TO_NATIVE(ch) UTF_TO_NATIVE(ch) /* synonym */ 532 /* Transform in wide UV char space */ 533 #define NATIVE_TO_UNI(ch) (((ch) > 255) ? (ch) : NATIVE_TO_ASCII(ch)) 534 #define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch)) 535 /* Transform in invariant..byte space */ 536 #define NATIVE_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(NATIVE_TO_ASCII(ch)) : (ch)) 537 #define ASCII_TO_NEED(enc,ch) ((enc) ? UTF_TO_NATIVE(ch) : ASCII_TO_NATIVE(ch)) 538 539 /* 540 The following table is adapted from tr16, it shows I8 encoding of Unicode code points. 541 542 Unicode Bit pattern 1st Byte 2nd Byte 3rd Byte 4th Byte 5th Byte 6th Byte 7th byte 543 U+0000..U+007F 000000000xxxxxxx 0xxxxxxx 544 U+0080..U+009F 00000000100xxxxx 100xxxxx 545 U+00A0..U+03FF 000000yyyyyxxxxx 110yyyyy 101xxxxx 546 U+0400..U+3FFF 00zzzzyyyyyxxxxx 1110zzzz 101yyyyy 101xxxxx 547 U+4000..U+3FFFF 0wwwzzzzzyyyyyxxxxx 11110www 101zzzzz 101yyyyy 101xxxxx 548 U+40000..U+3FFFFF 0vvwwwwwzzzzzyyyyyxxxxx 111110vv 101wwwww 101zzzzz 101yyyyy 101xxxxx 549 U+400000..U+3FFFFFF 0uvvvvvwwwwwzzzzzyyyyyxxxxx 1111110u 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx 550 U+4000000..U+7FFFFFFF 0tuuuuuvvvvvwwwwwzzzzzyyyyyxxxxx 1111111t 101uuuuu 101vvvvv 101wwwww 101zzzzz 101yyyyy 101xxxxx 551 552 Note: The I8 transformation is valid for UCS-4 values X'0' to 553 X'7FFFFFFF' (the full extent of ISO/IEC 10646 coding space). 554 555 */ 556 557 #define UNISKIP(uv) ( (uv) < 0xA0 ? 1 : \ 558 (uv) < 0x400 ? 2 : \ 559 (uv) < 0x4000 ? 3 : \ 560 (uv) < 0x40000 ? 4 : \ 561 (uv) < 0x400000 ? 5 : \ 562 (uv) < 0x4000000 ? 6 : 7 ) 563 564 #define UNI_IS_INVARIANT(c) ((c) < 0xA0) 565 566 /* UTF-EBCDIC semantic macros - transform back into I8 and then compare 567 * Comments as to the meaning of each are given at their corresponding utf8.h 568 * definitions */ 569 570 #define UTF8_IS_START(c) (NATIVE_TO_UTF(c) >= 0xC5 && NATIVE_TO_UTF(c) != 0xE0) 571 #define UTF8_IS_CONTINUATION(c) ((NATIVE_TO_UTF(c) & 0xE0) == 0xA0) 572 #define UTF8_IS_CONTINUED(c) (NATIVE_TO_UTF(c) >= 0xA0) 573 #define UTF8_IS_DOWNGRADEABLE_START(c) (NATIVE_TO_UTF(c) >= 0xC5 && NATIVE_TO_UTF(c) <= 0xC7) 574 #define UTF8_IS_ABOVE_LATIN1(c) (NATIVE_TO_I8(c) >= 0xC8) 575 576 #define UTF_START_MARK(len) (((len) > 7) ? 0xFF : ((U8)(0xFE << (7-(len))))) 577 #define UTF_START_MASK(len) (((len) >= 6) ? 0x01 : (0x1F >> ((len)-2))) 578 #define UTF_CONTINUATION_MARK 0xA0 579 #define UTF_CONTINUATION_MASK ((U8)0x1f) 580 #define UTF_ACCUMULATION_SHIFT 5 581 582 /* How wide can a single UTF-8 encoded character become in bytes. */ 583 /* NOTE: Strictly speaking Perl's UTF-8 should not be called UTF-8 since UTF-8 584 * is an encoding of Unicode, and Unicode's upper limit, 0x10FFFF, can be 585 * expressed with 5 bytes. However, Perl thinks of UTF-8 as a way to encode 586 * non-negative integers in a binary format, even those above Unicode */ 587 #define UTF8_MAXBYTES 7 588 589 /* The maximum number of UTF-8 bytes a single Unicode character can 590 * uppercase/lowercase/fold into. Unicode guarantees that the maximum 591 * expansion is 3 characters. On EBCDIC platforms, the highest Unicode 592 * character occupies 5 bytes, therefore this number is 15 */ 593 #define UTF8_MAXBYTES_CASE 15 594 595 /* 596 * Local variables: 597 * c-indentation-style: bsd 598 * c-basic-offset: 4 599 * indent-tabs-mode: nil 600 * End: 601 * 602 * ex: set ts=8 sts=4 sw=4 et: 603 */ 604