1 #pragma once 2 3 #include <cstdint> 4 5 // Warning: decodes/encodes JIS, not Unicode. 6 // Use a table to map. 7 struct ShiftJIS { 8 static const uint32_t INVALID = (uint32_t) -1; 9 ShiftJISShiftJIS10 ShiftJIS(const char *c) : c_(c), index_(0) {} 11 nextShiftJIS12 uint32_t next() { 13 uint32_t j = (uint8_t)c_[index_++]; 14 15 int row; 16 bool emojiAdjust = false; 17 switch (j >> 4) { 18 case 0x8: 19 if (j == 0x80) { 20 return INVALID; 21 } 22 // Intentional fall-through. 23 case 0x9: 24 case 0xE: 25 row = ((j & 0x3F) << 1) - 0x01; 26 break; 27 28 case 0xF: 29 emojiAdjust = true; 30 if (j < 0xF4) { 31 row = ((j & 0x7F) << 1) - 0x59; 32 } else if (j < 0xFD) { 33 row = ((j & 0x7F) << 1) - 0x1B; 34 } else { 35 return j; 36 } 37 break; 38 39 // Anything else (i.e. <= 0x7x, 0xAx, 0xBx, 0xCx, and 0xDx) is JIS X 0201, return directly. 40 default: 41 return j; 42 } 43 44 // Okay, if we didn't return, it's time for the second byte (the cell.) 45 j = (uint8_t)c_[index_++]; 46 // Not a valid second byte. 47 if (j < 0x40 || j == 0x7F || j >= 0xFD) { 48 return INVALID; 49 } 50 51 if (j >= 0x9F) { 52 // This range means the row was even. 53 ++row; 54 j -= 0x7E; 55 } else { 56 if (j >= 0x80) { 57 j -= 0x20; 58 } else { 59 // Yuck. They wrapped around 0x7F, so we subtract one less. 60 j -= 0x20 - 1; 61 } 62 63 if (emojiAdjust) { 64 // These are shoved in where they'll fit. 65 if (row == 0x87) { 66 // First byte was 0xF0. 67 row = 0x81; 68 } else if (row == 0x8B) { 69 // First byte was 0xF2. 70 row = 0x85; 71 } else if (row == 0xCD) { 72 // First byte was 0xF4. 73 row = 0x8F; 74 } 75 } 76 } 77 78 // j is already the cell + 0x20. 79 return ((row + 0x20) << 8) | j; 80 } 81 endShiftJIS82 bool end() const { 83 return c_[index_] == 0; 84 } 85 lengthShiftJIS86 int length() const { 87 int len = 0; 88 for (ShiftJIS dec(c_); !dec.end(); dec.next()) 89 ++len; 90 return len; 91 } 92 byteIndexShiftJIS93 int byteIndex() const { 94 return index_; 95 } 96 encodeShiftJIS97 static int encode(char *dest, uint32_t j) { 98 int row = (j >> 8) - 0x20; 99 int offsetCell = j & 0xFF; 100 101 // JIS X 0201. 102 if ((j & ~0xFF) == 0) { 103 *dest = j; 104 return 1; 105 } 106 107 if (row < 0x3F) { 108 *dest++ = 0x80 + ((row + 1) >> 1); 109 } else if (row < 0x5F) { 110 // Reduce by 0x40 to account for the above range. 111 *dest++ = 0xE0 + ((row - 0x40 + 1) >> 1); 112 } else if (row >= 0x80) { 113 // TODO 114 } 115 116 if (row & 1) { 117 if (offsetCell < 0x60) { 118 // Subtract one to shift around 0x7F. 119 *dest++ = offsetCell + 0x20 - 1; 120 } else { 121 *dest++ = offsetCell + 0x20; 122 } 123 } else { 124 *dest++ = offsetCell + 0x7E; 125 } 126 127 return 2; 128 } 129 encodeUnitsShiftJIS130 static int encodeUnits(uint32_t j) { 131 if ((j & ~0xFF) == 0) { 132 return 1; 133 } 134 return 2; 135 } 136 137 private: 138 const char *c_; 139 int index_; 140 }; 141