1 /* 2 * Copyright (c) 2014 Philip Guenther <guenther@openbsd.org> 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17 /* 18 * A mostly exhaustive test of UTF-8 decoder and encoder 19 */ 20 21 #include <stdio.h> 22 #include <string.h> 23 #include <err.h> 24 25 #include <openssl/asn1.h> 26 #include "asn1_locl.h" /* peek into the internals */ 27 28 #define UNCHANGED 0xfedcba98 29 30 #define ASSERT(x) \ 31 do { \ 32 if (!(x)) \ 33 errx(1, "test failed at line %d: %s", \ 34 __LINE__, #x); \ 35 } while (0) 36 37 int 38 main(void) 39 { 40 unsigned char testbuf[] = "012345"; 41 const unsigned char zerobuf[sizeof testbuf] = { 0 }; 42 unsigned long value; 43 unsigned int i, j, k, l; 44 int ret; 45 46 /* 47 * First, verify UTF8_getc() 48 */ 49 value = UNCHANGED; 50 ret = UTF8_getc(testbuf, 0, &value); 51 ASSERT(ret == 0); 52 ASSERT(value == UNCHANGED); 53 54 /* check all valid single-byte chars */ 55 for (i = 0; i < 0x80; i++) { 56 testbuf[0] = i; 57 ret = UTF8_getc(testbuf, 1, &value); 58 ASSERT(ret == 1); 59 ASSERT(value == i); 60 61 ret = UTF8_getc(testbuf, 2, &value); 62 ASSERT(ret == 1); 63 ASSERT(value == i); 64 } 65 66 /* 67 * Verify failure on all invalid initial bytes: 68 * 0x80 - 0xBF following bytes only 69 * 0xC0 - 0xC1 used to be in non-shortest forms 70 * 0xF5 - 0xFD used to be initial for 5 and 6 byte sequences 71 * 0xFE - 0xFF have never been valid in utf-8 72 */ 73 for (i = 0x80; i < 0xC2; i++) { 74 value = UNCHANGED; 75 testbuf[0] = i; 76 ret = UTF8_getc(testbuf, 1, &value); 77 ASSERT(ret == -2); 78 ASSERT(value == UNCHANGED); 79 } 80 for (i = 0xF5; i < 0x100; i++) { 81 value = UNCHANGED; 82 testbuf[0] = i; 83 ret = UTF8_getc(testbuf, 1, &value); 84 ASSERT(ret == -2); 85 ASSERT(value == UNCHANGED); 86 } 87 88 /* 89 * Verify handling of all two-byte sequences 90 */ 91 for (i = 0xC2; i < 0xE0; i++) { 92 testbuf[0] = i; 93 94 for (j = 0; j < 0x100; j++) { 95 testbuf[1] = j; 96 97 value = UNCHANGED; 98 ret = UTF8_getc(testbuf, 1, &value); 99 ASSERT(ret == -1); 100 ASSERT(value == UNCHANGED); 101 102 ret = UTF8_getc(testbuf, 2, &value); 103 104 /* outside range of trailing bytes */ 105 if (j < 0x80 || j > 0xBF) { 106 ASSERT(ret == -3); 107 ASSERT(value == UNCHANGED); 108 continue; 109 } 110 111 /* valid */ 112 ASSERT(ret == 2); 113 ASSERT((value & 0x3F) == (j & 0x3F)); 114 ASSERT(value >> 6 == (i & 0x1F)); 115 } 116 } 117 118 /* 119 * Verify handling of all three-byte sequences 120 */ 121 for (i = 0xE0; i < 0xF0; i++) { 122 testbuf[0] = i; 123 124 for (j = 0; j < 0x100; j++) { 125 testbuf[1] = j; 126 127 for (k = 0; k < 0x100; k++) { 128 testbuf[2] = k; 129 130 value = UNCHANGED; 131 ret = UTF8_getc(testbuf, 2, &value); 132 ASSERT(ret == -1); 133 ASSERT(value == UNCHANGED); 134 135 ret = UTF8_getc(testbuf, 3, &value); 136 137 /* outside range of trailing bytes */ 138 if (j < 0x80 || j > 0xBF || 139 k < 0x80 || k > 0xBF) { 140 ASSERT(ret == -3); 141 ASSERT(value == UNCHANGED); 142 continue; 143 } 144 145 /* non-shortest form */ 146 if (i == 0xE0 && j < 0xA0) { 147 ASSERT(ret == -4); 148 ASSERT(value == UNCHANGED); 149 continue; 150 } 151 152 /* surrogate pair code point */ 153 if (i == 0xED && j > 0x9F) { 154 ASSERT(ret == -2); 155 ASSERT(value == UNCHANGED); 156 continue; 157 } 158 159 ASSERT(ret == 3); 160 ASSERT((value & 0x3F) == (k & 0x3F)); 161 ASSERT(((value >> 6) & 0x3F) == (j & 0x3F)); 162 ASSERT(value >> 12 == (i & 0x0F)); 163 } 164 } 165 } 166 167 /* 168 * Verify handling of all four-byte sequences 169 */ 170 for (i = 0xF0; i < 0xF5; i++) { 171 testbuf[0] = i; 172 173 for (j = 0; j < 0x100; j++) { 174 testbuf[1] = j; 175 176 for (k = 0; k < 0x100; k++) { 177 testbuf[2] = k; 178 179 for (l = 0; l < 0x100; l++) { 180 testbuf[3] = l; 181 182 value = UNCHANGED; 183 ret = UTF8_getc(testbuf, 3, &value); 184 ASSERT(ret == -1); 185 ASSERT(value == UNCHANGED); 186 187 ret = UTF8_getc(testbuf, 4, &value); 188 189 /* outside range of trailing bytes */ 190 if (j < 0x80 || j > 0xBF || 191 k < 0x80 || k > 0xBF || 192 l < 0x80 || l > 0xBF) { 193 ASSERT(ret == -3); 194 ASSERT(value == UNCHANGED); 195 continue; 196 } 197 198 /* non-shortest form */ 199 if (i == 0xF0 && j < 0x90) { 200 ASSERT(ret == -4); 201 ASSERT(value == UNCHANGED); 202 continue; 203 } 204 205 /* beyond end of UCS range */ 206 if (i == 0xF4 && j > 0x8F) { 207 ASSERT(ret == -2); 208 ASSERT(value == UNCHANGED); 209 continue; 210 } 211 212 ASSERT(ret == 4); 213 ASSERT((value & 0x3F) == (l & 0x3F)); 214 ASSERT(((value >> 6) & 0x3F) == 215 (k & 0x3F)); 216 ASSERT(((value >> 12) & 0x3F) == 217 (j & 0x3F)); 218 ASSERT(value >> 18 == (i & 0x07)); 219 } 220 } 221 } 222 } 223 224 225 /* 226 * Next, verify UTF8_putc() 227 */ 228 memset(testbuf, 0, sizeof testbuf); 229 230 /* single-byte sequences */ 231 for (i = 0; i < 0x80; i++) { 232 ret = UTF8_putc(NULL, 0, i); 233 ASSERT(ret == 1); 234 235 testbuf[0] = 0; 236 ret = UTF8_putc(testbuf, 0, i); 237 ASSERT(ret == -1); 238 ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); 239 240 ret = UTF8_putc(testbuf, 1, i); 241 ASSERT(ret == 1); 242 ASSERT(testbuf[0] == i); 243 ASSERT(memcmp(testbuf+1, zerobuf, sizeof(testbuf)-1) == 0); 244 } 245 246 /* two-byte sequences */ 247 for (i = 0x80; i < 0x800; i++) { 248 ret = UTF8_putc(NULL, 0, i); 249 ASSERT(ret == 2); 250 251 testbuf[0] = testbuf[1] = 0; 252 ret = UTF8_putc(testbuf, 1, i); 253 ASSERT(ret == -1); 254 ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); 255 256 ret = UTF8_putc(testbuf, 2, i); 257 ASSERT(ret == 2); 258 ASSERT(memcmp(testbuf+2, zerobuf, sizeof(testbuf)-2) == 0); 259 ret = UTF8_getc(testbuf, 2, &value); 260 ASSERT(ret == 2); 261 ASSERT(value == i); 262 } 263 264 /* three-byte sequences */ 265 for (i = 0x800; i < 0x10000; i++) { 266 if (i >= 0xD800 && i < 0xE000) { 267 /* surrogates aren't valid */ 268 ret = UTF8_putc(NULL, 0, i); 269 ASSERT(ret == -2); 270 continue; 271 } 272 273 ret = UTF8_putc(NULL, 0, i); 274 ASSERT(ret == 3); 275 276 testbuf[0] = testbuf[1] = testbuf[2] = 0; 277 ret = UTF8_putc(testbuf, 2, i); 278 ASSERT(ret == -1); 279 ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); 280 281 ret = UTF8_putc(testbuf, 3, i); 282 ASSERT(ret == 3); 283 ASSERT(memcmp(testbuf+3, zerobuf, sizeof(testbuf)-3) == 0); 284 ret = UTF8_getc(testbuf, 3, &value); 285 ASSERT(ret == 3); 286 ASSERT(value == i); 287 } 288 289 /* four-byte sequences */ 290 for (i = 0x10000; i < 0x110000; i++) { 291 ret = UTF8_putc(NULL, 0, i); 292 ASSERT(ret == 4); 293 294 testbuf[0] = testbuf[1] = testbuf[2] = testbuf[3] = 0; 295 ret = UTF8_putc(testbuf, 3, i); 296 ASSERT(ret == -1); 297 ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0); 298 299 ret = UTF8_putc(testbuf, 4, i); 300 ASSERT(ret == 4); 301 ASSERT(memcmp(testbuf+4, zerobuf, sizeof(testbuf)-4) == 0); 302 ret = UTF8_getc(testbuf, 4, &value); 303 ASSERT(ret == 4); 304 ASSERT(value == i); 305 } 306 307 /* spot check some larger values to confirm error return */ 308 for (i = 0x110000; i < 0x110100; i++) { 309 ret = UTF8_putc(NULL, 0, i); 310 ASSERT(ret == -2); 311 } 312 for (value = (unsigned long)-1; value > (unsigned long)-256; value--) { 313 ret = UTF8_putc(NULL, 0, value); 314 ASSERT(ret == -2); 315 } 316 317 return 0; 318 } 319