1 /* Look at first character in UTF-8 string, returning an error code. 2 Copyright (C) 1999-2002, 2006-2007, 2009-2018 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2001. 4 5 This program is free software: you can redistribute it and/or modify it 6 under the terms of the GNU General Public License as published 7 by the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 17 18 #include <config.h> 19 20 /* Specification. */ 21 #include "unistr.h" 22 23 int 24 u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n) 25 { 26 uint8_t c = *s; 27 28 if (c < 0x80) 29 { 30 *puc = c; 31 return 1; 32 } 33 else if (c >= 0xc2) 34 { 35 if (c < 0xe0) 36 { 37 if (n >= 2) 38 { 39 if ((s[1] ^ 0x80) < 0x40) 40 { 41 *puc = ((unsigned int) (c & 0x1f) << 6) 42 | (unsigned int) (s[1] ^ 0x80); 43 return 2; 44 } 45 /* invalid multibyte character */ 46 } 47 else 48 { 49 /* incomplete multibyte character */ 50 *puc = 0xfffd; 51 return -2; 52 } 53 } 54 else if (c < 0xf0) 55 { 56 if (n >= 2) 57 { 58 if ((s[1] ^ 0x80) < 0x40 59 && (c >= 0xe1 || s[1] >= 0xa0) 60 && (c != 0xed || s[1] < 0xa0)) 61 { 62 if (n >= 3) 63 { 64 if ((s[2] ^ 0x80) < 0x40) 65 { 66 *puc = ((unsigned int) (c & 0x0f) << 12) 67 | ((unsigned int) (s[1] ^ 0x80) << 6) 68 | (unsigned int) (s[2] ^ 0x80); 69 return 3; 70 } 71 /* invalid multibyte character */ 72 } 73 else 74 { 75 /* incomplete multibyte character */ 76 *puc = 0xfffd; 77 return -2; 78 } 79 } 80 /* invalid multibyte character */ 81 } 82 else 83 { 84 /* incomplete multibyte character */ 85 *puc = 0xfffd; 86 return -2; 87 } 88 } 89 else if (c < 0xf8) 90 { 91 if (n >= 2) 92 { 93 if ((s[1] ^ 0x80) < 0x40 94 && (c >= 0xf1 || s[1] >= 0x90) 95 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))) 96 { 97 if (n >= 3) 98 { 99 if ((s[2] ^ 0x80) < 0x40) 100 { 101 if (n >= 4) 102 { 103 if ((s[3] ^ 0x80) < 0x40) 104 { 105 *puc = ((unsigned int) (c & 0x07) << 18) 106 | ((unsigned int) (s[1] ^ 0x80) << 12) 107 | ((unsigned int) (s[2] ^ 0x80) << 6) 108 | (unsigned int) (s[3] ^ 0x80); 109 return 4; 110 } 111 /* invalid multibyte character */ 112 } 113 else 114 { 115 /* incomplete multibyte character */ 116 *puc = 0xfffd; 117 return -2; 118 } 119 } 120 /* invalid multibyte character */ 121 } 122 else 123 { 124 /* incomplete multibyte character */ 125 *puc = 0xfffd; 126 return -2; 127 } 128 } 129 /* invalid multibyte character */ 130 } 131 else 132 { 133 /* incomplete multibyte character */ 134 *puc = 0xfffd; 135 return -2; 136 } 137 } 138 } 139 /* invalid multibyte character */ 140 *puc = 0xfffd; 141 return -1; 142 } 143