1 /* -*- buffer-read-only: t -*- vi: set ro: */ 2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */ 3 /* Look at first character in UTF-8 string, returning an error code. 4 Copyright (C) 1999-2002, 2006-2007, 2009-2011 Free Software Foundation, Inc. 5 Written by Bruno Haible <bruno@clisp.org>, 2001. 6 7 This program is free software: you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published 9 by the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 19 20 #include <config.h> 21 22 /* Specification. */ 23 #include "unistr.h" 24 25 int 26 u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n) 27 { 28 uint8_t c = *s; 29 30 if (c < 0x80) 31 { 32 *puc = c; 33 return 1; 34 } 35 else if (c >= 0xc2) 36 { 37 if (c < 0xe0) 38 { 39 if (n >= 2) 40 { 41 if ((s[1] ^ 0x80) < 0x40) 42 { 43 *puc = ((unsigned int) (c & 0x1f) << 6) 44 | (unsigned int) (s[1] ^ 0x80); 45 return 2; 46 } 47 /* invalid multibyte character */ 48 } 49 else 50 { 51 /* incomplete multibyte character */ 52 *puc = 0xfffd; 53 return -2; 54 } 55 } 56 else if (c < 0xf0) 57 { 58 if (n >= 2) 59 { 60 if ((s[1] ^ 0x80) < 0x40 61 && (c >= 0xe1 || s[1] >= 0xa0) 62 && (c != 0xed || s[1] < 0xa0)) 63 { 64 if (n >= 3) 65 { 66 if ((s[2] ^ 0x80) < 0x40) 67 { 68 *puc = ((unsigned int) (c & 0x0f) << 12) 69 | ((unsigned int) (s[1] ^ 0x80) << 6) 70 | (unsigned int) (s[2] ^ 0x80); 71 return 3; 72 } 73 /* invalid multibyte character */ 74 } 75 else 76 { 77 /* incomplete multibyte character */ 78 *puc = 0xfffd; 79 return -2; 80 } 81 } 82 /* invalid multibyte character */ 83 } 84 else 85 { 86 /* incomplete multibyte character */ 87 *puc = 0xfffd; 88 return -2; 89 } 90 } 91 else if (c < 0xf8) 92 { 93 if (n >= 2) 94 { 95 if ((s[1] ^ 0x80) < 0x40 96 && (c >= 0xf1 || s[1] >= 0x90) 97 #if 1 98 && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90)) 99 #endif 100 ) 101 { 102 if (n >= 3) 103 { 104 if ((s[2] ^ 0x80) < 0x40) 105 { 106 if (n >= 4) 107 { 108 if ((s[3] ^ 0x80) < 0x40) 109 { 110 *puc = ((unsigned int) (c & 0x07) << 18) 111 | ((unsigned int) (s[1] ^ 0x80) << 12) 112 | ((unsigned int) (s[2] ^ 0x80) << 6) 113 | (unsigned int) (s[3] ^ 0x80); 114 return 4; 115 } 116 /* invalid multibyte character */ 117 } 118 else 119 { 120 /* incomplete multibyte character */ 121 *puc = 0xfffd; 122 return -2; 123 } 124 } 125 /* invalid multibyte character */ 126 } 127 else 128 { 129 /* incomplete multibyte character */ 130 *puc = 0xfffd; 131 return -2; 132 } 133 } 134 /* invalid multibyte character */ 135 } 136 else 137 { 138 /* incomplete multibyte character */ 139 *puc = 0xfffd; 140 return -2; 141 } 142 } 143 #if 0 144 else if (c < 0xfc) 145 { 146 if (n >= 2) 147 { 148 if ((s[1] ^ 0x80) < 0x40 149 && (c >= 0xf9 || s[1] >= 0x88)) 150 { 151 if (n >= 3) 152 { 153 if ((s[2] ^ 0x80) < 0x40) 154 { 155 if (n >= 4) 156 { 157 if ((s[3] ^ 0x80) < 0x40) 158 { 159 if (n >= 5) 160 { 161 if ((s[4] ^ 0x80) < 0x40) 162 { 163 *puc = ((unsigned int) (c & 0x03) << 24) 164 | ((unsigned int) (s[1] ^ 0x80) << 18) 165 | ((unsigned int) (s[2] ^ 0x80) << 12) 166 | ((unsigned int) (s[3] ^ 0x80) << 6) 167 | (unsigned int) (s[4] ^ 0x80); 168 return 5; 169 } 170 /* invalid multibyte character */ 171 } 172 else 173 { 174 /* incomplete multibyte character */ 175 *puc = 0xfffd; 176 return -2; 177 } 178 } 179 /* invalid multibyte character */ 180 } 181 else 182 { 183 /* incomplete multibyte character */ 184 *puc = 0xfffd; 185 return -2; 186 } 187 } 188 /* invalid multibyte character */ 189 } 190 else 191 { 192 /* incomplete multibyte character */ 193 *puc = 0xfffd; 194 return -2; 195 } 196 } 197 /* invalid multibyte character */ 198 } 199 else 200 { 201 /* incomplete multibyte character */ 202 *puc = 0xfffd; 203 return -2; 204 } 205 } 206 else if (c < 0xfe) 207 { 208 if (n >= 2) 209 { 210 if ((s[1] ^ 0x80) < 0x40 211 && (c >= 0xfd || s[1] >= 0x84)) 212 { 213 if (n >= 3) 214 { 215 if ((s[2] ^ 0x80) < 0x40) 216 { 217 if (n >= 4) 218 { 219 if ((s[3] ^ 0x80) < 0x40) 220 { 221 if (n >= 5) 222 { 223 if ((s[4] ^ 0x80) < 0x40) 224 { 225 if (n >= 6) 226 { 227 if ((s[5] ^ 0x80) < 0x40) 228 { 229 *puc = ((unsigned int) (c & 0x01) << 30) 230 | ((unsigned int) (s[1] ^ 0x80) << 24) 231 | ((unsigned int) (s[2] ^ 0x80) << 18) 232 | ((unsigned int) (s[3] ^ 0x80) << 12) 233 | ((unsigned int) (s[4] ^ 0x80) << 6) 234 | (unsigned int) (s[5] ^ 0x80); 235 return 6; 236 } 237 /* invalid multibyte character */ 238 } 239 else 240 { 241 /* incomplete multibyte character */ 242 *puc = 0xfffd; 243 return -2; 244 } 245 } 246 /* invalid multibyte character */ 247 } 248 else 249 { 250 /* incomplete multibyte character */ 251 *puc = 0xfffd; 252 return -2; 253 } 254 } 255 /* invalid multibyte character */ 256 } 257 else 258 { 259 /* incomplete multibyte character */ 260 *puc = 0xfffd; 261 return -2; 262 } 263 } 264 /* invalid multibyte character */ 265 } 266 else 267 { 268 /* incomplete multibyte character */ 269 *puc = 0xfffd; 270 return -2; 271 } 272 } 273 /* invalid multibyte character */ 274 } 275 else 276 { 277 /* incomplete multibyte character */ 278 *puc = 0xfffd; 279 return -2; 280 } 281 } 282 #endif 283 } 284 /* invalid multibyte character */ 285 *puc = 0xfffd; 286 return -1; 287 } 288