1 /* $OpenBSD: chartype.c,v 1.16 2019/01/29 09:47:00 yasuoka Exp $ */ 2 /* $NetBSD: chartype.c,v 1.6 2011/07/28 00:48:21 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 2009 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * chartype.c: character classification and meta information 32 */ 33 #include "config.h" 34 35 #include <ctype.h> 36 #include <stdlib.h> 37 #include <string.h> 38 39 #include "el.h" 40 41 #define CT_BUFSIZ 1024 42 43 static void ct_conv_buff_resize(ct_buffer_t *, size_t, size_t); 44 45 static void 46 ct_conv_buff_resize(ct_buffer_t *conv, size_t mincsize, size_t minwsize) 47 { 48 void *p; 49 if (mincsize > conv->csize) { 50 conv->csize = mincsize; 51 p = reallocarray(conv->cbuff, conv->csize, sizeof(char)); 52 if (p == NULL) { 53 conv->csize = 0; 54 free(conv->cbuff); 55 conv->cbuff = NULL; 56 } else 57 conv->cbuff = p; 58 } 59 60 if (minwsize > conv->wsize) { 61 conv->wsize = minwsize; 62 p = reallocarray(conv->wbuff, conv->wsize, sizeof(wchar_t)); 63 if (p == NULL) { 64 conv->wsize = 0; 65 free(conv->wbuff); 66 conv->wbuff = NULL; 67 } else 68 conv->wbuff = p; 69 } 70 } 71 72 73 char * 74 ct_encode_string(const wchar_t *s, ct_buffer_t *conv) 75 { 76 char *dst; 77 ssize_t used = 0; 78 79 if (!s) 80 return NULL; 81 if (!conv->cbuff) 82 ct_conv_buff_resize(conv, CT_BUFSIZ, 0); 83 if (!conv->cbuff) 84 return NULL; 85 86 dst = conv->cbuff; 87 while (*s) { 88 used = conv->csize - (dst - conv->cbuff); 89 if (used < 5) { 90 used = dst - conv->cbuff; 91 ct_conv_buff_resize(conv, conv->csize + CT_BUFSIZ, 0); 92 if (!conv->cbuff) 93 return NULL; 94 dst = conv->cbuff + used; 95 } 96 used = ct_encode_char(dst, 5, *s); 97 if (used == -1) /* failed to encode, need more buffer space */ 98 abort(); 99 ++s; 100 dst += used; 101 } 102 *dst = '\0'; 103 return conv->cbuff; 104 } 105 106 wchar_t * 107 ct_decode_string(const char *s, ct_buffer_t *conv) 108 { 109 size_t len = 0; 110 111 if (!s) 112 return NULL; 113 if (!conv->wbuff) 114 ct_conv_buff_resize(conv, 0, CT_BUFSIZ); 115 if (!conv->wbuff) 116 return NULL; 117 118 len = mbstowcs(NULL, s, 0); 119 if (len == (size_t)-1) 120 return NULL; 121 if (len > conv->wsize) 122 ct_conv_buff_resize(conv, 0, len + 1); 123 if (!conv->wbuff) 124 return NULL; 125 126 mbstowcs(conv->wbuff, s, conv->wsize); 127 return conv->wbuff; 128 } 129 130 131 protected wchar_t ** 132 ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv) 133 { 134 size_t bufspace; 135 int i; 136 wchar_t *p; 137 wchar_t **wargv; 138 size_t wlen; 139 140 /* Make sure we have enough space in the conversion buffer to store all 141 * the argv strings. */ 142 for (i = 0, bufspace = 0; i < argc; ++i) 143 bufspace += argv[i] ? strlen(argv[i]) + 1 : 0; 144 ct_conv_buff_resize(conv, 0, bufspace * sizeof(*p)); 145 if (!conv->wsize) 146 return NULL; 147 148 wargv = reallocarray(NULL, argc + 1, sizeof(*wargv)); 149 150 for (i = 0, p = conv->wbuff; i < argc; ++i) { 151 if (!argv[i]) { /* don't pass null pointers to mbstowcs */ 152 wargv[i] = NULL; 153 continue; 154 } else { 155 wargv[i] = p; 156 wlen = mbstowcs(p, argv[i], bufspace); 157 } 158 if (wlen == (size_t)-1 || wlen == bufspace) { 159 /* Encoding error or not enough room for NUL. */ 160 free(wargv); 161 return NULL; 162 } else 163 wlen++; /* include NUL in the count */ 164 bufspace -= wlen; 165 p += wlen; 166 } 167 wargv[i] = NULL; 168 169 return wargv; 170 } 171 172 173 protected size_t 174 ct_enc_width(wchar_t c) 175 { 176 /* UTF-8 encoding specific values */ 177 if (c < 0x80) 178 return 1; 179 else if (c < 0x0800) 180 return 2; 181 else if (c < 0x10000) 182 return 3; 183 else if (c < 0x110000) 184 return 4; 185 else 186 return 0; /* not a valid codepoint */ 187 } 188 189 protected ssize_t 190 ct_encode_char(char *dst, size_t len, wchar_t c) 191 { 192 ssize_t l = 0; 193 if (len < ct_enc_width(c)) 194 return -1; 195 l = wctomb(dst, c); 196 197 if (l < 0) { 198 wctomb(NULL, L'\0'); 199 l = 0; 200 } 201 return l; 202 } 203 204 protected const wchar_t * 205 ct_visual_string(const wchar_t *s) 206 { 207 static wchar_t *buff = NULL; 208 static size_t buffsize = 0; 209 void *p; 210 wchar_t *dst; 211 ssize_t used = 0; 212 213 if (!s) 214 return NULL; 215 if (!buff) { 216 buffsize = CT_BUFSIZ; 217 buff = reallocarray(NULL, buffsize, sizeof(*buff)); 218 } 219 dst = buff; 220 while (*s) { 221 used = ct_visual_char(dst, buffsize - (dst - buff), *s); 222 if (used == -1) { /* failed to encode, need more buffer space */ 223 used = dst - buff; 224 buffsize += CT_BUFSIZ; 225 p = reallocarray(buff, buffsize, sizeof(*buff)); 226 if (p == NULL) 227 goto out; 228 buff = p; 229 dst = buff + used; 230 /* don't increment s here - we want to retry it! */ 231 } 232 else 233 ++s; 234 dst += used; 235 } 236 if (dst >= (buff + buffsize)) { /* sigh */ 237 buffsize += 1; 238 p = reallocarray(buff, buffsize, sizeof(*buff)); 239 if (p == NULL) 240 goto out; 241 buff = p; 242 dst = buff + buffsize - 1; 243 } 244 *dst = 0; 245 return buff; 246 out: 247 free(buff); 248 buffsize = 0; 249 return NULL; 250 } 251 252 253 254 protected int 255 ct_visual_width(wchar_t c) 256 { 257 int t = ct_chr_class(c); 258 int w; 259 switch (t) { 260 case CHTYPE_ASCIICTL: 261 return 2; /* ^@ ^? etc. */ 262 case CHTYPE_TAB: 263 return 1; /* Hmm, this really need to be handled outside! */ 264 case CHTYPE_NL: 265 return 0; /* Should this be 1 instead? */ 266 case CHTYPE_PRINT: 267 w = wcwidth(c); 268 return (w == -1 ? 0 : w); 269 case CHTYPE_NONPRINT: 270 if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 271 return 8; /* \U+12345 */ 272 else 273 return 7; /* \U+1234 */ 274 default: 275 return 0; /* should not happen */ 276 } 277 } 278 279 280 protected ssize_t 281 ct_visual_char(wchar_t *dst, size_t len, wchar_t c) 282 { 283 int t = ct_chr_class(c); 284 switch (t) { 285 case CHTYPE_TAB: 286 case CHTYPE_NL: 287 case CHTYPE_ASCIICTL: 288 if (len < 2) 289 return -1; /* insufficient space */ 290 *dst++ = '^'; 291 if (c == '\177') 292 *dst = '?'; /* DEL -> ^? */ 293 else 294 *dst = c | 0100; /* uncontrolify it */ 295 return 2; 296 case CHTYPE_PRINT: 297 if (len < 1) 298 return -1; /* insufficient space */ 299 *dst = c; 300 return 1; 301 case CHTYPE_NONPRINT: 302 /* we only use single-width glyphs for display, 303 * so this is right */ 304 if ((ssize_t)len < ct_visual_width(c)) 305 return -1; /* insufficient space */ 306 *dst++ = '\\'; 307 *dst++ = 'U'; 308 *dst++ = '+'; 309 #define tohexdigit(v) "0123456789ABCDEF"[v] 310 if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 311 *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf); 312 *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf); 313 *dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf); 314 *dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf); 315 *dst = tohexdigit(((unsigned int) c ) & 0xf); 316 return (c > 0xffff) ? 8 : 7; 317 /*FALLTHROUGH*/ 318 /* these two should be handled outside this function */ 319 default: /* we should never hit the default */ 320 return 0; 321 } 322 } 323 324 325 326 327 protected int 328 ct_chr_class(wchar_t c) 329 { 330 if (c == '\t') 331 return CHTYPE_TAB; 332 else if (c == '\n') 333 return CHTYPE_NL; 334 else if (c < 0x100 && iswcntrl(c)) 335 return CHTYPE_ASCIICTL; 336 else if (iswprint(c)) 337 return CHTYPE_PRINT; 338 else 339 return CHTYPE_NONPRINT; 340 } 341