xref: /freebsd/contrib/tcsh/tc.str.c (revision d93a896e)
1 /* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $ */
2 /*
3  * tc.str.c: Short string package
4  * 	     This has been a lesson of how to write buggy code!
5  */
6 /*-
7  * Copyright (c) 1980, 1991 The Regents of the University of California.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 #include "sh.h"
35 
36 #include <assert.h>
37 #include <limits.h>
38 
39 RCSID("$tcsh: tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $")
40 
41 #define MALLOC_INCR	128
42 #ifdef WIDE_STRINGS
43 #define MALLOC_SURPLUS	MB_LEN_MAX /* Space for one multibyte character */
44 #else
45 #define MALLOC_SURPLUS	0
46 #endif
47 
48 #ifdef WIDE_STRINGS
49 size_t
50 one_mbtowc(Char *pwc, const char *s, size_t n)
51 {
52     int len;
53 
54     len = rt_mbtowc(pwc, s, n);
55     if (len == -1) {
56         reset_mbtowc();
57 	*pwc = (unsigned char)*s | INVALID_BYTE;
58     }
59     if (len <= 0)
60 	len = 1;
61     return len;
62 }
63 
64 size_t
65 one_wctomb(char *s, Char wchar)
66 {
67     int len;
68 
69 #if INVALID_BYTE != 0
70     if ((wchar & INVALID_BYTE) == INVALID_BYTE) {    /* wchar >= INVALID_BYTE */
71 	/* invalid char
72 	 * exmaple)
73 	 * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */
74 	*s = (char)wchar;
75 	len = 1;
76 #else
77     if (wchar & (CHAR & INVALID_BYTE)) {
78 	s[0] = wchar & (CHAR & 0xFF);
79 	len = 1;
80 #endif
81     } else {
82 #if INVALID_BYTE != 0
83 	wchar &= MAX_UTF32;
84 #else
85 	wchar &= CHAR;
86 #endif
87 #ifdef UTF16_STRINGS
88 	if (wchar >= 0x10000) {
89 	    /* UTF-16 systems can't handle these values directly in calls to
90 	       wctomb.  Convert value to UTF-16 surrogate and call wcstombs to
91 	       convert the "string" to the correct multibyte representation,
92 	       if any. */
93 	    wchar_t ws[3];
94 	    wchar -= 0x10000;
95 	    ws[0] = 0xd800 | (wchar >> 10);
96 	    ws[1] = 0xdc00 | (wchar & 0x3ff);
97 	    ws[2] = 0;
98 	    /* The return value of wcstombs excludes the trailing 0, so len is
99 	       the correct number of multibytes for the Unicode char. */
100 	    len = wcstombs (s, ws, MB_CUR_MAX + 1);
101 	} else
102 #endif
103 	len = wctomb(s, (wchar_t) wchar);
104 	if (len == -1)
105 	    s[0] = wchar;
106 	if (len <= 0)
107 	    len = 1;
108     }
109     return len;
110 }
111 
112 int
113 rt_mbtowc(Char *pwc, const char *s, size_t n)
114 {
115     int ret;
116     char back[MB_LEN_MAX];
117     wchar_t tmp;
118 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
119 # if defined(AUTOSET_KANJI)
120     static mbstate_t mb_zero, mb;
121     /*
122      * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
123      */
124     if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
125 	!memcmp(&mb, &mb_zero, sizeof(mb)))
126     {
127 	*pwc = *s;
128 	return 1;
129     }
130 # else
131     mbstate_t mb;
132 # endif
133 
134     memset (&mb, 0, sizeof mb);
135     ret = mbrtowc(&tmp, s, n, &mb);
136 #else
137     ret = mbtowc(&tmp, s, n);
138 #endif
139     if (ret > 0) {
140 	*pwc = tmp;
141 #if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
142 	if (tmp >= 0xd800 && tmp <= 0xdbff) {
143 	    /* UTF-16 surrogate pair.  Fetch second half and compute
144 	       UTF-32 value.  Dispense with the inverse test in this case. */
145 	    size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
146 	    if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
147 		ret = -1;
148 	    else {
149 		*pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
150 		ret += n2;
151 	    }
152 	} else
153 #endif
154       	if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
155 	    ret = -1;
156 
157     } else if (ret == -2)
158 	ret = -1;
159     else if (ret == 0)
160 	*pwc = '\0';
161 
162     return ret;
163 }
164 #endif
165 
166 #ifdef SHORT_STRINGS
167 Char  **
168 blk2short(char **src)
169 {
170     size_t     n;
171     Char **sdst, **dst;
172 
173     /*
174      * Count
175      */
176     for (n = 0; src[n] != NULL; n++)
177 	continue;
178     sdst = dst = xmalloc((n + 1) * sizeof(Char *));
179 
180     for (; *src != NULL; src++)
181 	*dst++ = SAVE(*src);
182     *dst = NULL;
183     return (sdst);
184 }
185 
186 char  **
187 short2blk(Char **src)
188 {
189     size_t     n;
190     char **sdst, **dst;
191 
192     /*
193      * Count
194      */
195     for (n = 0; src[n] != NULL; n++)
196 	continue;
197     sdst = dst = xmalloc((n + 1) * sizeof(char *));
198 
199     for (; *src != NULL; src++)
200 	*dst++ = strsave(short2str(*src));
201     *dst = NULL;
202     return (sdst);
203 }
204 
205 Char   *
206 str2short(const char *src)
207 {
208     static struct Strbuf buf; /* = Strbuf_INIT; */
209 
210     if (src == NULL)
211 	return (NULL);
212 
213     buf.len = 0;
214     while (*src) {
215 	Char wc;
216 
217 	src += one_mbtowc(&wc, src, MB_LEN_MAX);
218 	Strbuf_append1(&buf, wc);
219     }
220     Strbuf_terminate(&buf);
221     return buf.s;
222 }
223 
224 char   *
225 short2str(const Char *src)
226 {
227     static char *sdst = NULL;
228     static size_t dstsize = 0;
229     char *dst, *edst;
230 
231     if (src == NULL)
232 	return (NULL);
233 
234     if (sdst == NULL) {
235 	dstsize = MALLOC_INCR;
236 	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
237     }
238     dst = sdst;
239     edst = &dst[dstsize];
240     while (*src) {
241 	dst += one_wctomb(dst, *src);
242 	src++;
243 	if (dst >= edst) {
244 	    char *wdst = dst;
245 	    char *wedst = edst;
246 
247 	    dstsize += MALLOC_INCR;
248 	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
249 	    edst = &sdst[dstsize];
250 	    dst = &edst[-MALLOC_INCR];
251 	    while (wdst > wedst) {
252 		dst++;
253 		wdst--;
254 	    }
255 	}
256     }
257     *dst = 0;
258     return (sdst);
259 }
260 
261 #if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
262 Char   *
263 s_strcpy(Char *dst, const Char *src)
264 {
265     Char *sdst;
266 
267     sdst = dst;
268     while ((*dst++ = *src++) != '\0')
269 	continue;
270     return (sdst);
271 }
272 
273 Char   *
274 s_strncpy(Char *dst, const Char *src, size_t n)
275 {
276     Char *sdst;
277 
278     if (n == 0)
279 	return(dst);
280 
281     sdst = dst;
282     do
283 	if ((*dst++ = *src++) == '\0') {
284 	    while (--n != 0)
285 		*dst++ = '\0';
286 	    return(sdst);
287 	}
288     while (--n != 0);
289     return (sdst);
290 }
291 
292 Char   *
293 s_strcat(Char *dst, const Char *src)
294 {
295     Strcpy(Strend(dst), src);
296     return dst;
297 }
298 
299 #ifdef NOTUSED
300 Char   *
301 s_strncat(Char *dst, const Char *src, size_t n)
302 {
303     Char *sdst;
304 
305     if (n == 0)
306 	return (dst);
307 
308     sdst = dst;
309 
310     while (*dst)
311 	dst++;
312 
313     do
314 	if ((*dst++ = *src++) == '\0')
315 	    return(sdst);
316     while (--n != 0)
317 	continue;
318 
319     *dst = '\0';
320     return (sdst);
321 }
322 
323 #endif
324 
325 Char   *
326 s_strchr(const Char *str, int ch)
327 {
328     do
329 	if (*str == ch)
330 	    return ((Char *)(intptr_t)str);
331     while (*str++);
332     return (NULL);
333 }
334 
335 Char   *
336 s_strrchr(const Char *str, int ch)
337 {
338     const Char *rstr;
339 
340     rstr = NULL;
341     do
342 	if (*str == ch)
343 	    rstr = str;
344     while (*str++);
345     return ((Char *)(intptr_t)rstr);
346 }
347 
348 size_t
349 s_strlen(const Char *str)
350 {
351     size_t n;
352 
353     for (n = 0; *str++; n++)
354 	continue;
355     return (n);
356 }
357 
358 int
359 s_strcmp(const Char *str1, const Char *str2)
360 {
361     for (; *str1 && *str1 == *str2; str1++, str2++)
362 	continue;
363     /*
364      * The following case analysis is necessary so that characters which look
365      * negative collate low against normal characters but high against the
366      * end-of-string NUL.
367      */
368     if (*str1 == '\0' && *str2 == '\0')
369 	return (0);
370     else if (*str1 == '\0')
371 	return (-1);
372     else if (*str2 == '\0')
373 	return (1);
374     else
375 	return (*str1 - *str2);
376 }
377 
378 int
379 s_strncmp(const Char *str1, const Char *str2, size_t n)
380 {
381     if (n == 0)
382 	return (0);
383     do {
384 	if (*str1 != *str2) {
385 	    /*
386 	     * The following case analysis is necessary so that characters
387 	     * which look negative collate low against normal characters
388 	     * but high against the end-of-string NUL.
389 	     */
390 	    if (*str1 == '\0')
391 		return (-1);
392 	    else if (*str2 == '\0')
393 		return (1);
394 	    else
395 		return (*str1 - *str2);
396 	}
397         if (*str1 == '\0')
398 	    return(0);
399 	str1++, str2++;
400     } while (--n != 0);
401     return(0);
402 }
403 #endif /* not WIDE_STRINGS */
404 
405 int
406 s_strcasecmp(const Char *str1, const Char *str2)
407 {
408 #ifdef WIDE_STRINGS
409     wint_t l1 = 0, l2 = 0;
410     for (; *str1; str1++, str2++)
411 	if (*str1 == *str2)
412 	    l1 = l2 = 0;
413 	else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
414 	    break;
415 #else
416     unsigned char l1 = 0, l2 = 0;
417     for (; *str1; str1++, str2++)
418 	if (*str1 == *str2)
419 		l1 = l2 = 0;
420 	else if ((l1 = tolower((unsigned char)*str1)) !=
421 	    (l2 = tolower((unsigned char)*str2)))
422 	    break;
423 #endif
424     /*
425      * The following case analysis is necessary so that characters which look
426      * negative collate low against normal characters but high against the
427      * end-of-string NUL.
428      */
429     if (*str1 == '\0' && *str2 == '\0')
430 	return (0);
431     else if (*str1 == '\0')
432 	return (-1);
433     else if (*str2 == '\0')
434 	return (1);
435     else if (l1 == l2)	/* They are zero when they are equal */
436 	return (*str1 - *str2);
437     else
438 	return (l1 - l2);
439 }
440 
441 Char   *
442 s_strnsave(const Char *s, size_t len)
443 {
444     Char *n;
445 
446     n = xmalloc((len + 1) * sizeof (*n));
447     memcpy(n, s, len * sizeof (*n));
448     n[len] = '\0';
449     return n;
450 }
451 
452 Char   *
453 s_strsave(const Char *s)
454 {
455     Char   *n;
456     size_t size;
457 
458     if (s == NULL)
459 	s = STRNULL;
460     size = (Strlen(s) + 1) * sizeof(*n);
461     n = xmalloc(size);
462     memcpy(n, s, size);
463     return (n);
464 }
465 
466 Char   *
467 s_strspl(const Char *cp, const Char *dp)
468 {
469     Char *res, *ep;
470     const Char *p, *q;
471 
472     if (!cp)
473 	cp = STRNULL;
474     if (!dp)
475 	dp = STRNULL;
476     for (p = cp; *p++;)
477 	continue;
478     for (q = dp; *q++;)
479 	continue;
480     res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
481     for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
482 	continue;
483     for (ep--, q = dp; (*ep++ = *q++) != '\0';)
484 	continue;
485     return (res);
486 }
487 
488 Char   *
489 s_strend(const Char *cp)
490 {
491     if (!cp)
492 	return ((Char *)(intptr_t) cp);
493     while (*cp)
494 	cp++;
495     return ((Char *)(intptr_t) cp);
496 }
497 
498 Char   *
499 s_strstr(const Char *s, const Char *t)
500 {
501     do {
502 	const Char *ss = s;
503 	const Char *tt = t;
504 
505 	do
506 	    if (*tt == '\0')
507 		return ((Char *)(intptr_t) s);
508 	while (*ss++ == *tt++);
509     } while (*s++ != '\0');
510     return (NULL);
511 }
512 
513 #else /* !SHORT_STRINGS */
514 char *
515 caching_strip(const char *s)
516 {
517     static char *buf = NULL;
518     static size_t buf_size = 0;
519     size_t size;
520 
521     if (s == NULL)
522       return NULL;
523     size = strlen(s) + 1;
524     if (buf_size < size) {
525 	buf = xrealloc(buf, size);
526 	buf_size = size;
527     }
528     memcpy(buf, s, size);
529     strip(buf);
530     return buf;
531 }
532 #endif
533 
534 char   *
535 short2qstr(const Char *src)
536 {
537     static char *sdst = NULL;
538     static size_t dstsize = 0;
539     char *dst, *edst;
540 
541     if (src == NULL)
542 	return (NULL);
543 
544     if (sdst == NULL) {
545 	dstsize = MALLOC_INCR;
546 	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
547     }
548     dst = sdst;
549     edst = &dst[dstsize];
550     while (*src) {
551 	if (*src & QUOTE) {
552 	    *dst++ = '\\';
553 	    if (dst == edst) {
554 		dstsize += MALLOC_INCR;
555 		sdst = xrealloc(sdst,
556 				(dstsize + MALLOC_SURPLUS) * sizeof(char));
557 		edst = &sdst[dstsize];
558 		dst = &edst[-MALLOC_INCR];
559 	    }
560 	}
561 	dst += one_wctomb(dst, *src);
562 	src++;
563 	if (dst >= edst) {
564 	    ptrdiff_t i = dst - edst;
565 	    dstsize += MALLOC_INCR;
566 	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
567 	    edst = &sdst[dstsize];
568 	    dst = &edst[-MALLOC_INCR + i];
569 	}
570     }
571     *dst = 0;
572     return (sdst);
573 }
574 
575 struct blk_buf *
576 bb_alloc(void)
577 {
578     return xcalloc(1, sizeof(struct blk_buf));
579 }
580 
581 static void
582 bb_store(struct blk_buf *bb, Char *str)
583 {
584     if (bb->len == bb->size) { /* Keep space for terminating NULL */
585 	if (bb->size == 0)
586 	    bb->size = 16; /* Arbitrary */
587 	else
588 	    bb->size *= 2;
589 	bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
590     }
591     bb->vec[bb->len] = str;
592 }
593 
594 void
595 bb_append(struct blk_buf *bb, Char *str)
596 {
597     bb_store(bb, str);
598     bb->len++;
599 }
600 
601 void
602 bb_cleanup(void *xbb)
603 {
604     struct blk_buf *bb;
605     size_t i;
606 
607     bb = (struct blk_buf *)xbb;
608     if (bb->vec) {
609 	for (i = 0; i < bb->len; i++)
610 	    xfree(bb->vec[i]);
611 	xfree(bb->vec);
612     }
613     bb->vec = NULL;
614     bb->len = 0;
615 }
616 
617 void
618 bb_free(void *bb)
619 {
620     bb_cleanup(bb);
621     xfree(bb);
622 }
623 
624 Char **
625 bb_finish(struct blk_buf *bb)
626 {
627     bb_store(bb, NULL);
628     return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
629 }
630 
631 #define DO_STRBUF(STRBUF, CHAR, STRLEN)				\
632 								\
633 struct STRBUF *							\
634 STRBUF##_alloc(void)						\
635 {								\
636     return xcalloc(1, sizeof(struct STRBUF));			\
637 }								\
638 								\
639 static void							\
640 STRBUF##_store1(struct STRBUF *buf, CHAR c)			\
641 {								\
642     if (buf->size == buf->len) {				\
643 	if (buf->size == 0)					\
644 	    buf->size = 64; /* Arbitrary */			\
645 	else							\
646 	    buf->size *= 2;					\
647 	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
648     }								\
649     assert(buf->s);						\
650     buf->s[buf->len] = c;					\
651 }								\
652 								\
653 /* Like strbuf_append1(buf, '\0'), but don't advance len */	\
654 void								\
655 STRBUF##_terminate(struct STRBUF *buf)				\
656 {								\
657     STRBUF##_store1(buf, '\0');					\
658 }								\
659 								\
660 void								\
661 STRBUF##_append1(struct STRBUF *buf, CHAR c)			\
662 {								\
663     STRBUF##_store1(buf, c);					\
664     buf->len++;							\
665 }								\
666 								\
667 void								\
668 STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len)	\
669 {								\
670     if (buf->size < buf->len + len) {				\
671 	if (buf->size == 0)					\
672 	    buf->size = 64; /* Arbitrary */			\
673 	while (buf->size < buf->len + len)			\
674 	    buf->size *= 2;					\
675 	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
676     }								\
677     memcpy(buf->s + buf->len, s, len * sizeof(*buf->s));	\
678     buf->len += len;						\
679 }								\
680 								\
681 void								\
682 STRBUF##_append(struct STRBUF *buf, const CHAR *s)		\
683 {								\
684     STRBUF##_appendn(buf, s, STRLEN(s));			\
685 }								\
686 								\
687 CHAR *								\
688 STRBUF##_finish(struct STRBUF *buf)				\
689 {								\
690     STRBUF##_append1(buf, 0);					\
691     return xrealloc(buf->s, buf->len * sizeof(*buf->s));	\
692 }								\
693 								\
694 void								\
695 STRBUF##_cleanup(void *xbuf)					\
696 {								\
697     struct STRBUF *buf;						\
698 								\
699     buf = xbuf;							\
700     xfree(buf->s);						\
701 }								\
702 								\
703 void								\
704 STRBUF##_free(void *xbuf)					\
705 {								\
706     STRBUF##_cleanup(xbuf);					\
707     xfree(xbuf);						\
708 }								\
709 								\
710 const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
711 
712 DO_STRBUF(strbuf, char, strlen);
713 DO_STRBUF(Strbuf, Char, Strlen);
714