1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2  *@ String support routines.
3  *
4  * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5  * Copyright (c) 2012 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6  * SPDX-License-Identifier: BSD-3-Clause
7  */
8 /*
9  * Copyright (c) 1980, 1993
10  *      The Regents of the University of California.  All rights reserved.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 #undef su_FILE
37 #define su_FILE strings
38 #define mx_SOURCE
39 
40 #ifndef mx_HAVE_AMALGAMATION
41 # include "mx/nail.h"
42 #endif
43 
44 #ifdef mx_HAVE_C90AMEND1
45 # include <wctype.h>
46 #endif
47 
48 #include <su/cs.h>
49 #include <su/mem.h>
50 
51 /* TODO fake */
52 #include "su/code-in.h"
53 
54 FL char *
55 (savestr)(char const *str  su_DBG_LOC_ARGS_DECL)
56 {
57    uz size;
58    char *news;
59    NYD_IN;
60 
61    size = su_cs_len(str);
62    news = su_MEM_BAG_SELF_AUTO_ALLOC_LOCOR(size +1,  su_DBG_LOC_ARGS_ORUSE);
63    if(size > 0)
64       su_mem_copy(news, str, size);
65    news[size] = '\0';
66    NYD_OU;
67    return news;
68 }
69 
70 FL char *
71 (savestrbuf)(char const *sbuf, uz sbuf_len  su_DBG_LOC_ARGS_DECL)
72 {
73    char *news;
74    NYD_IN;
75 
76    news = su_MEM_BAG_SELF_AUTO_ALLOC_LOCOR(sbuf_len +1, su_DBG_LOC_ARGS_ORUSE);
77    if(sbuf_len > 0)
78       su_mem_copy(news, sbuf, sbuf_len);
79    news[sbuf_len] = 0;
80    NYD_OU;
81    return news;
82 }
83 
84 FL char *
85 (savecatsep)(char const *s1, char sep, char const *s2  su_DBG_LOC_ARGS_DECL)
86 {
87    uz l1, l2;
88    char *news;
89    NYD_IN;
90 
91    l1 = (s1 != NULL) ? su_cs_len(s1) : 0;
92    l2 = su_cs_len(s2);
93    news = su_MEM_BAG_SELF_AUTO_ALLOC_LOCOR(l1 + (sep != '\0') + l2 +1,
94          su_DBG_LOC_ARGS_ORUSE);
95    if (l1 > 0) {
96       su_mem_copy(news + 0, s1, l1);
97       if (sep != '\0')
98          news[l1++] = sep;
99    }
100    if(l2 > 0)
101       su_mem_copy(news + l1, s2, l2);
102    news[l1 + l2] = '\0';
103    NYD_OU;
104    return news;
105 }
106 
107 /*
108  * Support routines, auto-reclaimed storage
109  */
110 
111 FL struct str *
str_concat_csvl(struct str * self,...)112 str_concat_csvl(struct str *self, ...) /* XXX onepass maybe better here */
113 {
114    va_list vl;
115    uz l;
116    char const *cs;
117    NYD_IN;
118 
119    va_start(vl, self);
120    for (l = 0; (cs = va_arg(vl, char const*)) != NULL;)
121       l += su_cs_len(cs);
122    va_end(vl);
123 
124    self->l = l;
125    self->s = n_autorec_alloc(l +1);
126 
127    va_start(vl, self);
128    for (l = 0; (cs = va_arg(vl, char const*)) != NULL;) {
129       uz i;
130 
131       i = su_cs_len(cs);
132       if(i > 0){
133          su_mem_copy(self->s + l, cs, i);
134          l += i;
135       }
136    }
137    self->s[l] = '\0';
138    va_end(vl);
139    NYD_OU;
140    return self;
141 }
142 
143 FL struct str *
144 (str_concat_cpa)(struct str *self, char const * const *cpa,
145    char const *sep_o_null  su_DBG_LOC_ARGS_DECL)
146 {
147    uz sonl, l;
148    char const * const *xcpa;
149    NYD_IN;
150 
151    sonl = (sep_o_null != NULL) ? su_cs_len(sep_o_null) : 0;
152 
153    for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa)
154       l += su_cs_len(*xcpa) + sonl;
155 
156    self->l = l;
157    self->s = su_MEM_BAG_SELF_AUTO_ALLOC_LOCOR(l +1, su_DBG_LOC_ARGS_ORUSE);
158 
159    for (l = 0, xcpa = cpa; *xcpa != NULL; ++xcpa) {
160       uz i;
161 
162       i = su_cs_len(*xcpa);
163       if(i > 0){
164          su_mem_copy(self->s + l, *xcpa, i);
165          l += i;
166       }
167       if (sonl > 0) {
168          su_mem_copy(self->s + l, sep_o_null, sonl);
169          l += sonl;
170       }
171    }
172    self->s[l] = '\0';
173    NYD_OU;
174    return self;
175 }
176 
177 /*
178  * Routines that are not related to auto-reclaimed storage follow.
179  */
180 
181 FL boole
n_is_maybe_regex_buf(char const * buf,uz len)182 n_is_maybe_regex_buf(char const *buf, uz len){
183    boole rv;
184    NYD2_IN;
185 
186    rv = (su_cs_first_of_cbuf_cbuf(buf, len, "^[*+?|$", su_UZ_MAX
187          ) != su_UZ_MAX);
188    NYD2_OU;
189    return rv;
190 }
191 
192 FL void
makelow(char * cp)193 makelow(char *cp) /* TODO isn't that crap? --> */
194 {
195       NYD_IN;
196 #ifdef mx_HAVE_C90AMEND1
197    if (n_mb_cur_max > 1) {
198       char *tp = cp;
199       wchar_t wc;
200       int len;
201 
202       while (*cp != '\0') {
203          len = mbtowc(&wc, cp, n_mb_cur_max);
204          if (len < 0)
205             *tp++ = *cp++;
206          else {
207             wc = towlower(wc);
208             if (wctomb(tp, wc) == len)
209                tp += len, cp += len;
210             else
211                *tp++ = *cp++; /* <-- at least here */
212          }
213       }
214    } else
215 #endif
216           for(;; ++cp){
217       char c;
218 
219       *cp = su_cs_to_lower(c = *cp);
220       if(c == '\0')
221          break;
222    }
223    NYD_OU;
224 }
225 
226 FL boole
substr(char const * str,char const * sub)227 substr(char const *str, char const *sub)
228 {
229    char const *cp, *backup;
230    NYD_IN;
231 
232    cp = sub;
233    backup = str;
234    while (*str != '\0' && *cp != '\0') {
235 #ifdef mx_HAVE_C90AMEND1
236       if (n_mb_cur_max > 1) {
237          wchar_t c, c2;
238          int i;
239 
240          if ((i = mbtowc(&c, cp, n_mb_cur_max)) == -1)
241             goto Jsinglebyte;
242          cp += i;
243          if ((i = mbtowc(&c2, str, n_mb_cur_max)) == -1)
244             goto Jsinglebyte;
245          str += i;
246          c = towupper(c);
247          c2 = towupper(c2);
248          if (c != c2) {
249             if ((i = mbtowc(&c, backup, n_mb_cur_max)) > 0) {
250                backup += i;
251                str = backup;
252             } else
253                str = ++backup;
254             cp = sub;
255          }
256       } else
257 Jsinglebyte:
258 #endif
259       {
260          int c, c2;
261 
262          c = *cp++ & 0377;
263          c = su_cs_to_upper(c);
264          c2 = *str++ & 0377;
265          c2 = su_cs_to_upper(c2);
266          if (c != c2) {
267             str = ++backup;
268             cp = sub;
269          }
270       }
271    }
272    NYD_OU;
273    return (*cp == '\0');
274 }
275 
276 FL struct str *
277 (n_str_assign_buf)(struct str *self, char const *buf, uz buflen
278       su_DBG_LOC_ARGS_DECL){
279    NYD_IN;
280    if(buflen == UZ_MAX)
281       buflen = (buf == NULL) ? 0 : su_cs_len(buf);
282 
283    ASSERT(buflen == 0 || buf != NULL);
284 
285    if(LIKELY(buflen > 0)){
286       self->s = su_MEM_REALLOC_LOCOR(self->s, (self->l = buflen) +1,
287             su_DBG_LOC_ARGS_ORUSE);
288       su_mem_copy(self->s, buf, buflen);
289       self->s[buflen] = '\0';
290    }else
291       self->l = 0;
292    NYD_OU;
293    return self;
294 }
295 
296 FL struct str *
297 (n_str_add_buf)(struct str *self, char const *buf, uz buflen
298       su_DBG_LOC_ARGS_DECL){
299    NYD_IN;
300    if(buflen == UZ_MAX)
301       buflen = (buf == NULL) ? 0 : su_cs_len(buf);
302 
303    ASSERT(buflen == 0 || buf != NULL);
304 
305    if(buflen > 0) {
306       uz osl = self->l, nsl = osl + buflen;
307 
308       self->s = su_MEM_REALLOC_LOCOR(self->s, (self->l = nsl) +1,
309             su_DBG_LOC_ARGS_ORUSE);
310       su_mem_copy(self->s + osl, buf, buflen);
311       self->s[nsl] = '\0';
312    }
313    NYD_OU;
314    return self;
315 }
316 
317 FL struct str *
n_str_trim(struct str * self,enum n_str_trim_flags stf)318 n_str_trim(struct str *self, enum n_str_trim_flags stf){
319    uz l;
320    char const *cp;
321    NYD2_IN;
322 
323    cp = self->s;
324 
325    if((l = self->l) > 0 && (stf & n_STR_TRIM_FRONT)){
326       while(su_cs_is_space(*cp)){
327          ++cp;
328          if(--l == 0)
329             break;
330       }
331       self->s = n_UNCONST(cp);
332    }
333 
334    if(l > 0 && (stf & n_STR_TRIM_END)){
335       for(cp += l -1; su_cs_is_space(*cp); --cp)
336          if(--l == 0)
337             break;
338    }
339    self->l = l;
340 
341    NYD2_OU;
342    return self;
343 }
344 
345 FL struct str *
n_str_trim_ifs(struct str * self,boole dodefaults)346 n_str_trim_ifs(struct str *self, boole dodefaults){
347    char s, t, n, c;
348    char const *ifs, *cp;
349    uz l, i;
350    NYD2_IN;
351 
352    if((l = self->l) == 0)
353       goto jleave;
354 
355    ifs = ok_vlook(ifs_ws);
356    cp = self->s;
357    s = t = n = '\0';
358 
359    /* Check whether we can go fast(er) path */
360    for(i = 0; (c = ifs[i]) != '\0'; ++i){
361       switch(c){
362       case ' ': s = c; break;
363       case '\t': t = c; break;
364       case '\n': n = c; break;
365       default:
366          /* Need to go the slow path */
367          while(su_cs_find_c(ifs, *cp) != NULL){
368             ++cp;
369             if(--l == 0)
370                break;
371          }
372          self->s = n_UNCONST(cp);
373 
374          if(l > 0){
375             for(cp += l -1; su_cs_find_c(ifs, *cp) != NULL;){
376                if(--l == 0)
377                   break;
378                /* An uneven number of reverse solidus escapes last WS! */
379                else if(*--cp == '\\'){
380                   sz j;
381 
382                   for(j = 1; l - (uz)j > 0 && cp[-j] == '\\'; ++j)
383                      ;
384                   if(j & 1){
385                      ++l;
386                      break;
387                   }
388                }
389             }
390          }
391          self->l = l;
392 
393          if(!dodefaults)
394             goto jleave;
395          cp = self->s;
396          ++i;
397          break;
398       }
399    }
400 
401    /* No ifs-ws?  No more data?  No trimming */
402    if(l == 0 || (i == 0 && !dodefaults))
403       goto jleave;
404 
405    if(dodefaults){
406       s = ' ';
407       t = '\t';
408       n = '\n';
409    }
410 
411    if(l > 0){
412       while((c = *cp) != '\0' && (c == s || c == t || c == n)){
413          ++cp;
414          if(--l == 0)
415             break;
416       }
417       self->s = n_UNCONST(cp);
418    }
419 
420    if(l > 0){
421       for(cp += l -1; (c = *cp) != '\0' && (c == s || c == t || c == n);){
422          if(--l == 0)
423             break;
424          /* An uneven number of reverse solidus escapes last WS! */
425          else if(*--cp == '\\'){
426             sz j;
427 
428             for(j = 1; l - (uz)j > 0 && cp[-j] == '\\'; ++j)
429                ;
430             if(j & 1){
431                ++l;
432                break;
433             }
434          }
435       }
436    }
437    self->l = l;
438 jleave:
439    NYD2_OU;
440    return self;
441 }
442 
443 /*
444  * struct n_string TODO extend, optimize
445  */
446 
447 FL struct n_string *
n__string_clear(struct n_string * self)448 n__string_clear(struct n_string *self){
449    NYD_IN;
450    ASSERT(self != NIL);
451    ASSERT(self->s_dat != NIL);
452 
453    if(!self->s_auto)
454       su_FREE(self->s_dat);
455    self->s_dat = NIL;
456    self->s_len = self->s_size = 0;
457 
458    NYD_OU;
459    return self;
460 }
461 
462 FL struct n_string *
463 (n_string_reserve)(struct n_string *self, uz noof  su_DBG_LOC_ARGS_DECL){
464    u32 i, l, s;
465    NYD_IN;
466    ASSERT(self != NULL);
467 
468    s = self->s_size;
469    l = self->s_len;
470    if((uz)S32_MAX - Z_ALIGN(1) - l <= noof)
471       n_panic(_("Memory allocation too large"));
472 
473    if((i = s - l) <= ++noof){
474       i += l + (u32)noof;
475       i = Z_ALIGN(i);
476       self->s_size = i -1;
477 
478       if(!self->s_auto)
479          self->s_dat = su_MEM_REALLOC_LOCOR(self->s_dat, i,
480                su_DBG_LOC_ARGS_ORUSE);
481       else{
482          char *ndat;
483 
484          ndat = su_MEM_BAG_SELF_AUTO_ALLOC_LOCOR(i, su_DBG_LOC_ARGS_ORUSE);
485          if(l > 0)
486             su_mem_copy(ndat, self->s_dat, l);
487          self->s_dat = ndat;
488       }
489    }
490    NYD_OU;
491    return self;
492 }
493 
494 FL struct n_string *
495 (n_string_resize)(struct n_string *self, uz nlen  su_DBG_LOC_ARGS_DECL){
496    NYD_IN;
497    ASSERT(self != NULL);
498 
499    if(UCMP(z, S32_MAX, <=, nlen))
500       n_panic(_("Memory allocation too large"));
501 
502    if(self->s_len < nlen)
503       self = (n_string_reserve)(self, nlen  su_DBG_LOC_ARGS_USE);
504    self->s_len = (u32)nlen;
505    NYD_OU;
506    return self;
507 }
508 
509 FL struct n_string *
510 (n_string_push_buf)(struct n_string *self, char const *buf, uz buflen
511       su_DBG_LOC_ARGS_DECL){
512    NYD_IN;
513 
514    ASSERT(self != NULL);
515    ASSERT(buflen == 0 || buf != NULL);
516 
517    if(buflen == UZ_MAX)
518       buflen = (buf == NULL) ? 0 : su_cs_len(buf);
519 
520    if(buflen > 0){
521       u32 i;
522 
523       self = (n_string_reserve)(self, buflen  su_DBG_LOC_ARGS_USE);
524       su_mem_copy(&self->s_dat[i = self->s_len], buf, buflen);
525       self->s_len = (i += (u32)buflen);
526    }
527    NYD_OU;
528    return self;
529 }
530 
531 FL struct n_string *
532 (n_string_push_c)(struct n_string *self, char c  su_DBG_LOC_ARGS_DECL){
533    NYD_IN;
534 
535    ASSERT(self != NULL);
536 
537    if(self->s_len + 1 >= self->s_size)
538       self = (n_string_reserve)(self, 1  su_DBG_LOC_ARGS_USE);
539    self->s_dat[self->s_len++] = c;
540    NYD_OU;
541    return self;
542 }
543 
544 FL struct n_string *
545 (n_string_unshift_buf)(struct n_string *self, char const *buf, uz buflen
546       su_DBG_LOC_ARGS_DECL){
547    NYD_IN;
548 
549    ASSERT(self != NULL);
550    ASSERT(buflen == 0 || buf != NULL);
551 
552    if(buflen == UZ_MAX)
553       buflen = (buf == NULL) ? 0 : su_cs_len(buf);
554 
555    if(buflen > 0){
556       self = (n_string_reserve)(self, buflen  su_DBG_LOC_ARGS_USE);
557       if(self->s_len > 0)
558          su_mem_move(&self->s_dat[buflen], self->s_dat, self->s_len);
559       su_mem_copy(self->s_dat, buf, buflen);
560       self->s_len += (u32)buflen;
561    }
562    NYD_OU;
563    return self;
564 }
565 
566 FL struct n_string *
567 (n_string_unshift_c)(struct n_string *self, char c  su_DBG_LOC_ARGS_DECL){
568    NYD_IN;
569 
570    ASSERT(self != NULL);
571 
572    if(self->s_len + 1 >= self->s_size)
573       self = (n_string_reserve)(self, 1  su_DBG_LOC_ARGS_USE);
574    if(self->s_len > 0)
575       su_mem_move(&self->s_dat[1], self->s_dat, self->s_len);
576    self->s_dat[0] = c;
577    ++self->s_len;
578    NYD_OU;
579    return self;
580 }
581 
582 FL struct n_string *
583 (n_string_insert_buf)(struct n_string *self, uz idx,
584       char const *buf, uz buflen  su_DBG_LOC_ARGS_DECL){
585    NYD_IN;
586 
587    ASSERT(self != NULL);
588    ASSERT(buflen == 0 || buf != NULL);
589    ASSERT(idx <= self->s_len);
590 
591    if(buflen == UZ_MAX)
592       buflen = (buf == NULL) ? 0 : su_cs_len(buf);
593 
594    if(buflen > 0){
595       self = (n_string_reserve)(self, buflen  su_DBG_LOC_ARGS_USE);
596       if(self->s_len > 0)
597          su_mem_move(&self->s_dat[idx + buflen], &self->s_dat[idx],
598             self->s_len - idx);
599       su_mem_copy(&self->s_dat[idx], buf, buflen);
600       self->s_len += (u32)buflen;
601    }
602    NYD_OU;
603    return self;
604 }
605 
606 FL struct n_string *
607 (n_string_insert_c)(struct n_string *self, uz idx,
608       char c  su_DBG_LOC_ARGS_DECL){
609    NYD_IN;
610 
611    ASSERT(self != NULL);
612    ASSERT(idx <= self->s_len);
613 
614    if(self->s_len + 1 >= self->s_size)
615       self = (n_string_reserve)(self, 1  su_DBG_LOC_ARGS_USE);
616    if(self->s_len > 0)
617       su_mem_move(&self->s_dat[idx + 1], &self->s_dat[idx], self->s_len - idx);
618    self->s_dat[idx] = c;
619    ++self->s_len;
620    NYD_OU;
621    return self;
622 }
623 
624 FL struct n_string *
n_string_cut(struct n_string * self,uz idx,uz len)625 n_string_cut(struct n_string *self, uz idx, uz len){
626    NYD_IN;
627 
628    ASSERT(self != NULL);
629    ASSERT(UZ_MAX - idx > len);
630    ASSERT(S32_MAX >= idx + len);
631    ASSERT(idx + len <= self->s_len);
632 
633    if(len > 0)
634       su_mem_move(&self->s_dat[idx], &self->s_dat[idx + len],
635          (self->s_len -= len) - idx);
636    NYD_OU;
637    return self;
638 }
639 
640 FL char *
641 (n_string_cp)(struct n_string *self  su_DBG_LOC_ARGS_DECL){
642    char *rv;
643    NYD2_IN;
644 
645    ASSERT(self != NULL);
646 
647    if(self->s_size == 0)
648       self = (n_string_reserve)(self, 1  su_DBG_LOC_ARGS_USE);
649 
650    (rv = self->s_dat)[self->s_len] = '\0';
651    NYD2_OU;
652    return rv;
653 }
654 
655 FL char const *
n_string_cp_const(struct n_string const * self)656 n_string_cp_const(struct n_string const *self){
657    char const *rv;
658    NYD2_IN;
659 
660    ASSERT(self != NULL);
661 
662    if(self->s_size != 0){
663       ((struct n_string*)n_UNCONST(self))->s_dat[self->s_len] = '\0';
664       rv = self->s_dat;
665    }else
666       rv = n_empty;
667    NYD2_OU;
668    return rv;
669 }
670 
671 #include "su/code-ou.h"
672 /* s-it-mode */
673