1 /*------------------------------------------------------------------
2  * String functions.
3  *
4  *------------------------------------------------------------------
5  * A collection of string related functions and utilities:
6  *
7  * xstrncpy(): a safer strncpy().
8  * trim_all_spaces(): used in efun parse_command().
9  *
10  * strbuf_t: an extendable string buffer, useful for incremental
11  *   construction of a string.
12  * TODO: I am afraid the handling of length in _grow() itself and
13  * TODO:: its calls is a bit too far on the conservative side.
14  * TODO: Rewrite the strbuf_t to use a scatter-gather storing
15  * TODO:: of data, to avoid allocations of large buffers (this happens
16  * TODO:: when doing save/restore on large objects).
17  *
18  * --- Efuns and Operators ---
19  *
20  * f_convert_charset(): Convert charsets using iconv().
21  * intersect_strings(): Implements '&' and '-' on strings
22  * x_filter_string(): Filter a string through a callback or mapping.
23  * x_map_string(): Map a string through a callback or mapping.
24  *
25  *------------------------------------------------------------------
26  */
27 
28 /*--------------------------------------------------------------------*/
29 
30 #include "driver.h"
31 #include "typedefs.h"
32 
33 #include "my-alloca.h"
34 #include <stdarg.h>
35 #include <stddef.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <sys/types.h>
40 #ifdef HAS_ICONV
41 #include <iconv.h>
42 #endif
43 
44 #include "strfuns.h"
45 
46 #include "comm.h"
47 #include "interpret.h"
48 #include "main.h"
49 #include "mapping.h"
50 #include "mstrings.h"
51 #include "object.h"
52 #include "simulate.h"
53 #include "stdstrings.h"
54 #include "svalue.h"
55 #include "xalloc.h"
56 
57 /*--------------------------------------------------------------------*/
58 void
strbuf_zero(strbuf_t * buf)59 strbuf_zero (strbuf_t *buf)
60 
61 /* Initialise the given string buffer <buf>.
62  */
63 
64 {
65     buf->alloc_len = 0;
66     buf->length = 0;
67     buf->buf = NULL;
68 }
69 
70 /*--------------------------------------------------------------------*/
71 void
strbuf_free(strbuf_t * buf)72 strbuf_free (strbuf_t *buf)
73 
74 /* Free the given string buffer <buf>.
75  * TODO: Not necessary once all strings are counted and with length?
76  */
77 
78 {
79     if (buf->buf)
80         xfree(buf->buf);
81     strbuf_zero(buf);
82 }
83 
84 /*--------------------------------------------------------------------*/
85 static INLINE size_t
strbuf_grow(strbuf_t * buf,size_t len)86 strbuf_grow (strbuf_t *buf, size_t len)
87 
88 /* Extend the stringbuffer <buf> to hold at least <len> more
89  * bytes (ie. enough for a string of length <len>-1).
90  *
91  * Return <len> if all memory could be allocated, or a lower number
92  * of only part of the required memory is available.
93  *
94  * N.B.: be careful with overflows when doing the checks.
95  */
96 
97 {
98     size_t new_len;
99 
100     /* Catch some simple situations. */
101     if (buf->alloc_len >= MAX_STRBUF_LEN)
102         return 0;  /* Truncated */
103 
104     if (buf->alloc_len - buf->length > len)
105         return len;
106 
107     /* Allocate more than we need in anticipation of further adds,
108      * but not more than we can manage
109      */
110     if (MAX_STRBUF_LEN - buf->length < len * 3)
111     {
112         new_len = MAX_STRBUF_LEN;
113         if (new_len - buf->length < len)
114             len = new_len - buf->length;
115     }
116     else
117         new_len = buf->length + len * 3;
118 
119 
120     /* Is this the first allocation? */
121     if (!buf->buf)
122     {
123         memsafe(buf->buf = xalloc(new_len), new_len, "new strbuf");
124         buf->alloc_len = (u_long)new_len;
125         buf->length = 0;
126         *(buf->buf) = '\0';
127         return len;
128     }
129 
130     /* Extension of the existing buffer */
131 
132     memsafe(buf->buf = rexalloc(buf->buf, new_len), new_len, "larger strbuf");
133     buf->alloc_len = (u_long)new_len;
134     return len;
135 } /* strbuf_grow() */
136 
137 /*--------------------------------------------------------------------*/
138 void
strbuf_add(strbuf_t * buf,const char * text)139 strbuf_add (strbuf_t *buf, const char * text)
140 
141 /* Add the <text> to string buffer <buf>.
142  */
143 
144 {
145     size_t len;
146 
147     len = strlen(text) + 1;
148     if (!len)
149         return;
150 
151     if (len + buf->length > buf->alloc_len)
152         len = strbuf_grow(buf, len);
153     if (len)
154     {
155         memcpy(buf->buf+buf->length, text, len);
156         buf->length += len-1;
157         buf->buf[buf->length] = '\0';
158     }
159 }  /* strbuf_add() */
160 
161 /*--------------------------------------------------------------------*/
162 void
strbuf_addn(strbuf_t * buf,const char * text,size_t len)163 strbuf_addn (strbuf_t *buf, const char * text, size_t len)
164 
165 /* Add the <len> characters starting at <text> to string buffer <buf>.
166  */
167 
168 {
169     if (!len)
170         return;
171 
172     len += 1;
173     if (len + buf->length > buf->alloc_len)
174         len = strbuf_grow(buf, len);
175     if (len)
176     {
177         len--;
178         memcpy(buf->buf+buf->length, text, len);
179         buf->length += len;
180         buf->buf[buf->length] = '\0';
181     }
182 }  /* strbuf_addn() */
183 
184 /*--------------------------------------------------------------------*/
185 void
strbuf_addc(strbuf_t * buf,char ch)186 strbuf_addc (strbuf_t *buf, char ch)
187 
188 /* Add the <ch>aracter to string buffer <buf>.
189  */
190 
191 {
192     size_t len;
193 
194     len = 2;
195     if (2 + buf->length > buf->alloc_len)
196         len = strbuf_grow(buf, 2);
197     if (len)
198     {
199         buf->buf[buf->length] = ch;
200         buf->length++;
201         buf->buf[buf->length] = '\0';
202     }
203 }  /* strbuf_addc() */
204 
205 /*--------------------------------------------------------------------*/
206 void
strbuf_addf(strbuf_t * buf,const char * format,...)207 strbuf_addf (strbuf_t *buf, const char * format, ...)
208 
209 /* Create a string from <format> and the following arguments using
210  * sprintf() rules, and add the result to the string buffer <buf>.
211  */
212 
213 {
214     char tmpbuf[4096];
215     va_list vargs;
216 
217     tmpbuf[sizeof tmpbuf - 1] = '\0';
218 
219     va_start(vargs, format);
220     vsprintf(tmpbuf, format, vargs);
221     va_end(vargs);
222 
223     if (tmpbuf[sizeof tmpbuf - 1])
224         fatal("strbuf_addf: Internal buffer overflow.\n");
225 
226     strbuf_add(buf, tmpbuf);
227 }  /* strbuf_addf() */
228 
229 /*--------------------------------------------------------------------*/
230 void
strbuf_send(strbuf_t * buf)231 strbuf_send (strbuf_t *buf)
232 
233 /* Send the string collected in <buf> out to the current user with
234  * add_message(), and clear <buf>.
235  */
236 
237 {
238     if (buf->buf && buf->length)
239     {
240         add_message("%s", buf->buf);
241     }
242 
243     /* Empty the string buffer */
244     if (buf->buf)
245         xfree(buf->buf);
246     buf->buf = NULL;
247     buf->length = 0;
248     buf->alloc_len = 0;
249 } /* strbuf_send() */
250 
251 /*--------------------------------------------------------------------*/
252 void
strbuf_store(strbuf_t * buf,svalue_t * svp)253 strbuf_store (strbuf_t *buf, svalue_t *svp)
254 
255 /* Store the string collected in <buf>, which may be the null string "",
256  * into the empty svalue *<svp>, then clear <buf>.
257  */
258 
259 {
260     svp->type = T_STRING;
261     if (buf->buf && buf->length)
262     {
263         svp->u.str = new_n_mstring(buf->buf, buf->length);
264     }
265     else
266     {
267         svp->u.str = ref_mstring(STR_EMPTY);
268     }
269 
270     /* Empty the string buffer */
271     if (buf->buf)
272         xfree(buf->buf);
273     buf->buf = NULL;
274     buf->length = 0;
275     buf->alloc_len = 0;
276 } /* strbuf_store() */
277 
278 /*--------------------------------------------------------------------*/
279 void
strbuf_copy(strbuf_t * buf,char * cbuf)280 strbuf_copy (strbuf_t *buf, char *cbuf)
281 
282 /* Copy the string collected in <buf>, which may be the null string "",
283  * into the buffer <cbuf> which must have been allocated by the caller
284  * to a suitable size. The copied string will be terminated with a '\0'.
285  */
286 
287 {
288     if (buf->buf && buf->length)
289         memcpy(cbuf, buf->buf, buf->length);
290     cbuf[buf->length] = '\0';
291 } /* strbuf_copy() */
292 
293 /*--------------------------------------------------------------------*/
294 char *
xstrncpy(char * dest,const char * src,size_t num)295 xstrncpy (char * dest, const char * src, size_t num)
296 
297 /* Copy string <src> at address <dest> up to and including the terminating
298  * 0 or up to size <num>, whichever comes first. Result is <dest>.
299  *
300  * In contrast to strncpy(), the copying terminates if a terminating 0
301  * is found (and copied) in <src> - strncpy() would add additional 0s
302  * until a total of <num> characters has been written to <dest>.
303  */
304 
305 {
306     char * p = dest;
307 
308     while (num-- != 0 && (*p++ = *src++) != '\0') NOOP;
309     return dest;
310 } /* xstrncpy() */
311 
312 /*-------------------------------------------------------------------------*/
313 string_t *
trim_all_spaces(const string_t * txt)314 trim_all_spaces (const string_t * txt)
315 
316 /* Trim the input string <txt> by removing all leading and trailing
317  * space, and by folding embedded space runs into just one each.
318  * Return the new string with one ref; the refcount of <txt> is not changed.
319  *
320  * Throw an error when out of memory.
321  */
322 
323 {
324     char * dest;
325     const char * src;
326     size_t dest_ix, src_ix, srclen;
327     string_t * rc;
328 
329     dest = alloca(mstrsize(txt));
330     if (dest == NULL)
331         errorf("Stack overflow (%zu bytes)\n", mstrsize(txt));
332 
333     src = get_txt((string_t *const)txt);
334     srclen = mstrsize(txt);
335     src_ix = 0;
336     dest_ix = 0;
337 
338     /* Blank out trailing spaces */
339     while (srclen > 0 && src[srclen-1] == ' ')
340         srclen--;
341 
342     /* Skip leading spaces */
343     while (src_ix < srclen && *src == ' ')
344         src_ix++, src++;
345 
346     /* Copy characters, but fold embedded spaces. */
347     for ( ; src_ix < srclen; src_ix++, src++, dest_ix++)
348     {
349         dest[dest_ix] = *src;
350 
351         /* If this and the next character is a space, forward
352          * src until the last space in this run.
353          */
354         if (' ' == *src)
355         {
356             while (src_ix+1 < srclen && ' ' == src[1])
357                 src_ix++, src++;
358         }
359     }
360 
361     memsafe(rc = new_n_mstring(dest, dest_ix), dest_ix, "trimmed result");
362     return rc;
363 } /* trim_all_spaces() */
364 
365 /*====================================================================*/
366 
367 /*                          EFUNS                                     */
368 
369 /*--------------------------------------------------------------------*/
370 #ifdef HAS_ICONV
371 
372 svalue_t *
f_convert_charset(svalue_t * sp)373 f_convert_charset (svalue_t *sp)
374 
375 /* EFUN convert_charset()
376  *
377  *   string convert_charset(string str, string from_cs, string to_cs)
378  *
379  * Convert the string <str> from charset <from_cs> to charset <to_cs>
380  * and return the converted string.
381  *
382  * The efun is only available on systems with libiconv.
383  */
384 
385 {
386     iconv_t context;
387 
388     string_t *from_cs, *to_cs, *in_str, *out_str;
389 
390 #if HAS_ICONV_NONCONST_IN
391 #   define ICONV_IN_CAST (char**)
392 #else
393 #   define ICONV_IN_CAST
394 #endif
395 
396     const char *pIn; /* Input string pointer */
397     size_t in_len;   /* Input length */
398     size_t in_left;  /* Input length left */
399 
400     char * out_buf;  /* Output buffer */
401     size_t out_size; /* Size of the output buffer */
402     size_t out_left; /* Size left in output buffer */
403     char  *pOut;     /* Output string pointer */
404 
405     in_str = sp[-2].u.str;
406     from_cs = sp[-1].u.str;
407     to_cs = sp->u.str;
408 
409     pIn = get_txt(in_str);
410     in_len = mstrsize(in_str);
411     in_left = in_len;
412 
413     /* If the input string is empty, we can return immediately
414      * (and in fact must since the allocator will balk at allocating 0 bytes)
415      */
416     if (!in_len)
417     {
418         // just free the 2nd and 3rd argument, return the first one unchanged.
419         free_string_svalue(sp--);
420         free_string_svalue(sp--);
421         return sp;
422     }
423 
424     /* Allocate a temporary output string */
425     out_size = in_len > 65536 ? (in_len + 33) : (2 * in_len);
426     out_left = out_size;
427 
428     xallocate(out_buf, out_size, "iconv buffer");
429     pOut = out_buf;
430 
431     /* Open the iconv context */
432     context = iconv_open(get_txt(to_cs), get_txt(from_cs));
433     if (context == (iconv_t) -1)
434     {
435         xfree(out_buf);
436 
437         if (errno == EINVAL)
438             errorf("convert_charset(): Conversion '%s' -> '%s' not supported.\n"
439                  , get_txt(from_cs), get_txt(to_cs)
440                 );
441         else
442             errorf("convert_charset(): Error %d.\n", errno);
443         /* NOTREACHED */
444         return sp;
445     }
446 
447     /* Convert the string, reallocating the output buffer where necessary */
448     while (in_left)
449     {
450         size_t rc;
451 
452         rc = iconv(context, ICONV_IN_CAST &pIn, &in_left, &pOut, &out_left);
453         if (rc == (size_t)-1)
454         {
455             if (errno == E2BIG)
456             {
457                 /* Reallocate output buffer */
458                 size_t newsize;
459                 char * tmp;
460 
461                 newsize = out_size + (in_len > 128 ? in_len : 128);
462                 tmp = rexalloc(out_buf, newsize);
463                 if (!tmp)
464                 {
465                     iconv_close(context);
466                     xfree(out_buf);
467                     outofmem(newsize, "iconv buffer");
468                     /* NOTREACHED */
469                     return sp;
470                 }
471                 out_buf = tmp;
472                 pOut = out_buf + out_size;
473                 out_left = newsize - out_size;
474                 out_size = newsize;
475 
476                 continue;
477             }
478 
479             /* Other error: clean up */
480             iconv_close(context);
481             xfree(out_buf);
482 
483             if (errno == EILSEQ)
484             {
485                 errorf("convert_charset(): Invalid character sequence at "
486                        "index %td\n",
487                        (ptrdiff_t)(pIn - get_txt(in_str)));
488                 /* NOTREACHED */
489                 return sp;
490             }
491 
492             if (errno == EINVAL)
493             {
494                 errorf("convert_charset(): Incomplete character sequence at "
495                        "index %td\n", (ptrdiff_t)(pIn - get_txt(in_str)));
496                 /* NOTREACHED */
497                 return sp;
498             }
499 
500             errorf("convert_charset(): Error %d at index %td\n"
501                  , errno, (ptrdiff_t)(pIn - get_txt(in_str))
502                  );
503             /* NOTREACHED */
504             return sp;
505         } /* if (rc < 0) */
506     } /* while (in_left) */
507 
508     /* While the actual conversion is complete, the output stream may now
509      * be in a non-base state. Add the necessary epilogue to get back
510      * to the base state.
511      */
512     while(1)
513     {
514         size_t rc;
515         rc = iconv(context, NULL, NULL, &pOut, &out_left);
516         if (rc == (size_t)-1)
517         {
518             if (errno == E2BIG)
519             {
520                 /* Reallocate output buffer */
521                 size_t newsize;
522                 char * tmp;
523 
524                 newsize = out_size + (in_len > 128 ? in_len : 128);
525                 tmp = rexalloc(out_buf, newsize);
526                 if (!tmp)
527                 {
528                     iconv_close(context);
529                     xfree(out_buf);
530                     outofmem(newsize, "iconv buffer");
531                     /* NOTREACHED */
532                     return sp;
533                 }
534                 out_buf = tmp;
535                 pOut = out_buf + out_size;
536                 out_left = newsize - out_size;
537                 out_size = newsize;
538 
539                 continue;
540             }
541 
542             /* Other error: clean up */
543             iconv_close(context);
544             xfree(out_buf);
545 
546             if (errno == EILSEQ)
547             {
548                 errorf("convert_charset(): Invalid character sequence at "
549                        "index %td\n", (ptrdiff_t)(pIn - get_txt(in_str)));
550                 /* NOTREACHED */
551                 return sp;
552             }
553 
554             if (errno == EINVAL)
555             {
556                 errorf("convert_charset(): Incomplete character sequence at "
557                        "index %td\n", (ptrdiff_t)(pIn - get_txt(in_str)));
558                 /* NOTREACHED */
559                 return sp;
560             }
561 
562             errorf("convert_charset(): Error %d at index %td\n"
563                  , errno, (ptrdiff_t)(pIn - get_txt(in_str))
564                  );
565             /* NOTREACHED */
566             return sp;
567         } /* if (rc < 0) */
568 
569         /* At this point, the iconv() succeeded: we're done */
570         break;
571     } /* while(1) */
572 
573     iconv_close(context);
574 
575     /* Get the return string and prepare the return arguments */
576     out_str = new_n_mstring(out_buf, out_size - out_left);
577     xfree(out_buf);
578     if (!out_str)
579     {
580         outofmem(out_size - out_left, "convert_charset() result");
581         /* NOTREACHED */
582         return sp;
583     }
584 
585     free_string_svalue(sp--);
586     free_string_svalue(sp--);
587     free_string_svalue(sp);
588     put_string(sp, out_str);
589 
590     return sp;
591 } /* f_convert_charset() */
592 
593 #endif /* HAS_ICONV */
594 
595 /*--------------------------------------------------------------------*/
596 static char *
sort_string(const string_t * p_in,size_t len,long ** pos)597 sort_string (const string_t * p_in, size_t len, long ** pos)
598 
599 /* Sort the characters of string <in> (with length <len>) by their numeric
600  * values and return a newly allocated memory block with the sorted string.
601  * If <pos> is not NULL, it will be set to a newly allocated memory block
602  * giving the original positions of the characters in the sorted string.
603  *
604  * We use Mergesort to sort the strings.
605  * TODO: Use Quicksort instead of Mergesort?
606  */
607 
608 {
609     const char * in;  /* Input string */
610     char   * out;     /* Result string */
611     long   * outpos;  /* Result position array */
612     char   * tmp;     /* Temporary string */
613     long   * tmppos;  /* Temporary position array */
614     size_t   step;
615     size_t   i, j;
616 
617     in = get_txt((string_t *const)p_in);
618     out = xalloc(len+1);
619     tmp = xalloc(len+1);
620     if (!out || !tmp)
621     {
622         if (out)
623             xfree(out);
624         if (tmp)
625             xfree(tmp);
626         errorf("(sort_string) Out of memory (2 * %zu bytes) for temporaries.\n"
627              , len+1);
628     }
629     out[len] = '\0';
630     tmp[len] = '\0';
631 
632     if (pos)
633     {
634         outpos = xalloc(len * sizeof(*outpos) + 1);
635         tmppos = xalloc(len * sizeof(*outpos) + 1);
636           /* +1 so that smalloc won't complain when given an empty string */
637         if (!outpos || !tmppos)
638         {
639             if (out)
640                 xfree(out);
641             if (tmp)
642                 xfree(tmp);
643             if (outpos)
644                 xfree(outpos);
645             if (tmppos)
646                 xfree(tmppos);
647             errorf("(sort_string) Out of memory (2 * %zu bytes) for positions.\n"
648                  , len*sizeof(*outpos)+1);
649         }
650     }
651     else
652     {
653         outpos = NULL;
654         tmppos = NULL;
655     }
656 
657     /* First Mergesort pass: comparison of adjacent characters
658      * and initialisation of the out arrays.
659      */
660     for (i = 0; i < len; i += 2)
661     {
662         if (i == len-1)
663         {
664             out[i] = in[i];
665             if (outpos)
666                 outpos[i] = i;
667         }
668         else if (in[i] <= in[i+1])
669         {
670             out[i] = in[i];
671             out[i+1] = in[i+1];
672             if (outpos)
673             {
674                 outpos[i] = i;
675                 outpos[i+1] = i+1;
676             }
677         }
678         else /* (in[i] > in[i+1]) */
679         {
680             out[i] = in[i+1];
681             out[i+1] = in[i];
682             if (outpos)
683             {
684                 outpos[i] = i+1;
685                 outpos[i+1] = i;
686             }
687         }
688     } /* for(initial pass) */
689 
690     /* Mergesort loop: perform the mergesort passes with increasing steps.
691      * Invariant: out is the (semi-sorted) data, tmp is the scratchspace.
692      */
693     for (step = 2; step < len; step *= 2)
694     {
695         size_t start, dest, left;
696 
697         /* Exchange out and tmp */
698         {
699             char *tmp2;
700             long *tmp2pos;
701 
702             tmp2 = out; out = tmp; tmp = tmp2;
703             if (outpos)
704             {
705                 tmp2pos = outpos; outpos = tmppos; tmppos = tmp2pos;
706             }
707         }
708 
709         for (start = 0, dest = 0; start <= len; start += 2*step)
710         {
711             for ( i = start, j = start+step, left = 2 * step
712                 ; left && dest < len
713                 ; left--, dest++
714                 )
715             {
716                 if (i >= start+step
717                  || i >= len)
718                 {
719                     if (j < len)
720                     {
721                         out[dest] = tmp[j];
722                         if (outpos)
723                             outpos[dest] = tmppos[j];
724                         j++;
725                     }
726                 }
727                 else if (j >= start+2*step
728                       || j >= len)
729                 {
730                     if (i < len)
731                     {
732                         out[dest] = tmp[i];
733                         if (outpos)
734                             outpos[dest] = tmppos[i];
735                         i++;
736                     }
737                 }
738                 else if (tmp[i] <= tmp[j])
739                 {
740                     out[dest] = tmp[i];
741                     if (outpos)
742                         outpos[dest] = tmppos[i];
743                     i++;
744                 }
745                 else /* (tmp[i] > tmp[i+step]) */
746                 {
747                     out[dest] = tmp[j];
748                     if (outpos)
749                         outpos[dest] = tmppos[j];
750                     j++;
751                 }
752             } /* for (sort run) */
753         } /* for (start) */
754     } /* for(step) */
755 
756     /* Free the temporary data */
757     if (tmppos)
758         xfree(tmppos);
759     xfree(tmp);
760 
761     /* Return the result */
762     if (pos)
763         *pos = outpos;
764 
765     return out;
766 } /* sort_string() */
767 
768 /*--------------------------------------------------------------------*/
769 string_t *
intersect_strings(const string_t * p_left,const string_t * p_right,Bool bSubtract)770 intersect_strings (const string_t * p_left, const string_t * p_right, Bool bSubtract)
771 
772 /* !bSubtract: Intersect string <left> with string <right> and return
773  *   a newly allocated string with all those characters which are in
774  *   both strings.
775  * bSubtract:  Subtract string <right> from string <left> and return
776  *   a newly allocated string with all those characters which are in
777  *   <left> but not in <right>.
778  * The order of the characters returned is their order of appearance
779  * in <left>.
780  */
781 
782 {
783     size_t   len_left, len_right, len_out;
784     size_t   ix_left, ix_right;
785     long   * pos;
786     CBool  * matches;
787     const char * left_txt;
788     char * left, * right, * result_txt;
789     string_t *result;
790 
791     len_left = mstrsize(p_left);
792     len_right = mstrsize(p_right);
793 
794     xallocate(matches, len_left+1, "intersection matches");
795       /* +1 so that smalloc won't complain when given an empty left string */
796 
797     for (ix_left = 0; ix_left < len_left; ix_left++)
798         matches[ix_left] = bSubtract ? MY_TRUE : MY_FALSE;
799 
800     /* Sort the two strings */
801     left = sort_string(p_left, len_left, &pos);
802     right = sort_string(p_right, len_right, NULL);
803 
804     /* Intersect the two strings by mutual comparison.
805      * Each non-matched character in left gets is pos[] set to -1.
806      */
807     len_out = bSubtract ? len_left : 0;
808     for ( ix_left = 0, ix_right = 0
809         ; ix_left < len_left && ix_right < len_right
810         ; )
811     {
812         if (left[ix_left] < right[ix_right])
813             ix_left++;
814         else if (left[ix_left] > right[ix_right])
815             ix_right++;
816         else /* left[ix_left] == right[ix_right]) */
817         {
818             if (!bSubtract)
819             {
820                 matches[pos[ix_left]] = MY_TRUE;
821                 len_out++;
822             }
823             else
824             {
825                 matches[pos[ix_left]] = MY_FALSE;
826                 len_out--;
827             }
828             ix_left++;
829         }
830     }
831 
832     /* Create the result: copy all flagged characters */
833     memsafe(result = alloc_mstring(len_out), len_out, "intersection result");
834     left_txt = get_txt((string_t *const)p_left);
835     result_txt = get_txt(result);
836     for (ix_left = 0, ix_right = 0; ix_left < len_left; ix_left++)
837         if (matches[ix_left])
838             result_txt[ix_right++] = left_txt[ix_left];
839 
840     /* Free intermediate results */
841     xfree(pos);
842     xfree(matches);
843     xfree(left);
844     xfree(right);
845 
846     return result;
847 } /* intersect_strings() */
848 
849 /*-------------------------------------------------------------------------*/
850 svalue_t *
x_filter_string(svalue_t * sp,int num_arg)851 x_filter_string (svalue_t *sp, int num_arg)
852 
853 /* EFUN: filter() for strings.
854  *
855  *   string filter(string arr, string fun, string|object obj, mixed extra, ...)
856  *   string filter(string arr, closure cl, mixed extra, ...)
857  *   string filter(string arr, mapping map)
858  *
859  * Filter the elements of <arr> through a filter defined by the other
860  * arguments, and return an array of those elements, for which the
861  * filter yields non-zero.
862  *
863  * The filter can be a function call:
864  *
865  *    <obj>-><fun>(elem, <extra>...)
866  *
867  * or a mapping query:
868  *
869  *    <map>[elem]
870  *
871  * <obj> can both be an object reference or a filename. If omitted,
872  * this_object() is used (this also works if the third argument is
873  * neither a string nor an object).
874  */
875 
876 {
877     string_t *rc;     /* Result string */
878     string_t *str;    /* Argument string  */
879     svalue_t *arg;    /* First argument the vm stack */
880     mp_int    slen;   /* Argument string length */
881     char     *src, *dest; /* String text work pointers */
882 
883     char     *flags;  /* Flag array, one flag for each element of <str>
884                        * (in reverse order). */
885     mp_int    res;    /* Number of surviving elements */
886 
887     res = 0;
888 
889     /* Locate the args on the stack, extract the string to filter
890      * and allocate the flags vector.
891      */
892     arg = sp - num_arg + 1;
893 
894     str = arg->u.str;
895     slen = (mp_int)mstrsize(str);
896 
897     /* Every element in flags is associated by index number with an
898      * element in the vector to filter. The filter function is evaluated
899      * for every string character, and the associated flag is set to 0
900      * or 1 according to the result.
901      * At the end, all 1-flagged elements are gathered and copied
902      * into the result string.
903      */
904 
905     if (arg[1].type == T_MAPPING)
906     {
907         mp_int cnt;
908 
909         /* --- Filter by mapping query --- */
910         mapping_t *m;
911 
912         if (num_arg > 2) {
913             errorf("Too many arguments to filter(array)\n");
914         }
915         /* Allocate memory for the flag array. Simultaneously an error
916          * handler is pushed onto the stack (after the arguments) for freeing
917          * the buffer in case of runtime errors. */
918         flags = xalloc_with_error_handler((size_t)slen + 1);
919         if (!flags)
920         {
921           errorf("Out of memory (%zu bytes) for temporary buffer in filter().\n",
922                  (size_t)slen + 1);
923         }
924         sp = inter_sp;
925 
926         m = arg[1].u.map;
927 
928         for (src = get_txt(str), cnt = slen; --cnt >= 0; src++)
929         {
930             svalue_t key;
931 
932             put_number(&key,  *src);
933             if (get_map_value(m, &key) == &const0)
934             {
935                 flags[cnt] = 0;
936                 continue;
937             }
938             flags[cnt] = 1;
939             res++;
940         }
941 
942     } else {
943 
944         /* --- Filter by function call --- */
945 
946         int         error_index;
947         callback_t  cb;
948         mp_int cnt;
949 
950         assign_eval_cost();
951 
952         /* setup_efun_callback() will adopt and therefore remove the
953          * arguments from arg+1 on to arg+num_arg from the stack and update
954          * inter_sp. New top-of-stack will be arg. */
955         error_index = setup_efun_callback(&cb, arg+1, num_arg-1);
956         if (error_index >= 0)
957         {
958             vefun_bad_arg(error_index+2, arg);
959             /* NOTREACHED */
960             return arg;
961         }
962         /* push the callback structure onto the stack. */
963         sp = arg + 1;
964         put_callback(sp, &cb);
965 
966         /* Allocate memory for the flag array. Simultaneously an error
967          * handler is pushed onto the stack (after the arguments) for freeing
968          * the buffer in case of runtime errors. */
969         inter_sp = sp;
970         flags = xalloc_with_error_handler((size_t)slen + 1);
971         if (!flags)
972         {
973             errorf("Out of memory (%"PRIdMPINT" bytes) for temporary buffer "
974                 "in filter().\n", slen + 1);
975         }
976         sp = inter_sp;
977 
978         /* Loop over all elements in p and call the filter.
979          * w is the current element filtered.
980          */
981         for (src = get_txt(str), cnt = slen; --cnt >= 0; src++)
982         {
983             svalue_t *v;
984 
985             flags[cnt] = 0;
986 
987             if (current_object->flags & O_DESTRUCTED)
988                 continue;
989                 /* Don't call the filter anymore, but fill the
990                  * flags array with 0es.
991                  */
992 
993             if (!callback_object(&cb))
994             {
995                 inter_sp = sp;
996                 errorf("object used by filter(array) destructed");
997             }
998 
999             push_number(inter_sp, *src);
1000 
1001             v = apply_callback(&cb, 1);
1002             if (!v || (v->type == T_NUMBER && !v->u.number) )
1003                 continue;
1004 
1005             flags[cnt] = 1;
1006             res++;
1007         }
1008     }
1009 
1010     /* flags[] holds the filter results, res is the number of
1011      * elements to keep. Now create the result vector.
1012      */
1013     rc = alloc_mstring(res);
1014     if (!rc)
1015     {
1016         errorf("Out of memory (%"PRIdMPINT" bytes) for result in filter().\n",
1017             slen+1);
1018     }
1019 
1020     for (src = get_txt(str), dest = get_txt(rc), flags = &flags[slen]
1021        ; res > 0 ; src++)
1022     {
1023         if (*--flags)
1024         {
1025             *dest++ = *src;
1026             res--;
1027         }
1028     }
1029 
1030     /* Cleanup. Arguments for the closure have already been removed. On the
1031      * stack are now the string, the mapping or callback structure and the
1032      * error handler. (Not using pop_n_elems() for 2 elements for saving loop
1033      * and function call overhead.) */
1034     free_svalue(sp--);  /* errorhandler, buffer and flags are freed by this. */
1035     free_svalue(sp--);  /* mapping or callback structure. */
1036     free_mstring(str);  /* string, at arg == sp */
1037     sp->u.str = rc;     /* put result here */
1038 
1039     return sp;
1040 } /* x_filter_string() */
1041 
1042 /*-------------------------------------------------------------------------*/
1043 svalue_t *
x_map_string(svalue_t * sp,int num_arg)1044 x_map_string (svalue_t *sp, int num_arg)
1045 
1046 /* EFUN map() for strings
1047  *
1048  *   string map(string arg, string func, string|object ob, mixed extra...)
1049  *   string map(string arg, closure cl, mixed extra...)
1050  *   string map(string arg, mapping m [, int idx])
1051  *
1052  * Call the function <ob>-><func>() resp. the closure <cl> for
1053  * every element of the array/struct/mapping/string <arg>, and return a result
1054  * made up from the returned values.
1055  *
1056  * It is also possible to map every entry through a lookup <m>[element[,idx]].
1057  * If the mapping entry doesn't exist, the original value is kept, otherwise
1058  * the result of the mapping lookup.
1059  * [Note: argument type and range checking for idx is done in v_map()]
1060  *
1061  * Since <arg> is a string, only integer return values are allowed, of which
1062  * only the lower 8 bits are considered.
1063  *
1064  * If <ob> is omitted, or neither an object nor a string, then
1065  * this_object() is used.
1066  */
1067 
1068 {
1069     string_t *res;
1070     string_t *str;
1071     svalue_t *arg;
1072     mp_int    len;
1073     char     *src, *dest;
1074 
1075     inter_sp = sp;
1076 
1077     arg = sp - num_arg + 1;
1078 
1079     str = arg->u.str;
1080     len = mstrsize(str);
1081 
1082     if (arg[1].type == T_MAPPING)
1083     {
1084         /* --- Map through mapping --- */
1085 
1086         mapping_t *m;
1087         p_int column = 0; /* mapping column to use */
1088 
1089         m = arg[1].u.map;
1090 
1091         if (num_arg > 2)
1092             column = arg[2].u.number;
1093 
1094         res = alloc_mstring(len);
1095         if (!res)
1096             errorf("(map_string) Out of memory: string[%"PRIdMPINT
1097                    "] for result\n", len);
1098 
1099         push_string(inter_sp, res); /* In case of errors */
1100 
1101         for (src = get_txt(str), dest = get_txt(res); --len >= 0; src++, dest++)
1102         {
1103             svalue_t key, *v;
1104 
1105             put_number(&key, *src);
1106             v = get_map_value(m, &key);
1107             if (v == &const0)
1108                 *dest = *src;
1109             else
1110             {
1111                 if (v[column].type != T_NUMBER)
1112                 {
1113                     errorf("(map_string) Illegal value type: %s, expected int\n"
1114                          , typename(v[column].type)
1115                          );
1116                 }
1117                 *dest = (v[column].u.number & 0xFF);
1118             }
1119         }
1120 
1121         if (num_arg > 2)
1122             free_svalue(arg+2);
1123         free_svalue(arg+1); /* the mapping */
1124         sp = arg;
1125     }
1126     else
1127     {
1128         /* --- Map through function call --- */
1129 
1130         callback_t  cb;
1131         int         error_index;
1132 
1133         error_index = setup_efun_callback(&cb, arg+1, num_arg-1);
1134         if (error_index >= 0)
1135         {
1136             vefun_bad_arg(error_index+2, arg);
1137             /* NOTREACHED */
1138             return arg;
1139         }
1140         inter_sp = sp = arg+1;
1141         put_callback(sp, &cb);
1142         num_arg = 2;
1143 
1144         res = alloc_mstring(len);
1145         if (!res)
1146             errorf("(map_string) Out of memory: string[%"PRIdMPINT
1147                    "] for result\n", len);
1148 
1149         push_string(inter_sp, res); /* In case of errors */
1150 
1151         for (src = get_txt(str), dest = get_txt(res); --len >= 0; src++, dest++)
1152         {
1153             svalue_t *v;
1154 
1155             if (current_object->flags & O_DESTRUCTED)
1156                 continue;
1157 
1158             if (!callback_object(&cb))
1159                 errorf("object used by map(string) destructed");
1160 
1161             push_number(inter_sp, *src);
1162 
1163             v = apply_callback(&cb, 1);
1164 
1165             if (v)
1166             {
1167                 if (v->type != T_NUMBER)
1168                 {
1169                     errorf("(map_string) Illegal value: %s, expected string\n"
1170                          , typename(v->type)
1171                          );
1172                 }
1173                 *dest = (v->u.number & 0xFF);
1174             }
1175         }
1176 
1177         free_callback(&cb);
1178     }
1179 
1180     /* The arguments have been removed already, now just replace
1181      * the arr on the stack with the result.
1182      */
1183     free_mstring(str);
1184     arg->u.str = res; /* Keep svalue type: T_POINTER */
1185 
1186     return arg;
1187 } /* x_map_string () */
1188 
1189 /*====================================================================*/
1190 
1191