1 /*------------------------------------------------------------------
2 * String functions.
3 *
4 *------------------------------------------------------------------
5 * A collection of string related functions and utilities:
6 *
7 * xstrncpy(): a safer strncpy().
8 * trim_all_spaces(): used in efun parse_command().
9 *
10 * strbuf_t: an extendable string buffer, useful for incremental
11 * construction of a string.
12 * TODO: I am afraid the handling of length in _grow() itself and
13 * TODO:: its calls is a bit too far on the conservative side.
14 * TODO: Rewrite the strbuf_t to use a scatter-gather storing
15 * TODO:: of data, to avoid allocations of large buffers (this happens
16 * TODO:: when doing save/restore on large objects).
17 *
18 * --- Efuns and Operators ---
19 *
20 * f_convert_charset(): Convert charsets using iconv().
21 * intersect_strings(): Implements '&' and '-' on strings
22 * x_filter_string(): Filter a string through a callback or mapping.
23 * x_map_string(): Map a string through a callback or mapping.
24 *
25 *------------------------------------------------------------------
26 */
27
28 /*--------------------------------------------------------------------*/
29
30 #include "driver.h"
31 #include "typedefs.h"
32
33 #include "my-alloca.h"
34 #include <stdarg.h>
35 #include <stddef.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <sys/types.h>
40 #ifdef HAS_ICONV
41 #include <iconv.h>
42 #endif
43
44 #include "strfuns.h"
45
46 #include "comm.h"
47 #include "interpret.h"
48 #include "main.h"
49 #include "mapping.h"
50 #include "mstrings.h"
51 #include "object.h"
52 #include "simulate.h"
53 #include "stdstrings.h"
54 #include "svalue.h"
55 #include "xalloc.h"
56
57 /*--------------------------------------------------------------------*/
58 void
strbuf_zero(strbuf_t * buf)59 strbuf_zero (strbuf_t *buf)
60
61 /* Initialise the given string buffer <buf>.
62 */
63
64 {
65 buf->alloc_len = 0;
66 buf->length = 0;
67 buf->buf = NULL;
68 }
69
70 /*--------------------------------------------------------------------*/
71 void
strbuf_free(strbuf_t * buf)72 strbuf_free (strbuf_t *buf)
73
74 /* Free the given string buffer <buf>.
75 * TODO: Not necessary once all strings are counted and with length?
76 */
77
78 {
79 if (buf->buf)
80 xfree(buf->buf);
81 strbuf_zero(buf);
82 }
83
84 /*--------------------------------------------------------------------*/
85 static INLINE size_t
strbuf_grow(strbuf_t * buf,size_t len)86 strbuf_grow (strbuf_t *buf, size_t len)
87
88 /* Extend the stringbuffer <buf> to hold at least <len> more
89 * bytes (ie. enough for a string of length <len>-1).
90 *
91 * Return <len> if all memory could be allocated, or a lower number
92 * of only part of the required memory is available.
93 *
94 * N.B.: be careful with overflows when doing the checks.
95 */
96
97 {
98 size_t new_len;
99
100 /* Catch some simple situations. */
101 if (buf->alloc_len >= MAX_STRBUF_LEN)
102 return 0; /* Truncated */
103
104 if (buf->alloc_len - buf->length > len)
105 return len;
106
107 /* Allocate more than we need in anticipation of further adds,
108 * but not more than we can manage
109 */
110 if (MAX_STRBUF_LEN - buf->length < len * 3)
111 {
112 new_len = MAX_STRBUF_LEN;
113 if (new_len - buf->length < len)
114 len = new_len - buf->length;
115 }
116 else
117 new_len = buf->length + len * 3;
118
119
120 /* Is this the first allocation? */
121 if (!buf->buf)
122 {
123 memsafe(buf->buf = xalloc(new_len), new_len, "new strbuf");
124 buf->alloc_len = (u_long)new_len;
125 buf->length = 0;
126 *(buf->buf) = '\0';
127 return len;
128 }
129
130 /* Extension of the existing buffer */
131
132 memsafe(buf->buf = rexalloc(buf->buf, new_len), new_len, "larger strbuf");
133 buf->alloc_len = (u_long)new_len;
134 return len;
135 } /* strbuf_grow() */
136
137 /*--------------------------------------------------------------------*/
138 void
strbuf_add(strbuf_t * buf,const char * text)139 strbuf_add (strbuf_t *buf, const char * text)
140
141 /* Add the <text> to string buffer <buf>.
142 */
143
144 {
145 size_t len;
146
147 len = strlen(text) + 1;
148 if (!len)
149 return;
150
151 if (len + buf->length > buf->alloc_len)
152 len = strbuf_grow(buf, len);
153 if (len)
154 {
155 memcpy(buf->buf+buf->length, text, len);
156 buf->length += len-1;
157 buf->buf[buf->length] = '\0';
158 }
159 } /* strbuf_add() */
160
161 /*--------------------------------------------------------------------*/
162 void
strbuf_addn(strbuf_t * buf,const char * text,size_t len)163 strbuf_addn (strbuf_t *buf, const char * text, size_t len)
164
165 /* Add the <len> characters starting at <text> to string buffer <buf>.
166 */
167
168 {
169 if (!len)
170 return;
171
172 len += 1;
173 if (len + buf->length > buf->alloc_len)
174 len = strbuf_grow(buf, len);
175 if (len)
176 {
177 len--;
178 memcpy(buf->buf+buf->length, text, len);
179 buf->length += len;
180 buf->buf[buf->length] = '\0';
181 }
182 } /* strbuf_addn() */
183
184 /*--------------------------------------------------------------------*/
185 void
strbuf_addc(strbuf_t * buf,char ch)186 strbuf_addc (strbuf_t *buf, char ch)
187
188 /* Add the <ch>aracter to string buffer <buf>.
189 */
190
191 {
192 size_t len;
193
194 len = 2;
195 if (2 + buf->length > buf->alloc_len)
196 len = strbuf_grow(buf, 2);
197 if (len)
198 {
199 buf->buf[buf->length] = ch;
200 buf->length++;
201 buf->buf[buf->length] = '\0';
202 }
203 } /* strbuf_addc() */
204
205 /*--------------------------------------------------------------------*/
206 void
strbuf_addf(strbuf_t * buf,const char * format,...)207 strbuf_addf (strbuf_t *buf, const char * format, ...)
208
209 /* Create a string from <format> and the following arguments using
210 * sprintf() rules, and add the result to the string buffer <buf>.
211 */
212
213 {
214 char tmpbuf[4096];
215 va_list vargs;
216
217 tmpbuf[sizeof tmpbuf - 1] = '\0';
218
219 va_start(vargs, format);
220 vsprintf(tmpbuf, format, vargs);
221 va_end(vargs);
222
223 if (tmpbuf[sizeof tmpbuf - 1])
224 fatal("strbuf_addf: Internal buffer overflow.\n");
225
226 strbuf_add(buf, tmpbuf);
227 } /* strbuf_addf() */
228
229 /*--------------------------------------------------------------------*/
230 void
strbuf_send(strbuf_t * buf)231 strbuf_send (strbuf_t *buf)
232
233 /* Send the string collected in <buf> out to the current user with
234 * add_message(), and clear <buf>.
235 */
236
237 {
238 if (buf->buf && buf->length)
239 {
240 add_message("%s", buf->buf);
241 }
242
243 /* Empty the string buffer */
244 if (buf->buf)
245 xfree(buf->buf);
246 buf->buf = NULL;
247 buf->length = 0;
248 buf->alloc_len = 0;
249 } /* strbuf_send() */
250
251 /*--------------------------------------------------------------------*/
252 void
strbuf_store(strbuf_t * buf,svalue_t * svp)253 strbuf_store (strbuf_t *buf, svalue_t *svp)
254
255 /* Store the string collected in <buf>, which may be the null string "",
256 * into the empty svalue *<svp>, then clear <buf>.
257 */
258
259 {
260 svp->type = T_STRING;
261 if (buf->buf && buf->length)
262 {
263 svp->u.str = new_n_mstring(buf->buf, buf->length);
264 }
265 else
266 {
267 svp->u.str = ref_mstring(STR_EMPTY);
268 }
269
270 /* Empty the string buffer */
271 if (buf->buf)
272 xfree(buf->buf);
273 buf->buf = NULL;
274 buf->length = 0;
275 buf->alloc_len = 0;
276 } /* strbuf_store() */
277
278 /*--------------------------------------------------------------------*/
279 void
strbuf_copy(strbuf_t * buf,char * cbuf)280 strbuf_copy (strbuf_t *buf, char *cbuf)
281
282 /* Copy the string collected in <buf>, which may be the null string "",
283 * into the buffer <cbuf> which must have been allocated by the caller
284 * to a suitable size. The copied string will be terminated with a '\0'.
285 */
286
287 {
288 if (buf->buf && buf->length)
289 memcpy(cbuf, buf->buf, buf->length);
290 cbuf[buf->length] = '\0';
291 } /* strbuf_copy() */
292
293 /*--------------------------------------------------------------------*/
294 char *
xstrncpy(char * dest,const char * src,size_t num)295 xstrncpy (char * dest, const char * src, size_t num)
296
297 /* Copy string <src> at address <dest> up to and including the terminating
298 * 0 or up to size <num>, whichever comes first. Result is <dest>.
299 *
300 * In contrast to strncpy(), the copying terminates if a terminating 0
301 * is found (and copied) in <src> - strncpy() would add additional 0s
302 * until a total of <num> characters has been written to <dest>.
303 */
304
305 {
306 char * p = dest;
307
308 while (num-- != 0 && (*p++ = *src++) != '\0') NOOP;
309 return dest;
310 } /* xstrncpy() */
311
312 /*-------------------------------------------------------------------------*/
313 string_t *
trim_all_spaces(const string_t * txt)314 trim_all_spaces (const string_t * txt)
315
316 /* Trim the input string <txt> by removing all leading and trailing
317 * space, and by folding embedded space runs into just one each.
318 * Return the new string with one ref; the refcount of <txt> is not changed.
319 *
320 * Throw an error when out of memory.
321 */
322
323 {
324 char * dest;
325 const char * src;
326 size_t dest_ix, src_ix, srclen;
327 string_t * rc;
328
329 dest = alloca(mstrsize(txt));
330 if (dest == NULL)
331 errorf("Stack overflow (%zu bytes)\n", mstrsize(txt));
332
333 src = get_txt((string_t *const)txt);
334 srclen = mstrsize(txt);
335 src_ix = 0;
336 dest_ix = 0;
337
338 /* Blank out trailing spaces */
339 while (srclen > 0 && src[srclen-1] == ' ')
340 srclen--;
341
342 /* Skip leading spaces */
343 while (src_ix < srclen && *src == ' ')
344 src_ix++, src++;
345
346 /* Copy characters, but fold embedded spaces. */
347 for ( ; src_ix < srclen; src_ix++, src++, dest_ix++)
348 {
349 dest[dest_ix] = *src;
350
351 /* If this and the next character is a space, forward
352 * src until the last space in this run.
353 */
354 if (' ' == *src)
355 {
356 while (src_ix+1 < srclen && ' ' == src[1])
357 src_ix++, src++;
358 }
359 }
360
361 memsafe(rc = new_n_mstring(dest, dest_ix), dest_ix, "trimmed result");
362 return rc;
363 } /* trim_all_spaces() */
364
365 /*====================================================================*/
366
367 /* EFUNS */
368
369 /*--------------------------------------------------------------------*/
370 #ifdef HAS_ICONV
371
372 svalue_t *
f_convert_charset(svalue_t * sp)373 f_convert_charset (svalue_t *sp)
374
375 /* EFUN convert_charset()
376 *
377 * string convert_charset(string str, string from_cs, string to_cs)
378 *
379 * Convert the string <str> from charset <from_cs> to charset <to_cs>
380 * and return the converted string.
381 *
382 * The efun is only available on systems with libiconv.
383 */
384
385 {
386 iconv_t context;
387
388 string_t *from_cs, *to_cs, *in_str, *out_str;
389
390 #if HAS_ICONV_NONCONST_IN
391 # define ICONV_IN_CAST (char**)
392 #else
393 # define ICONV_IN_CAST
394 #endif
395
396 const char *pIn; /* Input string pointer */
397 size_t in_len; /* Input length */
398 size_t in_left; /* Input length left */
399
400 char * out_buf; /* Output buffer */
401 size_t out_size; /* Size of the output buffer */
402 size_t out_left; /* Size left in output buffer */
403 char *pOut; /* Output string pointer */
404
405 in_str = sp[-2].u.str;
406 from_cs = sp[-1].u.str;
407 to_cs = sp->u.str;
408
409 pIn = get_txt(in_str);
410 in_len = mstrsize(in_str);
411 in_left = in_len;
412
413 /* If the input string is empty, we can return immediately
414 * (and in fact must since the allocator will balk at allocating 0 bytes)
415 */
416 if (!in_len)
417 {
418 // just free the 2nd and 3rd argument, return the first one unchanged.
419 free_string_svalue(sp--);
420 free_string_svalue(sp--);
421 return sp;
422 }
423
424 /* Allocate a temporary output string */
425 out_size = in_len > 65536 ? (in_len + 33) : (2 * in_len);
426 out_left = out_size;
427
428 xallocate(out_buf, out_size, "iconv buffer");
429 pOut = out_buf;
430
431 /* Open the iconv context */
432 context = iconv_open(get_txt(to_cs), get_txt(from_cs));
433 if (context == (iconv_t) -1)
434 {
435 xfree(out_buf);
436
437 if (errno == EINVAL)
438 errorf("convert_charset(): Conversion '%s' -> '%s' not supported.\n"
439 , get_txt(from_cs), get_txt(to_cs)
440 );
441 else
442 errorf("convert_charset(): Error %d.\n", errno);
443 /* NOTREACHED */
444 return sp;
445 }
446
447 /* Convert the string, reallocating the output buffer where necessary */
448 while (in_left)
449 {
450 size_t rc;
451
452 rc = iconv(context, ICONV_IN_CAST &pIn, &in_left, &pOut, &out_left);
453 if (rc == (size_t)-1)
454 {
455 if (errno == E2BIG)
456 {
457 /* Reallocate output buffer */
458 size_t newsize;
459 char * tmp;
460
461 newsize = out_size + (in_len > 128 ? in_len : 128);
462 tmp = rexalloc(out_buf, newsize);
463 if (!tmp)
464 {
465 iconv_close(context);
466 xfree(out_buf);
467 outofmem(newsize, "iconv buffer");
468 /* NOTREACHED */
469 return sp;
470 }
471 out_buf = tmp;
472 pOut = out_buf + out_size;
473 out_left = newsize - out_size;
474 out_size = newsize;
475
476 continue;
477 }
478
479 /* Other error: clean up */
480 iconv_close(context);
481 xfree(out_buf);
482
483 if (errno == EILSEQ)
484 {
485 errorf("convert_charset(): Invalid character sequence at "
486 "index %td\n",
487 (ptrdiff_t)(pIn - get_txt(in_str)));
488 /* NOTREACHED */
489 return sp;
490 }
491
492 if (errno == EINVAL)
493 {
494 errorf("convert_charset(): Incomplete character sequence at "
495 "index %td\n", (ptrdiff_t)(pIn - get_txt(in_str)));
496 /* NOTREACHED */
497 return sp;
498 }
499
500 errorf("convert_charset(): Error %d at index %td\n"
501 , errno, (ptrdiff_t)(pIn - get_txt(in_str))
502 );
503 /* NOTREACHED */
504 return sp;
505 } /* if (rc < 0) */
506 } /* while (in_left) */
507
508 /* While the actual conversion is complete, the output stream may now
509 * be in a non-base state. Add the necessary epilogue to get back
510 * to the base state.
511 */
512 while(1)
513 {
514 size_t rc;
515 rc = iconv(context, NULL, NULL, &pOut, &out_left);
516 if (rc == (size_t)-1)
517 {
518 if (errno == E2BIG)
519 {
520 /* Reallocate output buffer */
521 size_t newsize;
522 char * tmp;
523
524 newsize = out_size + (in_len > 128 ? in_len : 128);
525 tmp = rexalloc(out_buf, newsize);
526 if (!tmp)
527 {
528 iconv_close(context);
529 xfree(out_buf);
530 outofmem(newsize, "iconv buffer");
531 /* NOTREACHED */
532 return sp;
533 }
534 out_buf = tmp;
535 pOut = out_buf + out_size;
536 out_left = newsize - out_size;
537 out_size = newsize;
538
539 continue;
540 }
541
542 /* Other error: clean up */
543 iconv_close(context);
544 xfree(out_buf);
545
546 if (errno == EILSEQ)
547 {
548 errorf("convert_charset(): Invalid character sequence at "
549 "index %td\n", (ptrdiff_t)(pIn - get_txt(in_str)));
550 /* NOTREACHED */
551 return sp;
552 }
553
554 if (errno == EINVAL)
555 {
556 errorf("convert_charset(): Incomplete character sequence at "
557 "index %td\n", (ptrdiff_t)(pIn - get_txt(in_str)));
558 /* NOTREACHED */
559 return sp;
560 }
561
562 errorf("convert_charset(): Error %d at index %td\n"
563 , errno, (ptrdiff_t)(pIn - get_txt(in_str))
564 );
565 /* NOTREACHED */
566 return sp;
567 } /* if (rc < 0) */
568
569 /* At this point, the iconv() succeeded: we're done */
570 break;
571 } /* while(1) */
572
573 iconv_close(context);
574
575 /* Get the return string and prepare the return arguments */
576 out_str = new_n_mstring(out_buf, out_size - out_left);
577 xfree(out_buf);
578 if (!out_str)
579 {
580 outofmem(out_size - out_left, "convert_charset() result");
581 /* NOTREACHED */
582 return sp;
583 }
584
585 free_string_svalue(sp--);
586 free_string_svalue(sp--);
587 free_string_svalue(sp);
588 put_string(sp, out_str);
589
590 return sp;
591 } /* f_convert_charset() */
592
593 #endif /* HAS_ICONV */
594
595 /*--------------------------------------------------------------------*/
596 static char *
sort_string(const string_t * p_in,size_t len,long ** pos)597 sort_string (const string_t * p_in, size_t len, long ** pos)
598
599 /* Sort the characters of string <in> (with length <len>) by their numeric
600 * values and return a newly allocated memory block with the sorted string.
601 * If <pos> is not NULL, it will be set to a newly allocated memory block
602 * giving the original positions of the characters in the sorted string.
603 *
604 * We use Mergesort to sort the strings.
605 * TODO: Use Quicksort instead of Mergesort?
606 */
607
608 {
609 const char * in; /* Input string */
610 char * out; /* Result string */
611 long * outpos; /* Result position array */
612 char * tmp; /* Temporary string */
613 long * tmppos; /* Temporary position array */
614 size_t step;
615 size_t i, j;
616
617 in = get_txt((string_t *const)p_in);
618 out = xalloc(len+1);
619 tmp = xalloc(len+1);
620 if (!out || !tmp)
621 {
622 if (out)
623 xfree(out);
624 if (tmp)
625 xfree(tmp);
626 errorf("(sort_string) Out of memory (2 * %zu bytes) for temporaries.\n"
627 , len+1);
628 }
629 out[len] = '\0';
630 tmp[len] = '\0';
631
632 if (pos)
633 {
634 outpos = xalloc(len * sizeof(*outpos) + 1);
635 tmppos = xalloc(len * sizeof(*outpos) + 1);
636 /* +1 so that smalloc won't complain when given an empty string */
637 if (!outpos || !tmppos)
638 {
639 if (out)
640 xfree(out);
641 if (tmp)
642 xfree(tmp);
643 if (outpos)
644 xfree(outpos);
645 if (tmppos)
646 xfree(tmppos);
647 errorf("(sort_string) Out of memory (2 * %zu bytes) for positions.\n"
648 , len*sizeof(*outpos)+1);
649 }
650 }
651 else
652 {
653 outpos = NULL;
654 tmppos = NULL;
655 }
656
657 /* First Mergesort pass: comparison of adjacent characters
658 * and initialisation of the out arrays.
659 */
660 for (i = 0; i < len; i += 2)
661 {
662 if (i == len-1)
663 {
664 out[i] = in[i];
665 if (outpos)
666 outpos[i] = i;
667 }
668 else if (in[i] <= in[i+1])
669 {
670 out[i] = in[i];
671 out[i+1] = in[i+1];
672 if (outpos)
673 {
674 outpos[i] = i;
675 outpos[i+1] = i+1;
676 }
677 }
678 else /* (in[i] > in[i+1]) */
679 {
680 out[i] = in[i+1];
681 out[i+1] = in[i];
682 if (outpos)
683 {
684 outpos[i] = i+1;
685 outpos[i+1] = i;
686 }
687 }
688 } /* for(initial pass) */
689
690 /* Mergesort loop: perform the mergesort passes with increasing steps.
691 * Invariant: out is the (semi-sorted) data, tmp is the scratchspace.
692 */
693 for (step = 2; step < len; step *= 2)
694 {
695 size_t start, dest, left;
696
697 /* Exchange out and tmp */
698 {
699 char *tmp2;
700 long *tmp2pos;
701
702 tmp2 = out; out = tmp; tmp = tmp2;
703 if (outpos)
704 {
705 tmp2pos = outpos; outpos = tmppos; tmppos = tmp2pos;
706 }
707 }
708
709 for (start = 0, dest = 0; start <= len; start += 2*step)
710 {
711 for ( i = start, j = start+step, left = 2 * step
712 ; left && dest < len
713 ; left--, dest++
714 )
715 {
716 if (i >= start+step
717 || i >= len)
718 {
719 if (j < len)
720 {
721 out[dest] = tmp[j];
722 if (outpos)
723 outpos[dest] = tmppos[j];
724 j++;
725 }
726 }
727 else if (j >= start+2*step
728 || j >= len)
729 {
730 if (i < len)
731 {
732 out[dest] = tmp[i];
733 if (outpos)
734 outpos[dest] = tmppos[i];
735 i++;
736 }
737 }
738 else if (tmp[i] <= tmp[j])
739 {
740 out[dest] = tmp[i];
741 if (outpos)
742 outpos[dest] = tmppos[i];
743 i++;
744 }
745 else /* (tmp[i] > tmp[i+step]) */
746 {
747 out[dest] = tmp[j];
748 if (outpos)
749 outpos[dest] = tmppos[j];
750 j++;
751 }
752 } /* for (sort run) */
753 } /* for (start) */
754 } /* for(step) */
755
756 /* Free the temporary data */
757 if (tmppos)
758 xfree(tmppos);
759 xfree(tmp);
760
761 /* Return the result */
762 if (pos)
763 *pos = outpos;
764
765 return out;
766 } /* sort_string() */
767
768 /*--------------------------------------------------------------------*/
769 string_t *
intersect_strings(const string_t * p_left,const string_t * p_right,Bool bSubtract)770 intersect_strings (const string_t * p_left, const string_t * p_right, Bool bSubtract)
771
772 /* !bSubtract: Intersect string <left> with string <right> and return
773 * a newly allocated string with all those characters which are in
774 * both strings.
775 * bSubtract: Subtract string <right> from string <left> and return
776 * a newly allocated string with all those characters which are in
777 * <left> but not in <right>.
778 * The order of the characters returned is their order of appearance
779 * in <left>.
780 */
781
782 {
783 size_t len_left, len_right, len_out;
784 size_t ix_left, ix_right;
785 long * pos;
786 CBool * matches;
787 const char * left_txt;
788 char * left, * right, * result_txt;
789 string_t *result;
790
791 len_left = mstrsize(p_left);
792 len_right = mstrsize(p_right);
793
794 xallocate(matches, len_left+1, "intersection matches");
795 /* +1 so that smalloc won't complain when given an empty left string */
796
797 for (ix_left = 0; ix_left < len_left; ix_left++)
798 matches[ix_left] = bSubtract ? MY_TRUE : MY_FALSE;
799
800 /* Sort the two strings */
801 left = sort_string(p_left, len_left, &pos);
802 right = sort_string(p_right, len_right, NULL);
803
804 /* Intersect the two strings by mutual comparison.
805 * Each non-matched character in left gets is pos[] set to -1.
806 */
807 len_out = bSubtract ? len_left : 0;
808 for ( ix_left = 0, ix_right = 0
809 ; ix_left < len_left && ix_right < len_right
810 ; )
811 {
812 if (left[ix_left] < right[ix_right])
813 ix_left++;
814 else if (left[ix_left] > right[ix_right])
815 ix_right++;
816 else /* left[ix_left] == right[ix_right]) */
817 {
818 if (!bSubtract)
819 {
820 matches[pos[ix_left]] = MY_TRUE;
821 len_out++;
822 }
823 else
824 {
825 matches[pos[ix_left]] = MY_FALSE;
826 len_out--;
827 }
828 ix_left++;
829 }
830 }
831
832 /* Create the result: copy all flagged characters */
833 memsafe(result = alloc_mstring(len_out), len_out, "intersection result");
834 left_txt = get_txt((string_t *const)p_left);
835 result_txt = get_txt(result);
836 for (ix_left = 0, ix_right = 0; ix_left < len_left; ix_left++)
837 if (matches[ix_left])
838 result_txt[ix_right++] = left_txt[ix_left];
839
840 /* Free intermediate results */
841 xfree(pos);
842 xfree(matches);
843 xfree(left);
844 xfree(right);
845
846 return result;
847 } /* intersect_strings() */
848
849 /*-------------------------------------------------------------------------*/
850 svalue_t *
x_filter_string(svalue_t * sp,int num_arg)851 x_filter_string (svalue_t *sp, int num_arg)
852
853 /* EFUN: filter() for strings.
854 *
855 * string filter(string arr, string fun, string|object obj, mixed extra, ...)
856 * string filter(string arr, closure cl, mixed extra, ...)
857 * string filter(string arr, mapping map)
858 *
859 * Filter the elements of <arr> through a filter defined by the other
860 * arguments, and return an array of those elements, for which the
861 * filter yields non-zero.
862 *
863 * The filter can be a function call:
864 *
865 * <obj>-><fun>(elem, <extra>...)
866 *
867 * or a mapping query:
868 *
869 * <map>[elem]
870 *
871 * <obj> can both be an object reference or a filename. If omitted,
872 * this_object() is used (this also works if the third argument is
873 * neither a string nor an object).
874 */
875
876 {
877 string_t *rc; /* Result string */
878 string_t *str; /* Argument string */
879 svalue_t *arg; /* First argument the vm stack */
880 mp_int slen; /* Argument string length */
881 char *src, *dest; /* String text work pointers */
882
883 char *flags; /* Flag array, one flag for each element of <str>
884 * (in reverse order). */
885 mp_int res; /* Number of surviving elements */
886
887 res = 0;
888
889 /* Locate the args on the stack, extract the string to filter
890 * and allocate the flags vector.
891 */
892 arg = sp - num_arg + 1;
893
894 str = arg->u.str;
895 slen = (mp_int)mstrsize(str);
896
897 /* Every element in flags is associated by index number with an
898 * element in the vector to filter. The filter function is evaluated
899 * for every string character, and the associated flag is set to 0
900 * or 1 according to the result.
901 * At the end, all 1-flagged elements are gathered and copied
902 * into the result string.
903 */
904
905 if (arg[1].type == T_MAPPING)
906 {
907 mp_int cnt;
908
909 /* --- Filter by mapping query --- */
910 mapping_t *m;
911
912 if (num_arg > 2) {
913 errorf("Too many arguments to filter(array)\n");
914 }
915 /* Allocate memory for the flag array. Simultaneously an error
916 * handler is pushed onto the stack (after the arguments) for freeing
917 * the buffer in case of runtime errors. */
918 flags = xalloc_with_error_handler((size_t)slen + 1);
919 if (!flags)
920 {
921 errorf("Out of memory (%zu bytes) for temporary buffer in filter().\n",
922 (size_t)slen + 1);
923 }
924 sp = inter_sp;
925
926 m = arg[1].u.map;
927
928 for (src = get_txt(str), cnt = slen; --cnt >= 0; src++)
929 {
930 svalue_t key;
931
932 put_number(&key, *src);
933 if (get_map_value(m, &key) == &const0)
934 {
935 flags[cnt] = 0;
936 continue;
937 }
938 flags[cnt] = 1;
939 res++;
940 }
941
942 } else {
943
944 /* --- Filter by function call --- */
945
946 int error_index;
947 callback_t cb;
948 mp_int cnt;
949
950 assign_eval_cost();
951
952 /* setup_efun_callback() will adopt and therefore remove the
953 * arguments from arg+1 on to arg+num_arg from the stack and update
954 * inter_sp. New top-of-stack will be arg. */
955 error_index = setup_efun_callback(&cb, arg+1, num_arg-1);
956 if (error_index >= 0)
957 {
958 vefun_bad_arg(error_index+2, arg);
959 /* NOTREACHED */
960 return arg;
961 }
962 /* push the callback structure onto the stack. */
963 sp = arg + 1;
964 put_callback(sp, &cb);
965
966 /* Allocate memory for the flag array. Simultaneously an error
967 * handler is pushed onto the stack (after the arguments) for freeing
968 * the buffer in case of runtime errors. */
969 inter_sp = sp;
970 flags = xalloc_with_error_handler((size_t)slen + 1);
971 if (!flags)
972 {
973 errorf("Out of memory (%"PRIdMPINT" bytes) for temporary buffer "
974 "in filter().\n", slen + 1);
975 }
976 sp = inter_sp;
977
978 /* Loop over all elements in p and call the filter.
979 * w is the current element filtered.
980 */
981 for (src = get_txt(str), cnt = slen; --cnt >= 0; src++)
982 {
983 svalue_t *v;
984
985 flags[cnt] = 0;
986
987 if (current_object->flags & O_DESTRUCTED)
988 continue;
989 /* Don't call the filter anymore, but fill the
990 * flags array with 0es.
991 */
992
993 if (!callback_object(&cb))
994 {
995 inter_sp = sp;
996 errorf("object used by filter(array) destructed");
997 }
998
999 push_number(inter_sp, *src);
1000
1001 v = apply_callback(&cb, 1);
1002 if (!v || (v->type == T_NUMBER && !v->u.number) )
1003 continue;
1004
1005 flags[cnt] = 1;
1006 res++;
1007 }
1008 }
1009
1010 /* flags[] holds the filter results, res is the number of
1011 * elements to keep. Now create the result vector.
1012 */
1013 rc = alloc_mstring(res);
1014 if (!rc)
1015 {
1016 errorf("Out of memory (%"PRIdMPINT" bytes) for result in filter().\n",
1017 slen+1);
1018 }
1019
1020 for (src = get_txt(str), dest = get_txt(rc), flags = &flags[slen]
1021 ; res > 0 ; src++)
1022 {
1023 if (*--flags)
1024 {
1025 *dest++ = *src;
1026 res--;
1027 }
1028 }
1029
1030 /* Cleanup. Arguments for the closure have already been removed. On the
1031 * stack are now the string, the mapping or callback structure and the
1032 * error handler. (Not using pop_n_elems() for 2 elements for saving loop
1033 * and function call overhead.) */
1034 free_svalue(sp--); /* errorhandler, buffer and flags are freed by this. */
1035 free_svalue(sp--); /* mapping or callback structure. */
1036 free_mstring(str); /* string, at arg == sp */
1037 sp->u.str = rc; /* put result here */
1038
1039 return sp;
1040 } /* x_filter_string() */
1041
1042 /*-------------------------------------------------------------------------*/
1043 svalue_t *
x_map_string(svalue_t * sp,int num_arg)1044 x_map_string (svalue_t *sp, int num_arg)
1045
1046 /* EFUN map() for strings
1047 *
1048 * string map(string arg, string func, string|object ob, mixed extra...)
1049 * string map(string arg, closure cl, mixed extra...)
1050 * string map(string arg, mapping m [, int idx])
1051 *
1052 * Call the function <ob>-><func>() resp. the closure <cl> for
1053 * every element of the array/struct/mapping/string <arg>, and return a result
1054 * made up from the returned values.
1055 *
1056 * It is also possible to map every entry through a lookup <m>[element[,idx]].
1057 * If the mapping entry doesn't exist, the original value is kept, otherwise
1058 * the result of the mapping lookup.
1059 * [Note: argument type and range checking for idx is done in v_map()]
1060 *
1061 * Since <arg> is a string, only integer return values are allowed, of which
1062 * only the lower 8 bits are considered.
1063 *
1064 * If <ob> is omitted, or neither an object nor a string, then
1065 * this_object() is used.
1066 */
1067
1068 {
1069 string_t *res;
1070 string_t *str;
1071 svalue_t *arg;
1072 mp_int len;
1073 char *src, *dest;
1074
1075 inter_sp = sp;
1076
1077 arg = sp - num_arg + 1;
1078
1079 str = arg->u.str;
1080 len = mstrsize(str);
1081
1082 if (arg[1].type == T_MAPPING)
1083 {
1084 /* --- Map through mapping --- */
1085
1086 mapping_t *m;
1087 p_int column = 0; /* mapping column to use */
1088
1089 m = arg[1].u.map;
1090
1091 if (num_arg > 2)
1092 column = arg[2].u.number;
1093
1094 res = alloc_mstring(len);
1095 if (!res)
1096 errorf("(map_string) Out of memory: string[%"PRIdMPINT
1097 "] for result\n", len);
1098
1099 push_string(inter_sp, res); /* In case of errors */
1100
1101 for (src = get_txt(str), dest = get_txt(res); --len >= 0; src++, dest++)
1102 {
1103 svalue_t key, *v;
1104
1105 put_number(&key, *src);
1106 v = get_map_value(m, &key);
1107 if (v == &const0)
1108 *dest = *src;
1109 else
1110 {
1111 if (v[column].type != T_NUMBER)
1112 {
1113 errorf("(map_string) Illegal value type: %s, expected int\n"
1114 , typename(v[column].type)
1115 );
1116 }
1117 *dest = (v[column].u.number & 0xFF);
1118 }
1119 }
1120
1121 if (num_arg > 2)
1122 free_svalue(arg+2);
1123 free_svalue(arg+1); /* the mapping */
1124 sp = arg;
1125 }
1126 else
1127 {
1128 /* --- Map through function call --- */
1129
1130 callback_t cb;
1131 int error_index;
1132
1133 error_index = setup_efun_callback(&cb, arg+1, num_arg-1);
1134 if (error_index >= 0)
1135 {
1136 vefun_bad_arg(error_index+2, arg);
1137 /* NOTREACHED */
1138 return arg;
1139 }
1140 inter_sp = sp = arg+1;
1141 put_callback(sp, &cb);
1142 num_arg = 2;
1143
1144 res = alloc_mstring(len);
1145 if (!res)
1146 errorf("(map_string) Out of memory: string[%"PRIdMPINT
1147 "] for result\n", len);
1148
1149 push_string(inter_sp, res); /* In case of errors */
1150
1151 for (src = get_txt(str), dest = get_txt(res); --len >= 0; src++, dest++)
1152 {
1153 svalue_t *v;
1154
1155 if (current_object->flags & O_DESTRUCTED)
1156 continue;
1157
1158 if (!callback_object(&cb))
1159 errorf("object used by map(string) destructed");
1160
1161 push_number(inter_sp, *src);
1162
1163 v = apply_callback(&cb, 1);
1164
1165 if (v)
1166 {
1167 if (v->type != T_NUMBER)
1168 {
1169 errorf("(map_string) Illegal value: %s, expected string\n"
1170 , typename(v->type)
1171 );
1172 }
1173 *dest = (v->u.number & 0xFF);
1174 }
1175 }
1176
1177 free_callback(&cb);
1178 }
1179
1180 /* The arguments have been removed already, now just replace
1181 * the arr on the stack with the result.
1182 */
1183 free_mstring(str);
1184 arg->u.str = res; /* Keep svalue type: T_POINTER */
1185
1186 return arg;
1187 } /* x_map_string () */
1188
1189 /*====================================================================*/
1190
1191