1 
2 /**
3  * @file expString.c
4  *
5  *  This module implements expression functions that
6  *  manipulate string values.
7  *
8  * @addtogroup autogen
9  * @{
10  */
11 /*
12  *  This file is part of AutoGen.
13  *  AutoGen Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
14  *
15  * AutoGen is free software: you can redistribute it and/or modify it
16  * under the terms of the GNU General Public License as published by the
17  * Free Software Foundation, either version 3 of the License, or
18  * (at your option) any later version.
19  *
20  * AutoGen is distributed in the hope that it will be useful, but
21  * WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
23  * See the GNU General Public License for more details.
24  *
25  * You should have received a copy of the GNU General Public License along
26  * with this program.  If not, see <http://www.gnu.org/licenses/>.
27  */
28 
29 static size_t
stringify_for_sh(char * pzNew,uint_t qt,char const * pzDta)30 stringify_for_sh(char * pzNew, uint_t qt, char const * pzDta)
31 {
32     char * pz = pzNew;
33     *(pz++) = (char)qt;
34 
35     for (;;) {
36         char c = *(pz++) = *(pzDta++);
37         switch (c) {
38         case NUL:
39             pz[-1]  = (char)qt;
40             *pz     = NUL;
41 
42             return (size_t)(pz - pzNew);
43 
44         case '\\':
45             /*
46              *  If someone went to the trouble to escape a backquote or a
47              *  dollar sign, then we should not neutralize it.  Note that
48              *  we handle a following backslash as a normal character.
49              *
50              *  i.e.  \\ --> \\\\ *BUT* \\$ --> \\\$
51              */
52             c = *pzDta;
53             switch (*pzDta) {
54             case '$':
55                 break;
56 
57             case '"':
58             case '`':
59                 /*
60                  *  IF the ensuing quote character does *NOT* match the
61                  *  quote character for the string, then we will preserve
62                  *  the single copy of the backslash.  If it does match,
63                  *  then we will double the backslash and a third backslash
64                  *  will be inserted when we emit the quote character.
65                  */
66                 if ((unsigned)c != qt)
67                     break;
68                 /* FALLTHROUGH */
69 
70             default:
71                 *(pz++) = '\\';   /* \   -->  \\    */
72             }
73             break;
74 
75         case '"': case '`':
76             if ((unsigned)c == qt) {
77                 /*
78                  *  This routine does both `xx` and "xx" strings, we have
79                  *  to worry about this stuff differently.  I.e., in ""
80                  *  strings, add a single \ in front of ", and in ``
81                  *  preserve a add \ in front of `.
82                  */
83                 pz[-1]  = '\\';       /* "   -->   \"   */
84                 *(pz++) = c;
85             }
86         }
87     }
88 }
89 
90 static SCM
shell_stringify(SCM obj,uint_t qt)91 shell_stringify(SCM obj, uint_t qt)
92 {
93     char * pzNew;
94     size_t dtaSize = 3;
95     char * pzDta   = ag_scm2zchars(obj, "AG Object");
96     char * pz      = pzDta;
97 
98     for (;;) {
99         char c = *(pz++);
100 
101         switch (c) {
102         case NUL:
103             goto loopDone1;
104 
105         case '"': case '`': case '\\':
106             dtaSize += 2;
107             break;
108 
109         default:
110             dtaSize++;
111         }
112     } loopDone1:;
113 
114     pzNew = AGALOC(dtaSize, "shell string");
115     dtaSize = stringify_for_sh(pzNew, qt, pzDta);
116 
117     {
118         SCM res = scm_from_latin1_stringn(pzNew, dtaSize);
119         AGFREE(pzNew);
120         return res;
121     }
122 }
123 
124 static int
sub_count(char const * haystack,char const * needle)125 sub_count(char const * haystack, char const * needle)
126 {
127     int repCt = 0;
128     size_t needle_len = strlen(needle);
129 
130     for (;;) {
131         haystack = strstr(haystack, needle);
132         if (haystack == NULL) break;
133         repCt++;
134         haystack += needle_len;
135     }
136     return repCt;
137 }
138 
139 /**
140  *  Replace marker text.
141  *
142  *  Replace all occurrances of the marker text with the substitution text.
143  *  The result is stored in an automatically freed temporary buffer.
144  *
145  *  @param src_str  The source string
146  *  @param str_len  The length of the string
147  *  @param match    the SCM-ized marker string
148  *  @param repl     the SCM-ized replacement string
149  *  @param ppz_res  pointer to the result pointer
150  *  @param res_len  pointer to result length
151  */
152 static void
do_substitution(char const * src_str,ssize_t str_len,SCM match,SCM repl,char ** ppz_res,ssize_t * res_len)153 do_substitution(
154     char const * src_str,
155     ssize_t      str_len,
156     SCM          match,
157     SCM          repl,
158     char **      ppz_res,
159     ssize_t *    res_len)
160 {
161     char * pzMatch  = ag_scm2zchars(match, "match text");
162     char * rep_str  = ag_scm2zchars(repl,  "repl text");
163     int    mark_len = (int)scm_c_string_length(match);
164     int    repl_len = (int)scm_c_string_length(repl);
165 
166     {
167         int ct = sub_count(src_str, pzMatch);
168         if (ct == 0)
169             return; /* No substitutions -- no work. */
170 
171         str_len += (repl_len - mark_len) * ct;
172     }
173 
174     {
175         char * dest = scribble_get(str_len + 1);
176         *ppz_res = dest;
177         *res_len = str_len;
178 
179         for (;;) {
180             char const * next = strstr(src_str, pzMatch);
181             size_t len;
182 
183             if (next == NULL)
184                 break;
185             len = (size_t)(next - src_str);
186             if (len != 0) {
187                 memcpy(dest, src_str, len);
188                 dest += len;
189             }
190             memcpy(dest, rep_str, (size_t)repl_len);
191             dest   += repl_len;
192             src_str = next + mark_len;
193         }
194 
195         strcpy(dest, src_str);
196     }
197 }
198 
199 /**
200  *  Recursive routine.  It calls itself for list values and calls
201  *  "do_substitution" for string values.  Each substitution will
202  *  be done in the order found in the tree walk of list values.
203  *  The "match" and "repl" trees *must* be identical in structure.
204  */
205 static void
do_multi_subs(char ** ppzStr,ssize_t * pStrLen,SCM match,SCM repl)206 do_multi_subs(char ** ppzStr, ssize_t * pStrLen, SCM match, SCM repl)
207 {
208     char * pzStr = *ppzStr;
209     char * pzNxt = pzStr;
210 
211     /*
212      *  Loop for as long as our list has more entries
213      */
214     while (! scm_is_null(match)) {
215         /*
216          *  "CAR" is the current value, "CDR" is rest of list
217          */
218         SCM  matchCar  = SCM_CAR(match);
219         SCM  replCar   = SCM_CAR(repl);
220 
221         match = SCM_CDR(match);
222         repl  = SCM_CDR(repl);
223 
224         if (scm_is_string(matchCar)) {
225             do_substitution(pzStr, *pStrLen, matchCar, replCar,
226                             &pzNxt, pStrLen);
227 
228             // coverity[use_after_free] -- invalid alias analysis
229             pzStr = pzNxt;
230         }
231 
232         else if (AG_SCM_LIST_P(matchCar))
233             do_multi_subs(&pzStr, pStrLen, matchCar, replCar);
234 
235         else
236             /*
237              *  Whatever it is it is not part of what we would expect.  Bail.
238              */
239             break;
240     }
241 
242     *ppzStr = pzStr;
243 }
244 
245 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
246  *  EXPRESSION EVALUATION ROUTINES
247  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
248 
249 /*=gfunc mk_gettextable
250  *
251  * what:   print a string in a gettext-able format
252  * exparg: string, a multi-paragraph string
253  *
254  * doc: Returns SCM_UNDEFINED.  The input text string is printed
255  *      to the current output as one puts() call per paragraph.
256 =*/
257 SCM
ag_scm_mk_gettextable(SCM txt)258 ag_scm_mk_gettextable(SCM txt)
259 {
260     if (scm_is_string(txt)) {
261         char const * pz = ag_scm2zchars(txt, "txt");
262         optionPrintParagraphs(pz, false, cur_fpstack->stk_fp);
263     }
264     return SCM_UNDEFINED;
265 }
266 
267 /*=gfunc in_p
268  *
269  * what:   test for string in list
270  * general_use:
271  * exparg: test-string, string to look for
272  * exparg: string-list, list of strings to check,, list
273  *
274  * doc:  Return SCM_BOOL_T if the first argument string is found
275  *      in one of the entries in the second (list-of-strings) argument.
276 =*/
277 SCM
ag_scm_in_p(SCM obj,SCM list)278 ag_scm_in_p(SCM obj, SCM list)
279 {
280     int     len;
281     size_t  lenz;
282     SCM     car;
283     char const * pz1;
284 
285     if (! scm_is_string(obj))
286         return SCM_UNDEFINED;
287 
288     pz1  = scm_i_string_chars(obj);
289     lenz = scm_c_string_length(obj);
290 
291     /*
292      *  If the second argument is a string somehow, then treat
293      *  this as a straight out string comparison
294      */
295     if (scm_is_string(list)) {
296         if (  (scm_c_string_length(list) == lenz)
297            && (strncmp(pz1, scm_i_string_chars(list), lenz) == 0))
298             return SCM_BOOL_T;
299         return SCM_BOOL_F;
300     }
301 
302     len = (int)scm_ilength(list);
303     if (len == 0)
304         return SCM_BOOL_F;
305 
306     /*
307      *  Search all the lists and sub-lists passed in
308      */
309     while (len-- > 0) {
310         car  = SCM_CAR(list);
311         list = SCM_CDR(list);
312 
313         /*
314          *  This routine is listed as getting a list as the second
315          *  argument.  That means that if someone builds a list and
316          *  hands it to us, it magically becomes a nested list.
317          *  This unravels that.
318          */
319         if (! scm_is_string(car)) {
320             if (ag_scm_in_p(obj, car) == SCM_BOOL_T)
321                 return SCM_BOOL_T;
322             continue;
323         }
324 
325         if (  (scm_c_string_length(car) == lenz)
326            && (strncmp(pz1, scm_i_string_chars(car), lenz) == 0) )
327             return SCM_BOOL_T;
328     }
329 
330     return SCM_BOOL_F;
331 }
332 
333 
334 /*=gfunc join
335  *
336  * what:   join string list with separator
337  * general_use:
338  * exparg: separator, string to insert between entries
339  * exparg: list, list of strings to join,, list
340  *
341  * doc:  With the first argument as the separator string,
342  *       joins together an a-list of strings into one long string.
343  *       The list may contain nested lists, partly because you
344  *       cannot always control that.
345 =*/
346 SCM
ag_scm_join(SCM sep,SCM list)347 ag_scm_join(SCM sep, SCM list)
348 {
349     int      l_len, sv_l_len;
350     SCM      car;
351     SCM      alist = list;
352     size_t   sep_len;
353     size_t   str_len;
354     char *   pzRes;
355     char const * pzSep;
356     char *   pzScan;
357 
358     if (! scm_is_string(sep))
359         return SCM_UNDEFINED;
360 
361     sv_l_len = l_len = (int)scm_ilength(list);
362     if (l_len == 0)
363         return scm_from_latin1_string(zNil);
364 
365     pzSep   = scm_i_string_chars(sep);
366     sep_len = scm_c_string_length(sep);
367     str_len = 0;
368 
369     /*
370      *  Count up the lengths of all the strings to be joined.
371      */
372     for (;;) {
373         car  = SCM_CAR(list);
374         list = SCM_CDR(list);
375 
376         /*
377          *  This routine is listed as getting a list as the second
378          *  argument.  That means that if someone builds a list and
379          *  hands it to us, it magically becomes a nested list.
380          *  This unravels that.
381          */
382         if (! scm_is_string(car)) {
383             if (car != SCM_UNDEFINED)
384                 car = ag_scm_join(sep, car);
385             if (! scm_is_string(car))
386                 return SCM_UNDEFINED;
387         }
388 
389         str_len += scm_c_string_length(car);
390 
391         if (--l_len <= 0)
392             break;
393 
394         str_len += sep_len;
395     }
396 
397     l_len = sv_l_len;
398     pzRes = pzScan = scribble_get((ssize_t)str_len);
399 
400     /*
401      *  Now, copy each one into the output
402      */
403     for (;;) {
404         size_t cpy_len;
405 
406         car   = SCM_CAR(alist);
407         alist = SCM_CDR(alist);
408 
409         /*
410          *  This unravels nested lists.
411          */
412         if (! scm_is_string(car))
413             car = ag_scm_join(sep, car);
414 
415         cpy_len = scm_c_string_length(car);
416         memcpy(VOIDP(pzScan), scm_i_string_chars(car), cpy_len);
417         pzScan += cpy_len;
418 
419         /*
420          *  IF we reach zero, then do not insert a separation and bail out
421          */
422         if (--l_len <= 0)
423             break;
424         memcpy(VOIDP(pzScan), VOIDP(pzSep), sep_len);
425         pzScan += sep_len;
426     }
427 
428     return scm_from_latin1_stringn(pzRes, str_len);
429 }
430 
431 
432 /*=gfunc prefix
433  *
434  * what:  prefix lines with a string
435  * general_use:
436  *
437  * exparg: prefix, string to insert at start of each line
438  * exparg: text, multi-line block of text
439  *
440  * doc:
441  *  Prefix every line in the second string with the first string.
442  *  This includes empty lines.  Trailing white space will be removed
443  *  so if the prefix is all horizontal white space, then it will be
444  *  removed from otherwise blank lines.  Also, if the last character
445  *  is a newline, then *two* prefixes will be inserted into the result
446  *  text.
447  *
448  *  For example, if the first string is "# " and the second contains:
449  *  @example
450  *  "two\nlines\n"
451  *  @end example
452  *  @noindent
453  *  The result string will contain:
454  *  @example
455  *  # two
456  *  # lines
457  *  #
458  *  @end example
459  *
460  *  The last line will be incomplete:  no newline and no space after the
461  *  hash character, either.
462 =*/
463 SCM
ag_scm_prefix(SCM prefx,SCM txt)464 ag_scm_prefix(SCM prefx, SCM txt)
465 {
466     char *   prefix   = ag_scm2zchars(prefx, "pfx");
467     char *   text     = ag_scm2zchars(txt,   "txt");
468     char *   scan     = text;
469     size_t   pfx_size = strlen(prefix);
470     char *   r_str;   /* result string */
471 
472     {
473         size_t out_size = pfx_size + 1; // NUL or NL byte adjustment
474         for (;;) {
475             switch (*(scan++)) {
476             case NUL:
477                 out_size += scan - text;
478                 goto exit_count;
479             case NL:
480                 out_size += pfx_size;
481             }
482         } exit_count:;
483 
484         r_str = scan = scribble_get((ssize_t)out_size);
485     }
486 
487     /*
488      * If the text starts with a newline, then do not apply
489      */
490     memcpy(scan, prefix, pfx_size);
491     scan += pfx_size;
492     pfx_size++;
493 
494     for (;;) {
495         char ch = *(text++);
496         switch (ch) {
497         case NUL:
498             /*
499              * Trim trailing white space on the final line.
500              */
501             scan = SPN_HORIZ_WHITE_BACK(r_str, scan);
502             return scm_from_latin1_stringn(r_str, scan - r_str);
503 
504         case NL:
505             /*
506              * Trim trailing white space on previous line first.
507              */
508             scan  = SPN_HORIZ_WHITE_BACK(r_str, scan);
509             *scan = NL;
510             memcpy(scan+1, prefix, pfx_size - 1);
511             scan += pfx_size;  // prefix length plus 1 for new line
512             break;
513 
514         default:
515             *(scan++) = ch;
516             break;
517         }
518     }
519 }
520 
521 /*=gfunc raw_shell_str
522  *
523  * what:  single quote shell string
524  * general_use:
525  *
526  * exparg: string, string to transform
527  *
528  * doc:
529  *  Convert the text of the string into a singly quoted string
530  *  that a normal shell will process into the original string.
531  *  (It will not do macro expansion later, either.)
532  *  Contained single quotes become tripled, with the middle quote
533  *  escaped with a backslash.  Normal shells will reconstitute the
534  *  original string.
535  *
536  *  @strong{Notice}:  some shells will not correctly handle unusual
537  *  non-printing characters.  This routine works for most reasonably
538  *  conventional ASCII strings.
539 =*/
540 SCM
ag_scm_raw_shell_str(SCM obj)541 ag_scm_raw_shell_str(SCM obj)
542 {
543     char * data;
544     char * pz;
545     char * pzFree;
546 
547     data = ag_scm2zchars(obj, "AG Object");
548 
549     {
550         size_t dtaSize = scm_c_string_length(obj) + 3; /* NUL + 2 quotes */
551         pz = data-1;
552         for (;;) {
553             pz = strchr(pz+1, '\'');
554             if (pz == NULL)
555                 break;
556             dtaSize += 3; /* '\'' -> 3 additional chars */
557         }
558 
559         pzFree = pz = AGALOC(dtaSize + 2, "raw string");
560     }
561 
562     /*
563      *  Handle leading single quotes before starting the first quote.
564      */
565     while (*data == '\'') {
566         *(pz++) = '\\';
567         *(pz++) = '\'';
568 
569         /*
570          *  IF pure single quotes, then we're done.
571          */
572         if (*++data == NUL) {
573             *pz = NUL;
574             goto returnString;
575         }
576     }
577 
578     /*
579      *  Start quoting.  If the string is empty, we wind up with two quotes.
580      */
581     *(pz++) = '\'';
582 
583     for (;;) {
584         switch (*(pz++) = *(data++)) {
585         case NUL:
586             goto loopDone;
587 
588         case '\'':
589             /*
590              *  We've inserted a single quote, which ends the quoting session.
591              *  Now, insert escaped quotes for every quote char we find, then
592              *  restart the quoting.
593              */
594             data--;
595             do {
596                 *(pz++) = '\\';
597                 *(pz++) = '\'';
598             } while (*++data == '\'');
599             if (*data == NUL) {
600                 *pz = NUL;
601                 goto returnString;
602             }
603             *(pz++) = '\'';
604         }
605     } loopDone:;
606     pz[-1] = '\'';
607     *pz    = NUL;
608 
609  returnString:
610     {
611         SCM res = scm_from_latin1_string(pzFree);
612         AGFREE(pzFree);
613         return res;
614     }
615 }
616 
617 
618 /*=gfunc shell_str
619  *
620  * what:  double quote shell string
621  * general_use:
622  *
623  * exparg: string, string to transform
624  *
625  * doc:
626  *
627  *  Convert the text of the string into a double quoted string that a normal
628  *  shell will process into the original string, almost.  It will add the
629  *  escape character @code{\\} before two special characters to
630  *  accomplish this: the backslash @code{\\} and double quote @code{"}.
631  *
632  *  @strong{Notice}: some shells will not correctly handle unusual
633  *  non-printing characters.  This routine works for most reasonably
634  *  conventional ASCII strings.
635  *
636  *  @strong{WARNING}:
637  *@*
638  *  This function omits the extra backslash in front of a backslash, however,
639  *  if it is followed by either a backquote or a dollar sign.  It must do this
640  *  because otherwise it would be impossible to protect the dollar sign or
641  *  backquote from shell evaluation.  Consequently, it is not possible to
642  *  render the strings "\\$" or "\\`".  The lesser of two evils.
643  *
644  *  All others characters are copied directly into the output.
645  *
646  *  The @code{sub-shell-str} variation of this routine behaves identically,
647  *  except that the extra backslash is omitted in front of @code{"} instead
648  *  of @code{`}.  You have to think about it.  I'm open to suggestions.
649  *
650  *  Meanwhile, the best way to document is with a detailed output example.
651  *  If the backslashes make it through the text processing correctly,
652  *  below you will see what happens with three example strings.  The first
653  *  example string contains a list of quoted @code{foo}s, the second is
654  *  the same with a single backslash before the quote characters and the
655  *  last is with two backslash escapes.  Below each is the result of the
656  *  @code{raw-shell-str}, @code{shell-str} and @code{sub-shell-str} functions.
657  *
658  *  @example
659  *  foo[0]           ''foo'' 'foo' "foo" `foo` $foo
660  *  raw-shell-str -> \'\''foo'\'\'' '\''foo'\'' "foo" `foo` $foo'
661  *  shell-str     -> "''foo'' 'foo' \"foo\" `foo` $foo"
662  *  sub-shell-str -> `''foo'' 'foo' "foo" \`foo\` $foo`
663  *
664  *  foo[1]           \'bar\' \"bar\" \`bar\` \$bar
665  *  raw-shell-str -> '\'\''bar\'\'' \"bar\" \`bar\` \$bar'
666  *  shell-str     -> "\\'bar\\' \\\"bar\\\" \`bar\` \$bar"
667  *  sub-shell-str -> `\\'bar\\' \"bar\" \\\`bar\\\` \$bar`
668  *
669  *  foo[2]           \\'BAZ\\' \\"BAZ\\" \\`BAZ\\` \\$BAZ
670  *  raw-shell-str -> '\\'\''BAZ\\'\'' \\"BAZ\\" \\`BAZ\\` \\$BAZ'
671  *  shell-str     -> "\\\\'BAZ\\\\' \\\\\"BAZ\\\\\" \\\`BAZ\\\` \\\$BAZ"
672  *  sub-shell-str -> `\\\\'BAZ\\\\' \\\"BAZ\\\" \\\\\`BAZ\\\\\` \\\$BAZ`
673  *  @end example
674  *
675  *  There should be four, three, five and three backslashes for the four
676  *  examples on the last line, respectively.  The next to last line should
677  *  have four, five, three and three backslashes.  If this was not accurately
678  *  reproduced, take a look at the agen5/test/shell.test test.  Notice the
679  *  backslashes in front of the dollar signs.  It goes from zero to one to
680  *  three for the "cooked" string examples.
681 =*/
682 SCM
ag_scm_shell_str(SCM obj)683 ag_scm_shell_str(SCM obj)
684 {
685     return shell_stringify(obj, (unsigned char)'"');
686 }
687 
688 /*=gfunc sub_shell_str
689  *
690  * what:  back quoted (sub-)shell string
691  * general_use:
692  *
693  * exparg: string, string to transform
694  *
695  * doc:
696  *   This function is substantially identical to @code{shell-str}, except
697  *   that the quoting character is @code{`} and the "leave the escape alone"
698  *   character is @code{"}.
699 =*/
700 SCM
ag_scm_sub_shell_str(SCM obj)701 ag_scm_sub_shell_str(SCM obj)
702 {
703     return shell_stringify(obj, (unsigned char)'`');
704 }
705 
706 
707 /*=gfunc stack
708  *
709  * what:  make list of AutoGen values
710  *
711  * exparg: ag-name, AutoGen value name
712  *
713  * doc:  Create a scheme list of all the strings that are associated
714  *       with a name.  They must all be text values or we choke.
715 =*/
716 SCM
ag_scm_stack(SCM obj)717 ag_scm_stack(SCM obj)
718 {
719     SCM          res;
720     SCM *        pos = &res;
721     def_ent_t ** ppDE;
722     def_ent_t *  pDE;
723     SCM          str;
724 
725     res = SCM_EOL;
726 
727     ppDE = find_def_ent_list(ag_scm2zchars(obj, "AG Object"));
728     if (ppDE == NULL)
729         return SCM_EOL;
730 
731     for (;;) {
732         pDE = *(ppDE++);
733 
734         if (pDE == NULL)
735             break;
736 
737         if (pDE->de_type != VALTYP_TEXT)
738             return SCM_UNDEFINED;
739 
740         str  = scm_from_latin1_string(pDE->de_val.dvu_text);
741         *pos = scm_cons(str, SCM_EOL);
742         pos  = SCM_CDRLOC(*pos);
743     }
744 
745     return res;
746 }
747 
748 
749 /*=gfunc kr_string
750  *
751  * what:  emit string for K&R C
752  * general_use:
753  *
754  * exparg: string, string to reformat
755  *
756  *  doc:
757  *  Reform a string so that, when printed, a K&R C compiler will be able
758  *  to compile the data and construct a string that contains exactly
759  *  what the current string contains.  Many non-printing characters are
760  *  replaced with escape sequences.  New-lines are replaced with a
761  *  backslash-n-backslash and newline sequence,
762 =*/
763 SCM
ag_scm_kr_string(SCM str)764 ag_scm_kr_string(SCM str)
765 {
766     char const * pz = ag_scm2zchars(str, "krstr");
767     SCM res;
768     pz  = optionQuoteString(pz, KR_STRING_NEWLINE);
769     res = scm_from_latin1_string(pz);
770     AGFREE(pz);
771     return res;
772 }
773 
774 
775 /*=gfunc c_string
776  *
777  * what:  emit string for ANSI C
778  * general_use:
779  *
780  * exparg: string, string to reformat
781  *
782  * doc:
783  *  Reform a string so that, when printed, the C compiler will be able to
784  *  compile the data and construct a string that contains exactly what the
785  *  current string contains.  Many non-printing characters are replaced with
786  *  escape sequences.  Newlines are replaced with a backslash, an @code{n}, a
787  *  closing quote, a newline, seven spaces and another re-opening quote.  The
788  *  compiler will implicitly concatenate them.  The reader will see line
789  *  breaks.
790  *
791  *  A K&R compiler will choke.  Use @code{kr-string} for that compiler.
792  *
793 =*/
794 SCM
ag_scm_c_string(SCM str)795 ag_scm_c_string(SCM str)
796 {
797     char const * pz = ag_scm2zchars(str, "cstr");
798     SCM res;
799     pz  = optionQuoteString(pz, C_STRING_NEWLINE);
800     res = scm_from_latin1_string(pz);
801     AGFREE(pz);
802     return res;
803 }
804 
805 /**
806  * Map a character range for ag_scm_string_tr_x()
807  */
808 static inline void
tr_char_range(unsigned char * ch_map,unsigned char * from,unsigned char * to)809 tr_char_range(unsigned char * ch_map, unsigned char * from, unsigned char * to)
810 {
811     unsigned char fs = (unsigned char)from[-2]; // "from" start char
812     unsigned char fe = (unsigned char)from[0];  // "from" end char
813     unsigned char ts = (unsigned char)to[-2];   // "to" start char
814     unsigned char te = (unsigned char)to[0];    // "to" end char
815 
816     while (fs < fe) {
817         ch_map[ fs++ ] = ts;
818         if (ts < te)
819             ts++;
820     }
821 }
822 
823 /*=gfunc string_tr_x
824  *
825  * what:  convert characters
826  * general_use:
827  *
828  *  exparg:  source, string to transform
829  *  exparg:  match,  characters to be converted
830  *  exparg:  translation, conversion list
831  *
832  * doc: This is the same as the @code{tr(1)} program, except the
833  *      string to transform is the first argument.  The second and
834  *      third arguments are used to construct mapping arrays for the
835  *      transformation of the first argument.
836  *
837  *      It is too bad this little program has so many different
838  *      and incompatible implementations!
839 =*/
840 SCM
ag_scm_string_tr_x(SCM str,SCM from_xform,SCM to_xform)841 ag_scm_string_tr_x(SCM str, SCM from_xform, SCM to_xform)
842 {
843     unsigned char ch_map[ 1 << 8 /* bits-per-byte */ ];
844     int    i    = sizeof(ch_map) - 1;
845     char * from = ag_scm2zchars(from_xform, "str");
846     char * to   = ag_scm2zchars(to_xform, "str");
847 
848     do  {
849         ch_map[i] = (unsigned char)i;
850     } while (--i > 0);
851 
852     for (; i <= (int)sizeof(ch_map) - 1; i++) {
853         unsigned char fch = (unsigned char)*(from++);
854         unsigned char tch = (unsigned char)*(to++);
855 
856         if (tch == NUL) {
857             to--;
858             tch = (unsigned char)to[-1];
859         }
860 
861         switch (fch) {
862         case NUL:
863             goto map_done;
864 
865         case '-':
866             /*
867              * "from" char is a hyphen.
868              * IF we are beyond the first character AND
869              *    the "to" character is a hyphen AND
870              *    there is a "from" character after the hyphen
871              *    there is a "to" character after the hyphen,
872              * THEN map a character range
873              */
874             if (  (i > 0)
875                && (tch     == '-')
876                && (from[0] != NUL)
877                && (to[0]   != NUL)) {
878                 tr_char_range(ch_map, (unsigned char *)from,
879                               (unsigned char *)to);
880                 break;
881             }
882 
883         default:
884             ch_map[ fch ] = tch;
885         }
886     } map_done:;
887 
888     to = C(char *, scm_i_string_chars(str));
889     i    = (int)scm_c_string_length(str);
890     while (i-- > 0) {
891         *to = (char)ch_map[ (int)*to ];
892         to++;
893     }
894     return str;
895 }
896 
897 /*=gfunc string_tr
898  *
899  * what:  convert characters with new result
900  * general_use:
901  *
902  *  exparg:  source, string to transform
903  *  exparg:  match,  characters to be converted
904  *  exparg:  translation, conversion list
905  *
906  * doc: This is identical to @code{string-tr!}, except that it does not
907  *      over-write the previous value.
908 =*/
909 SCM
ag_scm_string_tr(SCM Str,SCM From,SCM To)910 ag_scm_string_tr(SCM Str, SCM From, SCM To)
911 {
912     size_t lenz  = scm_c_string_length(Str);
913     SCM    res   = scm_from_latin1_stringn(scm_i_string_chars(Str), lenz);
914     return ag_scm_string_tr_x(res, From, To);
915 }
916 
917 /*=gfunc string_substitute
918  *
919  * what:  multiple global replacements
920  * general_use:
921  *
922  *  exparg:  source, string to transform
923  *  exparg:  match,  substring or substring list to be replaced
924  *  exparg:  repl,   replacement strings or substrings
925  *
926  * doc: @code{match} and  @code{repl} may be either a single string or
927  *      a list of strings.  Either way, they must have the same structure
928  *      and number of elements.  For example, to replace all amphersands,
929  *      less than and greater than characters, do something like this:
930  *
931  * @example
932  *      (string-substitute source
933  *          (list "&"     "<"    ">")
934  *          (list "&amp;" "&lt;" "&gt;"))
935  * @end example
936 =*/
937 SCM
ag_scm_string_substitute(SCM str,SCM Match,SCM Repl)938 ag_scm_string_substitute(SCM str, SCM Match, SCM Repl)
939 {
940     char const *  text;
941     ssize_t len;
942     SCM     res;
943 
944     if (! scm_is_string(str))
945         return SCM_UNDEFINED;
946 
947     text = scm_i_string_chars(str);
948     len   = (ssize_t)scm_c_string_length(str);
949 
950     if (scm_is_string(Match))
951         do_substitution(text, len, Match, Repl, (char **)&text, &len);
952     else
953         do_multi_subs((char **)&text, &len, Match, Repl);
954 
955     res = scm_from_latin1_stringn(text, (size_t)len);
956     return res;
957 }
958 
959 /*=gfunc time_string_to_number
960  *
961  * what:   duration string to seconds
962  * general_use:
963  * exparg: time_spec, string to parse
964  *
965  * doc:    Convert the argument string to a time period in seconds.
966  *         The string may use multiple parts consisting of days, hours
967  *         minutes and seconds.  These are indicated with a suffix of
968  *         @code{d}, @code{h}, @code{m} and @code{s} respectively.
969  *         Hours, minutes and seconds may also be represented with
970  *         @code{HH:MM:SS} or, without hours, as @code{MM:SS}.
971 =*/
972 SCM
ag_scm_time_string_to_number(SCM time_spec)973 ag_scm_time_string_to_number(SCM time_spec)
974 {
975     extern time_t parse_duration(char const * in_pz);
976 
977     char const * pz;
978     time_t  time_period;
979 
980     if (! scm_is_string(time_spec))
981         return SCM_UNDEFINED;
982 
983     pz = scm_i_string_chars(time_spec);
984     time_period = parse_duration(pz);
985 
986     return scm_from_int((int)time_period);
987 }
988 
989 /**
990  * @}
991  *
992  * Local Variables:
993  * mode: C
994  * c-file-style: "stroustrup"
995  * indent-tabs-mode: nil
996  * End:
997  * end of agen5/expString.c */
998