1
2 /**
3 * @file expString.c
4 *
5 * This module implements expression functions that
6 * manipulate string values.
7 *
8 * @addtogroup autogen
9 * @{
10 */
11 /*
12 * This file is part of AutoGen.
13 * AutoGen Copyright (C) 1992-2018 by Bruce Korb - all rights reserved
14 *
15 * AutoGen is free software: you can redistribute it and/or modify it
16 * under the terms of the GNU General Public License as published by the
17 * Free Software Foundation, either version 3 of the License, or
18 * (at your option) any later version.
19 *
20 * AutoGen is distributed in the hope that it will be useful, but
21 * WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
23 * See the GNU General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License along
26 * with this program. If not, see <http://www.gnu.org/licenses/>.
27 */
28
29 static size_t
stringify_for_sh(char * pzNew,uint_t qt,char const * pzDta)30 stringify_for_sh(char * pzNew, uint_t qt, char const * pzDta)
31 {
32 char * pz = pzNew;
33 *(pz++) = (char)qt;
34
35 for (;;) {
36 char c = *(pz++) = *(pzDta++);
37 switch (c) {
38 case NUL:
39 pz[-1] = (char)qt;
40 *pz = NUL;
41
42 return (size_t)(pz - pzNew);
43
44 case '\\':
45 /*
46 * If someone went to the trouble to escape a backquote or a
47 * dollar sign, then we should not neutralize it. Note that
48 * we handle a following backslash as a normal character.
49 *
50 * i.e. \\ --> \\\\ *BUT* \\$ --> \\\$
51 */
52 c = *pzDta;
53 switch (*pzDta) {
54 case '$':
55 break;
56
57 case '"':
58 case '`':
59 /*
60 * IF the ensuing quote character does *NOT* match the
61 * quote character for the string, then we will preserve
62 * the single copy of the backslash. If it does match,
63 * then we will double the backslash and a third backslash
64 * will be inserted when we emit the quote character.
65 */
66 if ((unsigned)c != qt)
67 break;
68 /* FALLTHROUGH */
69
70 default:
71 *(pz++) = '\\'; /* \ --> \\ */
72 }
73 break;
74
75 case '"': case '`':
76 if ((unsigned)c == qt) {
77 /*
78 * This routine does both `xx` and "xx" strings, we have
79 * to worry about this stuff differently. I.e., in ""
80 * strings, add a single \ in front of ", and in ``
81 * preserve a add \ in front of `.
82 */
83 pz[-1] = '\\'; /* " --> \" */
84 *(pz++) = c;
85 }
86 }
87 }
88 }
89
90 static SCM
shell_stringify(SCM obj,uint_t qt)91 shell_stringify(SCM obj, uint_t qt)
92 {
93 char * pzNew;
94 size_t dtaSize = 3;
95 char * pzDta = ag_scm2zchars(obj, "AG Object");
96 char * pz = pzDta;
97
98 for (;;) {
99 char c = *(pz++);
100
101 switch (c) {
102 case NUL:
103 goto loopDone1;
104
105 case '"': case '`': case '\\':
106 dtaSize += 2;
107 break;
108
109 default:
110 dtaSize++;
111 }
112 } loopDone1:;
113
114 pzNew = AGALOC(dtaSize, "shell string");
115 dtaSize = stringify_for_sh(pzNew, qt, pzDta);
116
117 {
118 SCM res = scm_from_latin1_stringn(pzNew, dtaSize);
119 AGFREE(pzNew);
120 return res;
121 }
122 }
123
124 static int
sub_count(char const * haystack,char const * needle)125 sub_count(char const * haystack, char const * needle)
126 {
127 int repCt = 0;
128 size_t needle_len = strlen(needle);
129
130 for (;;) {
131 haystack = strstr(haystack, needle);
132 if (haystack == NULL) break;
133 repCt++;
134 haystack += needle_len;
135 }
136 return repCt;
137 }
138
139 /**
140 * Replace marker text.
141 *
142 * Replace all occurrances of the marker text with the substitution text.
143 * The result is stored in an automatically freed temporary buffer.
144 *
145 * @param src_str The source string
146 * @param str_len The length of the string
147 * @param match the SCM-ized marker string
148 * @param repl the SCM-ized replacement string
149 * @param ppz_res pointer to the result pointer
150 * @param res_len pointer to result length
151 */
152 static void
do_substitution(char const * src_str,ssize_t str_len,SCM match,SCM repl,char ** ppz_res,ssize_t * res_len)153 do_substitution(
154 char const * src_str,
155 ssize_t str_len,
156 SCM match,
157 SCM repl,
158 char ** ppz_res,
159 ssize_t * res_len)
160 {
161 char * pzMatch = ag_scm2zchars(match, "match text");
162 char * rep_str = ag_scm2zchars(repl, "repl text");
163 int mark_len = (int)scm_c_string_length(match);
164 int repl_len = (int)scm_c_string_length(repl);
165
166 {
167 int ct = sub_count(src_str, pzMatch);
168 if (ct == 0)
169 return; /* No substitutions -- no work. */
170
171 str_len += (repl_len - mark_len) * ct;
172 }
173
174 {
175 char * dest = scribble_get(str_len + 1);
176 *ppz_res = dest;
177 *res_len = str_len;
178
179 for (;;) {
180 char const * next = strstr(src_str, pzMatch);
181 size_t len;
182
183 if (next == NULL)
184 break;
185 len = (size_t)(next - src_str);
186 if (len != 0) {
187 memcpy(dest, src_str, len);
188 dest += len;
189 }
190 memcpy(dest, rep_str, (size_t)repl_len);
191 dest += repl_len;
192 src_str = next + mark_len;
193 }
194
195 strcpy(dest, src_str);
196 }
197 }
198
199 /**
200 * Recursive routine. It calls itself for list values and calls
201 * "do_substitution" for string values. Each substitution will
202 * be done in the order found in the tree walk of list values.
203 * The "match" and "repl" trees *must* be identical in structure.
204 */
205 static void
do_multi_subs(char ** ppzStr,ssize_t * pStrLen,SCM match,SCM repl)206 do_multi_subs(char ** ppzStr, ssize_t * pStrLen, SCM match, SCM repl)
207 {
208 char * pzStr = *ppzStr;
209 char * pzNxt = pzStr;
210
211 /*
212 * Loop for as long as our list has more entries
213 */
214 while (! scm_is_null(match)) {
215 /*
216 * "CAR" is the current value, "CDR" is rest of list
217 */
218 SCM matchCar = SCM_CAR(match);
219 SCM replCar = SCM_CAR(repl);
220
221 match = SCM_CDR(match);
222 repl = SCM_CDR(repl);
223
224 if (scm_is_string(matchCar)) {
225 do_substitution(pzStr, *pStrLen, matchCar, replCar,
226 &pzNxt, pStrLen);
227
228 // coverity[use_after_free] -- invalid alias analysis
229 pzStr = pzNxt;
230 }
231
232 else if (AG_SCM_LIST_P(matchCar))
233 do_multi_subs(&pzStr, pStrLen, matchCar, replCar);
234
235 else
236 /*
237 * Whatever it is it is not part of what we would expect. Bail.
238 */
239 break;
240 }
241
242 *ppzStr = pzStr;
243 }
244
245 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
246 * EXPRESSION EVALUATION ROUTINES
247 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
248
249 /*=gfunc mk_gettextable
250 *
251 * what: print a string in a gettext-able format
252 * exparg: string, a multi-paragraph string
253 *
254 * doc: Returns SCM_UNDEFINED. The input text string is printed
255 * to the current output as one puts() call per paragraph.
256 =*/
257 SCM
ag_scm_mk_gettextable(SCM txt)258 ag_scm_mk_gettextable(SCM txt)
259 {
260 if (scm_is_string(txt)) {
261 char const * pz = ag_scm2zchars(txt, "txt");
262 optionPrintParagraphs(pz, false, cur_fpstack->stk_fp);
263 }
264 return SCM_UNDEFINED;
265 }
266
267 /*=gfunc in_p
268 *
269 * what: test for string in list
270 * general_use:
271 * exparg: test-string, string to look for
272 * exparg: string-list, list of strings to check,, list
273 *
274 * doc: Return SCM_BOOL_T if the first argument string is found
275 * in one of the entries in the second (list-of-strings) argument.
276 =*/
277 SCM
ag_scm_in_p(SCM obj,SCM list)278 ag_scm_in_p(SCM obj, SCM list)
279 {
280 int len;
281 size_t lenz;
282 SCM car;
283 char const * pz1;
284
285 if (! scm_is_string(obj))
286 return SCM_UNDEFINED;
287
288 pz1 = scm_i_string_chars(obj);
289 lenz = scm_c_string_length(obj);
290
291 /*
292 * If the second argument is a string somehow, then treat
293 * this as a straight out string comparison
294 */
295 if (scm_is_string(list)) {
296 if ( (scm_c_string_length(list) == lenz)
297 && (strncmp(pz1, scm_i_string_chars(list), lenz) == 0))
298 return SCM_BOOL_T;
299 return SCM_BOOL_F;
300 }
301
302 len = (int)scm_ilength(list);
303 if (len == 0)
304 return SCM_BOOL_F;
305
306 /*
307 * Search all the lists and sub-lists passed in
308 */
309 while (len-- > 0) {
310 car = SCM_CAR(list);
311 list = SCM_CDR(list);
312
313 /*
314 * This routine is listed as getting a list as the second
315 * argument. That means that if someone builds a list and
316 * hands it to us, it magically becomes a nested list.
317 * This unravels that.
318 */
319 if (! scm_is_string(car)) {
320 if (ag_scm_in_p(obj, car) == SCM_BOOL_T)
321 return SCM_BOOL_T;
322 continue;
323 }
324
325 if ( (scm_c_string_length(car) == lenz)
326 && (strncmp(pz1, scm_i_string_chars(car), lenz) == 0) )
327 return SCM_BOOL_T;
328 }
329
330 return SCM_BOOL_F;
331 }
332
333
334 /*=gfunc join
335 *
336 * what: join string list with separator
337 * general_use:
338 * exparg: separator, string to insert between entries
339 * exparg: list, list of strings to join,, list
340 *
341 * doc: With the first argument as the separator string,
342 * joins together an a-list of strings into one long string.
343 * The list may contain nested lists, partly because you
344 * cannot always control that.
345 =*/
346 SCM
ag_scm_join(SCM sep,SCM list)347 ag_scm_join(SCM sep, SCM list)
348 {
349 int l_len, sv_l_len;
350 SCM car;
351 SCM alist = list;
352 size_t sep_len;
353 size_t str_len;
354 char * pzRes;
355 char const * pzSep;
356 char * pzScan;
357
358 if (! scm_is_string(sep))
359 return SCM_UNDEFINED;
360
361 sv_l_len = l_len = (int)scm_ilength(list);
362 if (l_len == 0)
363 return scm_from_latin1_string(zNil);
364
365 pzSep = scm_i_string_chars(sep);
366 sep_len = scm_c_string_length(sep);
367 str_len = 0;
368
369 /*
370 * Count up the lengths of all the strings to be joined.
371 */
372 for (;;) {
373 car = SCM_CAR(list);
374 list = SCM_CDR(list);
375
376 /*
377 * This routine is listed as getting a list as the second
378 * argument. That means that if someone builds a list and
379 * hands it to us, it magically becomes a nested list.
380 * This unravels that.
381 */
382 if (! scm_is_string(car)) {
383 if (car != SCM_UNDEFINED)
384 car = ag_scm_join(sep, car);
385 if (! scm_is_string(car))
386 return SCM_UNDEFINED;
387 }
388
389 str_len += scm_c_string_length(car);
390
391 if (--l_len <= 0)
392 break;
393
394 str_len += sep_len;
395 }
396
397 l_len = sv_l_len;
398 pzRes = pzScan = scribble_get((ssize_t)str_len);
399
400 /*
401 * Now, copy each one into the output
402 */
403 for (;;) {
404 size_t cpy_len;
405
406 car = SCM_CAR(alist);
407 alist = SCM_CDR(alist);
408
409 /*
410 * This unravels nested lists.
411 */
412 if (! scm_is_string(car))
413 car = ag_scm_join(sep, car);
414
415 cpy_len = scm_c_string_length(car);
416 memcpy(VOIDP(pzScan), scm_i_string_chars(car), cpy_len);
417 pzScan += cpy_len;
418
419 /*
420 * IF we reach zero, then do not insert a separation and bail out
421 */
422 if (--l_len <= 0)
423 break;
424 memcpy(VOIDP(pzScan), VOIDP(pzSep), sep_len);
425 pzScan += sep_len;
426 }
427
428 return scm_from_latin1_stringn(pzRes, str_len);
429 }
430
431
432 /*=gfunc prefix
433 *
434 * what: prefix lines with a string
435 * general_use:
436 *
437 * exparg: prefix, string to insert at start of each line
438 * exparg: text, multi-line block of text
439 *
440 * doc:
441 * Prefix every line in the second string with the first string.
442 * This includes empty lines. Trailing white space will be removed
443 * so if the prefix is all horizontal white space, then it will be
444 * removed from otherwise blank lines. Also, if the last character
445 * is a newline, then *two* prefixes will be inserted into the result
446 * text.
447 *
448 * For example, if the first string is "# " and the second contains:
449 * @example
450 * "two\nlines\n"
451 * @end example
452 * @noindent
453 * The result string will contain:
454 * @example
455 * # two
456 * # lines
457 * #
458 * @end example
459 *
460 * The last line will be incomplete: no newline and no space after the
461 * hash character, either.
462 =*/
463 SCM
ag_scm_prefix(SCM prefx,SCM txt)464 ag_scm_prefix(SCM prefx, SCM txt)
465 {
466 char * prefix = ag_scm2zchars(prefx, "pfx");
467 char * text = ag_scm2zchars(txt, "txt");
468 char * scan = text;
469 size_t pfx_size = strlen(prefix);
470 char * r_str; /* result string */
471
472 {
473 size_t out_size = pfx_size + 1; // NUL or NL byte adjustment
474 for (;;) {
475 switch (*(scan++)) {
476 case NUL:
477 out_size += scan - text;
478 goto exit_count;
479 case NL:
480 out_size += pfx_size;
481 }
482 } exit_count:;
483
484 r_str = scan = scribble_get((ssize_t)out_size);
485 }
486
487 /*
488 * If the text starts with a newline, then do not apply
489 */
490 memcpy(scan, prefix, pfx_size);
491 scan += pfx_size;
492 pfx_size++;
493
494 for (;;) {
495 char ch = *(text++);
496 switch (ch) {
497 case NUL:
498 /*
499 * Trim trailing white space on the final line.
500 */
501 scan = SPN_HORIZ_WHITE_BACK(r_str, scan);
502 return scm_from_latin1_stringn(r_str, scan - r_str);
503
504 case NL:
505 /*
506 * Trim trailing white space on previous line first.
507 */
508 scan = SPN_HORIZ_WHITE_BACK(r_str, scan);
509 *scan = NL;
510 memcpy(scan+1, prefix, pfx_size - 1);
511 scan += pfx_size; // prefix length plus 1 for new line
512 break;
513
514 default:
515 *(scan++) = ch;
516 break;
517 }
518 }
519 }
520
521 /*=gfunc raw_shell_str
522 *
523 * what: single quote shell string
524 * general_use:
525 *
526 * exparg: string, string to transform
527 *
528 * doc:
529 * Convert the text of the string into a singly quoted string
530 * that a normal shell will process into the original string.
531 * (It will not do macro expansion later, either.)
532 * Contained single quotes become tripled, with the middle quote
533 * escaped with a backslash. Normal shells will reconstitute the
534 * original string.
535 *
536 * @strong{Notice}: some shells will not correctly handle unusual
537 * non-printing characters. This routine works for most reasonably
538 * conventional ASCII strings.
539 =*/
540 SCM
ag_scm_raw_shell_str(SCM obj)541 ag_scm_raw_shell_str(SCM obj)
542 {
543 char * data;
544 char * pz;
545 char * pzFree;
546
547 data = ag_scm2zchars(obj, "AG Object");
548
549 {
550 size_t dtaSize = scm_c_string_length(obj) + 3; /* NUL + 2 quotes */
551 pz = data-1;
552 for (;;) {
553 pz = strchr(pz+1, '\'');
554 if (pz == NULL)
555 break;
556 dtaSize += 3; /* '\'' -> 3 additional chars */
557 }
558
559 pzFree = pz = AGALOC(dtaSize + 2, "raw string");
560 }
561
562 /*
563 * Handle leading single quotes before starting the first quote.
564 */
565 while (*data == '\'') {
566 *(pz++) = '\\';
567 *(pz++) = '\'';
568
569 /*
570 * IF pure single quotes, then we're done.
571 */
572 if (*++data == NUL) {
573 *pz = NUL;
574 goto returnString;
575 }
576 }
577
578 /*
579 * Start quoting. If the string is empty, we wind up with two quotes.
580 */
581 *(pz++) = '\'';
582
583 for (;;) {
584 switch (*(pz++) = *(data++)) {
585 case NUL:
586 goto loopDone;
587
588 case '\'':
589 /*
590 * We've inserted a single quote, which ends the quoting session.
591 * Now, insert escaped quotes for every quote char we find, then
592 * restart the quoting.
593 */
594 data--;
595 do {
596 *(pz++) = '\\';
597 *(pz++) = '\'';
598 } while (*++data == '\'');
599 if (*data == NUL) {
600 *pz = NUL;
601 goto returnString;
602 }
603 *(pz++) = '\'';
604 }
605 } loopDone:;
606 pz[-1] = '\'';
607 *pz = NUL;
608
609 returnString:
610 {
611 SCM res = scm_from_latin1_string(pzFree);
612 AGFREE(pzFree);
613 return res;
614 }
615 }
616
617
618 /*=gfunc shell_str
619 *
620 * what: double quote shell string
621 * general_use:
622 *
623 * exparg: string, string to transform
624 *
625 * doc:
626 *
627 * Convert the text of the string into a double quoted string that a normal
628 * shell will process into the original string, almost. It will add the
629 * escape character @code{\\} before two special characters to
630 * accomplish this: the backslash @code{\\} and double quote @code{"}.
631 *
632 * @strong{Notice}: some shells will not correctly handle unusual
633 * non-printing characters. This routine works for most reasonably
634 * conventional ASCII strings.
635 *
636 * @strong{WARNING}:
637 *@*
638 * This function omits the extra backslash in front of a backslash, however,
639 * if it is followed by either a backquote or a dollar sign. It must do this
640 * because otherwise it would be impossible to protect the dollar sign or
641 * backquote from shell evaluation. Consequently, it is not possible to
642 * render the strings "\\$" or "\\`". The lesser of two evils.
643 *
644 * All others characters are copied directly into the output.
645 *
646 * The @code{sub-shell-str} variation of this routine behaves identically,
647 * except that the extra backslash is omitted in front of @code{"} instead
648 * of @code{`}. You have to think about it. I'm open to suggestions.
649 *
650 * Meanwhile, the best way to document is with a detailed output example.
651 * If the backslashes make it through the text processing correctly,
652 * below you will see what happens with three example strings. The first
653 * example string contains a list of quoted @code{foo}s, the second is
654 * the same with a single backslash before the quote characters and the
655 * last is with two backslash escapes. Below each is the result of the
656 * @code{raw-shell-str}, @code{shell-str} and @code{sub-shell-str} functions.
657 *
658 * @example
659 * foo[0] ''foo'' 'foo' "foo" `foo` $foo
660 * raw-shell-str -> \'\''foo'\'\'' '\''foo'\'' "foo" `foo` $foo'
661 * shell-str -> "''foo'' 'foo' \"foo\" `foo` $foo"
662 * sub-shell-str -> `''foo'' 'foo' "foo" \`foo\` $foo`
663 *
664 * foo[1] \'bar\' \"bar\" \`bar\` \$bar
665 * raw-shell-str -> '\'\''bar\'\'' \"bar\" \`bar\` \$bar'
666 * shell-str -> "\\'bar\\' \\\"bar\\\" \`bar\` \$bar"
667 * sub-shell-str -> `\\'bar\\' \"bar\" \\\`bar\\\` \$bar`
668 *
669 * foo[2] \\'BAZ\\' \\"BAZ\\" \\`BAZ\\` \\$BAZ
670 * raw-shell-str -> '\\'\''BAZ\\'\'' \\"BAZ\\" \\`BAZ\\` \\$BAZ'
671 * shell-str -> "\\\\'BAZ\\\\' \\\\\"BAZ\\\\\" \\\`BAZ\\\` \\\$BAZ"
672 * sub-shell-str -> `\\\\'BAZ\\\\' \\\"BAZ\\\" \\\\\`BAZ\\\\\` \\\$BAZ`
673 * @end example
674 *
675 * There should be four, three, five and three backslashes for the four
676 * examples on the last line, respectively. The next to last line should
677 * have four, five, three and three backslashes. If this was not accurately
678 * reproduced, take a look at the agen5/test/shell.test test. Notice the
679 * backslashes in front of the dollar signs. It goes from zero to one to
680 * three for the "cooked" string examples.
681 =*/
682 SCM
ag_scm_shell_str(SCM obj)683 ag_scm_shell_str(SCM obj)
684 {
685 return shell_stringify(obj, (unsigned char)'"');
686 }
687
688 /*=gfunc sub_shell_str
689 *
690 * what: back quoted (sub-)shell string
691 * general_use:
692 *
693 * exparg: string, string to transform
694 *
695 * doc:
696 * This function is substantially identical to @code{shell-str}, except
697 * that the quoting character is @code{`} and the "leave the escape alone"
698 * character is @code{"}.
699 =*/
700 SCM
ag_scm_sub_shell_str(SCM obj)701 ag_scm_sub_shell_str(SCM obj)
702 {
703 return shell_stringify(obj, (unsigned char)'`');
704 }
705
706
707 /*=gfunc stack
708 *
709 * what: make list of AutoGen values
710 *
711 * exparg: ag-name, AutoGen value name
712 *
713 * doc: Create a scheme list of all the strings that are associated
714 * with a name. They must all be text values or we choke.
715 =*/
716 SCM
ag_scm_stack(SCM obj)717 ag_scm_stack(SCM obj)
718 {
719 SCM res;
720 SCM * pos = &res;
721 def_ent_t ** ppDE;
722 def_ent_t * pDE;
723 SCM str;
724
725 res = SCM_EOL;
726
727 ppDE = find_def_ent_list(ag_scm2zchars(obj, "AG Object"));
728 if (ppDE == NULL)
729 return SCM_EOL;
730
731 for (;;) {
732 pDE = *(ppDE++);
733
734 if (pDE == NULL)
735 break;
736
737 if (pDE->de_type != VALTYP_TEXT)
738 return SCM_UNDEFINED;
739
740 str = scm_from_latin1_string(pDE->de_val.dvu_text);
741 *pos = scm_cons(str, SCM_EOL);
742 pos = SCM_CDRLOC(*pos);
743 }
744
745 return res;
746 }
747
748
749 /*=gfunc kr_string
750 *
751 * what: emit string for K&R C
752 * general_use:
753 *
754 * exparg: string, string to reformat
755 *
756 * doc:
757 * Reform a string so that, when printed, a K&R C compiler will be able
758 * to compile the data and construct a string that contains exactly
759 * what the current string contains. Many non-printing characters are
760 * replaced with escape sequences. New-lines are replaced with a
761 * backslash-n-backslash and newline sequence,
762 =*/
763 SCM
ag_scm_kr_string(SCM str)764 ag_scm_kr_string(SCM str)
765 {
766 char const * pz = ag_scm2zchars(str, "krstr");
767 SCM res;
768 pz = optionQuoteString(pz, KR_STRING_NEWLINE);
769 res = scm_from_latin1_string(pz);
770 AGFREE(pz);
771 return res;
772 }
773
774
775 /*=gfunc c_string
776 *
777 * what: emit string for ANSI C
778 * general_use:
779 *
780 * exparg: string, string to reformat
781 *
782 * doc:
783 * Reform a string so that, when printed, the C compiler will be able to
784 * compile the data and construct a string that contains exactly what the
785 * current string contains. Many non-printing characters are replaced with
786 * escape sequences. Newlines are replaced with a backslash, an @code{n}, a
787 * closing quote, a newline, seven spaces and another re-opening quote. The
788 * compiler will implicitly concatenate them. The reader will see line
789 * breaks.
790 *
791 * A K&R compiler will choke. Use @code{kr-string} for that compiler.
792 *
793 =*/
794 SCM
ag_scm_c_string(SCM str)795 ag_scm_c_string(SCM str)
796 {
797 char const * pz = ag_scm2zchars(str, "cstr");
798 SCM res;
799 pz = optionQuoteString(pz, C_STRING_NEWLINE);
800 res = scm_from_latin1_string(pz);
801 AGFREE(pz);
802 return res;
803 }
804
805 /**
806 * Map a character range for ag_scm_string_tr_x()
807 */
808 static inline void
tr_char_range(unsigned char * ch_map,unsigned char * from,unsigned char * to)809 tr_char_range(unsigned char * ch_map, unsigned char * from, unsigned char * to)
810 {
811 unsigned char fs = (unsigned char)from[-2]; // "from" start char
812 unsigned char fe = (unsigned char)from[0]; // "from" end char
813 unsigned char ts = (unsigned char)to[-2]; // "to" start char
814 unsigned char te = (unsigned char)to[0]; // "to" end char
815
816 while (fs < fe) {
817 ch_map[ fs++ ] = ts;
818 if (ts < te)
819 ts++;
820 }
821 }
822
823 /*=gfunc string_tr_x
824 *
825 * what: convert characters
826 * general_use:
827 *
828 * exparg: source, string to transform
829 * exparg: match, characters to be converted
830 * exparg: translation, conversion list
831 *
832 * doc: This is the same as the @code{tr(1)} program, except the
833 * string to transform is the first argument. The second and
834 * third arguments are used to construct mapping arrays for the
835 * transformation of the first argument.
836 *
837 * It is too bad this little program has so many different
838 * and incompatible implementations!
839 =*/
840 SCM
ag_scm_string_tr_x(SCM str,SCM from_xform,SCM to_xform)841 ag_scm_string_tr_x(SCM str, SCM from_xform, SCM to_xform)
842 {
843 unsigned char ch_map[ 1 << 8 /* bits-per-byte */ ];
844 int i = sizeof(ch_map) - 1;
845 char * from = ag_scm2zchars(from_xform, "str");
846 char * to = ag_scm2zchars(to_xform, "str");
847
848 do {
849 ch_map[i] = (unsigned char)i;
850 } while (--i > 0);
851
852 for (; i <= (int)sizeof(ch_map) - 1; i++) {
853 unsigned char fch = (unsigned char)*(from++);
854 unsigned char tch = (unsigned char)*(to++);
855
856 if (tch == NUL) {
857 to--;
858 tch = (unsigned char)to[-1];
859 }
860
861 switch (fch) {
862 case NUL:
863 goto map_done;
864
865 case '-':
866 /*
867 * "from" char is a hyphen.
868 * IF we are beyond the first character AND
869 * the "to" character is a hyphen AND
870 * there is a "from" character after the hyphen
871 * there is a "to" character after the hyphen,
872 * THEN map a character range
873 */
874 if ( (i > 0)
875 && (tch == '-')
876 && (from[0] != NUL)
877 && (to[0] != NUL)) {
878 tr_char_range(ch_map, (unsigned char *)from,
879 (unsigned char *)to);
880 break;
881 }
882
883 default:
884 ch_map[ fch ] = tch;
885 }
886 } map_done:;
887
888 to = C(char *, scm_i_string_chars(str));
889 i = (int)scm_c_string_length(str);
890 while (i-- > 0) {
891 *to = (char)ch_map[ (int)*to ];
892 to++;
893 }
894 return str;
895 }
896
897 /*=gfunc string_tr
898 *
899 * what: convert characters with new result
900 * general_use:
901 *
902 * exparg: source, string to transform
903 * exparg: match, characters to be converted
904 * exparg: translation, conversion list
905 *
906 * doc: This is identical to @code{string-tr!}, except that it does not
907 * over-write the previous value.
908 =*/
909 SCM
ag_scm_string_tr(SCM Str,SCM From,SCM To)910 ag_scm_string_tr(SCM Str, SCM From, SCM To)
911 {
912 size_t lenz = scm_c_string_length(Str);
913 SCM res = scm_from_latin1_stringn(scm_i_string_chars(Str), lenz);
914 return ag_scm_string_tr_x(res, From, To);
915 }
916
917 /*=gfunc string_substitute
918 *
919 * what: multiple global replacements
920 * general_use:
921 *
922 * exparg: source, string to transform
923 * exparg: match, substring or substring list to be replaced
924 * exparg: repl, replacement strings or substrings
925 *
926 * doc: @code{match} and @code{repl} may be either a single string or
927 * a list of strings. Either way, they must have the same structure
928 * and number of elements. For example, to replace all amphersands,
929 * less than and greater than characters, do something like this:
930 *
931 * @example
932 * (string-substitute source
933 * (list "&" "<" ">")
934 * (list "&" "<" ">"))
935 * @end example
936 =*/
937 SCM
ag_scm_string_substitute(SCM str,SCM Match,SCM Repl)938 ag_scm_string_substitute(SCM str, SCM Match, SCM Repl)
939 {
940 char const * text;
941 ssize_t len;
942 SCM res;
943
944 if (! scm_is_string(str))
945 return SCM_UNDEFINED;
946
947 text = scm_i_string_chars(str);
948 len = (ssize_t)scm_c_string_length(str);
949
950 if (scm_is_string(Match))
951 do_substitution(text, len, Match, Repl, (char **)&text, &len);
952 else
953 do_multi_subs((char **)&text, &len, Match, Repl);
954
955 res = scm_from_latin1_stringn(text, (size_t)len);
956 return res;
957 }
958
959 /*=gfunc time_string_to_number
960 *
961 * what: duration string to seconds
962 * general_use:
963 * exparg: time_spec, string to parse
964 *
965 * doc: Convert the argument string to a time period in seconds.
966 * The string may use multiple parts consisting of days, hours
967 * minutes and seconds. These are indicated with a suffix of
968 * @code{d}, @code{h}, @code{m} and @code{s} respectively.
969 * Hours, minutes and seconds may also be represented with
970 * @code{HH:MM:SS} or, without hours, as @code{MM:SS}.
971 =*/
972 SCM
ag_scm_time_string_to_number(SCM time_spec)973 ag_scm_time_string_to_number(SCM time_spec)
974 {
975 extern time_t parse_duration(char const * in_pz);
976
977 char const * pz;
978 time_t time_period;
979
980 if (! scm_is_string(time_spec))
981 return SCM_UNDEFINED;
982
983 pz = scm_i_string_chars(time_spec);
984 time_period = parse_duration(pz);
985
986 return scm_from_int((int)time_period);
987 }
988
989 /**
990 * @}
991 *
992 * Local Variables:
993 * mode: C
994 * c-file-style: "stroustrup"
995 * indent-tabs-mode: nil
996 * End:
997 * end of agen5/expString.c */
998