1 /* rlwrap - a readline wrapper
2    (C) 2000-2007 Hans Lub
3 
4    This program is free software; you can redistribute it and/or
5    modify it under the terms of the GNU General Public License
6    as published by the Free Software Foundation; either version 2
7    of the License, or (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
17 
18 
19    **************************************************************
20 
21    string_utils.c: rlwrap uses a fair number of custom string-handling
22    functions. A few of those are replacements for (or default to)
23    GNU or POSIX standard funcions (with names like mydirname,
24    mystrldup). Others are special purpose string manglers for
25    debugging, removing colour codes and the construction of history
26    entries)
27 
28    All of these functions work on basic types as char * and int, and
29    none of them refer to any of rlwraps global variables (except debug)
30 */
31 
32 
33 #include "rlwrap.h"
34 
35 
36 
37 
38 /* mystrlcpy and mystrlcat: wrappers around strlcat and strlcpy, if
39    available, otherwise emulations of them. Both versions *assure*
40    0-termination, but don't check for truncation: return type is
41    void */
42 
43 void
mystrlcpy(char * dst,const char * src,size_t size)44 mystrlcpy(char *dst, const char *src, size_t size)
45 {
46 #ifdef HAVE_STRLCPY
47   strlcpy(dst, src, size);
48 #else
49   strncpy(dst, src, size - 1);
50   dst[size - 1] = '\0';
51 #endif
52 }
53 
54 void
mystrlcat(char * dst,const char * src,size_t size)55 mystrlcat(char *dst, const char *src, size_t size)
56 {
57 #ifdef HAVE_STRLCAT
58   strlcat(dst, src, size);
59   dst[size - 1] = '\0';         /* we don't check for truncation, just assure '\0'-termination. */
60 #else
61   strncat(dst, src, size - strnlen(dst, size) - 1);
62   dst[size - 1] = '\0';
63 #endif
64 }
65 
66 
67 
68 /* mystrndup: strndup replacement that uses the safer mymalloc instead
69    of malloc*/
70 
71 static char *
mystrndup(const char * string,int len)72 mystrndup(const char *string, int len)
73 {
74   /* allocate copy of string on the heap */
75   char *buf;
76   assert(string != NULL);
77   buf = (char *)mymalloc(len + 1);
78   mystrlcpy(buf, string, len + 1);
79   return buf;
80 }
81 
82 
83 /* compare strings for equality. fail if either of them is NULL */
84 bool
strings_are_equal(const char * s1,const char * s2)85 strings_are_equal(const char *s1, const char *s2) {
86   return s1 && s2 && strcmp(s1,s2)== 0;
87 }
88 
89 /*  allocate a copy of a string on the heap */
90 char *
mysavestring(const char * string)91 mysavestring(const char *string)
92 {
93   assert(string != NULL);
94   return mystrndup(string, strlen(string));
95 }
96 
97 /* return "<NULL>" if arg == NULL, else arg */
98 char *
strifnull(char * string)99 strifnull(char *string)
100 {
101   return string ? string : "<NULL>";
102 }
103 
104 
105 
106 /* add3strings: allocate a sufficently long buffer on the heap and
107    successively copy the three arguments into it */
108 
109 char *
add3strings(const char * str1,const char * str2,const char * str3)110 add3strings(const char *str1, const char *str2, const char *str3)
111 {
112   int size;
113   char *buf;
114 
115   assert(str1!= NULL); assert(str2!= NULL); assert(str3!= NULL);
116   size = strlen(str1) + strlen(str2) + strlen(str3) + 1;        /* total length plus 0 byte */
117   buf = (char *) mymalloc(size);
118 
119   /* DPRINTF3(DEBUG_TERMIO,"size1: %d, size2: %d, size3: %d",   (int) strlen(str1), (int) strlen(str2),  (int) strlen(str3)); */
120 
121   mystrlcpy(buf, str1, size);
122   mystrlcat(buf, str2, size);
123   mystrlcat(buf, str3, size);
124   return buf;
125 }
126 
127 
128 /* append_and_free_old(str1, str2): return add2strings(str1, str2), freeing str1
129    append_and_free_old(NULL, str) just returns a copy of  str
130 */
131 
132 char *
append_and_free_old(char * str1,const char * str2)133 append_and_free_old(char *str1, const char *str2)
134 {
135   if (!str1)
136     return mysavestring(str2); /* if str1 == NULL there is no need to "free the old str1" */
137   else {
138     char *result = add2strings(str1,str2);
139     free (str1);
140     return result;
141   }
142 }
143 
144 
145 /* mybasename and mydirname: wrappers around basename and dirname, if
146    available, otherwise emulations of them */
147 
148 
149 char *
mybasename(const char * filename)150 mybasename(const char *filename)
151 {                               /* determine basename of "filename" */
152 #if defined(HAVE_BASENAME) && defined(_GNU_SOURCE) /* we want only the GNU version  - but this doesn't guarantee that */
153   char *filename_copy = mysavestring(filename);
154   char *result = mysavestring(basename(filename_copy));
155   free(filename_copy);
156   return result; /* basename on HP-UX is toxic: the result will be overwritten by subsequent invocations! */
157 #else
158   char *p;
159 
160   /* find last '/' in name (if any) */
161   for (p = filename + strlen(filename) - 1; p > filename; p--)
162     if (*(p - 1) == '/')
163       break;
164   return p;
165 #endif
166 }
167 
168 char *
mydirname(const char * filename)169 mydirname(const char *filename)
170 {                               /* determine directory component of "name" */
171 #ifdef HAVE_DIRNAME
172   char *filename_copy = mysavestring(filename);
173   char *result = dirname(filename_copy);
174   return result;
175 #else
176   char *p;
177 
178   /* find last '/' in name (if any) */
179   for (p = filename + strlen(filename) - 1; p > filename; p--)
180     if (*(p - 1) == '/')
181       break;
182   return (p == filename ? "." : mystrndup(filename, p - filename));
183 #endif
184 }
185 
186 
187 
188 /* Better atoi() with error checking */
189 int
my_atoi(const char * nptr)190 my_atoi(const char *nptr)
191 {
192   int result;
193   char *endptr;
194 
195   errno = 0;
196   result = (int) strtol(nptr, &endptr, 10);
197   if (errno || endptr == nptr || *endptr)
198     myerror(FATAL|USE_ERRNO, "Could not make sense of <%s> as an integer", mangle_string_for_debug_log(nptr, 20));
199   return result;
200 }
201 
202 /* TODO: clean up the following mess. strtok() is cute, but madness. Write one function
203    char *tokenize(const char *string, const char *delimiters, bool allow_empty_strings), and make
204    both split_with functions a special case of it. Drop mystrtok, count_str_occurrences and count_char_occurrences */
205 
206 
207   /* mystrtok: saner version of strtok that doesn't overwrite its first argument */
208   /* Scary strtok: "The  strtok()  function breaks a string into a sequence of zero or more nonempty tokens.
209     On the first call to strtok(), the string to be parsed should be specified in str.
210     In each subsequent call that should parse the same string, str must be NULL.
211   */
212 
213 
214 char *
mystrtok(const char * s,const char * delimiters)215 mystrtok(const char *s, const char *delimiters) {
216   static char *scratchpad = NULL;
217   if (s) { /* first call */
218     if (scratchpad)
219       free(scratchpad); /* old news */
220     scratchpad = mysavestring(s);
221   }
222   return strtok(s ? scratchpad : NULL, delimiters);
223 }
224 
225 
226 static int
count_str_occurrences(const char * haystack,const char * needle)227 count_str_occurrences(const char *haystack, const char* needle)
228 {
229   int count = 0, needle_length = strlen(needle);
230   const char *p = haystack;
231 
232   assert(needle_length > 0);
233   while ((p = strstr(p, needle))) {
234       count++;
235       p += needle_length;
236   }
237   return count;
238 }
239 
240 static int
count_char_occurrences(const char * haystack,char c)241 count_char_occurrences(const char *haystack, char c)
242 {
243   int count;
244   char *needle = mysavestring(" ");
245   *needle = c;
246   count = count_str_occurrences(haystack, needle);
247   free(needle);
248   return count;
249 }
250 
251 
test_haystack(const char * haystack,const char * needle)252 void test_haystack(const char *haystack, const char* needle) {
253   printf("<%s> contains <%s> %d times\n", haystack, needle, count_str_occurrences(haystack, needle));
254 }
255 
256 
257 
258 /* split_with("a bee    cee"," ") returns a pointer to an array {"a", "bee",  "cee", NULL} on the heap */
259 
260 char **
261 
split_with(const char * string,const char * delimiters)262 split_with(const char *string, const char *delimiters) {
263   const char *s;
264   char *token, **pword;
265   char **list = mymalloc((1 + strlen(string)) * sizeof(char **)); /* worst case: only delimiters  */
266   for (s = string, pword = list; (token = mystrtok(s, delimiters)); s = NULL)
267     *pword++ = mysavestring(token);
268   *pword = NULL;
269   return list;
270 }
271 
272 /* unsplit_with(3, ["a", "bee", "cee"], "; ") returns a pointer to "a; bee; cee" on the heap */
273 /* takes n elements from strings (or all of them if n < 0) */
274 char *
unsplit_with(int n,char ** strings,const char * delim)275 unsplit_with(int n, char **strings, const char *delim) {
276   int i;
277   char *result = mysavestring(n != 0 && strings[0] ? strings[0]: "");
278   for (i = 1; (n<0 && strings[i]) || i < n; i++) {
279        result = append_and_free_old(result, delim);
280        result = append_and_free_old(result, strings[i]);
281   }
282   return result;
283 }
284 
285 /* split_with("a\t\tbla", '\t') returns {"a" "bla", NULL}, but we want {"a", "", "bla", NULL} for filter completion.
286    We write a special version (can be freed with free_splitlist), that optionally checks the number of components (if expected_count > 0) */
split_on_single_char(const char * string,char c,int expected_count)287 char **split_on_single_char(const char *string, char c, int expected_count) {
288   /* the 1st +1 for the last element ("bla"), the 2nd +1 for the marker element (NULL) */
289   char **list = mymalloc((count_char_occurrences(string,c) + 1 + 1) * sizeof(char **));
290   char *stringcopy = mysavestring(string);
291   char *p, **pword, *current_word;
292 
293   for (pword = list, p = current_word = stringcopy;
294        *p; p++) {
295     if (*p == c) {
296       *p = '\0';
297       *pword++ = mysavestring(current_word);
298       current_word = p+1;
299     }
300   }
301   *pword++ = mysavestring(current_word);
302   if (expected_count  && pword-list != expected_count)
303     myerror(FATAL|NOERRNO, "splitting <%s> on single %s yields %d components, expected %d",
304             mangle_string_for_debug_log(string, 50), mangle_char_for_debug_log(c, 1), pword -  list, expected_count);
305   *pword = NULL;
306   free(stringcopy);
307   return list;
308 }
309 
310 /* free_splitlist(list) frees lists components and then list itself. list must be NULL-terminated */
free_splitlist(char ** list)311 void free_splitlist (char **list) {
312   char **p = list;
313   while(*p)
314     free(*p++);
315   free (list);
316 }
317 
318 
319 /* search_and_replace() is a utilty for handling multi-line input
320    (-m option), keeping track of the cursor position on rlwraps prompt
321    in order to put the cursor on the very same spot in the external
322    editor For example, when using NL as a newline substitute (rlwrap
323    -m NL <command>):
324 
325    search_and_replace("NL", "\n", "To be NL ... or not to be", 11,
326    &line, &col) will return "To be \n ... or not to be", put 2 in line
327    and 3 in col because a cursor position of 11 in "To be NL ..."
328    corresponds to the 3rd column on the 2nd line in "To be \n ..."
329 
330    cursorpos, col and line only make sense if repl == "\n", otherwise
331    they may be 0/NULL (search_and_replace works for any repl). The
332    first position on the string corresponds to cursorpos = col = 0 and
333    line = 1.
334 */
335 
336 
337 char *
search_and_replace(char * patt,const char * repl,const char * string,int cursorpos,int * line,int * col)338 search_and_replace(char *patt, const char *repl, const char *string, int cursorpos,
339                    int *line, int *col)
340 {
341   int i, j, k;
342   int pattlen = strlen(patt);
343   int replen = strlen(repl);
344   int stringlen = strlen(string);
345   int cursor_found = FALSE;
346   int current_line = 1;
347   int current_column = 0;
348   size_t scratchsize;
349   char *scratchpad, *result;
350 
351   assert(patt && repl && string);
352   DPRINTF2(DEBUG_READLINE, "string=%s, cursorpos=%d",
353            M(string), cursorpos);
354   scratchsize = max(stringlen, (stringlen * replen) / pattlen) + 1;     /* worst case : repleng > pattlen and string consists of only <patt> */
355   DPRINTF1(DEBUG_READLINE, "Allocating %d bytes for scratchpad", (int) scratchsize);
356   scratchpad = mymalloc(scratchsize);
357 
358 
359   for (i = j = 0; i < stringlen;  ) {
360     if (line && col &&                           /* if col and line are BOTH non-NULL, and .. */
361         i >= cursorpos && !cursor_found) {       /*  ... for the first time, i >= cursorpos: */
362       cursor_found = TRUE;                       /* flag that we're done here */
363       *line = current_line;                      /* update *line */
364       *col = current_column;                     /* update *column */
365     }
366     if (strncmp(patt, string + i, pattlen) == 0) { /* found match */
367       i += pattlen;                                /* update i ("jump over" patt (and, maybe, cursorpos)) */
368       for (k = 0; k < replen; k++)                 /* append repl to scratchpad */
369         scratchpad[j++] = repl[k];
370       current_line++;                              /* update line # (assuming that repl = "\n") */
371       current_column = 0;                          /* reset column */
372     } else {
373       scratchpad[j++] = string[i++];               /* or else just copy things */
374       current_column++;
375     }
376   }
377   if (line && col)
378     DPRINTF2(DEBUG_READLINE, "line=%d, col=%d", *line, *col);
379   scratchpad[j] = '\0';
380   result = mysavestring(scratchpad);
381   free(scratchpad);
382   return (result);
383 }
384 
385 
386 /* first_of(&string_array) returns the first non-NULL element of string_array  */
387 char *
first_of(char ** strings)388 first_of(char **strings)
389 {
390   char **p;
391 
392   for (p = strings; *p == NULL; p++);
393   return *p;
394 }
395 
396 
397 /* allocate string representation of an integer on the heap */
398 char *
as_string(int i)399 as_string(int i)
400 {
401 #define MAXDIGITS 10 /* let's pray no-one edits multi-line input more than 10000000000 lines long :-) */
402   char *newstring = mymalloc(MAXDIGITS+1);
403 
404   snprintf1(newstring, MAXDIGITS, "%d", i);
405   return (newstring);
406 }
407 
408 
409 
410 char *
mangle_char_for_debug_log(char c,int quote_me)411 mangle_char_for_debug_log(char c, int quote_me)
412 {
413   char *special = NULL;
414   char scrap[10], code, *format;
415   char *remainder = "\\]^_";
416 
417   switch (c) {
418   case 0: special = "<NUL>"; break;
419   case 8: special  = "<BS>";  break;
420   case 9: special  = "<TAB>"; break;
421   case 10: special = "<NL>";  break;
422   case 13: special = "<CR>";  break;
423   case 27: special = "<ESC>"; break;
424   case 127: special = "<DEL>"; break;
425   }
426   if (!special) {
427     if (c > 0 && c < 27  ) {
428       format = "<CTRL-%c>"; code =  c + 64;
429     } else if (c > 27 && c < 32) {
430       format = "<CTRL-%c>"; code =  remainder[c-28];
431     } else {
432       format = (quote_me ? "\"%c\"" : "%c"); code = c;
433     }
434     snprintf1(scrap, sizeof(scrap), format, code);
435   }
436   return mysavestring (special ? special : scrap);
437 }
438 
439 
440 /* mangle_string_for_debug_log(string, len) returns a printable
441    representation of string for the debug log. It will truncate a
442    resulting string longer than len, appending three dots ...  */
443 
444 char *
mangle_string_for_debug_log(const char * string,int maxlen)445 mangle_string_for_debug_log(const char *string, int maxlen)
446 {
447   int total_length;
448   char *mangled_char, *result;
449   const char *p; /* good old K&R-style *p. I have become a fossil... */
450   MBSTATE st;
451 
452   if (!string)
453     return mysavestring("(null)");
454   result = mysavestring("");
455   for(mbc_initstate(&st), p = string, total_length = 0; *p;  mbc_inc(&p, &st)) {
456     if (is_multibyte(p, &st)) {
457         mangled_char = mbc_first(p, &st);
458         total_length += 1;
459       } else {
460         mangled_char = mangle_char_for_debug_log(*p, FALSE);
461         total_length +=  strlen(mangled_char); /* can be more than 1 ("CTRL-A") */
462       }
463     if (maxlen && (total_length > maxlen)) {
464       result = append_and_free_old(result, "...");
465       break;  /* break *before* we append the latest char and exceed maxlen */
466     }
467     result = append_and_free_old(result, mangled_char);
468     free(mangled_char);
469   }
470   return result;
471 }
472 
473 
474 
475 
mangle_buffer_for_debug_log(const char * buffer,int length)476 char *mangle_buffer_for_debug_log(const char *buffer, int length) {
477   char *string = mymalloc(length+1);
478   int debug_saved = debug; /* needed by macro MANGLE_LENGTH */
479   memcpy(string, buffer, length);
480   string[length] = '\0';
481   return mangle_string_for_debug_log(string, MANGLE_LENGTH);
482 }
483 
484 /* mem2str(mem, size) returns a fresh string representation of mem where al 0 bytes have been replaced by "\\0" */
mem2str(const char * mem,int size)485 char *mem2str(const char *mem, int size) {
486   const char *p_mem;
487   char  *p_str;
488   char *str = mymalloc(2*size + 1); /* worst case: "\0\0\0\0.." */
489   for(p_mem = mem, p_str = str; p_mem < mem + size; p_mem++) {
490     if (*p_mem)
491       *p_str++ = *p_mem;
492     else {
493       *p_str++ = '\\';
494       *p_str++ = '0';
495     }
496   }
497   *p_str = '\0';
498   return str;
499 }
500 
501 
502 char *
mystrstr(const char * haystack,const char * needle)503 mystrstr(const char *haystack, const char *needle)
504 {
505   return strstr(haystack, needle);
506 }
507 
508 
509 
scan_metacharacters(const char * string,const char * metacharacters)510 int scan_metacharacters(const char* string, const char *metacharacters) {
511   const char *c;
512   for (c = metacharacters; *c; c++)
513     if (strchr(string, *c))
514       return TRUE;
515   return FALSE;
516 }
517 
518 /* allocate and init array of 4 strings (helper for munge_line_in_editor() and filter) */
519 char **
list4(char * el0,char * el1,char * el2,char * el3)520 list4 (char *el0, char *el1, char *el2, char *el3)
521 {
522   char **list = mymalloc(4*sizeof(char*));
523   list[0] = el0;
524   list[1] = el1;
525   list[2] = el2;
526   list[3] = el3;
527   DPRINTF4(DEBUG_AD_HOC, "el0: <%s> el1: <%s> el2: <%s> el3: <%s>", el0, el1, el2, el3);
528   return list;
529 }
530 
531 
532 /* remove_padding_and_terminate(buf, N) overwrites buf with a copy of
533    its first N bytes, omitting any zero bytes, and then terminates the
534    result with a final zero byte.  Example: if buf="a\0b\0\0cde@#!" then,
535    after calling remove_padding_and_terminate(buf, 8) buf will contain
536    "abcde\0de@#!"
537 
538    We need to call this function on everything we get from the
539    inferior command because (out of sheer programmer laziness) rlwrap
540    uses C strings internally. Zero bytes are only ever used as padding
541    (@@@ I have never seen this happen, by the way), and padding is not
542    used anymore on modern terminals. (except maybe for things like the
543    visual bell) */
544 
545 
remove_padding_and_terminate(char * buf,int length)546 void remove_padding_and_terminate(char *buf, int length) {
547   char *readptr, *copyptr;
548 
549   for (readptr = copyptr = buf; readptr < buf + length; readptr++) {
550     if (*readptr != '\0')
551       *copyptr++ = *readptr;
552   }
553   *copyptr = '\0';
554   if (debug && strlen(buf) != (unsigned int) length)
555     DPRINTF2(DEBUG_TERMIO, "removed %d zero bytes (padding?) from %s", length - (int) strlen(buf), M(buf));
556 }
557 
558 #define ESCAPE  '\033'
559 #define BACKSPACE '\010'
560 #define CARRIAGE_RETURN  '\015'
561 
562 /* unbackspace(&buf) will overwrite buf (up to and including the first
563    '\0') with a copy of itself. Backspaces will move the "copy
564    pointer" one backwards, carriage returns will re-set it to the
565    begining of buf.  Because the re-written string is always shorter
566    than the original, we need not worry about writing outside buf
567 
568    Example: if buf="abc\bd\r\e" then, after calling unbackspace(buf),
569    buf will contain "ebd" @@@ Should be just "e"
570 
571    We need this function because many commands emit "status lines"
572    using backspaces and carriage returns to re-write parts of the line in-place.
573    Rlwrap will consider such lines as "prompts" (@@@myabe it shouldn't?)
574    but mayhem results if we feed the \b and \r characters to readline
575 */
576 
577 void
unbackspace_old(char * buf)578 unbackspace_old(char* buf) {
579   char *readptr, *copyptr, *endptr;
580   int seen_bs_or_cr;
581 
582   DPRINTF1(DEBUG_TERMIO,"unbackspace: %s", M(buf));
583   seen_bs_or_cr = FALSE;
584 
585   for (readptr = copyptr = endptr = buf; *readptr; readptr++) {
586 
587     assert(endptr <= readptr);
588     assert(copyptr <= endptr);
589 
590     switch (*readptr) {
591     case BACKSPACE:
592       copyptr = (copyptr > buf ? copyptr - 1 : buf);  /* cannot backspace past beginning of buf */
593       seen_bs_or_cr = TRUE;
594       break;
595     case CARRIAGE_RETURN:
596       copyptr = buf;
597       seen_bs_or_cr = TRUE;
598       break;
599     default:
600       *copyptr++ = *readptr;
601       break;
602     }
603     if (copyptr > endptr)
604       endptr = copyptr;
605   }
606   *endptr = '\0';
607   if (seen_bs_or_cr)
608       DPRINTF1(DEBUG_TERMIO,"unbackspace result: %s", M(buf));
609 }
610 
611 
612 
613 
614 void
unbackspace(char * buf)615 unbackspace(char* buf) {
616   char *readptr, *copyptr;
617   int seen_bs_or_cr;
618 
619   DPRINTF1(DEBUG_TERMIO,"unbackspace: %s", M(buf));
620   seen_bs_or_cr = FALSE;
621 
622   for (readptr = copyptr = buf; *readptr; readptr++) {
623     switch (*readptr) {
624     case BACKSPACE:
625       copyptr = (copyptr > buf ? copyptr - 1 : buf);  /* cannot backspace past beginning of buf          */
626       while (*copyptr == RL_PROMPT_END_IGNORE && copyptr > buf) {    /* skip control codes  ...          */
627         while (*copyptr != RL_PROMPT_START_IGNORE && copyptr > buf)  /* but never past buffer start      */
628           copyptr--;                                                 /* e.g. with pathological "x\002\b" */
629         if (copyptr > buf)
630           copyptr--;
631       }
632       seen_bs_or_cr = TRUE;
633       break;
634     case CARRIAGE_RETURN:
635       copyptr = buf;
636       seen_bs_or_cr = TRUE;
637       break;
638     default:
639       *copyptr++ = *readptr;
640       break;
641     }
642   }
643   *copyptr = '\0';
644   if (seen_bs_or_cr)
645       DPRINTF1(DEBUG_TERMIO,"unbackspace result: %s", M(buf));
646 
647 }
648 
649 
650 #ifdef UNIT_TEST
651 
652 static
test_unbackspace(const char * input,const char * expected_result)653 void test_unbackspace (const char *input, const char *expected_result) {
654   char *scrap = mysavestring(input);
655   unbackspace(scrap);
656   if (strcmp(scrap, expected_result) != 0)
657     myerror(FATAL|NOERRNO, "unbackspace '%s' yielded '%s', expected '%s'",
658               mangle_string_for_debug_log(input,0),
659               mangle_string_for_debug_log(scrap,0),
660               expected_result);
661 }
662 
663 /*  run with: make clean ; make CFLAGS='-g -DUNIT_TEST=test'; ./rlwrap <argv> */
TESTFUNC(test,argc,argv,stage)664 TESTFUNC(test, argc, argv, stage) {
665   ONLY_AT_STAGE(TEST_AT_PROGRAM_START );
666   test_unbackspace("zx\001a\002\bq","sssssssss");
667   exit(0);
668 }
669 
670 #endif
671 
672 
673 
674 /* Readline allows to single out character sequences that take up no
675    physical screen space when displayed by bracketing them with the
676    special markers `RL_PROMPT_START_IGNORE' and `RL_PROMPT_END_IGNORE'
677    (declared in `readline.h').
678 
679    mark_invisible(buf) returns a new copy of buf with sequences of the
680    form ESC[;0-9]*m? marked in this way.
681 
682 */
683 
684 /*
685   (Re-)definitions for testing
686   #undef RL_PROMPT_START_IGNORE
687   #undef  RL_PROMPT_END_IGNORE
688   #undef isprint
689   #define isprint(c) (c != 'x')
690   #define RL_PROMPT_START_IGNORE '('
691   #define RL_PROMPT_END_IGNORE ')'
692   #define ESCAPE 'E'
693 */
694 
695 
696 /* TODO @@@ replace the following obscure and unsafe functions using the regex library */
697 static void  copy_ordinary_char_or_ESC_sequence(const char **original, char **copy);
698 static void match_and_copy(const char *charlist, const char **original, char **copy);
699 static int matches (const char *charlist, char c) ;
700 static void copy_next(int n, const char **original, char **copy);
701 
702 char *
mark_invisible(const char * buf)703 mark_invisible(const char *buf)
704 {
705   int padsize =  (assert(buf != NULL), (3 * strlen(buf) + 1)); /* worst case: every char in buf gets surrounded by RL_PROMPT_{START,END}_IGNORE */
706   char *scratchpad = mymalloc (padsize);
707   char *result = scratchpad;
708   const char **original = &buf;
709   char **copy = &scratchpad;
710   DPRINTF1(DEBUG_AD_HOC, "mark_invisible(%s) ...", M(buf));
711 
712   if (strchr(buf, RL_PROMPT_START_IGNORE))
713     return mysavestring(buf); /* "invisible" parts already marked */
714 
715   while (**original) {
716     copy_ordinary_char_or_ESC_sequence(original, copy);
717     assert(*copy - scratchpad < padsize);
718   }
719   **copy = '\0';
720   DPRINTF1(DEBUG_AD_HOC, "mark_invisible(...) = <%s>", M(result));
721   return(result);
722 }
723 
724 
725 
726 
727 static void
copy_ordinary_char_or_ESC_sequence(const char ** original,char ** copy)728 copy_ordinary_char_or_ESC_sequence (const char **original, char **copy)
729 {
730   if (**original != ESCAPE || ! matches ("[]", *(*original + 1))) {
731     copy_next(1, original, copy);
732     return;       /* not an ESC[ sequence */
733   }
734   *(*copy)++ = RL_PROMPT_START_IGNORE;
735   copy_next(2, original, copy);
736   match_and_copy(";0123456789", original, copy);
737   match_and_copy("m", original, copy);
738   *(*copy)++ = RL_PROMPT_END_IGNORE;
739 }
740 
741 static void
match_and_copy(const char * charlist,const char ** original,char ** copy)742 match_and_copy(const char *charlist, const char **original, char **copy)
743 {
744   while (matches(charlist, **original))
745     *(*copy)++ = *(*original)++;
746 }
747 
748 static int
matches(const char * charlist,char c)749 matches (const char *charlist, char c)
750 {
751   const char *p;
752   for (p = charlist; *p; p++)
753     if (*p == c)
754       return TRUE;
755   return FALSE;
756 }
757 
758 
759 static void
copy_next(int n,const char ** original,char ** copy)760 copy_next(int n, const char **original, char **copy)
761 {
762   int i;
763   for (i = 0; **original && (i < n); i++)
764     *(*copy)++ = *(*original)++;
765 }
766 
767 
768 
769 
770 /* helper function: returns the number of displayed characters (the "colourless length") of str (which has to have its
771    unprintable sequences marked with RL_PROMPT_*_IGNORE).  Puts a copy without the RL_PROMPT_*_IGNORE characters in
772    *copy_without_ignore_markers (if != NULL)
773 */
774 
775 int
colourless_strlen(const char * str,char ** pcopy_without_ignore_markers,int UNUSED (termwidth))776 colourless_strlen(const char *str, char ** pcopy_without_ignore_markers, int UNUSED(termwidth))
777 {
778   int visible  = TRUE;
779   int length   = strlen(str);
780   int i, colourless_length, colourless_bytes;
781   const char *str_ptr, *p;
782   char *copy_ptr, *copy_without_ignore_markers;
783   MBSTATE st;
784 
785   typedef struct {
786     char *bytes;
787     MBSTATE state;
788   } mbchar_cell;
789 
790   mbchar_cell *cellptr, *copied_cells = mymalloc((length + 1)  * sizeof(mbchar_cell));
791 
792   /* The next loop scans str, one multi-byte character at a time, constructing a colourless copy  */
793   /* cellptr always points at the next available free cell                                        */
794   for(mbc_initstate(&st), str_ptr = str, colourless_bytes = 0, cellptr = copied_cells;
795       *str_ptr;
796       mbc_inc(&str_ptr, &st)) {
797     assert (cellptr < copied_cells + length);
798     switch (*str_ptr) {
799     case RL_PROMPT_START_IGNORE:
800       visible = FALSE;
801       continue;
802     case RL_PROMPT_END_IGNORE:
803       visible = TRUE;
804       continue;
805     case '\r':
806       if (visible) {                  /* only ever interpret CR (and Backspace) when visible (i.e. outside control sequences) */
807         for ( ; cellptr > copied_cells; cellptr--)
808           free((cellptr-1)->bytes);   /* free all cells            */
809         mbc_initstate(&st);           /* restart with virgin state */
810         colourless_bytes = 0;
811       }
812       continue;
813     case '\b':
814       if ((visible && cellptr > copied_cells)) {     /* except when invisible, or at beginning of copy ... */
815         cellptr -= 1;                                /* ... reset cellptr to previous (multibyte) char     */
816         colourless_bytes -= strlen(cellptr->bytes);
817         free(cellptr->bytes);
818         if (cellptr > copied_cells)
819           st = (cellptr -1) -> state;                /* restore corresponding shift state                  */
820         else
821           mbc_initstate(&st);                        /* or initial state, if at start of line              */
822       }
823       continue;
824     }
825     if (visible) {
826       MBSTATE st_copy   = st;
827       int nbytes        = mbc_charwidth(str_ptr, &st_copy);
828       char *q           = cellptr -> bytes = mymalloc(1 + nbytes);
829 
830       colourless_bytes += nbytes;
831       mbc_copy(str_ptr,&q, &st); /* copy the possibly multi-byte character at str_ptr to cellptr -> bytes , incrementing q to just past the copy */
832       *q                = '\0';
833       cellptr -> state  = st; /* remember shift state after reading str_ptr, just in case a backspace comes along later */
834       cellptr           += 1;
835     }
836   } /* end of for loop */
837 
838   colourless_length = cellptr - copied_cells;
839 
840   copy_without_ignore_markers = mymalloc(colourless_bytes + 1);
841   for (cellptr = copied_cells, copy_ptr = copy_without_ignore_markers, i = 0; i < colourless_length; i++, cellptr++) {
842     for(p = cellptr->bytes; *p; p++, copy_ptr++)
843       *copy_ptr = *p;
844     free(cellptr->bytes);
845   }
846   *copy_ptr = '\0';
847   free(copied_cells);
848 
849 
850   DPRINTF4(DEBUG_READLINE, "colourless_strlen(\"%s\", \"%s\") = %d  chars, %d  bytes",
851            M(str), copy_without_ignore_markers, colourless_length, colourless_bytes);
852 
853   if (pcopy_without_ignore_markers)
854     *pcopy_without_ignore_markers = copy_without_ignore_markers;
855   else
856     free(copy_without_ignore_markers);
857 
858 
859 
860   return colourless_length;
861 }
862 
863 /* helper function: returns the number of displayed characters (the
864    "colourless length") of str (which has its unprintable sequences
865    marked with RL_PROMPT_*_IGNORE).
866 
867    Until rlwrap 0.44, this function didn't take wide characters into
868    consideration, causing problems with long prompts containing wide characters.
869 */
870 
871 
872 
873 int
colourless_strlen_unmarked(const char * str,int termwidth)874 colourless_strlen_unmarked (const char *str, int termwidth)
875 {
876   char *marked_str = mark_invisible(str);
877   int colourless_length = colourless_strlen(marked_str, NULL, termwidth);
878   free(marked_str);
879   return colourless_length;
880 }
881 
882 
883 /* skip a maximal number (possibly zero) of termwidth-wide
884    initial segments of long_line and return the remainder
885    (i.e. the last line of long_line on screen)
886    if long_line contains an ESC character, return "" (signaling
887    "don't touch") */
888 
889 
890 char *
get_last_screenline(char * long_line,int termwidth)891 get_last_screenline(char *long_line, int termwidth)
892 {
893   int line_length, removed;
894   char *line_copy, *last_screenline;
895 
896   line_copy = mysavestring(long_line);
897   line_length = strlen(line_copy);
898 
899   if (termwidth == 0 ||              /* this may be the case on some weird systems */
900       line_length <=  termwidth)  {  /* line doesn't extend beyond right margin
901                                         @@@ are there terminals that put the cursor on the
902                                         next line if line_length == termwidth?? */
903     return line_copy;
904   } else if (strchr(long_line, '\033')) { /* <ESC> found, give up */
905     free (line_copy);
906     return mysavestring("Ehhmm..? > ");
907   } else {
908     removed = (line_length / termwidth) * termwidth;   /* integer arithmetic: 33/10 = 3 */
909     last_screenline  = mysavestring(line_copy + removed);
910     free(line_copy);
911     return last_screenline;
912   }
913 }
914 
915 /* lowercase(str) returns lowercased copy of str */
916 char *
lowercase(const char * str)917 lowercase(const char *str) {
918   char *result, *p;
919   result = mysavestring(str);
920   for (p=result; *p; p++)
921     *p = tolower(*p);
922   return result;
923 }
924 
925 
926 
927 char *
colour_name_to_ansi_code(const char * colour_name)928 colour_name_to_ansi_code(const char *colour_name) {
929   if (colour_name  && *colour_name && isalpha(*colour_name)) {
930     char *lc_colour_name = mysavestring(lowercase(colour_name));
931     char *bold_code = (isupper(*colour_name) ? "1" : "0");
932 
933 #define isit(c) (strcmp(c,lc_colour_name)==0)
934     char *colour_code =
935       isit("black")   ? "30" :
936       isit("red")     ? "31" :
937       isit("green")   ? "32" :
938       isit("yellow")  ? "33" :
939       isit("blue")    ? "34" :
940       isit("magenta") ? "35" :
941       isit("purple")  ? "35" :
942       isit("cyan")    ? "36" :
943       isit("white")   ? "37" :
944       NULL ;
945 
946 #undef isit
947     if (colour_code)
948       return add3strings(bold_code,";",colour_code);
949     else
950       myerror(FATAL|NOERRNO, "unrecognised colour name '%s'. Use e.g. 'yellow' or 'Blue'.", colour_name);
951   }
952   return mysavestring(colour_name);
953 }
954 
955 
956 
957 
958 
959 
960 
961 
962 /* returns TRUE if string is numeric (i.e. positive or negative integer), otherwise FALSE */
isnumeric(char * string)963 int isnumeric(char *string){
964   char *pstr = string;
965   if (*pstr == '-')  /* allow negative numbers */
966     pstr++;
967   while (*pstr != '\0')
968     if (!isdigit(*pstr++)) return FALSE;
969 
970   return TRUE;
971 }
972 
973 #define DIGITS_NUMBER 8  /* number of (hex) digits of length of field. 6 digits -> max 16MB per message field, should suffice */
974 #define MAX_FIELD_LENGTH ((1UL <<  (DIGITS_NUMBER * 4 - 1)) -1) /* max integer that can be written with DIGITS_NUMBER (hex)digits */
975 #define MY_HEX_FORMAT(n) ("%0" MY_ITOA(n) "x")
976 #define MY_ITOA(n) #n
977 
978 
979 /* fussy strtol with error checking */
980 long
mystrtol(const char * nptr,int base)981 mystrtol(const char *nptr, int base)
982 {
983   char *endptr;
984   long result;
985 
986   errno = 0;
987   result = strtol(nptr, &endptr, base);
988   if (*endptr != '\0')
989     myerror(FATAL|NOERRNO, "invalid representation %s", nptr);
990   if (errno != 0)
991     myerror(FATAL|USE_ERRNO, "strtol error");
992 
993   return result;
994 }
995 
996 
997 
998 
999 /* Encode a length as a string on the heap */
1000 static char *
encode_field_length(int length)1001 encode_field_length(int length)
1002 {
1003   char *encoded_length = mymalloc(DIGITS_NUMBER+1);
1004   sprintf(encoded_length, MY_HEX_FORMAT(DIGITS_NUMBER), length);
1005   return encoded_length;
1006 }
1007 
1008 
1009 /* decode first length field in a message of form "<length 1> <message 1> <length 2> ...."
1010    and advance pointer *ppmessage to the start of <message 1> */
1011 static int
decode_field_length(char ** ppmessage)1012 decode_field_length(char** ppmessage)
1013 {
1014   char hex_string[DIGITS_NUMBER+1];
1015   long length;
1016 
1017   mystrlcpy(hex_string, *ppmessage, DIGITS_NUMBER+1);
1018   length = mystrtol(hex_string, 16);
1019   *ppmessage += DIGITS_NUMBER;
1020   return length;
1021 }
1022 
1023 
1024 /* Test an invariant: */
1025 void
test_field_length_encoding()1026 test_field_length_encoding()
1027 {
1028   int testval = 1423722;
1029   char *encoded = encode_field_length(testval);
1030   assert( decode_field_length(&encoded) == testval &&  (*encoded == '\0'));
1031 }
1032 
1033 /* Append <field_length> <field> to message and return the result (after freeing the original message)
1034 <field_length> is a string representation of <field>s length
1035 message can be empty, or, equivalently, NULL.
1036 */
1037 char *
append_field_and_free_old(char * message,const char * field)1038 append_field_and_free_old(char *message, const char *field)
1039 {
1040   long unsigned int length = strlen(field);
1041   char *encoded_length;
1042   if (length > MAX_FIELD_LENGTH)
1043     myerror(FATAL|NOERRNO, "message field\"%s...\" has length %ld, it should be less than %ld",
1044             mangle_string_for_debug_log(field, 10), length,  MAX_FIELD_LENGTH);
1045   encoded_length =  encode_field_length(length);
1046   message = append_and_free_old(message, encoded_length);
1047   message = append_and_free_old(message, field);
1048   free(encoded_length);
1049   return message;
1050 }
1051 
1052 
1053 
1054 char *
merge_fields(char * field,...)1055 merge_fields(char *field, ...)
1056 {
1057   char *varg = field;
1058   char *message = NULL;
1059   va_list vargs;
1060 
1061   va_start(vargs, field);
1062   while (varg != END_FIELD) {
1063     message = append_field_and_free_old(message, varg);
1064     varg = va_arg(vargs, char*);
1065   }
1066   va_end(vargs);
1067   return message;
1068 }
1069 
1070 
1071 
1072 /*
1073 split a message of a string:
1074 
1075     <field_length1> <field1> <field_length 2> <field2>...
1076 
1077 into:
1078 
1079     [<field1>, <field2>, ...]
1080 */
1081 
1082 
1083 char **
split_filter_message(char * message,int * counter)1084 split_filter_message(char *message, int *counter)
1085 {
1086   char *pmessage = message;
1087   int message_length = strlen(message);
1088   char **list, **plist;
1089   int nfields = 0;
1090 
1091   static int smallest_message_size = 0;
1092 
1093   if (smallest_message_size == 0)
1094     smallest_message_size = strlen(append_field_and_free_old(NULL, "")); /* this assumes that the empty message is the smallest possible */
1095 
1096   assert(smallest_message_size > 0);
1097   plist = list = mymalloc(sizeof(char*) * (1 + strlen(message)/smallest_message_size )); /* worst case: "0000000000000000000000" */
1098   nfields = 0;
1099 
1100   while(!(*pmessage == '\0')) {
1101     long length = decode_field_length(&pmessage);
1102 
1103     /* cut out a field from the head of the message: */
1104     char *field = mymalloc(sizeof(char) * (length+1));
1105     mystrlcpy(field, pmessage, length+1);
1106     *plist++ = field;
1107     pmessage += length;
1108     nfields++;
1109     if (pmessage > message + message_length)
1110       myerror(FATAL|NOERRNO, "malformed message; %s", mangle_string_for_debug_log(message, 256));
1111   }
1112   if (counter)
1113     *counter =  nfields;
1114   *plist = 0;
1115   return list;
1116 }
1117 
1118 
1119 
1120 #ifndef HAVE_REGEX_H
protect_or_cleanup(const char * prompt)1121 char *protect_or_cleanup(const char *prompt) {
1122   return mysavestring(prompt); /*essentially a NOP */
1123 }
1124 
1125 #else
1126 
1127 /* regcomp with error checking (and simpler signature) */
my_regcomp(const char * regex,int flags)1128 static regex_t *my_regcomp(const char*regex, int flags) {
1129   regex_t *compiled_regexp;
1130   int compile_error;
1131   if (!*regex)
1132     return NULL;
1133   compiled_regexp = mymalloc(sizeof(regex_t));
1134   compile_error = regcomp(compiled_regexp, regex, flags);
1135   if (compile_error) {
1136     int size = regerror(compile_error, compiled_regexp, NULL, 0);
1137     char *error_message =  mymalloc(size);
1138     regerror(compile_error, compiled_regexp, error_message, size);
1139     myerror(FATAL|NOERRNO, "(Internal error:) in regexp \"%s\": %s", mangle_string_for_debug_log(regex,256), error_message);
1140   }
1141   return compiled_regexp;
1142 }
1143 
1144 
1145 #define TOKEN '@'
1146 #define MAXGROUPS 2
1147 
1148 /* protect_pattern("foo", 'a', 'b) = "(a[^b]*b)|(foo)" */
protect(const char * pattern,char protect_start,char protect_end)1149 char *protect(const char *pattern, char protect_start, char protect_end) {
1150   char *alternative_pattern = mymalloc(strlen(pattern) + 14);
1151   sprintf(alternative_pattern,"(%c[^%c]*%c)|(%s)", protect_start, protect_end, protect_end, pattern);
1152   return alternative_pattern;
1153 }
1154 
1155 
1156 
1157 /* Substitute all occurences of the second group in a <compiled_pattern> "(..)|(..)" within <source> with <replacement>
1158    (any TOKENs within the replacement will be replaced by the match) , skipping everything that matches the first group
1159    (which can be said to "protect" against replacement) E.g:
1160 
1161        replace_special("a zzz b zzz", "(a[^a]*b)|(z+)", "(@)") = "a zzz b (zzz)"
1162 
1163    The role of a and b above will be played by the  RL_PROMPT_{START,END}_IGNORE characters                           */
1164 
replace_special(const char * source,regex_t * compiled_pattern,const char * replacement)1165 char *replace_special(const char *source, regex_t *compiled_pattern, const char*replacement) {
1166   const char *source_cursor;
1167   char *copy_with_replacements, *copy_cursor;
1168   int max_copylen;
1169 
1170   assert(source != NULL);
1171   max_copylen = 1 + max(1, strlen(replacement)) * strlen(source);
1172 
1173   if (!compiled_pattern) /* pattern == NULL: just return a copy */
1174     return mysavestring(source);
1175   copy_with_replacements = mymalloc(max_copylen); /* worst case: replace every char in source by replacement (+ 1 final zero byte)  */
1176   source_cursor = source;
1177   copy_cursor = copy_with_replacements;
1178 
1179   while(TRUE) {
1180     regmatch_t matches[MAXGROUPS + 1];          /* whole match + MAXGROUPS groups */
1181     if ((regexec(compiled_pattern, source_cursor, MAXGROUPS + 1, matches , 0) == REG_NOMATCH)) {   /* no (more) matches ...   */
1182       strcpy(copy_cursor,source_cursor);        /* .. copy remainder of source (may be empty), and we're done      */
1183       break;                                    /* 0-terminates copy, even if last match consumed source copletely */
1184     } else {
1185       int i; const char *p;
1186       int protected_start   = matches[1].rm_so;
1187       int protected_end     = matches[1].rm_eo;
1188       int protected_length  = protected_end - protected_start;
1189       int match_start       = matches[2].rm_so;
1190       int match_end         = matches[2].rm_eo;
1191       int match_length      = match_end - match_start;   /* Either the first ("protected") group in alternative_pattern matches, */
1192       assert(!(protected_end > -1 && match_end > -1));   /*  ... or the second (never both - that is the assertion here)         */
1193       assert(protected_length > 0 || match_length > 0);  /*  ... and that match cannot be empty                                  */
1194       if (protected_end > -1)                   /* If it is the first ...                                                        */
1195         for (i = 0; i< protected_end; i++)      /* copy until protected match ends                                               */
1196           *copy_cursor++ =  *source_cursor++;
1197       else {                                    /* if it is the second (the original pattern) ...    */
1198         for (i = 0; i< match_start; i++)        /* ... copy until match starts                       */
1199           *copy_cursor++ =  *source_cursor++;
1200         for(p = replacement; *p; p++)           /* splice replacement                                */
1201           if (*p == TOKEN) {                    /* where there is a TOKEN (may be more than one) ... */
1202             assert(copy_cursor + match_length - copy_with_replacements < max_copylen);
1203             for(i=0; i< match_length ; i++)     /* ... splice matched group                          */
1204               *copy_cursor++ = *(source_cursor + i);
1205           } else                                  /* otherwise, just copy replacement                  */
1206             *copy_cursor++ = *p;
1207         source_cursor += match_length;          /* finished replacing this match, try again          */
1208       }
1209     }
1210     *copy_cursor = '\0';
1211   }
1212   return copy_with_replacements;
1213 }
1214 
1215 
1216 /* All the codes we want to preserve (i.e. keep and put between RL_PROMPT_{START,END}_IGNORE )    */
1217 static
1218 char *protected_codes[]  = { "\x1B\x1B", NULL};
1219 
1220 /* As we don't protect (and hence will get rid of) ANSI colour codes when "bleaching" the prompt, */
1221 /* specify separately:                                                                            */
1222 static char *ansi_colour_code_regexp = "(\x1B\\[[0-9;]*m)";          /* colour codes              */
1223 
1224 
1225 
1226 /* All the codes we want to get rid of. cf.                                                                        */
1227 /* https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python */
1228 /* We only cover 7-bit C1 ANSI sequences; the 8-bit are not much used as they interfere with UTF-8, and we         */
1229 /* don't need to absolutely catch everything anyway                                                                */
1230 static
1231 char *unwanted_codes[] = { "(\x1B[ -/]*[0-Z\\\\-~])"           /* ANSI X3.41: ESC + I-pattern + F-pattern (except [, which is covered below) */
1232                            ,"(\x1B[@-Z\\]-_])"                 /* C1_Fe */
1233                            ,"(\x1B\\[[0-9:;<>=?]*[-/]*[@-~])"  /* CSI   (overlaps with "protected" codes!) (is the meaning of [@-~] locale-dependent?) */
1234                            ,"(\x1B\\[?1h\x1B=)"                /* smkx keypad            */
1235                            ,"(\x1B\\]0;[[:print:]]*\x07)"      /* tsl <window title> fsl */
1236 
1237                          ,NULL};
1238 
1239 
1240 
1241 
1242 /* mark protected codes between RL_PROMPT_{START,END}_IGNORE and erase unwanted codes */
protect_or_cleanup(char * prompt,bool free_prompt)1243 char *protect_or_cleanup(char *prompt, bool free_prompt) {
1244   char *result1, *result;
1245   static char *protected_codes_regexp ;
1246   static regex_t *compiled_and_protected_protected_codes_regexp;
1247   static char *protected_token;
1248   static char *unwanted_codes_regexp;
1249   static regex_t *compiled_and_protected_unwanted_codes_regexp;
1250 
1251   /* protect stuff we want to keep: */
1252 
1253   /* (once)construct the regexp for the protected codes: */
1254   if (!protected_codes_regexp) {
1255     protected_codes_regexp = unsplit_with(-1, &protected_codes[0], "|");
1256     if (!bleach_the_prompt)
1257       protected_codes_regexp = add3strings(protected_codes_regexp, "|", ansi_colour_code_regexp);
1258     compiled_and_protected_protected_codes_regexp = my_regcomp(protect(protected_codes_regexp, RL_PROMPT_START_IGNORE, RL_PROMPT_END_IGNORE), REG_EXTENDED);
1259   }
1260   /* (once) construct the replacement pattern */
1261   if (!protected_token) {
1262      protected_token = mymalloc(4); /* "\x01@\x02" */
1263      sprintf(protected_token,"%c%c%c",RL_PROMPT_START_IGNORE, TOKEN, RL_PROMPT_END_IGNORE);
1264   }
1265   result1 = replace_special(prompt, compiled_and_protected_protected_codes_regexp, protected_token);
1266 
1267   if (!unwanted_codes_regexp) {
1268     unwanted_codes_regexp = unsplit_with(-1, &unwanted_codes[0], "|");
1269     DPRINTF1(DEBUG_AD_HOC, "unwanted_codes_regexp: %s", M(unwanted_codes_regexp));
1270     compiled_and_protected_unwanted_codes_regexp =  my_regcomp(protect(unwanted_codes_regexp, RL_PROMPT_START_IGNORE, RL_PROMPT_END_IGNORE), REG_EXTENDED);
1271   }
1272   result = replace_special(result1, compiled_and_protected_unwanted_codes_regexp, "");
1273    DPRINTF2(DEBUG_READLINE, "protect_or_cleanup(%s) = %s", M(prompt), M(result));
1274   free(result1);
1275 
1276   if (free_prompt)
1277     free(prompt);
1278 
1279   return result;
1280 }
1281 
1282 
1283 /*
1284   returns TRUE if 'string' matches the 'regexp' (or is a superstring
1285   of it, when we don't HAVE_REGEX_H). The regexp is recompiled with
1286   every call, which doesn't really hurt as this function is not called
1287   often: at most twice for every prompt.  'string' and 'regexp' may be
1288   NULL (in which case FALSE is returned)
1289 
1290   Only used for the --forget-regexp and the --prompt-regexp options
1291 */
match_regexp(const char * string,const char * regexp,int case_insensitive)1292 int match_regexp (const char *string, const char *regexp, int case_insensitive) {
1293   int result = FALSE;
1294 
1295   if (!regexp || !string)
1296     return FALSE;
1297 
1298 #ifndef HAVE_REGEX_H
1299   {
1300     static int been_warned = 0;
1301     char *metachars = "*()+?";
1302     char *lc_string = (case_insensitive ? lowercase(string) : mysavestring(string));
1303     char *lc_regexp = (case_insensitive  ? lowercase(regexp) : mysavestring(regexp));
1304 
1305     if (scan_metacharacters(regexp, metachars) && !been_warned++) /* warn only once if the user specifies a metacharacter */
1306       myerror(WARNING|NOERRNO, "one of the regexp metacharacters \"%s\" occurs in regexp(?) \"%s\"\n"
1307              "  ...but on your platform, regexp matching is not supported!", metachars, regexp);
1308 
1309     result = mystrstr(lc_string, lc_regexp);
1310     free(lc_string);
1311     free(lc_regexp);
1312   }
1313 #else
1314   {
1315     regex_t *compiled_regexp = my_regcomp(regexp, REG_EXTENDED|REG_NOSUB|(case_insensitive ? REG_ICASE : 0));
1316     result = !regexec(compiled_regexp, string, 0, NULL, 0);
1317     regfree(compiled_regexp);
1318   }
1319 #endif
1320 
1321 
1322   return result;
1323 }
1324 
1325 
1326 
1327 #ifdef UNIT_TEST
1328 
1329 
TESTFUNC(test_subst,argc,argv,stage)1330 TESTFUNC(test_subst, argc, argv, stage) {
1331   ONLY_AT_STAGE(TEST_AFTER_OPTION_PARSING);
1332   while(TRUE) {
1333      regmatch_t matches[MAXGROUPS + 1];          /* whole match + MAXGROUPS groups */
1334      char *line_read = readline ("go ahead (string pattern): ");
1335      if (strings_are_equal(line_read, "stop"))
1336        break;
1337      char**components = split_with(line_read," ");
1338      if (!(components[0] && components[1]))
1339        continue;
1340      regex_t *re = my_regcomp(protect(components[1],'a','b'), REG_EXTENDED);
1341      bool does_match = regexec(re, components[0], MAXGROUPS + 1, matches , 0) != REG_NOMATCH;
1342      printf("%s\n", does_match  ? "JA" : "NEE");
1343      free_foreign(line_read);
1344   }
1345   cleanup_rlwrap_and_exit(EXIT_SUCCESS);
1346 }
1347 
1348 #endif
1349 
1350 #endif /* def HAVE_REGEX_H */
1351 
1352 
1353 /* scan blocks of client output for "cupcodes" that enter and exit the "alternate screen" and set the global variable screen_is_alternate accordingly */
check_cupcodes(const char * client_output)1354 void check_cupcodes(const char *client_output) {
1355   static char *still_unchecked; /* to avoid missing a cupcode that spans more than 1 read buffer, we keep the last few
1356                                    bytes in a static buffer (still_unchecked) , that we always prepend to the next incoming block */
1357   static int rmcup_len, smcup_len, max_cuplen;
1358   char *output_copy, *outputptr;
1359   int cut_here;
1360 
1361   if (!(commands_children_not_wrapped && term_smcup && term_rmcup))
1362     return; /* check is impossible or unnecessary */
1363 
1364   if (still_unchecked == NULL) { /* init static vars once */
1365     still_unchecked = mysavestring("");
1366     rmcup_len = strlen(term_rmcup);
1367     smcup_len = strlen(term_smcup);
1368     max_cuplen = max(rmcup_len, smcup_len);
1369   }
1370   outputptr = output_copy = append_and_free_old(still_unchecked, client_output);
1371 
1372   /* keep the very end for next time, as it might contain a partial cupcode */
1373   /* in the worst case we will notice the shortest of the two cupcodes twice */
1374   cut_here  = max(0, strlen(output_copy) - max_cuplen);
1375   still_unchecked = mysavestring (&output_copy[cut_here]);
1376 
1377 
1378   while (TRUE) {
1379     char *rmptr = strstr(outputptr, term_rmcup);
1380     char *smptr = strstr(outputptr, term_smcup);
1381     if (rmptr == smptr) {
1382       assert(rmptr == NULL); /* can only fail  if term_smcup is prefix of term_rmcup, or vice versa, which never happens as far as I know */
1383       break;
1384     } else if (rmptr > smptr) { /* rmcup and smcup may occur in the same block of output */
1385       DPRINTF0(DEBUG_READLINE, "Saw rmcup");
1386       screen_is_alternate = FALSE;
1387       outputptr = rmptr + rmcup_len; /* 1 past match */
1388     } else  {
1389       DPRINTF0(DEBUG_READLINE, "Saw smcup");
1390       screen_is_alternate = TRUE;
1391       outputptr = smptr + smcup_len;
1392     }
1393   }
1394   free(output_copy);
1395 }
1396