1 /*
2 ** $Id: lstrlib.c,v 1.1 2002/02/14 10:46:59 jcatki Exp $
3 ** Standard library for string operations and pattern-matching
4 ** See Copyright Notice in lua.h
5 */
6 
7 
8 #include <ctype.h>
9 #include <stddef.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "lua.h"
15 
16 #include "lauxlib.h"
17 #include "lualib.h"
18 
19 
20 
str_len(lua_State * L)21 static int str_len (lua_State *L) {
22   size_t l;
23   luaL_check_lstr(L, 1, &l);
24   lua_pushnumber(L, l);
25   return 1;
26 }
27 
28 
posrelat(long pos,size_t len)29 static long posrelat (long pos, size_t len) {
30   /* relative string position: negative means back from end */
31   return (pos>=0) ? pos : (long)len+pos+1;
32 }
33 
34 
str_sub(lua_State * L)35 static int str_sub (lua_State *L) {
36   size_t l;
37   const char *s = luaL_check_lstr(L, 1, &l);
38   long start = posrelat(luaL_check_long(L, 2), l);
39   long end = posrelat(luaL_opt_long(L, 3, -1), l);
40   if (start < 1) start = 1;
41   if (end > (long)l) end = l;
42   if (start <= end)
43     lua_pushlstring(L, s+start-1, end-start+1);
44   else lua_pushstring(L, "");
45   return 1;
46 }
47 
48 
str_lower(lua_State * L)49 static int str_lower (lua_State *L) {
50   size_t l;
51   size_t i;
52   luaL_Buffer b;
53   const char *s = luaL_check_lstr(L, 1, &l);
54   luaL_buffinit(L, &b);
55   for (i=0; i<l; i++)
56     luaL_putchar(&b, tolower((unsigned char)(s[i])));
57   luaL_pushresult(&b);
58   return 1;
59 }
60 
61 
str_upper(lua_State * L)62 static int str_upper (lua_State *L) {
63   size_t l;
64   size_t i;
65   luaL_Buffer b;
66   const char *s = luaL_check_lstr(L, 1, &l);
67   luaL_buffinit(L, &b);
68   for (i=0; i<l; i++)
69     luaL_putchar(&b, toupper((unsigned char)(s[i])));
70   luaL_pushresult(&b);
71   return 1;
72 }
73 
str_rep(lua_State * L)74 static int str_rep (lua_State *L) {
75   size_t l;
76   luaL_Buffer b;
77   const char *s = luaL_check_lstr(L, 1, &l);
78   int n = luaL_check_int(L, 2);
79   luaL_buffinit(L, &b);
80   while (n-- > 0)
81     luaL_addlstring(&b, s, l);
82   luaL_pushresult(&b);
83   return 1;
84 }
85 
86 
str_byte(lua_State * L)87 static int str_byte (lua_State *L) {
88   size_t l;
89   const char *s = luaL_check_lstr(L, 1, &l);
90   long pos = posrelat(luaL_opt_long(L, 2, 1), l);
91   luaL_arg_check(L, 0<pos && (size_t)pos<=l, 2,  "out of range");
92   lua_pushnumber(L, (unsigned char)s[pos-1]);
93   return 1;
94 }
95 
96 
str_char(lua_State * L)97 static int str_char (lua_State *L) {
98   int n = lua_gettop(L);  /* number of arguments */
99   int i;
100   luaL_Buffer b;
101   luaL_buffinit(L, &b);
102   for (i=1; i<=n; i++) {
103     int c = luaL_check_int(L, i);
104     luaL_arg_check(L, (unsigned char)c == c, i, "invalid value");
105     luaL_putchar(&b, (unsigned char)c);
106   }
107   luaL_pushresult(&b);
108   return 1;
109 }
110 
111 
112 
113 /*
114 ** {======================================================
115 ** PATTERN MATCHING
116 ** =======================================================
117 */
118 
119 #ifndef MAX_CAPTURES
120 #define MAX_CAPTURES 32  /* arbitrary limit */
121 #endif
122 
123 
124 struct Capture {
125   const char *src_end;  /* end ('\0') of source string */
126   int level;  /* total number of captures (finished or unfinished) */
127   struct {
128     const char *init;
129     long len;  /* -1 signals unfinished capture */
130   } capture[MAX_CAPTURES];
131 };
132 
133 
134 #define ESC		'%'
135 #define SPECIALS	"^$*+?.([%-"
136 
137 
check_capture(lua_State * L,int l,struct Capture * cap)138 static int check_capture (lua_State *L, int l, struct Capture *cap) {
139   l -= '1';
140   if (!(0 <= l && l < cap->level && cap->capture[l].len != -1))
141     lua_error(L, "invalid capture index");
142   return l;
143 }
144 
145 
capture_to_close(lua_State * L,struct Capture * cap)146 static int capture_to_close (lua_State *L, struct Capture *cap) {
147   int level = cap->level;
148   for (level--; level>=0; level--)
149     if (cap->capture[level].len == -1) return level;
150   lua_error(L, "invalid pattern capture");
151   return 0;  /* to avoid warnings */
152 }
153 
154 
luaI_classend(lua_State * L,const char * p)155 const char *luaI_classend (lua_State *L, const char *p) {
156   switch (*p++) {
157     case ESC:
158       if (*p == '\0') lua_error(L, "malformed pattern (ends with `%')");
159       return p+1;
160     case '[':
161       if (*p == '^') p++;
162       do {  /* look for a ']' */
163         if (*p == '\0') lua_error(L, "malformed pattern (missing `]')");
164         if (*(p++) == ESC && *p != '\0') p++;  /* skip escapes (e.g. '%]') */
165       } while (*p != ']');
166       return p+1;
167     default:
168       return p;
169   }
170 }
171 
172 
match_class(int c,int cl)173 static int match_class (int c, int cl) {
174   int res;
175   switch (tolower(cl)) {
176     case 'a' : res = isalpha(c); break;
177     case 'c' : res = iscntrl(c); break;
178     case 'd' : res = isdigit(c); break;
179     case 'l' : res = islower(c); break;
180     case 'p' : res = ispunct(c); break;
181     case 's' : res = isspace(c); break;
182     case 'u' : res = isupper(c); break;
183     case 'w' : res = isalnum(c); break;
184     case 'x' : res = isxdigit(c); break;
185     case 'z' : res = (c == '\0'); break;
186     default: return (cl == c);
187   }
188   return (islower(cl) ? res : !res);
189 }
190 
191 
192 
matchbracketclass(int c,const char * p,const char * endclass)193 static int matchbracketclass (int c, const char *p, const char *endclass) {
194   int sig = 1;
195   if (*(p+1) == '^') {
196     sig = 0;
197     p++;  /* skip the '^' */
198   }
199   while (++p < endclass) {
200     if (*p == ESC) {
201       p++;
202       if (match_class(c, (unsigned char)*p))
203         return sig;
204     }
205     else if ((*(p+1) == '-') && (p+2 < endclass)) {
206       p+=2;
207       if ((int)(unsigned char)*(p-2) <= c && c <= (int)(unsigned char)*p)
208         return sig;
209     }
210     else if ((int)(unsigned char)*p == c) return sig;
211   }
212   return !sig;
213 }
214 
215 
216 
luaI_singlematch(int c,const char * p,const char * ep)217 int luaI_singlematch (int c, const char *p, const char *ep) {
218   switch (*p) {
219     case '.':  /* matches any char */
220       return 1;
221     case ESC:
222       return match_class(c, (unsigned char)*(p+1));
223     case '[':
224       return matchbracketclass(c, p, ep-1);
225     default:
226       return ((unsigned char)*p == c);
227   }
228 }
229 
230 
231 static const char *match (lua_State *L, const char *s, const char *p,
232                           struct Capture *cap);
233 
234 
matchbalance(lua_State * L,const char * s,const char * p,struct Capture * cap)235 static const char *matchbalance (lua_State *L, const char *s, const char *p,
236                                  struct Capture *cap) {
237   if (*p == 0 || *(p+1) == 0)
238     lua_error(L, "unbalanced pattern");
239   if (*s != *p) return NULL;
240   else {
241     int b = *p;
242     int e = *(p+1);
243     int cont = 1;
244     while (++s < cap->src_end) {
245       if (*s == e) {
246         if (--cont == 0) return s+1;
247       }
248       else if (*s == b) cont++;
249     }
250   }
251   return NULL;  /* string ends out of balance */
252 }
253 
254 
max_expand(lua_State * L,const char * s,const char * p,const char * ep,struct Capture * cap)255 static const char *max_expand (lua_State *L, const char *s, const char *p,
256                                const char *ep, struct Capture *cap) {
257   long i = 0;  /* counts maximum expand for item */
258   while ((s+i)<cap->src_end && luaI_singlematch((unsigned char)*(s+i), p, ep))
259     i++;
260   /* keeps trying to match with the maximum repetitions */
261   while (i>=0) {
262     const char *res = match(L, (s+i), ep+1, cap);
263     if (res) return res;
264     i--;  /* else didn't match; reduce 1 repetition to try again */
265   }
266   return NULL;
267 }
268 
269 
min_expand(lua_State * L,const char * s,const char * p,const char * ep,struct Capture * cap)270 static const char *min_expand (lua_State *L, const char *s, const char *p,
271                                const char *ep, struct Capture *cap) {
272   for (;;) {
273     const char *res = match(L, s, ep+1, cap);
274     if (res != NULL)
275       return res;
276     else if (s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep))
277       s++;  /* try with one more repetition */
278     else return NULL;
279   }
280 }
281 
282 
start_capture(lua_State * L,const char * s,const char * p,struct Capture * cap)283 static const char *start_capture (lua_State *L, const char *s, const char *p,
284                                   struct Capture *cap) {
285   const char *res;
286   int level = cap->level;
287   if (level >= MAX_CAPTURES) lua_error(L, "too many captures");
288   cap->capture[level].init = s;
289   cap->capture[level].len = -1;
290   cap->level = level+1;
291   if ((res=match(L, s, p+1, cap)) == NULL)  /* match failed? */
292     cap->level--;  /* undo capture */
293   return res;
294 }
295 
296 
end_capture(lua_State * L,const char * s,const char * p,struct Capture * cap)297 static const char *end_capture (lua_State *L, const char *s, const char *p,
298                                 struct Capture *cap) {
299   int l = capture_to_close(L, cap);
300   const char *res;
301   cap->capture[l].len = s - cap->capture[l].init;  /* close capture */
302   if ((res = match(L, s, p+1, cap)) == NULL)  /* match failed? */
303     cap->capture[l].len = -1;  /* undo capture */
304   return res;
305 }
306 
307 
match_capture(lua_State * L,const char * s,int level,struct Capture * cap)308 static const char *match_capture (lua_State *L, const char *s, int level,
309                                   struct Capture *cap) {
310   int l = check_capture(L, level, cap);
311   size_t len = cap->capture[l].len;
312   if ((size_t)(cap->src_end-s) >= len &&
313       memcmp(cap->capture[l].init, s, len) == 0)
314     return s+len;
315   else return NULL;
316 }
317 
318 
match(lua_State * L,const char * s,const char * p,struct Capture * cap)319 static const char *match (lua_State *L, const char *s, const char *p,
320                           struct Capture *cap) {
321   init: /* using goto's to optimize tail recursion */
322   switch (*p) {
323     case '(':  /* start capture */
324       return start_capture(L, s, p, cap);
325     case ')':  /* end capture */
326       return end_capture(L, s, p, cap);
327     case ESC:  /* may be %[0-9] or %b */
328       if (isdigit((unsigned char)(*(p+1)))) {  /* capture? */
329         s = match_capture(L, s, *(p+1), cap);
330         if (s == NULL) return NULL;
331         p+=2; goto init;  /* else return match(L, s, p+2, cap) */
332       }
333       else if (*(p+1) == 'b') {  /* balanced string? */
334         s = matchbalance(L, s, p+2, cap);
335         if (s == NULL) return NULL;
336         p+=4; goto init;  /* else return match(L, s, p+4, cap); */
337       }
338       else goto dflt;  /* case default */
339     case '\0':  /* end of pattern */
340       return s;  /* match succeeded */
341     case '$':
342       if (*(p+1) == '\0')  /* is the '$' the last char in pattern? */
343         return (s == cap->src_end) ? s : NULL;  /* check end of string */
344       else goto dflt;
345     default: dflt: {  /* it is a pattern item */
346       const char *ep = luaI_classend(L, p);  /* points to what is next */
347       int m = s<cap->src_end && luaI_singlematch((unsigned char)*s, p, ep);
348       switch (*ep) {
349         case '?': {  /* optional */
350           const char *res;
351           if (m && ((res=match(L, s+1, ep+1, cap)) != NULL))
352             return res;
353           p=ep+1; goto init;  /* else return match(L, s, ep+1, cap); */
354         }
355         case '*':  /* 0 or more repetitions */
356           return max_expand(L, s, p, ep, cap);
357         case '+':  /* 1 or more repetitions */
358           return (m ? max_expand(L, s+1, p, ep, cap) : NULL);
359         case '-':  /* 0 or more repetitions (minimum) */
360           return min_expand(L, s, p, ep, cap);
361         default:
362           if (!m) return NULL;
363           s++; p=ep; goto init;  /* else return match(L, s+1, ep, cap); */
364       }
365     }
366   }
367 }
368 
369 
370 
lmemfind(const char * s1,size_t l1,const char * s2,size_t l2)371 static const char *lmemfind (const char *s1, size_t l1,
372                              const char *s2, size_t l2) {
373   if (l2 == 0) return s1;  /* empty strings are everywhere */
374   else if (l2 > l1) return NULL;  /* avoids a negative `l1' */
375   else {
376     const char *init;  /* to search for a `*s2' inside `s1' */
377     l2--;  /* 1st char will be checked by `memchr' */
378     l1 = l1-l2;  /* `s2' cannot be found after that */
379     while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
380       init++;   /* 1st char is already checked */
381       if (memcmp(init, s2+1, l2) == 0)
382         return init-1;
383       else {  /* correct `l1' and `s1' to try again */
384         l1 -= init-s1;
385         s1 = init;
386       }
387     }
388     return NULL;  /* not found */
389   }
390 }
391 
392 
push_captures(lua_State * L,struct Capture * cap)393 static int push_captures (lua_State *L, struct Capture *cap) {
394   int i;
395   luaL_checkstack(L, cap->level, "too many captures");
396   for (i=0; i<cap->level; i++) {
397     int l = cap->capture[i].len;
398     if (l == -1) lua_error(L, "unfinished capture");
399     lua_pushlstring(L, cap->capture[i].init, l);
400   }
401   return cap->level;  /* number of strings pushed */
402 }
403 
404 
str_find(lua_State * L)405 static int str_find (lua_State *L) {
406   size_t l1, l2;
407   const char *s = luaL_check_lstr(L, 1, &l1);
408   const char *p = luaL_check_lstr(L, 2, &l2);
409   long init = posrelat(luaL_opt_long(L, 3, 1), l1) - 1;
410   struct Capture cap;
411   luaL_arg_check(L, 0 <= init && (size_t)init <= l1, 3, "out of range");
412   if (lua_gettop(L) > 3 ||  /* extra argument? */
413       strpbrk(p, SPECIALS) == NULL) {  /* or no special characters? */
414     const char *s2 = lmemfind(s+init, l1-init, p, l2);
415     if (s2) {
416       lua_pushnumber(L, s2-s+1);
417       lua_pushnumber(L, s2-s+l2);
418       return 2;
419     }
420   }
421   else {
422     int anchor = (*p == '^') ? (p++, 1) : 0;
423     const char *s1=s+init;
424     cap.src_end = s+l1;
425     do {
426       const char *res;
427       cap.level = 0;
428       if ((res=match(L, s1, p, &cap)) != NULL) {
429         lua_pushnumber(L, s1-s+1);  /* start */
430         lua_pushnumber(L, res-s);   /* end */
431         return push_captures(L, &cap) + 2;
432       }
433     } while (s1++<cap.src_end && !anchor);
434   }
435   lua_pushnil(L);  /* not found */
436   return 1;
437 }
438 
439 
add_s(lua_State * L,luaL_Buffer * b,struct Capture * cap)440 static void add_s (lua_State *L, luaL_Buffer *b, struct Capture *cap) {
441   if (lua_isstring(L, 3)) {
442     const char *news = lua_tostring(L, 3);
443     size_t l = lua_strlen(L, 3);
444     size_t i;
445     for (i=0; i<l; i++) {
446       if (news[i] != ESC)
447         luaL_putchar(b, news[i]);
448       else {
449         i++;  /* skip ESC */
450         if (!isdigit((unsigned char)news[i]))
451           luaL_putchar(b, news[i]);
452         else {
453           int level = check_capture(L, news[i], cap);
454           luaL_addlstring(b, cap->capture[level].init, cap->capture[level].len);
455         }
456       }
457     }
458   }
459   else {  /* is a function */
460     int n;
461     lua_pushvalue(L, 3);
462     n = push_captures(L, cap);
463     lua_rawcall(L, n, 1);
464     if (lua_isstring(L, -1))
465       luaL_addvalue(b);  /* add return to accumulated result */
466     else
467       lua_pop(L, 1);  /* function result is not a string: pop it */
468   }
469 }
470 
471 
str_gsub(lua_State * L)472 static int str_gsub (lua_State *L) {
473   size_t srcl;
474   const char *src = luaL_check_lstr(L, 1, &srcl);
475   const char *p = luaL_check_string(L, 2);
476   int max_s = luaL_opt_int(L, 4, srcl+1);
477   int anchor = (*p == '^') ? (p++, 1) : 0;
478   int n = 0;
479   struct Capture cap;
480   luaL_Buffer b;
481   luaL_arg_check(L,
482     lua_gettop(L) >= 3 && (lua_isstring(L, 3) || lua_isfunction(L, 3)),
483     3, "string or function expected");
484   luaL_buffinit(L, &b);
485   cap.src_end = src+srcl;
486   while (n < max_s) {
487     const char *e;
488     cap.level = 0;
489     e = match(L, src, p, &cap);
490     if (e) {
491       n++;
492       add_s(L, &b, &cap);
493     }
494     if (e && e>src) /* non empty match? */
495       src = e;  /* skip it */
496     else if (src < cap.src_end)
497       luaL_putchar(&b, *src++);
498     else break;
499     if (anchor) break;
500   }
501   luaL_addlstring(&b, src, cap.src_end-src);
502   luaL_pushresult(&b);
503   lua_pushnumber(L, n);  /* number of substitutions */
504   return 2;
505 }
506 
507 /* }====================================================== */
508 
509 
luaI_addquoted(lua_State * L,luaL_Buffer * b,int arg)510 static void luaI_addquoted (lua_State *L, luaL_Buffer *b, int arg) {
511   size_t l;
512   const char *s = luaL_check_lstr(L, arg, &l);
513   luaL_putchar(b, '"');
514   while (l--) {
515     switch (*s) {
516       case '"':  case '\\':  case '\n':
517         luaL_putchar(b, '\\');
518         luaL_putchar(b, *s);
519         break;
520       case '\0': luaL_addlstring(b, "\\000", 4); break;
521       default: luaL_putchar(b, *s);
522     }
523     s++;
524   }
525   luaL_putchar(b, '"');
526 }
527 
528 /* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
529 #define MAX_ITEM	512
530 /* maximum size of each format specification (such as '%-099.99d') */
531 #define MAX_FORMAT	20
532 
str_format(lua_State * L)533 static int str_format (lua_State *L) {
534   int arg = 1;
535   const char *strfrmt = luaL_check_string(L, arg);
536   luaL_Buffer b;
537   luaL_buffinit(L, &b);
538   while (*strfrmt) {
539     if (*strfrmt != '%')
540       luaL_putchar(&b, *strfrmt++);
541     else if (*++strfrmt == '%')
542       luaL_putchar(&b, *strfrmt++);  /* %% */
543     else { /* format item */
544       struct Capture cap;
545       char form[MAX_FORMAT];  /* to store the format ('%...') */
546       char buff[MAX_ITEM];  /* to store the formatted item */
547       const char *initf = strfrmt;
548       form[0] = '%';
549       if (isdigit((unsigned char)*initf) && *(initf+1) == '$') {
550         arg = *initf - '0';
551         initf += 2;  /* skip the 'n$' */
552       }
553       arg++;
554       cap.src_end = strfrmt+strlen(strfrmt)+1;
555       cap.level = 0;
556       strfrmt = match(L, initf, "[-+ #0]*(%d*)%.?(%d*)", &cap);
557       if (cap.capture[0].len > 2 || cap.capture[1].len > 2 ||  /* < 100? */
558           strfrmt-initf > MAX_FORMAT-2)
559         lua_error(L, "invalid format (width or precision too long)");
560       strncpy(form+1, initf, strfrmt-initf+1); /* +1 to include conversion */
561       form[strfrmt-initf+2] = 0;
562       switch (*strfrmt++) {
563         case 'c':  case 'd':  case 'i':
564           sprintf(buff, form, luaL_check_int(L, arg));
565           break;
566         case 'o':  case 'u':  case 'x':  case 'X':
567           sprintf(buff, form, (unsigned int)luaL_check_number(L, arg));
568           break;
569         case 'e':  case 'E': case 'f': case 'g': case 'G':
570           sprintf(buff, form, luaL_check_number(L, arg));
571           break;
572         case 'q':
573           luaI_addquoted(L, &b, arg);
574           continue;  /* skip the "addsize" at the end */
575         case 's': {
576           size_t l;
577           const char *s = luaL_check_lstr(L, arg, &l);
578           if (cap.capture[1].len == 0 && l >= 100) {
579             /* no precision and string is too long to be formatted;
580                keep original string */
581             lua_pushvalue(L, arg);
582             luaL_addvalue(&b);
583             continue;  /* skip the "addsize" at the end */
584           }
585           else {
586             sprintf(buff, form, s);
587             break;
588           }
589         }
590         default:  /* also treat cases 'pnLlh' */
591           lua_error(L, "invalid option in `format'");
592       }
593       luaL_addlstring(&b, buff, strlen(buff));
594     }
595   }
596   luaL_pushresult(&b);
597   return 1;
598 }
599 
600 
601 static const struct luaL_reg strlib[] = {
602 {"strlen", str_len},
603 {"strsub", str_sub},
604 {"strlower", str_lower},
605 {"strupper", str_upper},
606 {"strchar", str_char},
607 {"strrep", str_rep},
608 {"ascii", str_byte},  /* for compatibility with 3.0 and earlier */
609 {"strbyte", str_byte},
610 {"format", str_format},
611 {"strfind", str_find},
612 {"gsub", str_gsub}
613 };
614 
615 
616 /*
617 ** Open string library
618 */
lua_strlibopen(lua_State * L)619 LUALIB_API void lua_strlibopen (lua_State *L) {
620   luaL_openl(L, strlib);
621 }
622