1 /* lstrlibext.c
2 
3    Copyright 2012 Taco Hoekwater <taco@luatex.org>
4 
5    This file is part of LuaTeX.
6 
7    LuaTeX is free software; you can redistribute it and/or modify it under
8    the terms of the GNU General Public License as published by the Free
9    Software Foundation; either version 2 of the License, or (at your
10    option) any later version.
11 
12    LuaTeX is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License along
18    with LuaTeX; if not, see <http://www.gnu.org/licenses/>. */
19 
20 /* The relative ordering of the header files is important here,
21    otherwise some of the defines that are needed for lua_sdump
22    come out wrong.
23  */
24 #include "ptexlib.h"
25 
26 #define LUA_CORE
27 #include "lua.h"
28 #include "luaconf.h"
29 #include "lapi.h"
30 #include "lundump.h"
31 
32 
str_split(lua_State * L)33 static int str_split (lua_State *L) {
34   size_t l;
35   size_t i;
36   int n;
37   char *q, *p, *orig;
38   int mult = 0;
39   const char *s = luaL_checklstring(L, 1, &l);
40   const char *joiner = luaL_optstring(L, 2, " +");
41   lua_newtable(L);
42   if (l == 0) {
43 	lua_pushvalue(L,1);
44 	lua_rawseti(L,-2,1);
45 	return 1;
46   }
47   orig = p = malloc(l+1);
48   if (p==NULL) {
49 	fprintf(stderr, "fatal: memory exhausted (malloc of %u bytes).\n",(int)(l+1));
50 	exit(EXIT_FAILURE);
51   }
52   strcpy(p,s);
53   n = 1;
54   q = p;
55 
56   if (*joiner == 0) {
57 	for (i=0;i<l;i++) {
58 	  lua_pushlstring(L,q,1); q++;
59 	  lua_rawseti(L,-2,n); n++;
60 	}
61 	free(orig);
62 	return 1;
63   }
64   if (*(joiner+1) == '+') {
65 	mult = 1;
66 	while(*p==*joiner) {
67 	  p++;
68 	  l--;
69 	}
70 	q = p;
71   }
72   for (i=0;i<l;i++) {
73 	if (*(p+i)==*joiner) {
74 	  *(p+i) = 0;
75 	  lua_pushlstring(L,q,((p+i)-q));
76 	  lua_rawseti(L,-2,n); n++;
77 	  if (mult) {
78 		while(*(p+i+1)==*joiner) {
79 		  i++;
80 		}
81 	  }
82 	  q = p+i+1;
83 	}
84   }
85   if (mult && q==(p+l)) {
86 	free(orig);
87 	return 1;
88   }
89   if(q<=(p+l)) {
90 	lua_pushlstring(L,q,strlen(q));
91 	lua_rawseti(L,-2,n);
92   }
93   free(orig);
94   return 1;
95 }
96 
characters_aux(lua_State * L)97 static int characters_aux (lua_State *L) {
98   size_t ls;
99   char b[2];
100   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
101   int ind  = lua_tointeger(L, lua_upvalueindex(2));
102   if (ind<(int)ls) {
103     lua_pushinteger(L, (ind+1));  /* iterator */
104 	lua_replace(L, lua_upvalueindex(2));
105 	b[0] = *(s+ind); b[1] = 0;
106 	lua_pushlstring(L, b, 1);
107 	return 1;
108   }
109   return 0;  /* string ended */
110 }
111 
112 
str_characters(lua_State * L)113 static int str_characters (lua_State *L) {
114   luaL_checkstring(L, 1);
115   lua_settop(L, 1);
116   lua_pushinteger(L, 0);
117   lua_pushcclosure(L, characters_aux, 2);
118   return 1;
119 }
120 
121 
utf_failed(lua_State * L,int new_ind)122 static int utf_failed(lua_State *L, int new_ind) {
123   static char fffd [3] = {0xEF,0xBF,0xBD};
124   lua_pushinteger(L, new_ind);  /* iterator */
125   lua_replace(L, lua_upvalueindex(2));
126   lua_pushlstring(L, fffd, 3);
127   return 1;
128 }
129 
utfcharacters_aux(lua_State * L)130 static int utfcharacters_aux (lua_State *L) {
131   static const unsigned char mask[4] = {0x80,0xE0,0xF0,0xF8};
132   static const unsigned char mequ[4] = {0x00,0xC0,0xE0,0xF0};
133   size_t ls;
134   unsigned char c;
135   int j;
136   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
137   int       ind = lua_tointeger(L, lua_upvalueindex(2));
138   if (ind>=(int)ls) return 0; /* end of string */
139   c = (unsigned) s[ind];
140   for (j=0;j<4;j++) {
141     if ((c&mask[j])==mequ[j]) {
142       int k;
143       if (ind+1+j>(int)ls) return utf_failed(L,ls); /* will not fit */
144       for (k=1; k<=j; k++) {
145         c = (unsigned) s[ind+k];
146         if ((c&0xC0)!=0x80) return utf_failed(L,ind+k); /* bad follow */
147       }
148       lua_pushinteger(L, ind+1+j);  /* iterator */
149       lua_replace(L, lua_upvalueindex(2));
150       lua_pushlstring(L, s+ind, 1+j);
151       return 1;
152     }
153   }
154   return utf_failed(L,ind+1); /* we found a follow byte! */
155 }
156 
157 
str_utfcharacters(lua_State * L)158 static int str_utfcharacters (lua_State *L) {
159   luaL_checkstring(L, 1);
160   lua_settop(L, 1);
161   lua_pushinteger(L, 0);
162   lua_pushcclosure(L, utfcharacters_aux, 2);
163   return 1;
164 }
165 
166 
utfvalues_aux(lua_State * L)167 static int utfvalues_aux (lua_State *L) {
168   size_t ls;
169   unsigned char i = 0;
170   unsigned char j = 0;
171   unsigned char k = 0;
172   unsigned char l = 0;
173   unsigned int  v = 0xFFFD;
174   int numbytes = 1;
175   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
176   int ind       = lua_tointeger(L, lua_upvalueindex(2));
177 
178   if (ind<(int)ls) {
179 	i = *(s+ind);
180 	if (i<0x80) {
181 	  v = i;
182 	} else if (i>=0xF0) {
183 	  if ((ind+3)<(int)ls && ((unsigned)*(s+ind+1))>=0x80
184 		  && ((unsigned)*(s+ind+2))>=0x80 && ((unsigned)*(s+ind+3))>=0x80) {
185 		numbytes  = 4;
186 		j = ((unsigned)*(s+ind+1))-128;
187 		k = ((unsigned)*(s+ind+2))-128;
188 		l = ((unsigned)*(s+ind+3))-128;
189 		v = (((((i-0xF0)*64) + j)*64) + k)*64 + l;
190 	  }
191 	} else if (i>=0xE0) {
192 	  if ((ind+2)<(int)ls && ((unsigned)*(s+ind+1))>=0x80 && ((unsigned)*(s+ind+2))>=0x80) {
193 		numbytes  = 3;
194 		j = ((unsigned)*(s+ind+1))-128;
195 		k = ((unsigned)*(s+ind+2))-128;
196 		v = (((i-0xE0)*64) + j)*64 + k;
197 	  }
198 
199 	} else if (i>=0xC0) {
200 	  if ((ind+1)<(int)ls && ((unsigned)*(s+ind+1))>=0x80) {
201 		numbytes  = 2;
202 		j = ((unsigned)*(s+ind+1))-128;
203 		v = ((i-0xC0)*64) + j;
204 	  }
205 	}
206 	lua_pushinteger(L, (ind+numbytes));  /* iterator */
207 	lua_replace(L, lua_upvalueindex(2));
208 	lua_pushinteger(L, v);
209 	return 1;
210   }
211   return 0;  /* string ended */
212 }
213 
214 
str_utfvalues(lua_State * L)215 static int str_utfvalues (lua_State *L) {
216   luaL_checkstring(L, 1);
217   lua_settop(L, 1);
218   lua_pushinteger(L, 0);
219   lua_pushcclosure(L, utfvalues_aux, 2);
220   return 1;
221 }
222 
223 
224 
characterpairs_aux(lua_State * L)225 static int characterpairs_aux (lua_State *L) {
226   size_t ls;
227   char b[2];
228   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
229   int ind       = lua_tointeger(L, lua_upvalueindex(2));
230   if (ind<(int)ls) {
231 	if (ind+1<(int)ls) {
232 	  lua_pushinteger(L, (ind+2));  /* iterator */
233 	} else {
234 	  lua_pushinteger(L, (ind+1));  /* iterator */
235 	}
236 	lua_replace(L, lua_upvalueindex(2));
237 	b[0] = *(s+ind); b[1] = 0;
238 	lua_pushlstring(L, b, 1);
239 	if (ind+1<(int)ls) {
240 	  b[0] = *(s+ind+1);
241 	  lua_pushlstring(L, b, 1);
242 	} else {
243 	  lua_pushlstring(L, b+1, 0);
244 	}
245 	return 2;
246   }
247   return 0;  /* string ended */
248 }
249 
250 
str_characterpairs(lua_State * L)251 static int str_characterpairs (lua_State *L) {
252   luaL_checkstring(L, 1);
253   lua_settop(L, 1);
254   lua_pushinteger(L, 0);
255   lua_pushcclosure(L, characterpairs_aux, 2);
256   return 1;
257 }
258 
bytes_aux(lua_State * L)259 static int bytes_aux (lua_State *L) {
260   size_t ls;
261   unsigned char i;
262   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
263   int ind       = lua_tointeger(L, lua_upvalueindex(2));
264   if (ind<(int)ls) {
265     lua_pushinteger(L, (ind+1));  /* iterator */
266 	lua_replace(L, lua_upvalueindex(2));
267 	i = (unsigned char)*(s+ind);
268 	lua_pushinteger(L, i);     /* byte */
269 	return 1;
270   }
271   return 0;  /* string ended */
272 }
273 
str_bytes(lua_State * L)274 static int str_bytes (lua_State *L) {
275   luaL_checkstring(L, 1);
276   lua_settop(L, 1);
277   lua_pushinteger(L, 0);
278   lua_pushcclosure(L, bytes_aux, 2);
279   return 1;
280 }
281 
bytepairs_aux(lua_State * L)282 static int bytepairs_aux (lua_State *L) {
283   size_t ls;
284   unsigned char i;
285   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
286   int ind       = lua_tointeger(L, lua_upvalueindex(2));
287   if (ind<(int)ls) {
288 	if (ind+1<(int)ls) {
289 	  lua_pushinteger(L, (ind+2));  /* iterator */
290 	} else {
291 	  lua_pushinteger(L, (ind+1));  /* iterator */
292 	}
293 	lua_replace(L, lua_upvalueindex(2));
294 	i = (unsigned char)*(s+ind);
295 	lua_pushinteger(L, i);     /* byte one */
296 	if (ind+1<(int)ls) {
297 	  i = (unsigned char)*(s+ind+1);
298 	  lua_pushinteger(L, i);     /* byte two */
299 	} else {
300 	  lua_pushnil(L);     /* odd string length */
301 	}
302 	return 2;
303   }
304   return 0;  /* string ended */
305 }
306 
307 
str_bytepairs(lua_State * L)308 static int str_bytepairs (lua_State *L) {
309   luaL_checkstring(L, 1);
310   lua_settop(L, 1);
311   lua_pushinteger(L, 0);
312   lua_pushcclosure(L, bytepairs_aux, 2);
313   return 1;
314 }
315 
316 
317 
writer(lua_State * L,const void * b,size_t size,void * B)318 static int writer (lua_State *L, const void* b, size_t size, void* B) {
319   (void)L;
320   luaL_addlstring((luaL_Buffer*) B, (const char *)b, size);
321   return 0;
322 }
323 
lua_sdump(lua_State * L,lua_Writer writer,void * data,int stripping)324 static int lua_sdump (lua_State *L, lua_Writer writer, void *data, int stripping) {
325   int status;
326   TValue *o;
327   lua_lock(L);
328   api_checknelems(L, 1);
329   o = L->top - 1;
330   if (isLfunction(o))
331     status = luaU_dump(L, getproto(o), writer, data, stripping);
332   else
333     status = 1;
334   lua_unlock(L);
335   return status;
336 }
337 
str_dump(lua_State * L)338 static int str_dump (lua_State *L) {
339   luaL_Buffer b;
340   int stripping = 0;
341   luaL_checktype(L, 1, LUA_TFUNCTION);
342   if (lua_gettop(L)==2) {
343       stripping = lua_toboolean(L,2);
344   }
345   lua_settop(L, 1);
346   luaL_buffinit(L,&b);
347   if (lua_sdump(L, writer, &b, stripping) != 0)
348     return luaL_error(L, "unable to dump given function");
349   luaL_pushresult(&b);
350   return 1;
351 }
352 
353 
354 
355 static const luaL_Reg strlibext[] = {
356   {"utfvalues", str_utfvalues},
357   {"utfcharacters", str_utfcharacters},
358   {"characters", str_characters},
359   {"characterpairs", str_characterpairs},
360   {"bytes", str_bytes},
361   {"bytepairs", str_bytepairs},
362   {"explode", str_split},
363   {"dump", str_dump},
364   {NULL, NULL}
365 };
366 
open_strlibext(lua_State * L)367 void open_strlibext(lua_State * L)
368 {
369     const luaL_Reg *lib;
370     lua_getglobal(L, "string");
371     for (lib=strlibext;lib->name;lib++) {
372         lua_pushcfunction(L, lib->func);
373         lua_setfield(L, -2, lib->name);
374     }
375     lua_pop(L,1);
376 }
377 
378