1 /* lstrlibext.c for luajitex
2 
3    Copyright 2013 Luigi Scarso
4 
5    Code from lstrlibext.c for LuaTeX
6    Original version copyright 2012 Taco Hoekwater <taco@luatex.org>
7 
8    This file is part of LuajitTeX.
9 
10    LuajitTeX is free software; you can redistribute it and/or modify it under
11    the terms of the GNU General Public License as published by the Free
12    Software Foundation; either version 2 of the License, or (at your
13    option) any later version.
14 
15    LuajitTeX is distributed in the hope that it will be useful, but WITHOUT
16    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
18    License for more details.
19 
20    You should have received a copy of the GNU General Public License along
21    with LuaTeX; if not, see <http://www.gnu.org/licenses/>.
22 */
23 
24 #include "ptexlib.h"
25 #include <stdio.h>
26 
27 #define LUA_CORE
28 
29 #include "lua.h"
30 #include "lauxlib.h"
31 #include "lualib.h"
32 
33 
34 
35 
36 /*#include "lua51/lj_obj.h"*/
37 /*#include "lua51/lj_gc.h"*/
38 /*#include "lua51/lj_err.h"*/
39 /*#include "lua51/lj_str.h"*/
40 /*#include "lua51/lj_tab.h"*/
41 /*#include "lua51/lj_meta.h"*/
42 /*#include "lua51/lj_state.h"*/
43 /*#include "lua51/lj_ff.h" */
44 /*#include "lua51/lj_bcdump.h"*/
45 /*#include "lua51/lj_char.h"*/
46 /*#include "lua51/lj_lib.h"*/
47 
48 
49 
bytepairs_aux(lua_State * L)50 static int bytepairs_aux (lua_State *L) {
51   size_t ls;
52   unsigned char i;
53   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
54   int ind       = lua_tointeger(L, lua_upvalueindex(2));
55   if (ind<(int)ls) {
56 	if (ind+1<(int)ls) {
57 	  lua_pushinteger(L, (ind+2));  /* iterator */
58 	} else {
59 	  lua_pushinteger(L, (ind+1));  /* iterator */
60 	}
61 	lua_replace(L, lua_upvalueindex(2));
62 	i = (unsigned char)*(s+ind);
63 	lua_pushinteger(L, i);     /* byte one */
64 	if (ind+1<(int)ls) {
65 	  i = (unsigned char)*(s+ind+1);
66 	  lua_pushinteger(L, i);     /* byte two */
67 	} else {
68 	  lua_pushnil(L);     /* odd string length */
69 	}
70 	return 2;
71   }
72   return 0;  /* string ended */
73 }
74 
75 
str_bytepairs(lua_State * L)76 static int str_bytepairs (lua_State *L) {
77   luaL_checkstring(L, 1);
78   lua_settop(L, 1);
79   lua_pushinteger(L, 0);
80   lua_pushcclosure(L, bytepairs_aux, 2);
81   return 1;
82 }
83 
84 
bytes_aux(lua_State * L)85 static int bytes_aux (lua_State *L) {
86   size_t ls;
87   unsigned char i;
88   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
89   int ind       = lua_tointeger(L, lua_upvalueindex(2));
90   if (ind<(int)ls) {
91     lua_pushinteger(L, (ind+1));  /* iterator */
92 	lua_replace(L, lua_upvalueindex(2));
93 	i = (unsigned char)*(s+ind);
94 	lua_pushinteger(L, i);     /* byte */
95 	return 1;
96   }
97   return 0;  /* string ended */
98 }
99 
100 
str_bytes(lua_State * L)101 static int str_bytes (lua_State *L) {
102   luaL_checkstring(L, 1);
103   lua_settop(L, 1);
104   lua_pushinteger(L, 0);
105   lua_pushcclosure(L, bytes_aux, 2);
106   return 1;
107 }
108 
109 
utf_failed(lua_State * L,int new_ind)110 static int utf_failed(lua_State *L, int new_ind) {
111   static char fffd [3] = {0xEF,0xBF,0xBD};
112   lua_pushinteger(L, new_ind);  /* iterator */
113   lua_replace(L, lua_upvalueindex(2));
114   lua_pushlstring(L, fffd, 3);
115   return 1;
116 }
117 
118 
utfcharacters_aux(lua_State * L)119 static int utfcharacters_aux (lua_State *L) {
120   static const unsigned char mask[4] = {0x80,0xE0,0xF0,0xF8};
121   static const unsigned char mequ[4] = {0x00,0xC0,0xE0,0xF0};
122   size_t ls;
123   unsigned char c;
124   int j;
125   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
126   int       ind = lua_tointeger(L, lua_upvalueindex(2));
127   if (ind>=(int)ls) return 0; /* end of string */
128   c = (unsigned) s[ind];
129   for (j=0;j<4;j++) {
130     if ((c&mask[j])==mequ[j]) {
131       int k;
132       if (ind+1+j>(int)ls) return utf_failed(L,ls); /* will not fit */
133       for (k=1; k<=j; k++) {
134         c = (unsigned) s[ind+k];
135         if ((c&0xC0)!=0x80) return utf_failed(L,ind+k); /* bad follow */
136       }
137       lua_pushinteger(L, ind+1+j);  /* iterator */
138       lua_replace(L, lua_upvalueindex(2));
139       lua_pushlstring(L, s+ind, 1+j);
140       return 1;
141     }
142   }
143   return utf_failed(L,ind+1); /* we found a follow byte! */
144 }
145 
146 
str_utfcharacters(lua_State * L)147 static int str_utfcharacters (lua_State *L) {
148   luaL_checkstring(L, 1);
149   lua_settop(L, 1);
150   lua_pushinteger(L, 0);
151   lua_pushcclosure(L, utfcharacters_aux, 2);
152   return 1;
153 }
154 
155 
156 
utfvalues_aux(lua_State * L)157 static int utfvalues_aux (lua_State *L) {
158   size_t ls;
159   unsigned char i = 0;
160   unsigned char j = 0;
161   unsigned char k = 0;
162   unsigned char l = 0;
163   unsigned int  v = 0xFFFD;
164   int numbytes = 1;
165   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
166   int ind       = lua_tointeger(L, lua_upvalueindex(2));
167 
168   if (ind<(int)ls) {
169 	i = *(s+ind);
170 	if (i<0x80) {
171 	  v = i;
172 	} else if (i>=0xF0) {
173 	  if ((ind+3)<(int)ls && ((unsigned)*(s+ind+1))>=0x80
174 		  && ((unsigned)*(s+ind+2))>=0x80 && ((unsigned)*(s+ind+3))>=0x80) {
175 		numbytes  = 4;
176 		j = ((unsigned)*(s+ind+1))-128;
177 		k = ((unsigned)*(s+ind+2))-128;
178 		l = ((unsigned)*(s+ind+3))-128;
179 		v = (((((i-0xF0)*64) + j)*64) + k)*64 + l;
180 	  }
181 	} else if (i>=0xE0) {
182 	  if ((ind+2)<(int)ls && ((unsigned)*(s+ind+1))>=0x80 && ((unsigned)*(s+ind+2))>=0x80) {
183 		numbytes  = 3;
184 		j = ((unsigned)*(s+ind+1))-128;
185 		k = ((unsigned)*(s+ind+2))-128;
186 		v = (((i-0xE0)*64) + j)*64 + k;
187 	  }
188 
189 	} else if (i>=0xC0) {
190 	  if ((ind+1)<(int)ls && ((unsigned)*(s+ind+1))>=0x80) {
191 		numbytes  = 2;
192 		j = ((unsigned)*(s+ind+1))-128;
193 		v = ((i-0xC0)*64) + j;
194 	  }
195 	}
196 	lua_pushinteger(L, (ind+numbytes));  /* iterator */
197 	lua_replace(L, lua_upvalueindex(2));
198 	lua_pushinteger(L, v);
199 	return 1;
200   }
201   return 0;  /* string ended */
202 }
203 
204 
str_utfvalues(lua_State * L)205 static int str_utfvalues (lua_State *L) {
206   luaL_checkstring(L, 1);
207   lua_settop(L, 1);
208   lua_pushinteger(L, 0);
209   lua_pushcclosure(L, utfvalues_aux, 2);
210   return 1;
211 }
212 
213 
characterpairs_aux(lua_State * L)214 static int characterpairs_aux (lua_State *L) {
215   size_t ls;
216   char b[2];
217   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
218   int ind       = lua_tointeger(L, lua_upvalueindex(2));
219   if (ind<(int)ls) {
220 	if (ind+1<(int)ls) {
221 	  lua_pushinteger(L, (ind+2));  /* iterator */
222 	} else {
223 	  lua_pushinteger(L, (ind+1));  /* iterator */
224 	}
225 	lua_replace(L, lua_upvalueindex(2));
226 	b[0] = *(s+ind); b[1] = 0;
227 	lua_pushlstring(L, b, 1);
228 	if (ind+1<(int)ls) {
229 	  b[0] = *(s+ind+1);
230 	  lua_pushlstring(L, b, 1);
231 	} else {
232 	  lua_pushlstring(L, b+1, 0);
233 	}
234 	return 2;
235   }
236   return 0;  /* string ended */
237 }
238 
239 
str_characterpairs(lua_State * L)240 static int str_characterpairs (lua_State *L) {
241   luaL_checkstring(L, 1);
242   lua_settop(L, 1);
243   lua_pushinteger(L, 0);
244   lua_pushcclosure(L, characterpairs_aux, 2);
245   return 1;
246 }
247 
248 
characters_aux(lua_State * L)249 static int characters_aux (lua_State *L) {
250   size_t ls;
251   char b[2];
252   const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
253   int ind  = lua_tointeger(L, lua_upvalueindex(2));
254   if (ind<(int)ls) {
255     lua_pushinteger(L, (ind+1));  /* iterator */
256 	lua_replace(L, lua_upvalueindex(2));
257 	b[0] = *(s+ind); b[1] = 0;
258 	lua_pushlstring(L, b, 1);
259 	return 1;
260   }
261   return 0;  /* string ended */
262 }
263 
264 
str_characters(lua_State * L)265 static int str_characters (lua_State *L) {
266   luaL_checkstring(L, 1);
267   lua_settop(L, 1);
268   lua_pushinteger(L, 0);
269   lua_pushcclosure(L, characters_aux, 2);
270   return 1;
271 }
272 
str_split(lua_State * L)273 static int str_split (lua_State *L) {
274   size_t l;
275   size_t i;
276   int n;
277   char *q, *p, *orig;
278   int mult = 0;
279   const char *s = luaL_checklstring(L, 1, &l);
280   const char *joiner = luaL_optstring(L, 2, " +");
281   lua_newtable(L);
282   if (l == 0) {
283 	lua_pushvalue(L,1);
284 	lua_rawseti(L,-2,1);
285 	return 1;
286   }
287   orig = p = malloc(l+1);
288   if (p==NULL) {
289 	fprintf(stderr, "fatal: memory exhausted (malloc of %u bytes).\n",(int)(l+1));
290 	exit(EXIT_FAILURE);
291   }
292   strcpy(p,s);
293   n = 1;
294   q = p;
295 
296   if (*joiner == 0) {
297 	for (i=0;i<l;i++) {
298 	  lua_pushlstring(L,q,1); q++;
299 	  lua_rawseti(L,-2,n); n++;
300 	}
301 	free(orig);
302 	return 1;
303   }
304   if (*(joiner+1) == '+') {
305 	mult = 1;
306 	while(*p==*joiner) {
307 	  p++;
308 	  l--;
309 	}
310 	q = p;
311   }
312   for (i=0;i<l;i++) {
313 	if (*(p+i)==*joiner) {
314 	  *(p+i) = 0;
315 	  lua_pushlstring(L,q,((p+i)-q));
316 	  lua_rawseti(L,-2,n); n++;
317 	  if (mult) {
318 		while(*(p+i+1)==*joiner) {
319 		  i++;
320 		}
321 	  }
322 	  q = p+i+1;
323 	}
324   }
325   if (mult && q==(p+l)) {
326 	free(orig);
327 	return 1;
328   }
329   if(q<=(p+l)) {
330 	lua_pushlstring(L,q,strlen(q));
331 	lua_rawseti(L,-2,n);
332   }
333   free(orig);
334   return 1;
335 }
336 
337 
338 static const luaL_Reg strlibext[] = {
339   {"utfvalues", str_utfvalues},
340   {"utfcharacters", str_utfcharacters},
341   {"characters", str_characters},
342   {"characterpairs", str_characterpairs},
343   {"bytes", str_bytes},
344   {"bytepairs", str_bytepairs},
345   {"explode", str_split},
346   /* {"dump", str_dump} already in luajit */
347   {NULL, NULL}
348 };
349 
350 
351 
352 
353 /* ------------------------------------------------------------------------ */
354 
355 /* lj_libdef.h is generated by buildvm, it's not available on source */
356 /* #include "lua51/lj_libdef.h" */
357 
358 /* LUALIB_API int luaopen_string(lua_State *L) */
359 /* { */
360 /*   GCtab *mt; */
361 /*   global_State *g; */
362 /*   LJ_LIB_REG(L, LUA_STRLIBNAME, string); */
363 /*   luaL_register(L, LUA_STRLIBNAME, strlib); */
364 /*   //LJ_LIB_REG(L, LUA_STRLIBNAME, strlib); */
365 /* #if defined(LUA_COMPAT_GFIND) && !LJ_52 */
366 /*   lua_getfield(L, -1, "gmatch"); */
367 /*   lua_setfield(L, -2, "gfind"); */
368 /* #endif */
369 /*   mt = lj_tab_new(L, 0, 1); */
370 /*   /\* NOBARRIER: basemt is a GC root. *\/ */
371 /*   g = G(L); */
372 /*   setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); */
373 /*   settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); */
374 /*   mt->nomm = (uint8_t)(~(1u<<MM_index)); */
375 /*   return 1; */
376 /* } */
377 
378 /* void open_strlibext(lua_State *L) */
379 /* { */
380 /*   int v; */
381 /*   v = luaopen_string(L); */
382 /* } */
383 
384 
open_strlibext(lua_State * L)385 void open_strlibext(lua_State * L)
386 {
387     const luaL_Reg *lib;
388     lua_getglobal(L, "string");
389     for (lib=strlibext;lib->name;lib++) {
390         lua_pushcfunction(L, lib->func);
391         lua_setfield(L, -2, lib->name);
392     }
393     lua_pop(L,1);
394 }
395 
396