1 #include "jsi.h"
2 #include "jsvalue.h"
3 #include "jsbuiltin.h"
4 #include "utf.h"
5 #include "regexp.h"
6
checkstring(js_State * J,int idx)7 static const char *checkstring(js_State *J, int idx)
8 {
9 if (!js_iscoercible(J, idx))
10 js_typeerror(J, "string function called on null or undefined");
11 return js_tostring(J, idx);
12 }
13
js_runeat(js_State * J,const char * s,int i)14 int js_runeat(js_State *J, const char *s, int i)
15 {
16 Rune rune = 0;
17 while (i-- >= 0) {
18 rune = *(unsigned char*)s;
19 if (rune < Runeself) {
20 if (rune == 0)
21 return 0;
22 ++s;
23 } else
24 s += chartorune(&rune, s);
25 }
26 return rune;
27 }
28
js_utfidxtoptr(const char * s,int i)29 const char *js_utfidxtoptr(const char *s, int i)
30 {
31 Rune rune;
32 while (i-- > 0) {
33 rune = *(unsigned char*)s;
34 if (rune < Runeself) {
35 if (rune == 0)
36 return NULL;
37 ++s;
38 } else
39 s += chartorune(&rune, s);
40 }
41 return s;
42 }
43
js_utfptrtoidx(const char * s,const char * p)44 int js_utfptrtoidx(const char *s, const char *p)
45 {
46 Rune rune;
47 int i = 0;
48 while (s < p) {
49 if (*(unsigned char *)s < Runeself)
50 ++s;
51 else
52 s += chartorune(&rune, s);
53 ++i;
54 }
55 return i;
56 }
57
jsB_new_String(js_State * J)58 static void jsB_new_String(js_State *J)
59 {
60 js_newstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
61 }
62
jsB_String(js_State * J)63 static void jsB_String(js_State *J)
64 {
65 js_pushstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
66 }
67
Sp_toString(js_State * J)68 static void Sp_toString(js_State *J)
69 {
70 js_Object *self = js_toobject(J, 0);
71 if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
72 js_pushliteral(J, self->u.s.string);
73 }
74
Sp_valueOf(js_State * J)75 static void Sp_valueOf(js_State *J)
76 {
77 js_Object *self = js_toobject(J, 0);
78 if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
79 js_pushliteral(J, self->u.s.string);
80 }
81
Sp_charAt(js_State * J)82 static void Sp_charAt(js_State *J)
83 {
84 char buf[UTFmax + 1];
85 const char *s = checkstring(J, 0);
86 int pos = js_tointeger(J, 1);
87 Rune rune = js_runeat(J, s, pos);
88 if (rune > 0) {
89 buf[runetochar(buf, &rune)] = 0;
90 js_pushstring(J, buf);
91 } else {
92 js_pushliteral(J, "");
93 }
94 }
95
Sp_charCodeAt(js_State * J)96 static void Sp_charCodeAt(js_State *J)
97 {
98 const char *s = checkstring(J, 0);
99 int pos = js_tointeger(J, 1);
100 Rune rune = js_runeat(J, s, pos);
101 if (rune > 0)
102 js_pushnumber(J, rune);
103 else
104 js_pushnumber(J, NAN);
105 }
106
Sp_concat(js_State * J)107 static void Sp_concat(js_State *J)
108 {
109 int i, top = js_gettop(J);
110 int n;
111 char * volatile out;
112 const char *s;
113
114 if (top == 1)
115 return;
116
117 s = checkstring(J, 0);
118 n = strlen(s);
119 out = js_malloc(J, n + 1);
120 strcpy(out, s);
121
122 if (js_try(J)) {
123 js_free(J, out);
124 js_throw(J);
125 }
126
127 for (i = 1; i < top; ++i) {
128 s = js_tostring(J, i);
129 n += strlen(s);
130 out = js_realloc(J, out, n + 1);
131 strcat(out, s);
132 }
133
134 js_pushstring(J, out);
135 js_endtry(J);
136 js_free(J, out);
137 }
138
Sp_indexOf(js_State * J)139 static void Sp_indexOf(js_State *J)
140 {
141 const char *haystack = checkstring(J, 0);
142 const char *needle = js_tostring(J, 1);
143 int pos = js_tointeger(J, 2);
144 int len = strlen(needle);
145 int k = 0;
146 Rune rune;
147 while (*haystack) {
148 if (k >= pos && !strncmp(haystack, needle, len)) {
149 js_pushnumber(J, k);
150 return;
151 }
152 haystack += chartorune(&rune, haystack);
153 ++k;
154 }
155 js_pushnumber(J, -1);
156 }
157
Sp_lastIndexOf(js_State * J)158 static void Sp_lastIndexOf(js_State *J)
159 {
160 const char *haystack = checkstring(J, 0);
161 const char *needle = js_tostring(J, 1);
162 int pos = js_isdefined(J, 2) ? js_tointeger(J, 2) : (int)strlen(haystack);
163 int len = strlen(needle);
164 int k = 0, last = -1;
165 Rune rune;
166 while (*haystack && k <= pos) {
167 if (!strncmp(haystack, needle, len))
168 last = k;
169 haystack += chartorune(&rune, haystack);
170 ++k;
171 }
172 js_pushnumber(J, last);
173 }
174
Sp_localeCompare(js_State * J)175 static void Sp_localeCompare(js_State *J)
176 {
177 const char *a = checkstring(J, 0);
178 const char *b = js_tostring(J, 1);
179 js_pushnumber(J, strcmp(a, b));
180 }
181
Sp_slice(js_State * J)182 static void Sp_slice(js_State *J)
183 {
184 const char *str = checkstring(J, 0);
185 const char *ss, *ee;
186 int len = utflen(str);
187 int s = js_tointeger(J, 1);
188 int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
189
190 s = s < 0 ? s + len : s;
191 e = e < 0 ? e + len : e;
192
193 s = s < 0 ? 0 : s > len ? len : s;
194 e = e < 0 ? 0 : e > len ? len : e;
195
196 if (s < e) {
197 ss = js_utfidxtoptr(str, s);
198 ee = js_utfidxtoptr(ss, e - s);
199 } else {
200 ss = js_utfidxtoptr(str, e);
201 ee = js_utfidxtoptr(ss, s - e);
202 }
203
204 js_pushlstring(J, ss, ee - ss);
205 }
206
Sp_substring(js_State * J)207 static void Sp_substring(js_State *J)
208 {
209 const char *str = checkstring(J, 0);
210 const char *ss, *ee;
211 int len = utflen(str);
212 int s = js_tointeger(J, 1);
213 int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
214
215 s = s < 0 ? 0 : s > len ? len : s;
216 e = e < 0 ? 0 : e > len ? len : e;
217
218 if (s < e) {
219 ss = js_utfidxtoptr(str, s);
220 ee = js_utfidxtoptr(ss, e - s);
221 } else {
222 ss = js_utfidxtoptr(str, e);
223 ee = js_utfidxtoptr(ss, s - e);
224 }
225
226 js_pushlstring(J, ss, ee - ss);
227 }
228
Sp_toLowerCase(js_State * J)229 static void Sp_toLowerCase(js_State *J)
230 {
231 const char *src = checkstring(J, 0);
232 char *dst = js_malloc(J, UTFmax * strlen(src) + 1);
233 const char *s = src;
234 char *d = dst;
235 Rune rune;
236 while (*s) {
237 s += chartorune(&rune, s);
238 rune = tolowerrune(rune);
239 d += runetochar(d, &rune);
240 }
241 *d = 0;
242 if (js_try(J)) {
243 js_free(J, dst);
244 js_throw(J);
245 }
246 js_pushstring(J, dst);
247 js_endtry(J);
248 js_free(J, dst);
249 }
250
Sp_toUpperCase(js_State * J)251 static void Sp_toUpperCase(js_State *J)
252 {
253 const char *src = checkstring(J, 0);
254 char *dst = js_malloc(J, UTFmax * strlen(src) + 1);
255 const char *s = src;
256 char *d = dst;
257 Rune rune;
258 while (*s) {
259 s += chartorune(&rune, s);
260 rune = toupperrune(rune);
261 d += runetochar(d, &rune);
262 }
263 *d = 0;
264 if (js_try(J)) {
265 js_free(J, dst);
266 js_throw(J);
267 }
268 js_pushstring(J, dst);
269 js_endtry(J);
270 js_free(J, dst);
271 }
272
istrim(int c)273 static int istrim(int c)
274 {
275 return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF ||
276 c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
277 }
278
Sp_trim(js_State * J)279 static void Sp_trim(js_State *J)
280 {
281 const char *s, *e;
282 s = checkstring(J, 0);
283 while (istrim(*s))
284 ++s;
285 e = s + strlen(s);
286 while (e > s && istrim(e[-1]))
287 --e;
288 js_pushlstring(J, s, e - s);
289 }
290
S_fromCharCode(js_State * J)291 static void S_fromCharCode(js_State *J)
292 {
293 int i, top = js_gettop(J);
294 Rune c;
295 char *s, *p;
296
297 s = p = js_malloc(J, (top-1) * UTFmax + 1);
298
299 if (js_try(J)) {
300 js_free(J, s);
301 js_throw(J);
302 }
303
304 for (i = 1; i < top; ++i) {
305 c = js_touint16(J, i);
306 p += runetochar(p, &c);
307 }
308 *p = 0;
309 js_pushstring(J, s);
310
311 js_endtry(J);
312 js_free(J, s);
313 }
314
Sp_match(js_State * J)315 static void Sp_match(js_State *J)
316 {
317 js_Regexp *re;
318 const char *text;
319 int len;
320 const char *a, *b, *c, *e;
321 Resub m;
322
323 text = checkstring(J, 0);
324
325 if (js_isregexp(J, 1))
326 js_copy(J, 1);
327 else if (js_isundefined(J, 1))
328 js_newregexp(J, "", 0);
329 else
330 js_newregexp(J, js_tostring(J, 1), 0);
331
332 re = js_toregexp(J, -1);
333 if (!(re->flags & JS_REGEXP_G)) {
334 js_RegExp_prototype_exec(J, re, text);
335 return;
336 }
337
338 re->last = 0;
339
340 js_newarray(J);
341
342 len = 0;
343 a = text;
344 e = text + strlen(text);
345 while (a <= e) {
346 if (js_regexec(re->prog, a, &m, a > text ? REG_NOTBOL : 0))
347 break;
348
349 b = m.sub[0].sp;
350 c = m.sub[0].ep;
351
352 js_pushlstring(J, b, c - b);
353 js_setindex(J, -2, len++);
354
355 a = c;
356 if (c - b == 0)
357 ++a;
358 }
359
360 if (len == 0) {
361 js_pop(J, 1);
362 js_pushnull(J);
363 }
364 }
365
Sp_search(js_State * J)366 static void Sp_search(js_State *J)
367 {
368 js_Regexp *re;
369 const char *text;
370 Resub m;
371
372 text = checkstring(J, 0);
373
374 if (js_isregexp(J, 1))
375 js_copy(J, 1);
376 else if (js_isundefined(J, 1))
377 js_newregexp(J, "", 0);
378 else
379 js_newregexp(J, js_tostring(J, 1), 0);
380
381 re = js_toregexp(J, -1);
382
383 if (!js_regexec(re->prog, text, &m, 0))
384 js_pushnumber(J, js_utfptrtoidx(text, m.sub[0].sp));
385 else
386 js_pushnumber(J, -1);
387 }
388
Sp_replace_regexp(js_State * J)389 static void Sp_replace_regexp(js_State *J)
390 {
391 js_Regexp *re;
392 const char *source, *s, *r;
393 js_Buffer *sb = NULL;
394 int n, x;
395 Resub m;
396
397 source = checkstring(J, 0);
398 re = js_toregexp(J, 1);
399
400 if (js_regexec(re->prog, source, &m, 0)) {
401 js_copy(J, 0);
402 return;
403 }
404
405 re->last = 0;
406
407 loop:
408 s = m.sub[0].sp;
409 n = m.sub[0].ep - m.sub[0].sp;
410
411 if (js_iscallable(J, 2)) {
412 js_copy(J, 2);
413 js_pushundefined(J);
414 for (x = 0; m.sub[x].sp; ++x) /* arg 0..x: substring and subexps that matched */
415 js_pushlstring(J, m.sub[x].sp, m.sub[x].ep - m.sub[x].sp);
416 js_pushnumber(J, s - source); /* arg x+2: offset within search string */
417 js_copy(J, 0); /* arg x+3: search string */
418 js_call(J, 2 + x);
419 r = js_tostring(J, -1);
420 js_putm(J, &sb, source, s);
421 js_puts(J, &sb, r);
422 js_pop(J, 1);
423 } else {
424 r = js_tostring(J, 2);
425 js_putm(J, &sb, source, s);
426 while (*r) {
427 if (*r == '$') {
428 switch (*(++r)) {
429 case 0: --r; /* end of string; back up and fall through */
430 case '$': js_putc(J, &sb, '$'); break;
431 case '`': js_putm(J, &sb, source, s); break;
432 case '\'': js_puts(J, &sb, s + n); break;
433 case '&':
434 js_putm(J, &sb, s, s + n);
435 break;
436 case '0': case '1': case '2': case '3': case '4':
437 case '5': case '6': case '7': case '8': case '9':
438 x = *r - '0';
439 if (r[1] >= '0' && r[1] <= '9')
440 x = x * 10 + *(++r) - '0';
441 if (x > 0 && x < m.nsub) {
442 js_putm(J, &sb, m.sub[x].sp, m.sub[x].ep);
443 } else {
444 js_putc(J, &sb, '$');
445 if (x > 10) {
446 js_putc(J, &sb, '0' + x / 10);
447 js_putc(J, &sb, '0' + x % 10);
448 } else {
449 js_putc(J, &sb, '0' + x);
450 }
451 }
452 break;
453 default:
454 js_putc(J, &sb, '$');
455 js_putc(J, &sb, *r);
456 break;
457 }
458 ++r;
459 } else {
460 js_putc(J, &sb, *r++);
461 }
462 }
463 }
464
465 if (re->flags & JS_REGEXP_G) {
466 source = m.sub[0].ep;
467 if (n == 0) {
468 if (*source)
469 js_putc(J, &sb, *source++);
470 else
471 goto end;
472 }
473 if (!js_regexec(re->prog, source, &m, REG_NOTBOL))
474 goto loop;
475 }
476
477 end:
478 js_puts(J, &sb, s + n);
479 js_putc(J, &sb, 0);
480
481 if (js_try(J)) {
482 js_free(J, sb);
483 js_throw(J);
484 }
485 js_pushstring(J, sb ? sb->s : "");
486 js_endtry(J);
487 js_free(J, sb);
488 }
489
Sp_replace_string(js_State * J)490 static void Sp_replace_string(js_State *J)
491 {
492 const char *source, *needle, *s, *r;
493 js_Buffer *sb = NULL;
494 int n;
495
496 source = checkstring(J, 0);
497 needle = js_tostring(J, 1);
498
499 s = strstr(source, needle);
500 if (!s) {
501 js_copy(J, 0);
502 return;
503 }
504 n = strlen(needle);
505
506 if (js_iscallable(J, 2)) {
507 js_copy(J, 2);
508 js_pushundefined(J);
509 js_pushlstring(J, s, n); /* arg 1: substring that matched */
510 js_pushnumber(J, s - source); /* arg 2: offset within search string */
511 js_copy(J, 0); /* arg 3: search string */
512 js_call(J, 3);
513 r = js_tostring(J, -1);
514 js_putm(J, &sb, source, s);
515 js_puts(J, &sb, r);
516 js_puts(J, &sb, s + n);
517 js_putc(J, &sb, 0);
518 js_pop(J, 1);
519 } else {
520 r = js_tostring(J, 2);
521 js_putm(J, &sb, source, s);
522 while (*r) {
523 if (*r == '$') {
524 switch (*(++r)) {
525 case 0: --r; /* end of string; back up and fall through */
526 case '$': js_putc(J, &sb, '$'); break;
527 case '&': js_putm(J, &sb, s, s + n); break;
528 case '`': js_putm(J, &sb, source, s); break;
529 case '\'': js_puts(J, &sb, s + n); break;
530 default: js_putc(J, &sb, '$'); js_putc(J, &sb, *r); break;
531 }
532 ++r;
533 } else {
534 js_putc(J, &sb, *r++);
535 }
536 }
537 js_puts(J, &sb, s + n);
538 js_putc(J, &sb, 0);
539 }
540
541 if (js_try(J)) {
542 js_free(J, sb);
543 js_throw(J);
544 }
545 js_pushstring(J, sb ? sb->s : "");
546 js_endtry(J);
547 js_free(J, sb);
548 }
549
Sp_replace(js_State * J)550 static void Sp_replace(js_State *J)
551 {
552 if (js_isregexp(J, 1))
553 Sp_replace_regexp(J);
554 else
555 Sp_replace_string(J);
556 }
557
Sp_split_regexp(js_State * J)558 static void Sp_split_regexp(js_State *J)
559 {
560 js_Regexp *re;
561 const char *text;
562 int limit, len, k;
563 const char *p, *a, *b, *c, *e;
564 Resub m;
565
566 text = checkstring(J, 0);
567 re = js_toregexp(J, 1);
568 limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
569
570 js_newarray(J);
571 len = 0;
572
573 e = text + strlen(text);
574
575 /* splitting the empty string */
576 if (e == text) {
577 if (js_regexec(re->prog, text, &m, 0)) {
578 if (len == limit) return;
579 js_pushliteral(J, "");
580 js_setindex(J, -2, 0);
581 }
582 return;
583 }
584
585 p = a = text;
586 while (a < e) {
587 if (js_regexec(re->prog, a, &m, a > text ? REG_NOTBOL : 0))
588 break; /* no match */
589
590 b = m.sub[0].sp;
591 c = m.sub[0].ep;
592
593 /* empty string at end of last match */
594 if (b == p) {
595 ++a;
596 continue;
597 }
598
599 if (len == limit) return;
600 js_pushlstring(J, p, b - p);
601 js_setindex(J, -2, len++);
602
603 for (k = 1; k < m.nsub; ++k) {
604 if (len == limit) return;
605 js_pushlstring(J, m.sub[k].sp, m.sub[k].ep - m.sub[k].sp);
606 js_setindex(J, -2, len++);
607 }
608
609 a = p = c;
610 }
611
612 if (len == limit) return;
613 js_pushstring(J, p);
614 js_setindex(J, -2, len);
615 }
616
Sp_split_string(js_State * J)617 static void Sp_split_string(js_State *J)
618 {
619 const char *str = checkstring(J, 0);
620 const char *sep = js_tostring(J, 1);
621 int limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
622 int i, n;
623
624 js_newarray(J);
625
626 n = strlen(sep);
627
628 /* empty string */
629 if (n == 0) {
630 Rune rune;
631 for (i = 0; *str && i < limit; ++i) {
632 n = chartorune(&rune, str);
633 js_pushlstring(J, str, n);
634 js_setindex(J, -2, i);
635 str += n;
636 }
637 return;
638 }
639
640 for (i = 0; str && i < limit; ++i) {
641 const char *s = strstr(str, sep);
642 if (s) {
643 js_pushlstring(J, str, s-str);
644 js_setindex(J, -2, i);
645 str = s + n;
646 } else {
647 js_pushstring(J, str);
648 js_setindex(J, -2, i);
649 str = NULL;
650 }
651 }
652 }
653
Sp_split(js_State * J)654 static void Sp_split(js_State *J)
655 {
656 if (js_isundefined(J, 1)) {
657 js_newarray(J);
658 js_copy(J, 0);
659 js_setindex(J, -2, 0);
660 } else if (js_isregexp(J, 1)) {
661 Sp_split_regexp(J);
662 } else {
663 Sp_split_string(J);
664 }
665 }
666
jsB_initstring(js_State * J)667 void jsB_initstring(js_State *J)
668 {
669 J->String_prototype->u.s.string = "";
670 J->String_prototype->u.s.length = 0;
671
672 js_pushobject(J, J->String_prototype);
673 {
674 jsB_propf(J, "String.prototype.toString", Sp_toString, 0);
675 jsB_propf(J, "String.prototype.valueOf", Sp_valueOf, 0);
676 jsB_propf(J, "String.prototype.charAt", Sp_charAt, 1);
677 jsB_propf(J, "String.prototype.charCodeAt", Sp_charCodeAt, 1);
678 jsB_propf(J, "String.prototype.concat", Sp_concat, 0); /* 1 */
679 jsB_propf(J, "String.prototype.indexOf", Sp_indexOf, 1);
680 jsB_propf(J, "String.prototype.lastIndexOf", Sp_lastIndexOf, 1);
681 jsB_propf(J, "String.prototype.localeCompare", Sp_localeCompare, 1);
682 jsB_propf(J, "String.prototype.match", Sp_match, 1);
683 jsB_propf(J, "String.prototype.replace", Sp_replace, 2);
684 jsB_propf(J, "String.prototype.search", Sp_search, 1);
685 jsB_propf(J, "String.prototype.slice", Sp_slice, 2);
686 jsB_propf(J, "String.prototype.split", Sp_split, 2);
687 jsB_propf(J, "String.prototype.substring", Sp_substring, 2);
688 jsB_propf(J, "String.prototype.toLowerCase", Sp_toLowerCase, 0);
689 jsB_propf(J, "String.prototype.toLocaleLowerCase", Sp_toLowerCase, 0);
690 jsB_propf(J, "String.prototype.toUpperCase", Sp_toUpperCase, 0);
691 jsB_propf(J, "String.prototype.toLocaleUpperCase", Sp_toUpperCase, 0);
692
693 /* ES5 */
694 jsB_propf(J, "String.prototype.trim", Sp_trim, 0);
695 }
696 js_newcconstructor(J, jsB_String, jsB_new_String, "String", 0); /* 1 */
697 {
698 jsB_propf(J, "String.fromCharCode", S_fromCharCode, 0); /* 1 */
699 }
700 js_defglobal(J, "String", JS_DONTENUM);
701 }
702