1 /*
2  *  String built-ins
3  */
4 
5 /* XXX: There are several limitations in the current implementation for
6  * strings with >= 0x80000000UL characters.  In some cases one would need
7  * to be able to represent the range [-0xffffffff,0xffffffff] and so on.
8  * Generally character and byte length are assumed to fit into signed 32
9  * bits (< 0x80000000UL).  Places with issues are not marked explicitly
10  * below in all cases, look for signed type usage (duk_int_t etc) for
11  * offsets/lengths.
12  */
13 
14 #include "duk_internal.h"
15 
16 /*
17  *  Constructor
18  */
19 
duk_bi_string_constructor(duk_context * ctx)20 DUK_INTERNAL duk_ret_t duk_bi_string_constructor(duk_context *ctx) {
21 	/* String constructor needs to distinguish between an argument not given at all
22 	 * vs. given as 'undefined'.  We're a vararg function to handle this properly.
23 	 */
24 
25 	if (duk_get_top(ctx) == 0) {
26 		duk_push_hstring_stridx(ctx, DUK_STRIDX_EMPTY_STRING);
27 	} else {
28 		duk_to_string(ctx, 0);
29 	}
30 	DUK_ASSERT(duk_is_string(ctx, 0));
31 	duk_set_top(ctx, 1);
32 
33 	if (duk_is_constructor_call(ctx)) {
34 		duk_push_object_helper(ctx,
35 		                       DUK_HOBJECT_FLAG_EXTENSIBLE |
36 		                       DUK_HOBJECT_FLAG_EXOTIC_STRINGOBJ |
37 		                       DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_STRING),
38 		                       DUK_BIDX_STRING_PROTOTYPE);
39 
40 		/* String object internal value is immutable */
41 		duk_dup(ctx, 0);
42 		duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_INT_VALUE, DUK_PROPDESC_FLAGS_NONE);
43 	}
44 	/* Note: unbalanced stack on purpose */
45 
46 	return 1;
47 }
48 
duk_bi_string_constructor_from_char_code(duk_context * ctx)49 DUK_INTERNAL duk_ret_t duk_bi_string_constructor_from_char_code(duk_context *ctx) {
50 	duk_hthread *thr = (duk_hthread *) ctx;
51 	duk_bufwriter_ctx bw_alloc;
52 	duk_bufwriter_ctx *bw;
53 	duk_idx_t i, n;
54 	duk_ucodepoint_t cp;
55 
56 	/* XXX: It would be nice to build the string directly but ToUint16()
57 	 * coercion is needed so a generic helper would not be very
58 	 * helpful (perhaps coerce the value stack first here and then
59 	 * build a string from a duk_tval number sequence in one go?).
60 	 */
61 
62 	n = duk_get_top(ctx);
63 
64 	bw = &bw_alloc;
65 	DUK_BW_INIT_PUSHBUF(thr, bw, n);  /* initial estimate for ASCII only codepoints */
66 
67 	for (i = 0; i < n; i++) {
68 		/* XXX: could improve bufwriter handling to write multiple codepoints
69 		 * with one ensure call but the relative benefit would be quite small.
70 		 */
71 
72 #if defined(DUK_USE_NONSTD_STRING_FROMCHARCODE_32BIT)
73 		/* ToUint16() coercion is mandatory in the E5.1 specification, but
74 		 * this non-compliant behavior makes more sense because we support
75 		 * non-BMP codepoints.  Don't use CESU-8 because that'd create
76 		 * surrogate pairs.
77 		 */
78 
79 		cp = (duk_ucodepoint_t) duk_to_uint32(ctx, i);
80 		DUK_BW_WRITE_ENSURE_XUTF8(thr, bw, cp);
81 #else
82 		cp = (duk_ucodepoint_t) duk_to_uint16(ctx, i);
83 		DUK_BW_WRITE_ENSURE_CESU8(thr, bw, cp);
84 #endif
85 	}
86 
87 	DUK_BW_COMPACT(thr, bw);
88 	duk_to_string(ctx, -1);
89 	return 1;
90 }
91 
92 /*
93  *  toString(), valueOf()
94  */
95 
duk_bi_string_prototype_to_string(duk_context * ctx)96 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_to_string(duk_context *ctx) {
97 	duk_tval *tv;
98 
99 	duk_push_this(ctx);
100 	tv = duk_require_tval(ctx, -1);
101 	DUK_ASSERT(tv != NULL);
102 
103 	if (DUK_TVAL_IS_STRING(tv)) {
104 		/* return as is */
105 		return 1;
106 	} else if (DUK_TVAL_IS_OBJECT(tv)) {
107 		duk_hobject *h = DUK_TVAL_GET_OBJECT(tv);
108 		DUK_ASSERT(h != NULL);
109 
110 		/* Must be a "string object", i.e. class "String" */
111 		if (DUK_HOBJECT_GET_CLASS_NUMBER(h) != DUK_HOBJECT_CLASS_STRING) {
112 			goto type_error;
113 		}
114 
115 		duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INT_VALUE);
116 		DUK_ASSERT(duk_is_string(ctx, -1));
117 
118 		return 1;
119 	} else {
120 		goto type_error;
121 	}
122 
123 	/* never here, but fall through */
124 
125  type_error:
126 	return DUK_RET_TYPE_ERROR;
127 }
128 
129 /*
130  *  Character and charcode access
131  */
132 
duk_bi_string_prototype_char_at(duk_context * ctx)133 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_at(duk_context *ctx) {
134 	duk_int_t pos;
135 
136 	/* XXX: faster implementation */
137 
138 	(void) duk_push_this_coercible_to_string(ctx);
139 	pos = duk_to_int(ctx, 0);
140 	duk_substring(ctx, -1, pos, pos + 1);
141 	return 1;
142 }
143 
duk_bi_string_prototype_char_code_at(duk_context * ctx)144 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_code_at(duk_context *ctx) {
145 	duk_hthread *thr = (duk_hthread *) ctx;
146 	duk_int_t pos;
147 	duk_hstring *h;
148 	duk_bool_t clamped;
149 
150 	/* XXX: faster implementation */
151 
152 	DUK_DDD(DUK_DDDPRINT("arg=%!T", (duk_tval *) duk_get_tval(ctx, 0)));
153 
154 	h = duk_push_this_coercible_to_string(ctx);
155 	DUK_ASSERT(h != NULL);
156 
157 	pos = duk_to_int_clamped_raw(ctx,
158 	                             0 /*index*/,
159 	                             0 /*min(incl)*/,
160 	                             DUK_HSTRING_GET_CHARLEN(h) - 1 /*max(incl)*/,
161 	                             &clamped /*out_clamped*/);
162 	if (clamped) {
163 		duk_push_number(ctx, DUK_DOUBLE_NAN);
164 		return 1;
165 	}
166 
167 	duk_push_u32(ctx, (duk_uint32_t) duk_hstring_char_code_at_raw(thr, h, pos));
168 	return 1;
169 }
170 
171 /*
172  *  substring(), substr(), slice()
173  */
174 
175 /* XXX: any chance of merging these three similar but still slightly
176  * different algorithms so that footprint would be reduced?
177  */
178 
duk_bi_string_prototype_substring(duk_context * ctx)179 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substring(duk_context *ctx) {
180 	duk_hstring *h;
181 	duk_int_t start_pos, end_pos;
182 	duk_int_t len;
183 
184 	h = duk_push_this_coercible_to_string(ctx);
185 	DUK_ASSERT(h != NULL);
186 	len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h);
187 
188 	/* [ start end str ] */
189 
190 	start_pos = duk_to_int_clamped(ctx, 0, 0, len);
191 	if (duk_is_undefined(ctx, 1)) {
192 		end_pos = len;
193 	} else {
194 		end_pos = duk_to_int_clamped(ctx, 1, 0, len);
195 	}
196 	DUK_ASSERT(start_pos >= 0 && start_pos <= len);
197 	DUK_ASSERT(end_pos >= 0 && end_pos <= len);
198 
199 	if (start_pos > end_pos) {
200 		duk_int_t tmp = start_pos;
201 		start_pos = end_pos;
202 		end_pos = tmp;
203 	}
204 
205 	DUK_ASSERT(end_pos >= start_pos);
206 
207 	duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos);
208 	return 1;
209 }
210 
211 #ifdef DUK_USE_SECTION_B
duk_bi_string_prototype_substr(duk_context * ctx)212 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_context *ctx) {
213 	duk_hstring *h;
214 	duk_int_t start_pos, end_pos;
215 	duk_int_t len;
216 
217 	/* Unlike non-obsolete String calls, substr() algorithm in E5.1
218 	 * specification will happily coerce undefined and null to strings
219 	 * ("undefined" and "null").
220 	 */
221 	duk_push_this(ctx);
222 	h = duk_to_hstring(ctx, -1);
223 	DUK_ASSERT(h != NULL);
224 	len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h);
225 
226 	/* [ start length str ] */
227 
228 	/* The implementation for computing of start_pos and end_pos differs
229 	 * from the standard algorithm, but is intended to result in the exactly
230 	 * same behavior.  This is not always obvious.
231 	 */
232 
233 	/* combines steps 2 and 5; -len ensures max() not needed for step 5 */
234 	start_pos = duk_to_int_clamped(ctx, 0, -len, len);
235 	if (start_pos < 0) {
236 		start_pos = len + start_pos;
237 	}
238 	DUK_ASSERT(start_pos >= 0 && start_pos <= len);
239 
240 	/* combines steps 3, 6; step 7 is not needed */
241 	if (duk_is_undefined(ctx, 1)) {
242 		end_pos = len;
243 	} else {
244 		DUK_ASSERT(start_pos <= len);
245 		end_pos = start_pos + duk_to_int_clamped(ctx, 1, 0, len - start_pos);
246 	}
247 	DUK_ASSERT(start_pos >= 0 && start_pos <= len);
248 	DUK_ASSERT(end_pos >= 0 && end_pos <= len);
249 	DUK_ASSERT(end_pos >= start_pos);
250 
251 	duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos);
252 	return 1;
253 }
254 #else  /* DUK_USE_SECTION_B */
duk_bi_string_prototype_substr(duk_context * ctx)255 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_context *ctx) {
256 	DUK_UNREF(ctx);
257 	return DUK_RET_UNSUPPORTED_ERROR;
258 }
259 #endif  /* DUK_USE_SECTION_B */
260 
duk_bi_string_prototype_slice(duk_context * ctx)261 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_slice(duk_context *ctx) {
262 	duk_hstring *h;
263 	duk_int_t start_pos, end_pos;
264 	duk_int_t len;
265 
266 	h = duk_push_this_coercible_to_string(ctx);
267 	DUK_ASSERT(h != NULL);
268 	len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h);
269 
270 	/* [ start end str ] */
271 
272 	start_pos = duk_to_int_clamped(ctx, 0, -len, len);
273 	if (start_pos < 0) {
274 		start_pos = len + start_pos;
275 	}
276 	if (duk_is_undefined(ctx, 1)) {
277 		end_pos = len;
278 	} else {
279 		end_pos = duk_to_int_clamped(ctx, 1, -len, len);
280 		if (end_pos < 0) {
281 			end_pos = len + end_pos;
282 		}
283 	}
284 	DUK_ASSERT(start_pos >= 0 && start_pos <= len);
285 	DUK_ASSERT(end_pos >= 0 && end_pos <= len);
286 
287 	if (end_pos < start_pos) {
288 		end_pos = start_pos;
289 	}
290 
291 	DUK_ASSERT(end_pos >= start_pos);
292 
293 	duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos);
294 	return 1;
295 }
296 
297 /*
298  *  Case conversion
299  */
300 
duk_bi_string_prototype_caseconv_shared(duk_context * ctx)301 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_caseconv_shared(duk_context *ctx) {
302 	duk_hthread *thr = (duk_hthread *) ctx;
303 	duk_small_int_t uppercase = duk_get_current_magic(ctx);
304 
305 	(void) duk_push_this_coercible_to_string(ctx);
306 	duk_unicode_case_convert_string(thr, (duk_bool_t) uppercase);
307 	return 1;
308 }
309 
310 /*
311  *  indexOf() and lastIndexOf()
312  */
313 
duk_bi_string_prototype_indexof_shared(duk_context * ctx)314 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_indexof_shared(duk_context *ctx) {
315 	duk_hthread *thr = (duk_hthread *) ctx;
316 	duk_hstring *h_this;
317 	duk_hstring *h_search;
318 	duk_int_t clen_this;
319 	duk_int_t cpos;
320 	duk_int_t bpos;
321 	const duk_uint8_t *p_start, *p_end, *p;
322 	const duk_uint8_t *q_start;
323 	duk_int_t q_blen;
324 	duk_uint8_t firstbyte;
325 	duk_uint8_t t;
326 	duk_small_int_t is_lastindexof = duk_get_current_magic(ctx);  /* 0=indexOf, 1=lastIndexOf */
327 
328 	h_this = duk_push_this_coercible_to_string(ctx);
329 	DUK_ASSERT(h_this != NULL);
330 	clen_this = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h_this);
331 
332 	h_search = duk_to_hstring(ctx, 0);
333 	DUK_ASSERT(h_search != NULL);
334 	q_start = DUK_HSTRING_GET_DATA(h_search);
335 	q_blen = (duk_int_t) DUK_HSTRING_GET_BYTELEN(h_search);
336 
337 	duk_to_number(ctx, 1);
338 	if (duk_is_nan(ctx, 1) && is_lastindexof) {
339 		/* indexOf: NaN should cause pos to be zero.
340 		 * lastIndexOf: NaN should cause pos to be +Infinity
341 		 * (and later be clamped to len).
342 		 */
343 		cpos = clen_this;
344 	} else {
345 		cpos = duk_to_int_clamped(ctx, 1, 0, clen_this);
346 	}
347 
348 	/* Empty searchstring always matches; cpos must be clamped here.
349 	 * (If q_blen were < 0 due to clamped coercion, it would also be
350 	 * caught here.)
351 	 */
352 	if (q_blen <= 0) {
353 		duk_push_int(ctx, cpos);
354 		return 1;
355 	}
356 	DUK_ASSERT(q_blen > 0);
357 
358 	bpos = (duk_int_t) duk_heap_strcache_offset_char2byte(thr, h_this, (duk_uint32_t) cpos);
359 
360 	p_start = DUK_HSTRING_GET_DATA(h_this);
361 	p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_this);
362 	p = p_start + bpos;
363 
364 	/* This loop is optimized for size.  For speed, there should be
365 	 * two separate loops, and we should ensure that memcmp() can be
366 	 * used without an extra "will searchstring fit" check.  Doing
367 	 * the preconditioning for 'p' and 'p_end' is easy but cpos
368 	 * must be updated if 'p' is wound back (backward scanning).
369 	 */
370 
371 	firstbyte = q_start[0];  /* leading byte of match string */
372 	while (p <= p_end && p >= p_start) {
373 		t = *p;
374 
375 		/* For Ecmascript strings, this check can only match for
376 		 * initial UTF-8 bytes (not continuation bytes).  For other
377 		 * strings all bets are off.
378 		 */
379 
380 		if ((t == firstbyte) && ((duk_size_t) (p_end - p) >= (duk_size_t) q_blen)) {
381 			DUK_ASSERT(q_blen > 0);  /* no issues with memcmp() zero size, even if broken */
382 			if (DUK_MEMCMP((const void *) p, (const void *) q_start, (size_t) q_blen) == 0) {
383 				duk_push_int(ctx, cpos);
384 				return 1;
385 			}
386 		}
387 
388 		/* track cpos while scanning */
389 		if (is_lastindexof) {
390 			/* when going backwards, we decrement cpos 'early';
391 			 * 'p' may point to a continuation byte of the char
392 			 * at offset 'cpos', but that's OK because we'll
393 			 * backtrack all the way to the initial byte.
394 			 */
395 			if ((t & 0xc0) != 0x80) {
396 				cpos--;
397 			}
398 			p--;
399 		} else {
400 			if ((t & 0xc0) != 0x80) {
401 				cpos++;
402 			}
403 			p++;
404 		}
405 	}
406 
407 	/* Not found.  Empty string case is handled specially above. */
408 	duk_push_int(ctx, -1);
409 	return 1;
410 }
411 
412 /*
413  *  replace()
414  */
415 
416 /* XXX: the current implementation works but is quite clunky; it compiles
417  * to almost 1,4kB of x86 code so it needs to be simplified (better approach,
418  * shared helpers, etc).  Some ideas for refactoring:
419  *
420  * - a primitive to convert a string into a regexp matcher (reduces matching
421  *   code at the cost of making matching much slower)
422  * - use replace() as a basic helper for match() and split(), which are both
423  *   much simpler
424  * - API call to get_prop and to_boolean
425  */
426 
duk_bi_string_prototype_replace(duk_context * ctx)427 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_replace(duk_context *ctx) {
428 	duk_hthread *thr = (duk_hthread *) ctx;
429 	duk_hstring *h_input;
430 	duk_hstring *h_match;
431 	duk_hstring *h_search;
432 	duk_hobject *h_re;
433 	duk_bufwriter_ctx bw_alloc;
434 	duk_bufwriter_ctx *bw;
435 #ifdef DUK_USE_REGEXP_SUPPORT
436 	duk_bool_t is_regexp;
437 	duk_bool_t is_global;
438 #endif
439 	duk_bool_t is_repl_func;
440 	duk_uint32_t match_start_coff, match_start_boff;
441 #ifdef DUK_USE_REGEXP_SUPPORT
442 	duk_int_t match_caps;
443 #endif
444 	duk_uint32_t prev_match_end_boff;
445 	const duk_uint8_t *r_start, *r_end, *r;   /* repl string scan */
446 	duk_size_t tmp_sz;
447 
448 	DUK_ASSERT_TOP(ctx, 2);
449 	h_input = duk_push_this_coercible_to_string(ctx);
450 	DUK_ASSERT(h_input != NULL);
451 
452 	bw = &bw_alloc;
453 	DUK_BW_INIT_PUSHBUF(thr, bw, DUK_HSTRING_GET_BYTELEN(h_input));  /* input size is good output starting point */
454 
455 	DUK_ASSERT_TOP(ctx, 4);
456 
457 	/* stack[0] = search value
458 	 * stack[1] = replace value
459 	 * stack[2] = input string
460 	 * stack[3] = result buffer
461 	 */
462 
463 	h_re = duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP);
464 	if (h_re) {
465 #ifdef DUK_USE_REGEXP_SUPPORT
466 		is_regexp = 1;
467 		is_global = duk_get_prop_stridx_boolean(ctx, 0, DUK_STRIDX_GLOBAL, NULL);
468 
469 		if (is_global) {
470 			/* start match from beginning */
471 			duk_push_int(ctx, 0);
472 			duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
473 		}
474 #else  /* DUK_USE_REGEXP_SUPPORT */
475 		return DUK_RET_UNSUPPORTED_ERROR;
476 #endif  /* DUK_USE_REGEXP_SUPPORT */
477 	} else {
478 		duk_to_string(ctx, 0);
479 #ifdef DUK_USE_REGEXP_SUPPORT
480 		is_regexp = 0;
481 		is_global = 0;
482 #endif
483 	}
484 
485 	if (duk_is_function(ctx, 1)) {
486 		is_repl_func = 1;
487 		r_start = NULL;
488 		r_end = NULL;
489 	} else {
490 		duk_hstring *h_repl;
491 
492 		is_repl_func = 0;
493 		h_repl = duk_to_hstring(ctx, 1);
494 		DUK_ASSERT(h_repl != NULL);
495 		r_start = DUK_HSTRING_GET_DATA(h_repl);
496 		r_end = r_start + DUK_HSTRING_GET_BYTELEN(h_repl);
497 	}
498 
499 	prev_match_end_boff = 0;
500 
501 	for (;;) {
502 		/*
503 		 *  If matching with a regexp:
504 		 *    - non-global RegExp: lastIndex not touched on a match, zeroed
505 		 *      on a non-match
506 		 *    - global RegExp: on match, lastIndex will be updated by regexp
507 		 *      executor to point to next char after the matching part (so that
508 		 *      characters in the matching part are not matched again)
509 		 *
510 		 *  If matching with a string:
511 		 *    - always non-global match, find first occurrence
512 		 *
513 		 *  We need:
514 		 *    - The character offset of start-of-match for the replacer function
515 		 *    - The byte offsets for start-of-match and end-of-match to implement
516 		 *      the replacement values $&, $`, and $', and to copy non-matching
517 		 *      input string portions (including header and trailer) verbatim.
518 		 *
519 		 *  NOTE: the E5.1 specification is a bit vague how the RegExp should
520 		 *  behave in the replacement process; e.g. is matching done first for
521 		 *  all matches (in the global RegExp case) before any replacer calls
522 		 *  are made?  See: test-bi-string-proto-replace.js for discussion.
523 		 */
524 
525 		DUK_ASSERT_TOP(ctx, 4);
526 
527 #ifdef DUK_USE_REGEXP_SUPPORT
528 		if (is_regexp) {
529 			duk_dup(ctx, 0);
530 			duk_dup(ctx, 2);
531 			duk_regexp_match(thr);  /* [ ... regexp input ] -> [ res_obj ] */
532 			if (!duk_is_object(ctx, -1)) {
533 				duk_pop(ctx);
534 				break;
535 			}
536 
537 			duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX);
538 			DUK_ASSERT(duk_is_number(ctx, -1));
539 			match_start_coff = duk_get_int(ctx, -1);
540 			duk_pop(ctx);
541 
542 			duk_get_prop_index(ctx, -1, 0);
543 			DUK_ASSERT(duk_is_string(ctx, -1));
544 			h_match = duk_get_hstring(ctx, -1);
545 			DUK_ASSERT(h_match != NULL);
546 			duk_pop(ctx);  /* h_match is borrowed, remains reachable through match_obj */
547 
548 			if (DUK_HSTRING_GET_BYTELEN(h_match) == 0) {
549 				/* This should be equivalent to match() algorithm step 8.f.iii.2:
550 				 * detect an empty match and allow it, but don't allow it twice.
551 				 */
552 				duk_uint32_t last_index;
553 
554 				duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
555 				last_index = (duk_uint32_t) duk_get_uint(ctx, -1);
556 				DUK_DDD(DUK_DDDPRINT("empty match, bump lastIndex: %ld -> %ld",
557 				                     (long) last_index, (long) (last_index + 1)));
558 				duk_pop(ctx);
559 				duk_push_int(ctx, last_index + 1);
560 				duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
561 			}
562 
563 			DUK_ASSERT(duk_get_length(ctx, -1) <= DUK_INT_MAX);  /* string limits */
564 			match_caps = (duk_int_t) duk_get_length(ctx, -1);
565 		} else {
566 #else  /* DUK_USE_REGEXP_SUPPORT */
567 		{  /* unconditionally */
568 #endif  /* DUK_USE_REGEXP_SUPPORT */
569 			const duk_uint8_t *p_start, *p_end, *p;   /* input string scan */
570 			const duk_uint8_t *q_start;               /* match string */
571 			duk_size_t q_blen;
572 
573 #ifdef DUK_USE_REGEXP_SUPPORT
574 			DUK_ASSERT(!is_global);  /* single match always */
575 #endif
576 
577 			p_start = DUK_HSTRING_GET_DATA(h_input);
578 			p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
579 			p = p_start;
580 
581 			h_search = duk_get_hstring(ctx, 0);
582 			DUK_ASSERT(h_search != NULL);
583 			q_start = DUK_HSTRING_GET_DATA(h_search);
584 			q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_search);
585 
586 			p_end -= q_blen;  /* ensure full memcmp() fits in while */
587 
588 			match_start_coff = 0;
589 
590 			while (p <= p_end) {
591 				DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input));
592 				if (DUK_MEMCMP((const void *) p, (const void *) q_start, (size_t) q_blen) == 0) {
593 					duk_dup(ctx, 0);
594 					h_match = duk_get_hstring(ctx, -1);
595 					DUK_ASSERT(h_match != NULL);
596 #ifdef DUK_USE_REGEXP_SUPPORT
597 					match_caps = 0;
598 #endif
599 					goto found;
600 				}
601 
602 				/* track utf-8 non-continuation bytes */
603 				if ((p[0] & 0xc0) != 0x80) {
604 					match_start_coff++;
605 				}
606 				p++;
607 			}
608 
609 			/* not found */
610 			break;
611 		}
612 	 found:
613 
614 		/* stack[0] = search value
615 		 * stack[1] = replace value
616 		 * stack[2] = input string
617 		 * stack[3] = result buffer
618 		 * stack[4] = regexp match OR match string
619 		 */
620 
621 		match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff);
622 
623 		tmp_sz = (duk_size_t) (match_start_boff - prev_match_end_boff);
624 		DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz);
625 
626 		prev_match_end_boff = match_start_boff + DUK_HSTRING_GET_BYTELEN(h_match);
627 
628 		if (is_repl_func) {
629 			duk_idx_t idx_args;
630 			duk_hstring *h_repl;
631 
632 			/* regexp res_obj is at index 4 */
633 
634 			duk_dup(ctx, 1);
635 			idx_args = duk_get_top(ctx);
636 
637 #ifdef DUK_USE_REGEXP_SUPPORT
638 			if (is_regexp) {
639 				duk_int_t idx;
640 				duk_require_stack(ctx, match_caps + 2);
641 				for (idx = 0; idx < match_caps; idx++) {
642 					/* match followed by capture(s) */
643 					duk_get_prop_index(ctx, 4, idx);
644 				}
645 			} else {
646 #else  /* DUK_USE_REGEXP_SUPPORT */
647 			{  /* unconditionally */
648 #endif  /* DUK_USE_REGEXP_SUPPORT */
649 				/* match == search string, by definition */
650 				duk_dup(ctx, 0);
651 			}
652 			duk_push_int(ctx, match_start_coff);
653 			duk_dup(ctx, 2);
654 
655 			/* [ ... replacer match [captures] match_char_offset input ] */
656 
657 			duk_call(ctx, duk_get_top(ctx) - idx_args);
658 			h_repl = duk_to_hstring(ctx, -1);  /* -> [ ... repl_value ] */
659 			DUK_ASSERT(h_repl != NULL);
660 
661 			DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_repl);
662 
663 			duk_pop(ctx);  /* repl_value */
664 		} else {
665 			r = r_start;
666 
667 			while (r < r_end) {
668 				duk_int_t ch1;
669 				duk_int_t ch2;
670 #ifdef DUK_USE_REGEXP_SUPPORT
671 				duk_int_t ch3;
672 #endif
673 				duk_size_t left;
674 
675 				ch1 = *r++;
676 				if (ch1 != DUK_ASC_DOLLAR) {
677 					goto repl_write;
678 				}
679 				left = r_end - r;
680 
681 				if (left <= 0) {
682 					goto repl_write;
683 				}
684 
685 				ch2 = r[0];
686 				switch ((int) ch2) {
687 				case DUK_ASC_DOLLAR: {
688 					ch1 = (1 << 8) + DUK_ASC_DOLLAR;
689 					goto repl_write;
690 				}
691 				case DUK_ASC_AMP: {
692 					DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_match);
693 					r++;
694 					continue;
695 				}
696 				case DUK_ASC_GRAVE: {
697 					tmp_sz = (duk_size_t) match_start_boff;
698 					DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input), tmp_sz);
699 					r++;
700 					continue;
701 				}
702 				case DUK_ASC_SINGLEQUOTE: {
703 					duk_uint32_t match_end_boff;
704 
705 					/* Use match charlen instead of bytelen, just in case the input and
706 					 * match codepoint encodings would have different lengths.
707 					 */
708 					match_end_boff = duk_heap_strcache_offset_char2byte(thr,
709 					                                                    h_input,
710 					                                                    match_start_coff + DUK_HSTRING_GET_CHARLEN(h_match));
711 
712 					tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - match_end_boff);
713 					DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + match_end_boff, tmp_sz);
714 					r++;
715 					continue;
716 				}
717 				default: {
718 #ifdef DUK_USE_REGEXP_SUPPORT
719 					duk_int_t capnum, captmp, capadv;
720 					/* XXX: optional check, match_caps is zero if no regexp,
721 					 * so dollar will be interpreted literally anyway.
722 					 */
723 
724 					if (!is_regexp) {
725 						goto repl_write;
726 					}
727 
728 					if (!(ch2 >= DUK_ASC_0 && ch2 <= DUK_ASC_9)) {
729 						goto repl_write;
730 					}
731 					capnum = ch2 - DUK_ASC_0;
732 					capadv = 1;
733 
734 					if (left >= 2) {
735 						ch3 = r[1];
736 						if (ch3 >= DUK_ASC_0 && ch3 <= DUK_ASC_9) {
737 							captmp = capnum * 10 + (ch3 - DUK_ASC_0);
738 							if (captmp < match_caps) {
739 								capnum = captmp;
740 								capadv = 2;
741 							}
742 						}
743 					}
744 
745 					if (capnum > 0 && capnum < match_caps) {
746 						DUK_ASSERT(is_regexp != 0);  /* match_caps == 0 without regexps */
747 
748 						/* regexp res_obj is at offset 4 */
749 						duk_get_prop_index(ctx, 4, (duk_uarridx_t) capnum);
750 						if (duk_is_string(ctx, -1)) {
751 							duk_hstring *h_tmp_str;
752 
753 							h_tmp_str = duk_get_hstring(ctx, -1);
754 							DUK_ASSERT(h_tmp_str != NULL);
755 
756 							DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_tmp_str);
757 						} else {
758 							/* undefined -> skip (replaced with empty) */
759 						}
760 						duk_pop(ctx);
761 						r += capadv;
762 						continue;
763 					} else {
764 						goto repl_write;
765 					}
766 #else  /* DUK_USE_REGEXP_SUPPORT */
767 					goto repl_write;  /* unconditionally */
768 #endif  /* DUK_USE_REGEXP_SUPPORT */
769 				}  /* default case */
770 				}  /* switch (ch2) */
771 
772 			 repl_write:
773 				/* ch1 = (r_increment << 8) + byte */
774 
775 				DUK_BW_WRITE_ENSURE_U8(thr, bw, (duk_uint8_t) (ch1 & 0xff));
776 				r += ch1 >> 8;
777 			}  /* while repl */
778 		}  /* if (is_repl_func) */
779 
780 		duk_pop(ctx);  /* pop regexp res_obj or match string */
781 
782 #ifdef DUK_USE_REGEXP_SUPPORT
783 		if (!is_global) {
784 #else
785 		{  /* unconditionally; is_global==0 */
786 #endif
787 			break;
788 		}
789 	}
790 
791 	/* trailer */
792 	tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff);
793 	DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz);
794 
795 	DUK_ASSERT_TOP(ctx, 4);
796 	DUK_BW_COMPACT(thr, bw);
797 	duk_to_string(ctx, -1);
798 	return 1;
799 }
800 
801 /*
802  *  split()
803  */
804 
805 /* XXX: very messy now, but works; clean up, remove unused variables (nomimally
806  * used so compiler doesn't complain).
807  */
808 
809 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_split(duk_context *ctx) {
810 	duk_hthread *thr = (duk_hthread *) ctx;
811 	duk_hstring *h_input;
812 	duk_hstring *h_sep;
813 	duk_uint32_t limit;
814 	duk_uint32_t arr_idx;
815 #ifdef DUK_USE_REGEXP_SUPPORT
816 	duk_bool_t is_regexp;
817 #endif
818 	duk_bool_t matched;  /* set to 1 if any match exists (needed for empty input special case) */
819 	duk_uint32_t prev_match_end_coff, prev_match_end_boff;
820 	duk_uint32_t match_start_boff, match_start_coff;
821 	duk_uint32_t match_end_boff, match_end_coff;
822 
823 	DUK_UNREF(thr);
824 
825 	h_input = duk_push_this_coercible_to_string(ctx);
826 	DUK_ASSERT(h_input != NULL);
827 
828 	duk_push_array(ctx);
829 
830 	if (duk_is_undefined(ctx, 1)) {
831 		limit = 0xffffffffUL;
832 	} else {
833 		limit = duk_to_uint32(ctx, 1);
834 	}
835 
836 	if (limit == 0) {
837 		return 1;
838 	}
839 
840 	/* If the separator is a RegExp, make a "clone" of it.  The specification
841 	 * algorithm calls [[Match]] directly for specific indices; we emulate this
842 	 * by tweaking lastIndex and using a "force global" variant of duk_regexp_match()
843 	 * which will use global-style matching even when the RegExp itself is non-global.
844 	 */
845 
846 	if (duk_is_undefined(ctx, 0)) {
847 		/* The spec algorithm first does "R = ToString(separator)" before checking
848 		 * whether separator is undefined.  Since this is side effect free, we can
849 		 * skip the ToString() here.
850 		 */
851 		duk_dup(ctx, 2);
852 		duk_put_prop_index(ctx, 3, 0);
853 		return 1;
854 	} else if (duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP) != NULL) {
855 #ifdef DUK_USE_REGEXP_SUPPORT
856 		duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR);
857 		duk_dup(ctx, 0);
858 		duk_new(ctx, 1);  /* [ ... RegExp val ] -> [ ... res ] */
859 		duk_replace(ctx, 0);
860 		/* lastIndex is initialized to zero by new RegExp() */
861 		is_regexp = 1;
862 #else
863 		return DUK_RET_UNSUPPORTED_ERROR;
864 #endif
865 	} else {
866 		duk_to_string(ctx, 0);
867 #ifdef DUK_USE_REGEXP_SUPPORT
868 		is_regexp = 0;
869 #endif
870 	}
871 
872 	/* stack[0] = separator (string or regexp)
873 	 * stack[1] = limit
874 	 * stack[2] = input string
875 	 * stack[3] = result array
876 	 */
877 
878 	prev_match_end_boff = 0;
879 	prev_match_end_coff = 0;
880 	arr_idx = 0;
881 	matched = 0;
882 
883 	for (;;) {
884 		/*
885 		 *  The specification uses RegExp [[Match]] to attempt match at specific
886 		 *  offsets.  We don't have such a primitive, so we use an actual RegExp
887 		 *  and tweak lastIndex.  Since the RegExp may be non-global, we use a
888 		 *  special variant which forces global-like behavior for matching.
889 		 */
890 
891 		DUK_ASSERT_TOP(ctx, 4);
892 
893 #ifdef DUK_USE_REGEXP_SUPPORT
894 		if (is_regexp) {
895 			duk_dup(ctx, 0);
896 			duk_dup(ctx, 2);
897 			duk_regexp_match_force_global(thr);  /* [ ... regexp input ] -> [ res_obj ] */
898 			if (!duk_is_object(ctx, -1)) {
899 				duk_pop(ctx);
900 				break;
901 			}
902 			matched = 1;
903 
904 			duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX);
905 			DUK_ASSERT(duk_is_number(ctx, -1));
906 			match_start_coff = duk_get_int(ctx, -1);
907 			match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff);
908 			duk_pop(ctx);
909 
910 			if (match_start_coff == DUK_HSTRING_GET_CHARLEN(h_input)) {
911 				/* don't allow an empty match at the end of the string */
912 				duk_pop(ctx);
913 				break;
914 			}
915 
916 			duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
917 			DUK_ASSERT(duk_is_number(ctx, -1));
918 			match_end_coff = duk_get_int(ctx, -1);
919 			match_end_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_end_coff);
920 			duk_pop(ctx);
921 
922 			/* empty match -> bump and continue */
923 			if (prev_match_end_boff == match_end_boff) {
924 				duk_push_int(ctx, match_end_coff + 1);
925 				duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
926 				duk_pop(ctx);
927 				continue;
928 			}
929 		} else {
930 #else  /* DUK_USE_REGEXP_SUPPORT */
931 		{  /* unconditionally */
932 #endif  /* DUK_USE_REGEXP_SUPPORT */
933 			const duk_uint8_t *p_start, *p_end, *p;   /* input string scan */
934 			const duk_uint8_t *q_start;               /* match string */
935 			duk_size_t q_blen, q_clen;
936 
937 			p_start = DUK_HSTRING_GET_DATA(h_input);
938 			p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
939 			p = p_start + prev_match_end_boff;
940 
941 			h_sep = duk_get_hstring(ctx, 0);
942 			DUK_ASSERT(h_sep != NULL);
943 			q_start = DUK_HSTRING_GET_DATA(h_sep);
944 			q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_sep);
945 			q_clen = (duk_size_t) DUK_HSTRING_GET_CHARLEN(h_sep);
946 
947 			p_end -= q_blen;  /* ensure full memcmp() fits in while */
948 
949 			match_start_coff = prev_match_end_coff;
950 
951 			if (q_blen == 0) {
952 				/* Handle empty separator case: it will always match, and always
953 				 * triggers the check in step 13.c.iii initially.  Note that we
954 				 * must skip to either end of string or start of first codepoint,
955 				 * skipping over any continuation bytes!
956 				 *
957 				 * Don't allow an empty string to match at the end of the input.
958 				 */
959 
960 				matched = 1;  /* empty separator can always match */
961 
962 				match_start_coff++;
963 				p++;
964 				while (p < p_end) {
965 					if ((p[0] & 0xc0) != 0x80) {
966 						goto found;
967 					}
968 					p++;
969 				}
970 				goto not_found;
971 			}
972 
973 			DUK_ASSERT(q_blen > 0 && q_clen > 0);
974 			while (p <= p_end) {
975 				DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input));
976 				DUK_ASSERT(q_blen > 0);  /* no issues with empty memcmp() */
977 				if (DUK_MEMCMP((const void *) p, (const void *) q_start, (size_t) q_blen) == 0) {
978 					/* never an empty match, so step 13.c.iii can't be triggered */
979 					goto found;
980 				}
981 
982 				/* track utf-8 non-continuation bytes */
983 				if ((p[0] & 0xc0) != 0x80) {
984 					match_start_coff++;
985 				}
986 				p++;
987 			}
988 
989 		 not_found:
990 			/* not found */
991 			break;
992 
993 		 found:
994 			matched = 1;
995 			match_start_boff = (duk_uint32_t) (p - p_start);
996 			match_end_coff = (duk_uint32_t) (match_start_coff + q_clen);  /* constrained by string length */
997 			match_end_boff = (duk_uint32_t) (match_start_boff + q_blen);  /* ditto */
998 
999 			/* empty match (may happen with empty separator) -> bump and continue */
1000 			if (prev_match_end_boff == match_end_boff) {
1001 				prev_match_end_boff++;
1002 				prev_match_end_coff++;
1003 				continue;
1004 			}
1005 		}  /* if (is_regexp) */
1006 
1007 		/* stack[0] = separator (string or regexp)
1008 		 * stack[1] = limit
1009 		 * stack[2] = input string
1010 		 * stack[3] = result array
1011 		 * stack[4] = regexp res_obj (if is_regexp)
1012 		 */
1013 
1014 		DUK_DDD(DUK_DDDPRINT("split; match_start b=%ld,c=%ld, match_end b=%ld,c=%ld, prev_end b=%ld,c=%ld",
1015 		                     (long) match_start_boff, (long) match_start_coff,
1016 		                     (long) match_end_boff, (long) match_end_coff,
1017 		                     (long) prev_match_end_boff, (long) prev_match_end_coff));
1018 
1019 		duk_push_lstring(ctx,
1020 		                 (const char *) (DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff),
1021 		                 (duk_size_t) (match_start_boff - prev_match_end_boff));
1022 		duk_put_prop_index(ctx, 3, arr_idx);
1023 		arr_idx++;
1024 		if (arr_idx >= limit) {
1025 			goto hit_limit;
1026 		}
1027 
1028 #ifdef DUK_USE_REGEXP_SUPPORT
1029 		if (is_regexp) {
1030 			duk_size_t i, len;
1031 
1032 			len = duk_get_length(ctx, 4);
1033 			for (i = 1; i < len; i++) {
1034 				DUK_ASSERT(i <= DUK_UARRIDX_MAX);  /* cannot have >4G captures */
1035 				duk_get_prop_index(ctx, 4, (duk_uarridx_t) i);
1036 				duk_put_prop_index(ctx, 3, arr_idx);
1037 				arr_idx++;
1038 				if (arr_idx >= limit) {
1039 					goto hit_limit;
1040 				}
1041 			}
1042 
1043 			duk_pop(ctx);
1044 			/* lastIndex already set up for next match */
1045 		} else {
1046 #else  /* DUK_USE_REGEXP_SUPPORT */
1047 		{  /* unconditionally */
1048 #endif  /* DUK_USE_REGEXP_SUPPORT */
1049 			/* no action */
1050 		}
1051 
1052 		prev_match_end_boff = match_end_boff;
1053 		prev_match_end_coff = match_end_coff;
1054 		continue;
1055 	}  /* for */
1056 
1057 	/* Combined step 11 (empty string special case) and 14-15. */
1058 
1059 	DUK_DDD(DUK_DDDPRINT("split trailer; prev_end b=%ld,c=%ld",
1060 	                     (long) prev_match_end_boff, (long) prev_match_end_coff));
1061 
1062 	if (DUK_HSTRING_GET_CHARLEN(h_input) > 0 || !matched) {
1063 		/* Add trailer if:
1064 		 *   a) non-empty input
1065 		 *   b) empty input and no (zero size) match found (step 11)
1066 		 */
1067 
1068 		duk_push_lstring(ctx,
1069 		                 (const char *) DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff,
1070 		                 (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff));
1071 		duk_put_prop_index(ctx, 3, arr_idx);
1072 		/* No arr_idx update or limit check */
1073 	}
1074 
1075 	return 1;
1076 
1077  hit_limit:
1078 #ifdef DUK_USE_REGEXP_SUPPORT
1079 	if (is_regexp) {
1080 		duk_pop(ctx);
1081 	}
1082 #endif
1083 
1084 	return 1;
1085 }
1086 
1087 /*
1088  *  Various
1089  */
1090 
1091 #ifdef DUK_USE_REGEXP_SUPPORT
1092 DUK_LOCAL void duk__to_regexp_helper(duk_context *ctx, duk_idx_t index, duk_bool_t force_new) {
1093 	duk_hobject *h;
1094 
1095 	/* Shared helper for match() steps 3-4, search() steps 3-4. */
1096 
1097 	DUK_ASSERT(index >= 0);
1098 
1099 	if (force_new) {
1100 		goto do_new;
1101 	}
1102 
1103 	h = duk_get_hobject_with_class(ctx, index, DUK_HOBJECT_CLASS_REGEXP);
1104 	if (!h) {
1105 		goto do_new;
1106 	}
1107 	return;
1108 
1109  do_new:
1110 	duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR);
1111 	duk_dup(ctx, index);
1112 	duk_new(ctx, 1);  /* [ ... RegExp val ] -> [ ... res ] */
1113 	duk_replace(ctx, index);
1114 }
1115 #endif  /* DUK_USE_REGEXP_SUPPORT */
1116 
1117 #ifdef DUK_USE_REGEXP_SUPPORT
1118 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) {
1119 	duk_hthread *thr = (duk_hthread *) ctx;
1120 
1121 	/* Easiest way to implement the search required by the specification
1122 	 * is to do a RegExp test() with lastIndex forced to zero.  To avoid
1123 	 * side effects on the argument, "clone" the RegExp if a RegExp was
1124 	 * given as input.
1125 	 *
1126 	 * The global flag of the RegExp should be ignored; setting lastIndex
1127 	 * to zero (which happens when "cloning" the RegExp) should have an
1128 	 * equivalent effect.
1129 	 */
1130 
1131 	DUK_ASSERT_TOP(ctx, 1);
1132 	(void) duk_push_this_coercible_to_string(ctx);  /* at index 1 */
1133 	duk__to_regexp_helper(ctx, 0 /*index*/, 1 /*force_new*/);
1134 
1135 	/* stack[0] = regexp
1136 	 * stack[1] = string
1137 	 */
1138 
1139 	/* Avoid using RegExp.prototype methods, as they're writable and
1140 	 * configurable and may have been changed.
1141 	 */
1142 
1143 	duk_dup(ctx, 0);
1144 	duk_dup(ctx, 1);  /* [ ... re_obj input ] */
1145 	duk_regexp_match(thr);  /* -> [ ... res_obj ] */
1146 
1147 	if (!duk_is_object(ctx, -1)) {
1148 		duk_push_int(ctx, -1);
1149 		return 1;
1150 	}
1151 
1152 	duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX);
1153 	DUK_ASSERT(duk_is_number(ctx, -1));
1154 	return 1;
1155 }
1156 #else  /* DUK_USE_REGEXP_SUPPORT */
1157 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) {
1158 	DUK_UNREF(ctx);
1159 	return DUK_RET_UNSUPPORTED_ERROR;
1160 }
1161 #endif  /* DUK_USE_REGEXP_SUPPORT */
1162 
1163 #ifdef DUK_USE_REGEXP_SUPPORT
1164 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_match(duk_context *ctx) {
1165 	duk_hthread *thr = (duk_hthread *) ctx;
1166 	duk_bool_t global;
1167 	duk_int_t prev_last_index;
1168 	duk_int_t this_index;
1169 	duk_int_t arr_idx;
1170 
1171 	DUK_ASSERT_TOP(ctx, 1);
1172 	(void) duk_push_this_coercible_to_string(ctx);
1173 	duk__to_regexp_helper(ctx, 0 /*index*/, 0 /*force_new*/);
1174 	global = duk_get_prop_stridx_boolean(ctx, 0, DUK_STRIDX_GLOBAL, NULL);
1175 	DUK_ASSERT_TOP(ctx, 2);
1176 
1177 	/* stack[0] = regexp
1178 	 * stack[1] = string
1179 	 */
1180 
1181 	if (!global) {
1182 		duk_regexp_match(thr);  /* -> [ res_obj ] */
1183 		return 1;  /* return 'res_obj' */
1184 	}
1185 
1186 	/* Global case is more complex. */
1187 
1188 	/* [ regexp string ] */
1189 
1190 	duk_push_int(ctx, 0);
1191 	duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
1192 	duk_push_array(ctx);
1193 
1194 	/* [ regexp string res_arr ] */
1195 
1196 	prev_last_index = 0;
1197 	arr_idx = 0;
1198 
1199 	for (;;) {
1200 		DUK_ASSERT_TOP(ctx, 3);
1201 
1202 		duk_dup(ctx, 0);
1203 		duk_dup(ctx, 1);
1204 		duk_regexp_match(thr);  /* -> [ ... regexp string ] -> [ ... res_obj ] */
1205 
1206 		if (!duk_is_object(ctx, -1)) {
1207 			duk_pop(ctx);
1208 			break;
1209 		}
1210 
1211 		duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
1212 		DUK_ASSERT(duk_is_number(ctx, -1));
1213 		this_index = duk_get_int(ctx, -1);
1214 		duk_pop(ctx);
1215 
1216 		if (this_index == prev_last_index) {
1217 			this_index++;
1218 			duk_push_int(ctx, this_index);
1219 			duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
1220 		}
1221 		prev_last_index = this_index;
1222 
1223 		duk_get_prop_index(ctx, -1, 0);  /* match string */
1224 		duk_put_prop_index(ctx, 2, arr_idx);
1225 		arr_idx++;
1226 		duk_pop(ctx);  /* res_obj */
1227 	}
1228 
1229 	if (arr_idx == 0) {
1230 		duk_push_null(ctx);
1231 	}
1232 
1233 	return 1;  /* return 'res_arr' or 'null' */
1234 }
1235 #else  /* DUK_USE_REGEXP_SUPPORT */
1236 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_match(duk_context *ctx) {
1237 	DUK_UNREF(ctx);
1238 	return DUK_RET_UNSUPPORTED_ERROR;
1239 }
1240 #endif  /* DUK_USE_REGEXP_SUPPORT */
1241 
1242 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_concat(duk_context *ctx) {
1243 	/* duk_concat() coerces arguments with ToString() in correct order */
1244 	(void) duk_push_this_coercible_to_string(ctx);
1245 	duk_insert(ctx, 0);  /* this is relatively expensive */
1246 	duk_concat(ctx, duk_get_top(ctx));
1247 	return 1;
1248 }
1249 
1250 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_trim(duk_context *ctx) {
1251 	DUK_ASSERT_TOP(ctx, 0);
1252 	(void) duk_push_this_coercible_to_string(ctx);
1253 	duk_trim(ctx, 0);
1254 	DUK_ASSERT_TOP(ctx, 1);
1255 	return 1;
1256 }
1257 
1258 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_locale_compare(duk_context *ctx) {
1259 	duk_hstring *h1;
1260 	duk_hstring *h2;
1261 	duk_size_t h1_len, h2_len, prefix_len;
1262 	duk_small_int_t ret = 0;
1263 	duk_small_int_t rc;
1264 
1265 	/* The current implementation of localeCompare() is simply a codepoint
1266 	 * by codepoint comparison, implemented with a simple string compare
1267 	 * because UTF-8 should preserve codepoint ordering (assuming valid
1268 	 * shortest UTF-8 encoding).
1269 	 *
1270 	 * The specification requires that the return value must be related
1271 	 * to the sort order: e.g. negative means that 'this' comes before
1272 	 * 'that' in sort order.  We assume an ascending sort order.
1273 	 */
1274 
1275 	/* XXX: could share code with duk_js_ops.c, duk_js_compare_helper */
1276 
1277 	h1 = duk_push_this_coercible_to_string(ctx);
1278 	DUK_ASSERT(h1 != NULL);
1279 
1280 	h2 = duk_to_hstring(ctx, 0);
1281 	DUK_ASSERT(h2 != NULL);
1282 
1283 	h1_len = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h1);
1284 	h2_len = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h2);
1285 	prefix_len = (h1_len <= h2_len ? h1_len : h2_len);
1286 
1287 	/* Zero size compare not an issue with DUK_MEMCMP. */
1288 	rc = (duk_small_int_t) DUK_MEMCMP((const void *) DUK_HSTRING_GET_DATA(h1),
1289 	                                  (const void *) DUK_HSTRING_GET_DATA(h2),
1290 	                                  (size_t) prefix_len);
1291 
1292 	if (rc < 0) {
1293 		ret = -1;
1294 		goto done;
1295 	} else if (rc > 0) {
1296 		ret = 1;
1297 		goto done;
1298 	}
1299 
1300 	/* prefix matches, lengths matter now */
1301 	if (h1_len > h2_len) {
1302 		ret = 1;
1303 		goto done;
1304 	} else if (h1_len == h2_len) {
1305 		DUK_ASSERT(ret == 0);
1306 		goto done;
1307 	}
1308 	ret = -1;
1309 	goto done;
1310 
1311  done:
1312 	duk_push_int(ctx, (duk_int_t) ret);
1313 	return 1;
1314 }
1315