1 /*
2  *  Global object built-ins
3  */
4 
5 #include "duk_internal.h"
6 
7 /*
8  *  Encoding/decoding helpers
9  */
10 
11 /* XXX: Could add fast path (for each transform callback) with direct byte
12  * lookups (no shifting) and no explicit check for x < 0x80 before table
13  * lookup.
14  */
15 
16 /* Macros for creating and checking bitmasks for character encoding.
17  * Bit number is a bit counterintuitive, but minimizes code size.
18  */
19 #define DUK__MKBITS(a,b,c,d,e,f,g,h)  ((duk_uint8_t) ( \
20 	((a) << 0) | ((b) << 1) | ((c) << 2) | ((d) << 3) | \
21 	((e) << 4) | ((f) << 5) | ((g) << 6) | ((h) << 7) \
22 	))
23 #define DUK__CHECK_BITMASK(table,cp)  ((table)[(cp) >> 3] & (1 << ((cp) & 0x07)))
24 
25 /* E5.1 Section 15.1.3.3: uriReserved + uriUnescaped + '#' */
26 DUK_LOCAL const duk_uint8_t duk__encode_uriunescaped_table[16] = {
27 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
28 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
29 	DUK__MKBITS(0, 1, 0, 1, 1, 0, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x20-0x2f */
30 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 0, 1, 0, 1),  /* 0x30-0x3f */
31 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x40-0x4f */
32 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1),  /* 0x50-0x5f */
33 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x60-0x6f */
34 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 1, 0),  /* 0x70-0x7f */
35 };
36 
37 /* E5.1 Section 15.1.3.4: uriUnescaped */
38 DUK_LOCAL const duk_uint8_t duk__encode_uricomponent_unescaped_table[16] = {
39 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
40 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
41 	DUK__MKBITS(0, 1, 0, 0, 0, 0, 0, 1), DUK__MKBITS(1, 1, 1, 0, 0, 1, 1, 0),  /* 0x20-0x2f */
42 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 0, 0, 0, 0, 0, 0),  /* 0x30-0x3f */
43 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x40-0x4f */
44 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1),  /* 0x50-0x5f */
45 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x60-0x6f */
46 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 1, 0),  /* 0x70-0x7f */
47 };
48 
49 /* E5.1 Section 15.1.3.1: uriReserved + '#' */
50 DUK_LOCAL const duk_uint8_t duk__decode_uri_reserved_table[16] = {
51 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
52 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
53 	DUK__MKBITS(0, 0, 0, 1, 1, 0, 1, 0), DUK__MKBITS(0, 0, 0, 1, 1, 0, 0, 1),  /* 0x20-0x2f */
54 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 1, 1, 0, 1, 0, 1),  /* 0x30-0x3f */
55 	DUK__MKBITS(1, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x40-0x4f */
56 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x50-0x5f */
57 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x60-0x6f */
58 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x70-0x7f */
59 };
60 
61 /* E5.1 Section 15.1.3.2: empty */
62 DUK_LOCAL const duk_uint8_t duk__decode_uri_component_reserved_table[16] = {
63 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
64 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
65 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x20-0x2f */
66 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x30-0x3f */
67 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x40-0x4f */
68 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x50-0x5f */
69 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x60-0x6f */
70 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x70-0x7f */
71 };
72 
73 #if defined(DUK_USE_SECTION_B)
74 /* E5.1 Section B.2.2, step 7. */
75 DUK_LOCAL const duk_uint8_t duk__escape_unescaped_table[16] = {
76 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
77 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
78 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 1, 1, 0, 1, 1, 1),  /* 0x20-0x2f */
79 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 0, 0, 0, 0, 0, 0),  /* 0x30-0x3f */
80 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x40-0x4f */
81 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1),  /* 0x50-0x5f */
82 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x60-0x6f */
83 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 0)   /* 0x70-0x7f */
84 };
85 #endif  /* DUK_USE_SECTION_B */
86 
87 typedef struct {
88 	duk_hthread *thr;
89 	duk_hstring *h_str;
90 	duk_bufwriter_ctx bw;
91 	const duk_uint8_t *p;
92 	const duk_uint8_t *p_start;
93 	const duk_uint8_t *p_end;
94 } duk__transform_context;
95 
96 typedef void (*duk__transform_callback)(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp);
97 
98 /* XXX: refactor and share with other code */
duk__decode_hex_escape(const duk_uint8_t * p,duk_small_int_t n)99 DUK_LOCAL duk_small_int_t duk__decode_hex_escape(const duk_uint8_t *p, duk_small_int_t n) {
100 	duk_small_int_t ch;
101 	duk_small_int_t t = 0;
102 
103 	while (n > 0) {
104 		t = t * 16;
105 		ch = (duk_small_int_t) duk_hex_dectab[*p++];
106 		if (DUK_LIKELY(ch >= 0)) {
107 			t += ch;
108 		} else {
109 			return -1;
110 		}
111 		n--;
112 	}
113 	return t;
114 }
115 
duk__transform_helper(duk_hthread * thr,duk__transform_callback callback,const void * udata)116 DUK_LOCAL int duk__transform_helper(duk_hthread *thr, duk__transform_callback callback, const void *udata) {
117 	duk__transform_context tfm_ctx_alloc;
118 	duk__transform_context *tfm_ctx = &tfm_ctx_alloc;
119 	duk_codepoint_t cp;
120 
121 	tfm_ctx->thr = thr;
122 
123 	tfm_ctx->h_str = duk_to_hstring(thr, 0);
124 	DUK_ASSERT(tfm_ctx->h_str != NULL);
125 
126 	DUK_BW_INIT_PUSHBUF(thr, &tfm_ctx->bw, DUK_HSTRING_GET_BYTELEN(tfm_ctx->h_str));  /* initial size guess */
127 
128 	tfm_ctx->p_start = DUK_HSTRING_GET_DATA(tfm_ctx->h_str);
129 	tfm_ctx->p_end = tfm_ctx->p_start + DUK_HSTRING_GET_BYTELEN(tfm_ctx->h_str);
130 	tfm_ctx->p = tfm_ctx->p_start;
131 
132 	while (tfm_ctx->p < tfm_ctx->p_end) {
133 		cp = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(thr, &tfm_ctx->p, tfm_ctx->p_start, tfm_ctx->p_end);
134 		callback(tfm_ctx, udata, cp);
135 	}
136 
137 	DUK_BW_COMPACT(thr, &tfm_ctx->bw);
138 
139 	(void) duk_buffer_to_string(thr, -1);  /* Safe if transform is safe. */
140 	return 1;
141 }
142 
duk__transform_callback_encode_uri(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)143 DUK_LOCAL void duk__transform_callback_encode_uri(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
144 	duk_uint8_t xutf8_buf[DUK_UNICODE_MAX_XUTF8_LENGTH];
145 	duk_small_int_t len;
146 	duk_codepoint_t cp1, cp2;
147 	duk_small_int_t i, t;
148 	const duk_uint8_t *unescaped_table = (const duk_uint8_t *) udata;
149 
150 	/* UTF-8 encoded bytes escaped as %xx%xx%xx... -> 3 * nbytes.
151 	 * Codepoint range is restricted so this is a slightly too large
152 	 * but doesn't matter.
153 	 */
154 	DUK_BW_ENSURE(tfm_ctx->thr, &tfm_ctx->bw, 3 * DUK_UNICODE_MAX_XUTF8_LENGTH);
155 
156 	if (cp < 0) {
157 		goto uri_error;
158 	} else if ((cp < 0x80L) && DUK__CHECK_BITMASK(unescaped_table, cp)) {
159 		DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) cp);
160 		return;
161 	} else if (cp >= 0xdc00L && cp <= 0xdfffL) {
162 		goto uri_error;
163 	} else if (cp >= 0xd800L && cp <= 0xdbffL) {
164 		/* Needs lookahead */
165 		if (duk_unicode_decode_xutf8(tfm_ctx->thr, &tfm_ctx->p, tfm_ctx->p_start, tfm_ctx->p_end, (duk_ucodepoint_t *) &cp2) == 0) {
166 			goto uri_error;
167 		}
168 		if (!(cp2 >= 0xdc00L && cp2 <= 0xdfffL)) {
169 			goto uri_error;
170 		}
171 		cp1 = cp;
172 		cp = (duk_codepoint_t) (((cp1 - 0xd800L) << 10) + (cp2 - 0xdc00L) + 0x10000L);
173 	} else if (cp > 0x10ffffL) {
174 		/* Although we can allow non-BMP characters (they'll decode
175 		 * back into surrogate pairs), we don't allow extended UTF-8
176 		 * characters; they would encode to URIs which won't decode
177 		 * back because of strict UTF-8 checks in URI decoding.
178 		 * (However, we could just as well allow them here.)
179 		 */
180 		goto uri_error;
181 	} else {
182 		/* Non-BMP characters within valid UTF-8 range: encode as is.
183 		 * They'll decode back into surrogate pairs if the escaped
184 		 * output is decoded.
185 		 */
186 		;
187 	}
188 
189 	len = duk_unicode_encode_xutf8((duk_ucodepoint_t) cp, xutf8_buf);
190 	for (i = 0; i < len; i++) {
191 		t = (duk_small_int_t) xutf8_buf[i];
192 		DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
193 		                      &tfm_ctx->bw,
194 		                      DUK_ASC_PERCENT,
195 		                      (duk_uint8_t) duk_uc_nybbles[t >> 4],
196                                       (duk_uint8_t) duk_uc_nybbles[t & 0x0f]);
197 	}
198 
199 	return;
200 
201  uri_error:
202 	DUK_ERROR_URI(tfm_ctx->thr, DUK_STR_INVALID_INPUT);
203 	DUK_WO_NORETURN(return;);
204 }
205 
duk__transform_callback_decode_uri(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)206 DUK_LOCAL void duk__transform_callback_decode_uri(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
207 	const duk_uint8_t *reserved_table = (const duk_uint8_t *) udata;
208 	duk_small_uint_t utf8_blen;
209 	duk_codepoint_t min_cp;
210 	duk_small_int_t t;  /* must be signed */
211 	duk_small_uint_t i;
212 
213 	/* Maximum write size: XUTF8 path writes max DUK_UNICODE_MAX_XUTF8_LENGTH,
214 	 * percent escape path writes max two times CESU-8 encoded BMP length.
215 	 */
216 	DUK_BW_ENSURE(tfm_ctx->thr,
217 	              &tfm_ctx->bw,
218 	              (DUK_UNICODE_MAX_XUTF8_LENGTH >= 2 * DUK_UNICODE_MAX_CESU8_BMP_LENGTH ?
219 	              DUK_UNICODE_MAX_XUTF8_LENGTH : DUK_UNICODE_MAX_CESU8_BMP_LENGTH));
220 
221 	if (cp == (duk_codepoint_t) '%') {
222 		const duk_uint8_t *p = tfm_ctx->p;
223 		duk_size_t left = (duk_size_t) (tfm_ctx->p_end - p);  /* bytes left */
224 
225 		DUK_DDD(DUK_DDDPRINT("percent encoding, left=%ld", (long) left));
226 
227 		if (left < 2) {
228 			goto uri_error;
229 		}
230 
231 		t = duk__decode_hex_escape(p, 2);
232 		DUK_DDD(DUK_DDDPRINT("first byte: %ld", (long) t));
233 		if (t < 0) {
234 			goto uri_error;
235 		}
236 
237 		if (t < 0x80) {
238 			if (DUK__CHECK_BITMASK(reserved_table, t)) {
239 				/* decode '%xx' to '%xx' if decoded char in reserved set */
240 				DUK_ASSERT(tfm_ctx->p - 1 >= tfm_ctx->p_start);
241 				DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
242 				                      &tfm_ctx->bw,
243 				                      DUK_ASC_PERCENT,
244 				                      p[0],
245 				                      p[1]);
246 			} else {
247 				DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) t);
248 			}
249 			tfm_ctx->p += 2;
250 			return;
251 		}
252 
253 		/* Decode UTF-8 codepoint from a sequence of hex escapes.  The
254 		 * first byte of the sequence has been decoded to 't'.
255 		 *
256 		 * Note that UTF-8 validation must be strict according to the
257 		 * specification: E5.1 Section 15.1.3, decode algorithm step
258 		 * 4.d.vii.8.  URIError from non-shortest encodings is also
259 		 * specifically noted in the spec.
260 		 */
261 
262 		DUK_ASSERT(t >= 0x80);
263 		if (t < 0xc0) {
264 			/* continuation byte */
265 			goto uri_error;
266 		} else if (t < 0xe0) {
267 			/* 110x xxxx; 2 bytes */
268 			utf8_blen = 2;
269 			min_cp = 0x80L;
270 			cp = t & 0x1f;
271 		} else if (t < 0xf0) {
272 			/* 1110 xxxx; 3 bytes */
273 			utf8_blen = 3;
274 			min_cp = 0x800L;
275 			cp = t & 0x0f;
276 		} else if (t < 0xf8) {
277 			/* 1111 0xxx; 4 bytes */
278 			utf8_blen = 4;
279 			min_cp = 0x10000L;
280 			cp = t & 0x07;
281 		} else {
282 			/* extended utf-8 not allowed for URIs */
283 			goto uri_error;
284 		}
285 
286 		if (left < utf8_blen * 3 - 1) {
287 			/* '%xx%xx...%xx', p points to char after first '%' */
288 			goto uri_error;
289 		}
290 
291 		p += 3;
292 		for (i = 1; i < utf8_blen; i++) {
293 			/* p points to digit part ('%xy', p points to 'x') */
294 			t = duk__decode_hex_escape(p, 2);
295 			DUK_DDD(DUK_DDDPRINT("i=%ld utf8_blen=%ld cp=%ld t=0x%02lx",
296 			                     (long) i, (long) utf8_blen, (long) cp, (unsigned long) t));
297 			if (t < 0) {
298 				goto uri_error;
299 			}
300 			if ((t & 0xc0) != 0x80) {
301 				goto uri_error;
302 			}
303 			cp = (cp << 6) + (t & 0x3f);
304 			p += 3;
305 		}
306 		p--;  /* p overshoots */
307 		tfm_ctx->p = p;
308 
309 		DUK_DDD(DUK_DDDPRINT("final cp=%ld, min_cp=%ld", (long) cp, (long) min_cp));
310 
311 		if (cp < min_cp || cp > 0x10ffffL || (cp >= 0xd800L && cp <= 0xdfffL)) {
312 			goto uri_error;
313 		}
314 
315 		/* The E5.1 algorithm checks whether or not a decoded codepoint
316 		 * is below 0x80 and perhaps may be in the "reserved" set.
317 		 * This seems pointless because the single byte UTF-8 case is
318 		 * handled separately, and non-shortest encodings are rejected.
319 		 * So, 'cp' cannot be below 0x80 here, and thus cannot be in
320 		 * the reserved set.
321 		 */
322 
323 		/* utf-8 validation ensures these */
324 		DUK_ASSERT(cp >= 0x80L && cp <= 0x10ffffL);
325 
326 		if (cp >= 0x10000L) {
327 			cp -= 0x10000L;
328 			DUK_ASSERT(cp < 0x100000L);
329 
330 			DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, ((cp >> 10) + 0xd800L));
331 			DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, ((cp & 0x03ffL) + 0xdc00L));
332 		} else {
333 			DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
334 		}
335 	} else {
336 		DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
337 	}
338 	return;
339 
340  uri_error:
341 	DUK_ERROR_URI(tfm_ctx->thr, DUK_STR_INVALID_INPUT);
342 	DUK_WO_NORETURN(return;);
343 }
344 
345 #if defined(DUK_USE_SECTION_B)
duk__transform_callback_escape(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)346 DUK_LOCAL void duk__transform_callback_escape(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
347 	DUK_UNREF(udata);
348 
349 	DUK_BW_ENSURE(tfm_ctx->thr, &tfm_ctx->bw, 6);
350 
351 	if (cp < 0) {
352 		goto esc_error;
353 	} else if ((cp < 0x80L) && DUK__CHECK_BITMASK(duk__escape_unescaped_table, cp)) {
354 		DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) cp);
355 	} else if (cp < 0x100L) {
356 		DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
357 		                      &tfm_ctx->bw,
358 		                      (duk_uint8_t) DUK_ASC_PERCENT,
359 		                      (duk_uint8_t) duk_uc_nybbles[cp >> 4],
360 		                      (duk_uint8_t) duk_uc_nybbles[cp & 0x0f]);
361 	} else if (cp < 0x10000L) {
362 		DUK_BW_WRITE_RAW_U8_6(tfm_ctx->thr,
363 		                      &tfm_ctx->bw,
364 		                      (duk_uint8_t) DUK_ASC_PERCENT,
365 		                      (duk_uint8_t) DUK_ASC_LC_U,
366 		                      (duk_uint8_t) duk_uc_nybbles[cp >> 12],
367 		                      (duk_uint8_t) duk_uc_nybbles[(cp >> 8) & 0x0f],
368 		                      (duk_uint8_t) duk_uc_nybbles[(cp >> 4) & 0x0f],
369 		                      (duk_uint8_t) duk_uc_nybbles[cp & 0x0f]);
370 	} else {
371 		/* Characters outside BMP cannot be escape()'d.  We could
372 		 * encode them as surrogate pairs (for codepoints inside
373 		 * valid UTF-8 range, but not extended UTF-8).  Because
374 		 * escape() and unescape() are legacy functions, we don't.
375 		 */
376 		goto esc_error;
377 	}
378 
379 	return;
380 
381  esc_error:
382 	DUK_ERROR_TYPE(tfm_ctx->thr, DUK_STR_INVALID_INPUT);
383 	DUK_WO_NORETURN(return;);
384 }
385 
duk__transform_callback_unescape(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)386 DUK_LOCAL void duk__transform_callback_unescape(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
387 	duk_small_int_t t;
388 
389 	DUK_UNREF(udata);
390 
391 	if (cp == (duk_codepoint_t) '%') {
392 		const duk_uint8_t *p = tfm_ctx->p;
393 		duk_size_t left = (duk_size_t) (tfm_ctx->p_end - p);  /* bytes left */
394 
395 		if (left >= 5 && p[0] == 'u' &&
396 		    ((t = duk__decode_hex_escape(p + 1, 4)) >= 0)) {
397 			cp = (duk_codepoint_t) t;
398 			tfm_ctx->p += 5;
399 		} else if (left >= 2 &&
400 		           ((t = duk__decode_hex_escape(p, 2)) >= 0)) {
401 			cp = (duk_codepoint_t) t;
402 			tfm_ctx->p += 2;
403 		}
404 	}
405 
406 	DUK_BW_WRITE_ENSURE_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
407 }
408 #endif  /* DUK_USE_SECTION_B */
409 
410 /*
411  *  Eval
412  *
413  *  Eval needs to handle both a "direct eval" and an "indirect eval".
414  *  Direct eval handling needs access to the caller's activation so that its
415  *  lexical environment can be accessed.  A direct eval is only possible from
416  *  ECMAScript code; an indirect eval call is possible also from C code.
417  *  When an indirect eval call is made from C code, there may not be a
418  *  calling activation at all which needs careful handling.
419  */
420 
duk_bi_global_object_eval(duk_hthread * thr)421 DUK_INTERNAL duk_ret_t duk_bi_global_object_eval(duk_hthread *thr) {
422 	duk_hstring *h;
423 	duk_activation *act_caller;
424 	duk_activation *act_eval;
425 	duk_hcompfunc *func;
426 	duk_hobject *outer_lex_env;
427 	duk_hobject *outer_var_env;
428 	duk_bool_t this_to_global = 1;
429 	duk_small_uint_t comp_flags;
430 	duk_int_t level = -2;
431 	duk_small_uint_t call_flags;
432 
433 	DUK_ASSERT(duk_get_top(thr) == 1 || duk_get_top(thr) == 2);  /* 2 when called by debugger */
434 	DUK_ASSERT(thr->callstack_top >= 1);  /* at least this function exists */
435 	DUK_ASSERT(thr->callstack_curr != NULL);
436 	DUK_ASSERT((thr->callstack_curr->flags & DUK_ACT_FLAG_DIRECT_EVAL) == 0 || /* indirect eval */
437 	           (thr->callstack_top >= 2));  /* if direct eval, calling activation must exist */
438 
439 	/*
440 	 *  callstack_top - 1 --> this function
441 	 *  callstack_top - 2 --> caller (may not exist)
442 	 *
443 	 *  If called directly from C, callstack_top might be 1.  If calling
444 	 *  activation doesn't exist, call must be indirect.
445 	 */
446 
447 	h = duk_get_hstring_notsymbol(thr, 0);
448 	if (!h) {
449 		/* Symbol must be returned as is, like any non-string values. */
450 		return 1;  /* return arg as-is */
451 	}
452 
453 #if defined(DUK_USE_DEBUGGER_SUPPORT)
454 	/* NOTE: level is used only by the debugger and should never be present
455 	 * for an ECMAScript eval().
456 	 */
457 	DUK_ASSERT(level == -2);  /* by default, use caller's environment */
458 	if (duk_get_top(thr) >= 2 && duk_is_number(thr, 1)) {
459 		level = duk_get_int(thr, 1);
460 	}
461 	DUK_ASSERT(level <= -2);  /* This is guaranteed by debugger code. */
462 #endif
463 
464 	/* [ source ] */
465 
466 	comp_flags = DUK_COMPILE_EVAL;
467 	act_eval = thr->callstack_curr;  /* this function */
468 	DUK_ASSERT(act_eval != NULL);
469 	act_caller = duk_hthread_get_activation_for_level(thr, level);
470 	if (act_caller != NULL) {
471 		/* Have a calling activation, check for direct eval (otherwise
472 		 * assume indirect eval.
473 		 */
474 		if ((act_caller->flags & DUK_ACT_FLAG_STRICT) &&
475 		    (act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL)) {
476 			/* Only direct eval inherits strictness from calling code
477 			 * (E5.1 Section 10.1.1).
478 			 */
479 			comp_flags |= DUK_COMPILE_STRICT;
480 		}
481 	} else {
482 		DUK_ASSERT((act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL) == 0);
483 	}
484 
485 	duk_push_hstring_stridx(thr, DUK_STRIDX_INPUT);  /* XXX: copy from caller? */
486 	duk_js_compile(thr,
487 	               (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h),
488 	               (duk_size_t) DUK_HSTRING_GET_BYTELEN(h),
489 	               comp_flags);
490 	func = (duk_hcompfunc *) duk_known_hobject(thr, -1);
491 	DUK_ASSERT(DUK_HOBJECT_IS_COMPFUNC((duk_hobject *) func));
492 
493 	/* [ source template ] */
494 
495 	/* E5 Section 10.4.2 */
496 
497 	if (act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL) {
498 		DUK_ASSERT(thr->callstack_top >= 2);
499 		DUK_ASSERT(act_caller != NULL);
500 		if (act_caller->lex_env == NULL) {
501 			DUK_ASSERT(act_caller->var_env == NULL);
502 			DUK_DDD(DUK_DDDPRINT("delayed environment initialization"));
503 
504 			/* this may have side effects, so re-lookup act */
505 			duk_js_init_activation_environment_records_delayed(thr, act_caller);
506 		}
507 		DUK_ASSERT(act_caller->lex_env != NULL);
508 		DUK_ASSERT(act_caller->var_env != NULL);
509 
510 		this_to_global = 0;
511 
512 		if (DUK_HOBJECT_HAS_STRICT((duk_hobject *) func)) {
513 			duk_hdecenv *new_env;
514 			duk_hobject *act_lex_env;
515 
516 			DUK_DDD(DUK_DDDPRINT("direct eval call to a strict function -> "
517 			                     "var_env and lex_env to a fresh env, "
518 			                     "this_binding to caller's this_binding"));
519 
520 			act_lex_env = act_caller->lex_env;
521 
522 			new_env = duk_hdecenv_alloc(thr,
523 			                            DUK_HOBJECT_FLAG_EXTENSIBLE |
524 			                            DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_DECENV));
525 			DUK_ASSERT(new_env != NULL);
526 			duk_push_hobject(thr, (duk_hobject *) new_env);
527 
528 			DUK_ASSERT(DUK_HOBJECT_GET_PROTOTYPE(thr->heap, (duk_hobject *) new_env) == NULL);
529 			DUK_HOBJECT_SET_PROTOTYPE(thr->heap, (duk_hobject *) new_env, act_lex_env);
530 			DUK_HOBJECT_INCREF_ALLOWNULL(thr, act_lex_env);
531 			DUK_DDD(DUK_DDDPRINT("new_env allocated: %!iO", (duk_heaphdr *) new_env));
532 
533 			outer_lex_env = (duk_hobject *) new_env;
534 			outer_var_env = (duk_hobject *) new_env;
535 
536 			duk_insert(thr, 0);  /* stash to bottom of value stack to keep new_env reachable for duration of eval */
537 
538 			/* compiler's responsibility */
539 			DUK_ASSERT(DUK_HOBJECT_HAS_NEWENV((duk_hobject *) func));
540 		} else {
541 			DUK_DDD(DUK_DDDPRINT("direct eval call to a non-strict function -> "
542 			                     "var_env and lex_env to caller's envs, "
543 			                     "this_binding to caller's this_binding"));
544 
545 			outer_lex_env = act_caller->lex_env;
546 			outer_var_env = act_caller->var_env;
547 
548 			/* compiler's responsibility */
549 			DUK_ASSERT(!DUK_HOBJECT_HAS_NEWENV((duk_hobject *) func));
550 		}
551 	} else {
552 		DUK_DDD(DUK_DDDPRINT("indirect eval call -> var_env and lex_env to "
553 		                     "global object, this_binding to global object"));
554 
555 		this_to_global = 1;
556 		outer_lex_env = thr->builtins[DUK_BIDX_GLOBAL_ENV];
557 		outer_var_env = thr->builtins[DUK_BIDX_GLOBAL_ENV];
558 	}
559 
560 	/* Eval code doesn't need an automatic .prototype object. */
561 	duk_js_push_closure(thr, func, outer_var_env, outer_lex_env, 0 /*add_auto_proto*/);
562 
563 	/* [ env? source template closure ] */
564 
565 	if (this_to_global) {
566 		DUK_ASSERT(thr->builtins[DUK_BIDX_GLOBAL] != NULL);
567 		duk_push_hobject_bidx(thr, DUK_BIDX_GLOBAL);
568 	} else {
569 		duk_tval *tv;
570 		DUK_ASSERT(thr->callstack_top >= 2);
571 		DUK_ASSERT(act_caller != NULL);
572 		tv = (duk_tval *) (void *) ((duk_uint8_t *) thr->valstack + act_caller->bottom_byteoff - sizeof(duk_tval));  /* this is just beneath bottom */
573 		DUK_ASSERT(tv >= thr->valstack);
574 		duk_push_tval(thr, tv);
575 	}
576 
577 	DUK_DDD(DUK_DDDPRINT("eval -> lex_env=%!iO, var_env=%!iO, this_binding=%!T",
578 	                     (duk_heaphdr *) outer_lex_env,
579 	                     (duk_heaphdr *) outer_var_env,
580 	                     duk_get_tval(thr, -1)));
581 
582 	/* [ env? source template closure this ] */
583 
584 	call_flags = 0;
585 	if (act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL) {
586 		/* Set DIRECT_EVAL flag for the call; it's not strictly
587 		 * needed for the 'inner' eval call (the eval body) but
588 		 * current new.target implementation expects to find it
589 		 * so it can traverse direct eval chains up to the real
590 		 * calling function.
591 		 */
592 		call_flags |= DUK_CALL_FLAG_DIRECT_EVAL;
593 	}
594 	duk_handle_call_unprotected_nargs(thr, 0, call_flags);
595 
596 	/* [ env? source template result ] */
597 
598 	return 1;
599 }
600 
601 /*
602  *  Parsing of ints and floats
603  */
604 
605 #if defined(DUK_USE_GLOBAL_BUILTIN)
duk_bi_global_object_parse_int(duk_hthread * thr)606 DUK_INTERNAL duk_ret_t duk_bi_global_object_parse_int(duk_hthread *thr) {
607 	duk_int32_t radix;
608 	duk_small_uint_t s2n_flags;
609 
610 	DUK_ASSERT_TOP(thr, 2);
611 	duk_to_string(thr, 0);  /* Reject symbols. */
612 
613 	radix = duk_to_int32(thr, 1);
614 
615 	/* While parseInt() recognizes 0xdeadbeef, it doesn't recognize
616 	 * ES2015 0o123 or 0b10001.
617 	 */
618 	s2n_flags = DUK_S2N_FLAG_TRIM_WHITE |
619 	            DUK_S2N_FLAG_ALLOW_GARBAGE |
620 	            DUK_S2N_FLAG_ALLOW_PLUS |
621 	            DUK_S2N_FLAG_ALLOW_MINUS |
622 	            DUK_S2N_FLAG_ALLOW_LEADING_ZERO |
623 	            DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT;
624 
625 	/* Specification stripPrefix maps to DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT.
626 	 *
627 	 * Don't autodetect octals (from leading zeroes), require user code to
628 	 * provide an explicit radix 8 for parsing octal.  See write-up from Mozilla:
629 	 * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseInt#ECMAScript_5_Removes_Octal_Interpretation
630 	 */
631 
632 	if (radix != 0) {
633 		if (radix < 2 || radix > 36) {
634 			goto ret_nan;
635 		}
636 		if (radix != 16) {
637 			s2n_flags &= ~DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT;
638 		}
639 	} else {
640 		radix = 10;
641 	}
642 
643 	duk_dup_0(thr);
644 	duk_numconv_parse(thr, (duk_small_int_t) radix, s2n_flags);
645 	return 1;
646 
647  ret_nan:
648 	duk_push_nan(thr);
649 	return 1;
650 }
651 #endif  /* DUK_USE_GLOBAL_BUILTIN */
652 
653 #if defined(DUK_USE_GLOBAL_BUILTIN)
duk_bi_global_object_parse_float(duk_hthread * thr)654 DUK_INTERNAL duk_ret_t duk_bi_global_object_parse_float(duk_hthread *thr) {
655 	duk_small_uint_t s2n_flags;
656 
657 	DUK_ASSERT_TOP(thr, 1);
658 	duk_to_string(thr, 0);  /* Reject symbols. */
659 
660 	/* XXX: check flags */
661 	s2n_flags = DUK_S2N_FLAG_TRIM_WHITE |
662 	            DUK_S2N_FLAG_ALLOW_EXP |
663 	            DUK_S2N_FLAG_ALLOW_GARBAGE |
664 	            DUK_S2N_FLAG_ALLOW_PLUS |
665 	            DUK_S2N_FLAG_ALLOW_MINUS |
666 	            DUK_S2N_FLAG_ALLOW_INF |
667 	            DUK_S2N_FLAG_ALLOW_FRAC |
668 	            DUK_S2N_FLAG_ALLOW_NAKED_FRAC |
669 	            DUK_S2N_FLAG_ALLOW_EMPTY_FRAC |
670 	            DUK_S2N_FLAG_ALLOW_LEADING_ZERO;
671 
672 	duk_numconv_parse(thr, 10 /*radix*/, s2n_flags);
673 	return 1;
674 }
675 #endif  /* DUK_USE_GLOBAL_BUILTIN */
676 
677 /*
678  *  Number checkers
679  */
680 
681 #if defined(DUK_USE_GLOBAL_BUILTIN)
duk_bi_global_object_is_nan(duk_hthread * thr)682 DUK_INTERNAL duk_ret_t duk_bi_global_object_is_nan(duk_hthread *thr) {
683 	duk_double_t d = duk_to_number(thr, 0);
684 	duk_push_boolean(thr, (duk_bool_t) DUK_ISNAN(d));
685 	return 1;
686 }
687 #endif  /* DUK_USE_GLOBAL_BUILTIN */
688 
689 #if defined(DUK_USE_GLOBAL_BUILTIN)
duk_bi_global_object_is_finite(duk_hthread * thr)690 DUK_INTERNAL duk_ret_t duk_bi_global_object_is_finite(duk_hthread *thr) {
691 	duk_double_t d = duk_to_number(thr, 0);
692 	duk_push_boolean(thr, (duk_bool_t) DUK_ISFINITE(d));
693 	return 1;
694 }
695 #endif  /* DUK_USE_GLOBAL_BUILTIN */
696 
697 /*
698  *  URI handling
699  */
700 
701 #if defined(DUK_USE_GLOBAL_BUILTIN)
duk_bi_global_object_decode_uri(duk_hthread * thr)702 DUK_INTERNAL duk_ret_t duk_bi_global_object_decode_uri(duk_hthread *thr) {
703 	return duk__transform_helper(thr, duk__transform_callback_decode_uri, (const void *) duk__decode_uri_reserved_table);
704 }
705 
duk_bi_global_object_decode_uri_component(duk_hthread * thr)706 DUK_INTERNAL duk_ret_t duk_bi_global_object_decode_uri_component(duk_hthread *thr) {
707 	return duk__transform_helper(thr, duk__transform_callback_decode_uri, (const void *) duk__decode_uri_component_reserved_table);
708 }
709 
duk_bi_global_object_encode_uri(duk_hthread * thr)710 DUK_INTERNAL duk_ret_t duk_bi_global_object_encode_uri(duk_hthread *thr) {
711 	return duk__transform_helper(thr, duk__transform_callback_encode_uri, (const void *) duk__encode_uriunescaped_table);
712 }
713 
duk_bi_global_object_encode_uri_component(duk_hthread * thr)714 DUK_INTERNAL duk_ret_t duk_bi_global_object_encode_uri_component(duk_hthread *thr) {
715 	return duk__transform_helper(thr, duk__transform_callback_encode_uri, (const void *) duk__encode_uricomponent_unescaped_table);
716 }
717 
718 #if defined(DUK_USE_SECTION_B)
duk_bi_global_object_escape(duk_hthread * thr)719 DUK_INTERNAL duk_ret_t duk_bi_global_object_escape(duk_hthread *thr) {
720 	return duk__transform_helper(thr, duk__transform_callback_escape, (const void *) NULL);
721 }
722 
duk_bi_global_object_unescape(duk_hthread * thr)723 DUK_INTERNAL duk_ret_t duk_bi_global_object_unescape(duk_hthread *thr) {
724 	return duk__transform_helper(thr, duk__transform_callback_unescape, (const void *) NULL);
725 }
726 #endif  /* DUK_USE_SECTION_B */
727 #endif  /* DUK_USE_GLOBAL_BUILTIN */
728