1 /*
2  *  Global object built-ins
3  */
4 
5 #include "duk_internal.h"
6 
7 /*
8  *  Encoding/decoding helpers
9  */
10 
11 /* XXX: Could add fast path (for each transform callback) with direct byte
12  * lookups (no shifting) and no explicit check for x < 0x80 before table
13  * lookup.
14  */
15 
16 /* Macros for creating and checking bitmasks for character encoding.
17  * Bit number is a bit counterintuitive, but minimizes code size.
18  */
19 #define DUK__MKBITS(a,b,c,d,e,f,g,h)  ((duk_uint8_t) ( \
20 	((a) << 0) | ((b) << 1) | ((c) << 2) | ((d) << 3) | \
21 	((e) << 4) | ((f) << 5) | ((g) << 6) | ((h) << 7) \
22 	))
23 #define DUK__CHECK_BITMASK(table,cp)  ((table)[(cp) >> 3] & (1 << ((cp) & 0x07)))
24 
25 /* E5.1 Section 15.1.3.3: uriReserved + uriUnescaped + '#' */
26 DUK_LOCAL const duk_uint8_t duk__encode_uriunescaped_table[16] = {
27 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
28 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
29 	DUK__MKBITS(0, 1, 0, 1, 1, 0, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x20-0x2f */
30 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 0, 1, 0, 1),  /* 0x30-0x3f */
31 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x40-0x4f */
32 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1),  /* 0x50-0x5f */
33 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x60-0x6f */
34 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 1, 0),  /* 0x70-0x7f */
35 };
36 
37 /* E5.1 Section 15.1.3.4: uriUnescaped */
38 DUK_LOCAL const duk_uint8_t duk__encode_uricomponent_unescaped_table[16] = {
39 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
40 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
41 	DUK__MKBITS(0, 1, 0, 0, 0, 0, 0, 1), DUK__MKBITS(1, 1, 1, 0, 0, 1, 1, 0),  /* 0x20-0x2f */
42 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 0, 0, 0, 0, 0, 0),  /* 0x30-0x3f */
43 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x40-0x4f */
44 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1),  /* 0x50-0x5f */
45 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x60-0x6f */
46 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 1, 0),  /* 0x70-0x7f */
47 };
48 
49 /* E5.1 Section 15.1.3.1: uriReserved + '#' */
50 DUK_LOCAL const duk_uint8_t duk__decode_uri_reserved_table[16] = {
51 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
52 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
53 	DUK__MKBITS(0, 0, 0, 1, 1, 0, 1, 0), DUK__MKBITS(0, 0, 0, 1, 1, 0, 0, 1),  /* 0x20-0x2f */
54 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 1, 1, 0, 1, 0, 1),  /* 0x30-0x3f */
55 	DUK__MKBITS(1, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x40-0x4f */
56 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x50-0x5f */
57 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x60-0x6f */
58 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x70-0x7f */
59 };
60 
61 /* E5.1 Section 15.1.3.2: empty */
62 DUK_LOCAL const duk_uint8_t duk__decode_uri_component_reserved_table[16] = {
63 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
64 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
65 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x20-0x2f */
66 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x30-0x3f */
67 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x40-0x4f */
68 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x50-0x5f */
69 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x60-0x6f */
70 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x70-0x7f */
71 };
72 
73 #ifdef DUK_USE_SECTION_B
74 /* E5.1 Section B.2.2, step 7. */
75 DUK_LOCAL const duk_uint8_t duk__escape_unescaped_table[16] = {
76 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
77 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
78 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 1, 1, 0, 1, 1, 1),  /* 0x20-0x2f */
79 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 0, 0, 0, 0, 0, 0),  /* 0x30-0x3f */
80 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x40-0x4f */
81 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1),  /* 0x50-0x5f */
82 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x60-0x6f */
83 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 0)   /* 0x70-0x7f */
84 };
85 #endif  /* DUK_USE_SECTION_B */
86 
87 #undef DUK__MKBITS
88 
89 typedef struct {
90 	duk_hthread *thr;
91 	duk_hstring *h_str;
92 	duk_bufwriter_ctx bw;
93 	const duk_uint8_t *p;
94 	const duk_uint8_t *p_start;
95 	const duk_uint8_t *p_end;
96 } duk__transform_context;
97 
98 typedef void (*duk__transform_callback)(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp);
99 
100 /* XXX: refactor and share with other code */
duk__decode_hex_escape(const duk_uint8_t * p,duk_small_int_t n)101 DUK_LOCAL duk_small_int_t duk__decode_hex_escape(const duk_uint8_t *p, duk_small_int_t n) {
102 	duk_small_int_t ch;
103 	duk_small_int_t t = 0;
104 
105 	while (n > 0) {
106 		t = t * 16;
107 		ch = (duk_small_int_t) duk_hex_dectab[*p++];
108 		if (DUK_LIKELY(ch >= 0)) {
109 			t += ch;
110 		} else {
111 			return -1;
112 		}
113 		n--;
114 	}
115 	return t;
116 }
117 
duk__transform_helper(duk_context * ctx,duk__transform_callback callback,const void * udata)118 DUK_LOCAL int duk__transform_helper(duk_context *ctx, duk__transform_callback callback, const void *udata) {
119 	duk_hthread *thr = (duk_hthread *) ctx;
120 	duk__transform_context tfm_ctx_alloc;
121 	duk__transform_context *tfm_ctx = &tfm_ctx_alloc;
122 	duk_codepoint_t cp;
123 
124 	tfm_ctx->thr = thr;
125 
126 	tfm_ctx->h_str = duk_to_hstring(ctx, 0);
127 	DUK_ASSERT(tfm_ctx->h_str != NULL);
128 
129 	DUK_BW_INIT_PUSHBUF(thr, &tfm_ctx->bw, DUK_HSTRING_GET_BYTELEN(tfm_ctx->h_str));  /* initial size guess */
130 
131 	tfm_ctx->p_start = DUK_HSTRING_GET_DATA(tfm_ctx->h_str);
132 	tfm_ctx->p_end = tfm_ctx->p_start + DUK_HSTRING_GET_BYTELEN(tfm_ctx->h_str);
133 	tfm_ctx->p = tfm_ctx->p_start;
134 
135 	while (tfm_ctx->p < tfm_ctx->p_end) {
136 		cp = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(thr, &tfm_ctx->p, tfm_ctx->p_start, tfm_ctx->p_end);
137 		callback(tfm_ctx, udata, cp);
138 	}
139 
140 	DUK_BW_COMPACT(thr, &tfm_ctx->bw);
141 
142 	duk_to_string(ctx, -1);
143 	return 1;
144 }
145 
duk__transform_callback_encode_uri(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)146 DUK_LOCAL void duk__transform_callback_encode_uri(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
147 	duk_uint8_t xutf8_buf[DUK_UNICODE_MAX_XUTF8_LENGTH];
148 	duk_small_int_t len;
149 	duk_codepoint_t cp1, cp2;
150 	duk_small_int_t i, t;
151 	const duk_uint8_t *unescaped_table = (const duk_uint8_t *) udata;
152 
153 	/* UTF-8 encoded bytes escaped as %xx%xx%xx... -> 3 * nbytes.
154 	 * Codepoint range is restricted so this is a slightly too large
155 	 * but doesn't matter.
156 	 */
157 	DUK_BW_ENSURE(tfm_ctx->thr, &tfm_ctx->bw, 3 * DUK_UNICODE_MAX_XUTF8_LENGTH);
158 
159 	if (cp < 0) {
160 		goto uri_error;
161 	} else if ((cp < 0x80L) && DUK__CHECK_BITMASK(unescaped_table, cp)) {
162 		DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) cp);
163 		return;
164 	} else if (cp >= 0xdc00L && cp <= 0xdfffL) {
165 		goto uri_error;
166 	} else if (cp >= 0xd800L && cp <= 0xdbffL) {
167 		/* Needs lookahead */
168 		if (duk_unicode_decode_xutf8(tfm_ctx->thr, &tfm_ctx->p, tfm_ctx->p_start, tfm_ctx->p_end, (duk_ucodepoint_t *) &cp2) == 0) {
169 			goto uri_error;
170 		}
171 		if (!(cp2 >= 0xdc00L && cp2 <= 0xdfffL)) {
172 			goto uri_error;
173 		}
174 		cp1 = cp;
175 		cp = ((cp1 - 0xd800L) << 10) + (cp2 - 0xdc00L) + 0x10000L;
176 	} else if (cp > 0x10ffffL) {
177 		/* Although we can allow non-BMP characters (they'll decode
178 		 * back into surrogate pairs), we don't allow extended UTF-8
179 		 * characters; they would encode to URIs which won't decode
180 		 * back because of strict UTF-8 checks in URI decoding.
181 		 * (However, we could just as well allow them here.)
182 		 */
183 		goto uri_error;
184 	} else {
185 		/* Non-BMP characters within valid UTF-8 range: encode as is.
186 		 * They'll decode back into surrogate pairs if the escaped
187 		 * output is decoded.
188 		 */
189 		;
190 	}
191 
192 	len = duk_unicode_encode_xutf8((duk_ucodepoint_t) cp, xutf8_buf);
193 	for (i = 0; i < len; i++) {
194 		t = (int) xutf8_buf[i];
195 		DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
196 		                      &tfm_ctx->bw,
197 		                      DUK_ASC_PERCENT,
198 		                      (duk_uint8_t) duk_uc_nybbles[t >> 4],
199                                       (duk_uint8_t) duk_uc_nybbles[t & 0x0f]);
200 	}
201 
202 	return;
203 
204  uri_error:
205 	DUK_ERROR(tfm_ctx->thr, DUK_ERR_URI_ERROR, "invalid input");
206 }
207 
duk__transform_callback_decode_uri(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)208 DUK_LOCAL void duk__transform_callback_decode_uri(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
209 	const duk_uint8_t *reserved_table = (const duk_uint8_t *) udata;
210 	duk_small_uint_t utf8_blen;
211 	duk_codepoint_t min_cp;
212 	duk_small_int_t t;  /* must be signed */
213 	duk_small_uint_t i;
214 
215 	/* Maximum write size: XUTF8 path writes max DUK_UNICODE_MAX_XUTF8_LENGTH,
216 	 * percent escape path writes max two times CESU-8 encoded BMP length.
217 	 */
218 	DUK_BW_ENSURE(tfm_ctx->thr,
219 	              &tfm_ctx->bw,
220 	              (DUK_UNICODE_MAX_XUTF8_LENGTH >= 2 * DUK_UNICODE_MAX_CESU8_BMP_LENGTH ?
221 	              DUK_UNICODE_MAX_XUTF8_LENGTH : DUK_UNICODE_MAX_CESU8_BMP_LENGTH));
222 
223 	if (cp == (duk_codepoint_t) '%') {
224 		const duk_uint8_t *p = tfm_ctx->p;
225 		duk_size_t left = (duk_size_t) (tfm_ctx->p_end - p);  /* bytes left */
226 
227 		DUK_DDD(DUK_DDDPRINT("percent encoding, left=%ld", (long) left));
228 
229 		if (left < 2) {
230 			goto uri_error;
231 		}
232 
233 		t = duk__decode_hex_escape(p, 2);
234 		DUK_DDD(DUK_DDDPRINT("first byte: %ld", (long) t));
235 		if (t < 0) {
236 			goto uri_error;
237 		}
238 
239 		if (t < 0x80) {
240 			if (DUK__CHECK_BITMASK(reserved_table, t)) {
241 				/* decode '%xx' to '%xx' if decoded char in reserved set */
242 				DUK_ASSERT(tfm_ctx->p - 1 >= tfm_ctx->p_start);
243 				DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
244 				                      &tfm_ctx->bw,
245 				                      DUK_ASC_PERCENT,
246 				                      p[0],
247 				                      p[1]);
248 			} else {
249 				DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) t);
250 			}
251 			tfm_ctx->p += 2;
252 			return;
253 		}
254 
255 		/* Decode UTF-8 codepoint from a sequence of hex escapes.  The
256 		 * first byte of the sequence has been decoded to 't'.
257 		 *
258 		 * Note that UTF-8 validation must be strict according to the
259 		 * specification: E5.1 Section 15.1.3, decode algorithm step
260 		 * 4.d.vii.8.  URIError from non-shortest encodings is also
261 		 * specifically noted in the spec.
262 		 */
263 
264 		DUK_ASSERT(t >= 0x80);
265 		if (t < 0xc0) {
266 			/* continuation byte */
267 			goto uri_error;
268 		} else if (t < 0xe0) {
269 			/* 110x xxxx; 2 bytes */
270 			utf8_blen = 2;
271 			min_cp = 0x80L;
272 			cp = t & 0x1f;
273 		} else if (t < 0xf0) {
274 			/* 1110 xxxx; 3 bytes */
275 			utf8_blen = 3;
276 			min_cp = 0x800L;
277 			cp = t & 0x0f;
278 		} else if (t < 0xf8) {
279 			/* 1111 0xxx; 4 bytes */
280 			utf8_blen = 4;
281 			min_cp = 0x10000L;
282 			cp = t & 0x07;
283 		} else {
284 			/* extended utf-8 not allowed for URIs */
285 			goto uri_error;
286 		}
287 
288 		if (left < utf8_blen * 3 - 1) {
289 			/* '%xx%xx...%xx', p points to char after first '%' */
290 			goto uri_error;
291 		}
292 
293 		p += 3;
294 		for (i = 1; i < utf8_blen; i++) {
295 			/* p points to digit part ('%xy', p points to 'x') */
296 			t = duk__decode_hex_escape(p, 2);
297 			DUK_DDD(DUK_DDDPRINT("i=%ld utf8_blen=%ld cp=%ld t=0x%02lx",
298 			                     (long) i, (long) utf8_blen, (long) cp, (unsigned long) t));
299 			if (t < 0) {
300 				goto uri_error;
301 			}
302 			if ((t & 0xc0) != 0x80) {
303 				goto uri_error;
304 			}
305 			cp = (cp << 6) + (t & 0x3f);
306 			p += 3;
307 		}
308 		p--;  /* p overshoots */
309 		tfm_ctx->p = p;
310 
311 		DUK_DDD(DUK_DDDPRINT("final cp=%ld, min_cp=%ld", (long) cp, (long) min_cp));
312 
313 		if (cp < min_cp || cp > 0x10ffffL || (cp >= 0xd800L && cp <= 0xdfffL)) {
314 			goto uri_error;
315 		}
316 
317 		/* The E5.1 algorithm checks whether or not a decoded codepoint
318 		 * is below 0x80 and perhaps may be in the "reserved" set.
319 		 * This seems pointless because the single byte UTF-8 case is
320 		 * handled separately, and non-shortest encodings are rejected.
321 		 * So, 'cp' cannot be below 0x80 here, and thus cannot be in
322 		 * the reserved set.
323 		 */
324 
325 		/* utf-8 validation ensures these */
326 		DUK_ASSERT(cp >= 0x80L && cp <= 0x10ffffL);
327 
328 		if (cp >= 0x10000L) {
329 			cp -= 0x10000L;
330 			DUK_ASSERT(cp < 0x100000L);
331 
332 			DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, ((cp >> 10) + 0xd800L));
333 			DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, ((cp & 0x03ffUL) + 0xdc00L));
334 		} else {
335 			DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
336 		}
337 	} else {
338 		DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
339 	}
340 	return;
341 
342  uri_error:
343 	DUK_ERROR(tfm_ctx->thr, DUK_ERR_URI_ERROR, "invalid input");
344 }
345 
346 #ifdef DUK_USE_SECTION_B
duk__transform_callback_escape(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)347 DUK_LOCAL void duk__transform_callback_escape(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
348 	DUK_UNREF(udata);
349 
350 	DUK_BW_ENSURE(tfm_ctx->thr, &tfm_ctx->bw, 6);
351 
352 	if (cp < 0) {
353 		goto esc_error;
354 	} else if ((cp < 0x80L) && DUK__CHECK_BITMASK(duk__escape_unescaped_table, cp)) {
355 		DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) cp);
356 	} else if (cp < 0x100L) {
357 		DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
358 		                      &tfm_ctx->bw,
359 		                      (duk_uint8_t) DUK_ASC_PERCENT,
360 		                      (duk_uint8_t) duk_uc_nybbles[cp >> 4],
361 		                      (duk_uint8_t) duk_uc_nybbles[cp & 0x0f]);
362 	} else if (cp < 0x10000L) {
363 		DUK_BW_WRITE_RAW_U8_6(tfm_ctx->thr,
364 		                      &tfm_ctx->bw,
365 		                      (duk_uint8_t) DUK_ASC_PERCENT,
366 		                      (duk_uint8_t) DUK_ASC_LC_U,
367 		                      (duk_uint8_t) duk_uc_nybbles[cp >> 12],
368 		                      (duk_uint8_t) duk_uc_nybbles[(cp >> 8) & 0x0f],
369 		                      (duk_uint8_t) duk_uc_nybbles[(cp >> 4) & 0x0f],
370 		                      (duk_uint8_t) duk_uc_nybbles[cp & 0x0f]);
371 	} else {
372 		/* Characters outside BMP cannot be escape()'d.  We could
373 		 * encode them as surrogate pairs (for codepoints inside
374 		 * valid UTF-8 range, but not extended UTF-8).  Because
375 		 * escape() and unescape() are legacy functions, we don't.
376 		 */
377 		goto esc_error;
378 	}
379 
380 	return;
381 
382  esc_error:
383 	DUK_ERROR_TYPE(tfm_ctx->thr, "invalid input");
384 }
385 
duk__transform_callback_unescape(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)386 DUK_LOCAL void duk__transform_callback_unescape(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
387 	duk_small_int_t t;
388 
389 	DUK_UNREF(udata);
390 
391 	if (cp == (duk_codepoint_t) '%') {
392 		const duk_uint8_t *p = tfm_ctx->p;
393 		duk_size_t left = (duk_size_t) (tfm_ctx->p_end - p);  /* bytes left */
394 
395 		if (left >= 5 && p[0] == 'u' &&
396 		    ((t = duk__decode_hex_escape(p + 1, 4)) >= 0)) {
397 			cp = (duk_codepoint_t) t;
398 			tfm_ctx->p += 5;
399 		} else if (left >= 2 &&
400 		           ((t = duk__decode_hex_escape(p, 2)) >= 0)) {
401 			cp = (duk_codepoint_t) t;
402 			tfm_ctx->p += 2;
403 		}
404 	}
405 
406 	DUK_BW_WRITE_ENSURE_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
407 }
408 #endif  /* DUK_USE_SECTION_B */
409 
410 /*
411  *  Eval
412  *
413  *  Eval needs to handle both a "direct eval" and an "indirect eval".
414  *  Direct eval handling needs access to the caller's activation so that its
415  *  lexical environment can be accessed.  A direct eval is only possible from
416  *  Ecmascript code; an indirect eval call is possible also from C code.
417  *  When an indirect eval call is made from C code, there may not be a
418  *  calling activation at all which needs careful handling.
419  */
420 
duk_bi_global_object_eval(duk_context * ctx)421 DUK_INTERNAL duk_ret_t duk_bi_global_object_eval(duk_context *ctx) {
422 	duk_hthread *thr = (duk_hthread *) ctx;
423 	duk_hstring *h;
424 	duk_activation *act_caller;
425 	duk_activation *act_eval;
426 	duk_activation *act;
427 	duk_hcompiledfunction *func;
428 	duk_hobject *outer_lex_env;
429 	duk_hobject *outer_var_env;
430 	duk_bool_t this_to_global = 1;
431 	duk_small_uint_t comp_flags;
432 	duk_int_t level = -2;
433 
434 	DUK_ASSERT(duk_get_top(ctx) == 1 || duk_get_top(ctx) == 2);  /* 2 when called by debugger */
435 	DUK_ASSERT(thr->callstack_top >= 1);  /* at least this function exists */
436 	DUK_ASSERT(((thr->callstack + thr->callstack_top - 1)->flags & DUK_ACT_FLAG_DIRECT_EVAL) == 0 || /* indirect eval */
437 	           (thr->callstack_top >= 2));  /* if direct eval, calling activation must exist */
438 
439 	/*
440 	 *  callstack_top - 1 --> this function
441 	 *  callstack_top - 2 --> caller (may not exist)
442 	 *
443 	 *  If called directly from C, callstack_top might be 1.  If calling
444 	 *  activation doesn't exist, call must be indirect.
445 	 */
446 
447 	h = duk_get_hstring(ctx, 0);
448 	if (!h) {
449 		return 1;  /* return arg as-is */
450 	}
451 
452 #if defined(DUK_USE_DEBUGGER_SUPPORT)
453 	/* NOTE: level is used only by the debugger and should never be present
454 	 * for an Ecmascript eval().
455 	 */
456 	DUK_ASSERT(level == -2);  /* by default, use caller's environment */
457 	if (duk_get_top(ctx) >= 2 && duk_is_number(ctx, 1)) {
458 		level = duk_get_int(ctx, 1);
459 	}
460 	DUK_ASSERT(level <= -2);  /* This is guaranteed by debugger code. */
461 #endif
462 
463 	/* [ source ] */
464 
465 	comp_flags = DUK_JS_COMPILE_FLAG_EVAL;
466 	act_eval = thr->callstack + thr->callstack_top - 1;    /* this function */
467 	if (thr->callstack_top >= (duk_size_t) -level) {
468 		/* Have a calling activation, check for direct eval (otherwise
469 		 * assume indirect eval.
470 		 */
471 		act_caller = thr->callstack + thr->callstack_top + level;  /* caller */
472 		if ((act_caller->flags & DUK_ACT_FLAG_STRICT) &&
473 		    (act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL)) {
474 			/* Only direct eval inherits strictness from calling code
475 			 * (E5.1 Section 10.1.1).
476 			 */
477 			comp_flags |= DUK_JS_COMPILE_FLAG_STRICT;
478 		}
479 	} else {
480 		DUK_ASSERT((act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL) == 0);
481 	}
482 	act_caller = NULL;  /* avoid dereference after potential callstack realloc */
483 	act_eval = NULL;
484 
485 	duk_push_hstring_stridx(ctx, DUK_STRIDX_INPUT);  /* XXX: copy from caller? */
486 	duk_js_compile(thr,
487 	               (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h),
488 	               (duk_size_t) DUK_HSTRING_GET_BYTELEN(h),
489 	               comp_flags);
490 	func = (duk_hcompiledfunction *) duk_get_hobject(ctx, -1);
491 	DUK_ASSERT(func != NULL);
492 	DUK_ASSERT(DUK_HOBJECT_IS_COMPILEDFUNCTION((duk_hobject *) func));
493 
494 	/* [ source template ] */
495 
496 	/* E5 Section 10.4.2 */
497 	DUK_ASSERT(thr->callstack_top >= 1);
498 	act = thr->callstack + thr->callstack_top - 1;  /* this function */
499 	if (act->flags & DUK_ACT_FLAG_DIRECT_EVAL) {
500 		DUK_ASSERT(thr->callstack_top >= 2);
501 		act = thr->callstack + thr->callstack_top + level;  /* caller */
502 		if (act->lex_env == NULL) {
503 			DUK_ASSERT(act->var_env == NULL);
504 			DUK_DDD(DUK_DDDPRINT("delayed environment initialization"));
505 
506 			/* this may have side effects, so re-lookup act */
507 			duk_js_init_activation_environment_records_delayed(thr, act);
508 			act = thr->callstack + thr->callstack_top + level;
509 		}
510 		DUK_ASSERT(act->lex_env != NULL);
511 		DUK_ASSERT(act->var_env != NULL);
512 
513 		this_to_global = 0;
514 
515 		if (DUK_HOBJECT_HAS_STRICT((duk_hobject *) func)) {
516 			duk_hobject *new_env;
517 			duk_hobject *act_lex_env;
518 
519 			DUK_DDD(DUK_DDDPRINT("direct eval call to a strict function -> "
520 			                     "var_env and lex_env to a fresh env, "
521 			                     "this_binding to caller's this_binding"));
522 
523 			act = thr->callstack + thr->callstack_top + level;  /* caller */
524 			act_lex_env = act->lex_env;
525 			act = NULL;  /* invalidated */
526 
527 			(void) duk_push_object_helper_proto(ctx,
528 			                                    DUK_HOBJECT_FLAG_EXTENSIBLE |
529 			                                    DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_DECENV),
530 			                                    act_lex_env);
531 			new_env = duk_require_hobject(ctx, -1);
532 			DUK_ASSERT(new_env != NULL);
533 			DUK_DDD(DUK_DDDPRINT("new_env allocated: %!iO",
534 			                     (duk_heaphdr *) new_env));
535 
536 			outer_lex_env = new_env;
537 			outer_var_env = new_env;
538 
539 			duk_insert(ctx, 0);  /* stash to bottom of value stack to keep new_env reachable for duration of eval */
540 
541 			/* compiler's responsibility */
542 			DUK_ASSERT(DUK_HOBJECT_HAS_NEWENV((duk_hobject *) func));
543 		} else {
544 			DUK_DDD(DUK_DDDPRINT("direct eval call to a non-strict function -> "
545 			                     "var_env and lex_env to caller's envs, "
546 			                     "this_binding to caller's this_binding"));
547 
548 			outer_lex_env = act->lex_env;
549 			outer_var_env = act->var_env;
550 
551 			/* compiler's responsibility */
552 			DUK_ASSERT(!DUK_HOBJECT_HAS_NEWENV((duk_hobject *) func));
553 		}
554 	} else {
555 		DUK_DDD(DUK_DDDPRINT("indirect eval call -> var_env and lex_env to "
556 		                     "global object, this_binding to global object"));
557 
558 		this_to_global = 1;
559 		outer_lex_env = thr->builtins[DUK_BIDX_GLOBAL_ENV];
560 		outer_var_env = thr->builtins[DUK_BIDX_GLOBAL_ENV];
561 	}
562 	act = NULL;
563 
564 	/* Eval code doesn't need an automatic .prototype object. */
565 	duk_js_push_closure(thr, func, outer_var_env, outer_lex_env, 0 /*add_auto_proto*/);
566 
567 	/* [ source template closure ] */
568 
569 	if (this_to_global) {
570 		DUK_ASSERT(thr->builtins[DUK_BIDX_GLOBAL] != NULL);
571 		duk_push_hobject_bidx(ctx, DUK_BIDX_GLOBAL);
572 	} else {
573 		duk_tval *tv;
574 		DUK_ASSERT(thr->callstack_top >= 2);
575 		act = thr->callstack + thr->callstack_top + level;  /* caller */
576 		tv = thr->valstack + act->idx_bottom - 1;  /* this is just beneath bottom */
577 		DUK_ASSERT(tv >= thr->valstack);
578 		duk_push_tval(ctx, tv);
579 	}
580 
581 	DUK_DDD(DUK_DDDPRINT("eval -> lex_env=%!iO, var_env=%!iO, this_binding=%!T",
582 	                     (duk_heaphdr *) outer_lex_env,
583 	                     (duk_heaphdr *) outer_var_env,
584 	                     duk_get_tval(ctx, -1)));
585 
586 	/* [ source template closure this ] */
587 
588 	duk_call_method(ctx, 0);
589 
590 	/* [ source template result ] */
591 
592 	return 1;
593 }
594 
595 /*
596  *  Parsing of ints and floats
597  */
598 
duk_bi_global_object_parse_int(duk_context * ctx)599 DUK_INTERNAL duk_ret_t duk_bi_global_object_parse_int(duk_context *ctx) {
600 	duk_int32_t radix;
601 	duk_small_uint_t s2n_flags;
602 
603 	DUK_ASSERT_TOP(ctx, 2);
604 	duk_to_string(ctx, 0);
605 
606 	radix = duk_to_int32(ctx, 1);
607 
608 	s2n_flags = DUK_S2N_FLAG_TRIM_WHITE |
609 	            DUK_S2N_FLAG_ALLOW_GARBAGE |
610 	            DUK_S2N_FLAG_ALLOW_PLUS |
611 	            DUK_S2N_FLAG_ALLOW_MINUS |
612 	            DUK_S2N_FLAG_ALLOW_LEADING_ZERO |
613 	            DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT;
614 
615 	/* Specification stripPrefix maps to DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT.
616 	 *
617 	 * Don't autodetect octals (from leading zeroes), require user code to
618 	 * provide an explicit radix 8 for parsing octal.  See write-up from Mozilla:
619 	 * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseInt#ECMAScript_5_Removes_Octal_Interpretation
620 	 */
621 
622 	if (radix != 0) {
623 		if (radix < 2 || radix > 36) {
624 			goto ret_nan;
625 		}
626 		if (radix != 16) {
627 			s2n_flags &= ~DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT;
628 		}
629 	} else {
630 		radix = 10;
631 	}
632 
633 	duk_dup(ctx, 0);
634 	duk_numconv_parse(ctx, radix, s2n_flags);
635 	return 1;
636 
637  ret_nan:
638 	duk_push_nan(ctx);
639 	return 1;
640 }
641 
duk_bi_global_object_parse_float(duk_context * ctx)642 DUK_INTERNAL duk_ret_t duk_bi_global_object_parse_float(duk_context *ctx) {
643 	duk_small_uint_t s2n_flags;
644 	duk_int32_t radix;
645 
646 	DUK_ASSERT_TOP(ctx, 1);
647 	duk_to_string(ctx, 0);
648 
649 	radix = 10;
650 
651 	/* XXX: check flags */
652 	s2n_flags = DUK_S2N_FLAG_TRIM_WHITE |
653 	            DUK_S2N_FLAG_ALLOW_EXP |
654 	            DUK_S2N_FLAG_ALLOW_GARBAGE |
655 	            DUK_S2N_FLAG_ALLOW_PLUS |
656 	            DUK_S2N_FLAG_ALLOW_MINUS |
657 	            DUK_S2N_FLAG_ALLOW_INF |
658 	            DUK_S2N_FLAG_ALLOW_FRAC |
659 	            DUK_S2N_FLAG_ALLOW_NAKED_FRAC |
660 	            DUK_S2N_FLAG_ALLOW_EMPTY_FRAC |
661 	            DUK_S2N_FLAG_ALLOW_LEADING_ZERO;
662 
663 	duk_numconv_parse(ctx, radix, s2n_flags);
664 	return 1;
665 }
666 
667 /*
668  *  Number checkers
669  */
670 
duk_bi_global_object_is_nan(duk_context * ctx)671 DUK_INTERNAL duk_ret_t duk_bi_global_object_is_nan(duk_context *ctx) {
672 	duk_double_t d = duk_to_number(ctx, 0);
673 	duk_push_boolean(ctx, DUK_ISNAN(d));
674 	return 1;
675 }
676 
duk_bi_global_object_is_finite(duk_context * ctx)677 DUK_INTERNAL duk_ret_t duk_bi_global_object_is_finite(duk_context *ctx) {
678 	duk_double_t d = duk_to_number(ctx, 0);
679 	duk_push_boolean(ctx, DUK_ISFINITE(d));
680 	return 1;
681 }
682 
683 /*
684  *  URI handling
685  */
686 
duk_bi_global_object_decode_uri(duk_context * ctx)687 DUK_INTERNAL duk_ret_t duk_bi_global_object_decode_uri(duk_context *ctx) {
688 	return duk__transform_helper(ctx, duk__transform_callback_decode_uri, (const void *) duk__decode_uri_reserved_table);
689 }
690 
duk_bi_global_object_decode_uri_component(duk_context * ctx)691 DUK_INTERNAL duk_ret_t duk_bi_global_object_decode_uri_component(duk_context *ctx) {
692 	return duk__transform_helper(ctx, duk__transform_callback_decode_uri, (const void *) duk__decode_uri_component_reserved_table);
693 }
694 
duk_bi_global_object_encode_uri(duk_context * ctx)695 DUK_INTERNAL duk_ret_t duk_bi_global_object_encode_uri(duk_context *ctx) {
696 	return duk__transform_helper(ctx, duk__transform_callback_encode_uri, (const void *) duk__encode_uriunescaped_table);
697 }
698 
duk_bi_global_object_encode_uri_component(duk_context * ctx)699 DUK_INTERNAL duk_ret_t duk_bi_global_object_encode_uri_component(duk_context *ctx) {
700 	return duk__transform_helper(ctx, duk__transform_callback_encode_uri, (const void *) duk__encode_uricomponent_unescaped_table);
701 }
702 
703 #ifdef DUK_USE_SECTION_B
duk_bi_global_object_escape(duk_context * ctx)704 DUK_INTERNAL duk_ret_t duk_bi_global_object_escape(duk_context *ctx) {
705 	return duk__transform_helper(ctx, duk__transform_callback_escape, (const void *) NULL);
706 }
707 
duk_bi_global_object_unescape(duk_context * ctx)708 DUK_INTERNAL duk_ret_t duk_bi_global_object_unescape(duk_context *ctx) {
709 	return duk__transform_helper(ctx, duk__transform_callback_unescape, (const void *) NULL);
710 }
711 #else  /* DUK_USE_SECTION_B */
duk_bi_global_object_escape(duk_context * ctx)712 DUK_INTERNAL duk_ret_t duk_bi_global_object_escape(duk_context *ctx) {
713 	DUK_UNREF(ctx);
714 	return DUK_RET_UNSUPPORTED_ERROR;
715 }
716 
duk_bi_global_object_unescape(duk_context * ctx)717 DUK_INTERNAL duk_ret_t duk_bi_global_object_unescape(duk_context *ctx) {
718 	DUK_UNREF(ctx);
719 	return DUK_RET_UNSUPPORTED_ERROR;
720 }
721 #endif  /* DUK_USE_SECTION_B */
722 
723 #if defined(DUK_USE_BROWSER_LIKE) && (defined(DUK_USE_FILE_IO) || defined(DUK_USE_DEBUGGER_SUPPORT))
duk_bi_global_object_print_helper(duk_context * ctx)724 DUK_INTERNAL duk_ret_t duk_bi_global_object_print_helper(duk_context *ctx) {
725 	duk_hthread *thr = (duk_hthread *) ctx;
726 	duk_int_t magic;
727 	duk_idx_t nargs;
728 	const duk_uint8_t *buf;
729 	duk_size_t sz_buf;
730 	const char nl = (const char) DUK_ASC_LF;
731 #ifndef DUK_USE_PREFER_SIZE
732 	duk_uint8_t buf_stack[256];
733 #endif
734 #ifdef DUK_USE_FILE_IO
735 	duk_file *f_out;
736 #endif
737 
738 	DUK_UNREF(thr);
739 
740 	magic = duk_get_current_magic(ctx);
741 	DUK_UNREF(magic);
742 
743 	nargs = duk_get_top(ctx);
744 
745 	/* If argument count is 1 and first argument is a buffer, write the buffer
746 	 * as raw data into the file without a newline; this allows exact control
747 	 * over stdout/stderr without an additional entrypoint (useful for now).
748 	 *
749 	 * Otherwise current print/alert semantics are to ToString() coerce
750 	 * arguments, join them with a single space, and append a newline.
751 	 */
752 
753 	if (nargs == 1 && duk_is_buffer(ctx, 0)) {
754 		buf = (const duk_uint8_t *) duk_get_buffer(ctx, 0, &sz_buf);
755 		DUK_ASSERT(buf != NULL);
756 	} else if (nargs > 0) {
757 #ifdef DUK_USE_PREFER_SIZE
758 		/* Compact but lots of churn. */
759 		duk_push_hstring_stridx(thr, DUK_STRIDX_SPACE);
760 		duk_insert(ctx, 0);
761 		duk_join(ctx, nargs);
762 		duk_push_string(thr, "\n");
763 		duk_concat(ctx, 2);
764 		buf = (const duk_uint8_t *) duk_get_lstring(ctx, -1, &sz_buf);
765 		DUK_ASSERT(buf != NULL);
766 #else  /* DUK_USE_PREFER_SIZE */
767 		/* Higher footprint, less churn. */
768 		duk_idx_t i;
769 		duk_size_t sz_str;
770 		const duk_uint8_t *p_str;
771 		duk_uint8_t *p;
772 
773 		sz_buf = (duk_size_t) nargs;  /* spaces (nargs - 1) + newline */
774 		for (i = 0; i < nargs; i++) {
775 			(void) duk_to_lstring(ctx, i, &sz_str);
776 			sz_buf += sz_str;
777 		}
778 
779 		if (sz_buf <= sizeof(buf_stack)) {
780 			p = (duk_uint8_t *) buf_stack;
781 		} else {
782 			p = (duk_uint8_t *) duk_push_fixed_buffer(ctx, sz_buf);
783 			DUK_ASSERT(p != NULL);
784 		}
785 
786 		buf = (const duk_uint8_t *) p;
787 		for (i = 0; i < nargs; i++) {
788 			p_str = (const duk_uint8_t *) duk_get_lstring(ctx, i, &sz_str);
789 			DUK_ASSERT(p_str != NULL);
790 			DUK_MEMCPY((void *) p, (const void *) p_str, sz_str);
791 			p += sz_str;
792 			*p++ = (duk_uint8_t) (i == nargs - 1 ? DUK_ASC_LF : DUK_ASC_SPACE);
793 		}
794 		DUK_ASSERT((const duk_uint8_t *) p == buf + sz_buf);
795 #endif  /* DUK_USE_PREFER_SIZE */
796 	} else {
797 		buf = (const duk_uint8_t *) &nl;
798 		sz_buf = 1;
799 	}
800 
801 	/* 'buf' contains the string to write, 'sz_buf' contains the length
802 	 * (which may be zero).
803 	 */
804 	DUK_ASSERT(buf != NULL);
805 
806 	if (sz_buf == 0) {
807 		return 0;
808 	}
809 
810 #ifdef DUK_USE_FILE_IO
811 	f_out = (magic ? DUK_STDERR : DUK_STDOUT);
812 	DUK_FWRITE((const void *) buf, 1, (size_t) sz_buf, f_out);
813 	DUK_FFLUSH(f_out);
814 #endif
815 
816 #if defined(DUK_USE_DEBUGGER_SUPPORT) && defined(DUK_USE_DEBUGGER_FWD_PRINTALERT)
817 	if (DUK_HEAP_IS_DEBUGGER_ATTACHED(thr->heap)) {
818 		duk_debug_write_notify(thr, magic ? DUK_DBG_CMD_ALERT : DUK_DBG_CMD_PRINT);
819 		duk_debug_write_string(thr, (const char *) buf, sz_buf);
820 		duk_debug_write_eom(thr);
821 	}
822 #endif
823 	return 0;
824 }
825 #elif defined(DUK_USE_BROWSER_LIKE)  /* print provider */
duk_bi_global_object_print_helper(duk_context * ctx)826 DUK_INTERNAL duk_ret_t duk_bi_global_object_print_helper(duk_context *ctx) {
827 	DUK_UNREF(ctx);
828 	return 0;
829 }
830 #else  /* print provider */
duk_bi_global_object_print_helper(duk_context * ctx)831 DUK_INTERNAL duk_ret_t duk_bi_global_object_print_helper(duk_context *ctx) {
832 	DUK_UNREF(ctx);
833 	return DUK_RET_UNSUPPORTED_ERROR;
834 }
835 #endif  /* print provider */
836 
837 /*
838  *  CommonJS require() and modules support
839  */
840 
841 #if defined(DUK_USE_COMMONJS_MODULES)
duk__bi_global_resolve_module_id(duk_context * ctx,const char * req_id,const char * mod_id)842 DUK_LOCAL void duk__bi_global_resolve_module_id(duk_context *ctx, const char *req_id, const char *mod_id) {
843 	duk_hthread *thr = (duk_hthread *) ctx;
844 	duk_uint8_t buf[DUK_BI_COMMONJS_MODULE_ID_LIMIT];
845 	duk_uint8_t *p;
846 	duk_uint8_t *q;
847 	duk_uint8_t *q_last;  /* last component */
848 	duk_int_t int_rc;
849 
850 	DUK_ASSERT(req_id != NULL);
851 	/* mod_id may be NULL */
852 
853 	/*
854 	 *  A few notes on the algorithm:
855 	 *
856 	 *    - Terms are not allowed to begin with a period unless the term
857 	 *      is either '.' or '..'.  This simplifies implementation (and
858 	 *      is within CommonJS modules specification).
859 	 *
860 	 *    - There are few output bound checks here.  This is on purpose:
861 	 *      the resolution input is length checked and the output is never
862 	 *      longer than the input.  The resolved output is written directly
863 	 *      over the input because it's never longer than the input at any
864 	 *      point in the algorithm.
865 	 *
866 	 *    - Non-ASCII characters are processed as individual bytes and
867 	 *      need no special treatment.  However, U+0000 terminates the
868 	 *      algorithm; this is not an issue because U+0000 is not a
869 	 *      desirable term character anyway.
870 	 */
871 
872 	/*
873 	 *  Set up the resolution input which is the requested ID directly
874 	 *  (if absolute or no current module path) or with current module
875 	 *  ID prepended (if relative and current module path exists).
876 	 *
877 	 *  Suppose current module is 'foo/bar' and relative path is './quux'.
878 	 *  The 'bar' component must be replaced so the initial input here is
879 	 *  'foo/bar/.././quux'.
880 	 */
881 
882 	if (mod_id != NULL && req_id[0] == '.') {
883 		int_rc = DUK_SNPRINTF((char *) buf, sizeof(buf), "%s/../%s", mod_id, req_id);
884 	} else {
885 		int_rc = DUK_SNPRINTF((char *) buf, sizeof(buf), "%s", req_id);
886 	}
887 	if (int_rc >= (duk_int_t) sizeof(buf) || int_rc < 0) {
888 		/* Potentially truncated, NUL not guaranteed in any case.
889 		 * The (int_rc < 0) case should not occur in practice.
890 		 */
891 		DUK_DD(DUK_DDPRINT("resolve error: temporary working module ID doesn't fit into resolve buffer"));
892 		goto resolve_error;
893 	}
894 	DUK_ASSERT(DUK_STRLEN((const char *) buf) < sizeof(buf));  /* at most sizeof(buf) - 1 */
895 
896 	DUK_DDD(DUK_DDDPRINT("input module id: '%s'", (const char *) buf));
897 
898 	/*
899 	 *  Resolution loop.  At the top of the loop we're expecting a valid
900 	 *  term: '.', '..', or a non-empty identifier not starting with a period.
901 	 */
902 
903 	p = buf;
904 	q = buf;
905 	for (;;) {
906 		duk_uint_fast8_t c;
907 
908 		/* Here 'p' always points to the start of a term.
909 		 *
910 		 * We can also unconditionally reset q_last here: if this is
911 		 * the last (non-empty) term q_last will have the right value
912 		 * on loop exit.
913 		 */
914 
915 		DUK_ASSERT(p >= q);  /* output is never longer than input during resolution */
916 
917 		DUK_DDD(DUK_DDDPRINT("resolve loop top: p -> '%s', q=%p, buf=%p",
918 		                     (const char *) p, (void *) q, (void *) buf));
919 
920 		q_last = q;
921 
922 		c = *p++;
923 		if (DUK_UNLIKELY(c == 0)) {
924 			DUK_DD(DUK_DDPRINT("resolve error: requested ID must end with a non-empty term"));
925 			goto resolve_error;
926 		} else if (DUK_UNLIKELY(c == '.')) {
927 			c = *p++;
928 			if (c == '/') {
929 				/* Term was '.' and is eaten entirely (including dup slashes). */
930 				goto eat_dup_slashes;
931 			}
932 			if (c == '.' && *p == '/') {
933 				/* Term was '..', backtrack resolved name by one component.
934 				 *  q[-1] = previous slash (or beyond start of buffer)
935 				 *  q[-2] = last char of previous component (or beyond start of buffer)
936 				 */
937 				p++;  /* eat (first) input slash */
938 				DUK_ASSERT(q >= buf);
939 				if (q == buf) {
940 					DUK_DD(DUK_DDPRINT("resolve error: term was '..' but nothing to backtrack"));
941 					goto resolve_error;
942 				}
943 				DUK_ASSERT(*(q - 1) == '/');
944 				q--;  /* backtrack to last output slash (dups already eliminated) */
945 				for (;;) {
946 					/* Backtrack to previous slash or start of buffer. */
947 					DUK_ASSERT(q >= buf);
948 					if (q == buf) {
949 						break;
950 					}
951 					if (*(q - 1) == '/') {
952 						break;
953 					}
954 					q--;
955 				}
956 				goto eat_dup_slashes;
957 			}
958 			DUK_DD(DUK_DDPRINT("resolve error: term begins with '.' but is not '.' or '..' (not allowed now)"));
959 			goto resolve_error;
960 		} else if (DUK_UNLIKELY(c == '/')) {
961 			/* e.g. require('/foo'), empty terms not allowed */
962 			DUK_DD(DUK_DDPRINT("resolve error: empty term (not allowed now)"));
963 			goto resolve_error;
964 		} else {
965 			for (;;) {
966 				/* Copy term name until end or '/'. */
967 				*q++ = c;
968 				c = *p++;
969 				if (DUK_UNLIKELY(c == 0)) {
970 					/* This was the last term, and q_last was
971 					 * updated to match this term at loop top.
972 					 */
973 					goto loop_done;
974 				} else if (DUK_UNLIKELY(c == '/')) {
975 					*q++ = '/';
976 					break;
977 				} else {
978 					/* write on next loop */
979 				}
980 			}
981 		}
982 
983 	 eat_dup_slashes:
984 		for (;;) {
985 			/* eat dup slashes */
986 			c = *p;
987 			if (DUK_LIKELY(c != '/')) {
988 				break;
989 			}
990 			p++;
991 		}
992 	}
993  loop_done:
994 	/* Output #1: resolved absolute name */
995 	DUK_ASSERT(q >= buf);
996 	duk_push_lstring(ctx, (const char *) buf, (size_t) (q - buf));
997 
998 	/* Output #2: last component name */
999 	DUK_ASSERT(q >= q_last);
1000 	DUK_ASSERT(q_last >= buf);
1001 	duk_push_lstring(ctx, (const char *) q_last, (size_t) (q - q_last));
1002 
1003 	DUK_DD(DUK_DDPRINT("after resolving module name: buf=%p, q_last=%p, q=%p",
1004 	                   (void *) buf, (void *) q_last, (void *) q));
1005 	return;
1006 
1007  resolve_error:
1008 	DUK_ERROR_FMT1(thr, DUK_ERR_TYPE_ERROR, "cannot resolve module id: %s", (const char *) req_id);
1009 }
1010 #endif  /* DUK_USE_COMMONJS_MODULES */
1011 
1012 #if defined(DUK_USE_COMMONJS_MODULES)
1013 /* Stack indices for better readability */
1014 #define DUK__IDX_REQUESTED_ID   0  /* Module id requested */
1015 #define DUK__IDX_REQUIRE        1  /* Current require() function */
1016 #define DUK__IDX_REQUIRE_ID     2  /* The base ID of the current require() function, resolution base */
1017 #define DUK__IDX_RESOLVED_ID    3  /* Resolved, normalized absolute module ID */
1018 #define DUK__IDX_LASTCOMP       4  /* Last component name in resolved path */
1019 #define DUK__IDX_DUKTAPE        5  /* Duktape object */
1020 #define DUK__IDX_MODLOADED      6  /* Duktape.modLoaded[] module cache */
1021 #define DUK__IDX_UNDEFINED      7  /* 'undefined', artifact of lookup */
1022 #define DUK__IDX_FRESH_REQUIRE  8  /* New require() function for module, updated resolution base */
1023 #define DUK__IDX_EXPORTS        9  /* Default exports table */
1024 #define DUK__IDX_MODULE         10  /* Module object containing module.exports, etc */
1025 
duk_bi_global_object_require(duk_context * ctx)1026 DUK_INTERNAL duk_ret_t duk_bi_global_object_require(duk_context *ctx) {
1027 	const char *str_req_id;  /* requested identifier */
1028 	const char *str_mod_id;  /* require.id of current module */
1029 	duk_int_t pcall_rc;
1030 
1031 	/* NOTE: we try to minimize code size by avoiding unnecessary pops,
1032 	 * so the stack looks a bit cluttered in this function.  DUK_ASSERT_TOP()
1033 	 * assertions are used to ensure stack configuration is correct at each
1034 	 * step.
1035 	 */
1036 
1037 	/*
1038 	 *  Resolve module identifier into canonical absolute form.
1039 	 */
1040 
1041 	str_req_id = duk_require_string(ctx, DUK__IDX_REQUESTED_ID);
1042 	duk_push_current_function(ctx);
1043 	duk_get_prop_stridx(ctx, -1, DUK_STRIDX_ID);
1044 	str_mod_id = duk_get_string(ctx, DUK__IDX_REQUIRE_ID);  /* ignore non-strings */
1045 	DUK_DDD(DUK_DDDPRINT("resolve module id: requested=%!T, currentmodule=%!T",
1046 	                     duk_get_tval(ctx, DUK__IDX_REQUESTED_ID),
1047 	                     duk_get_tval(ctx, DUK__IDX_REQUIRE_ID)));
1048 	duk__bi_global_resolve_module_id(ctx, str_req_id, str_mod_id);
1049 	str_req_id = NULL;
1050 	str_mod_id = NULL;
1051 	DUK_DDD(DUK_DDDPRINT("resolved module id: requested=%!T, currentmodule=%!T, result=%!T, lastcomp=%!T",
1052 	                     duk_get_tval(ctx, DUK__IDX_REQUESTED_ID),
1053 	                     duk_get_tval(ctx, DUK__IDX_REQUIRE_ID),
1054 	                     duk_get_tval(ctx, DUK__IDX_RESOLVED_ID),
1055 	                     duk_get_tval(ctx, DUK__IDX_LASTCOMP)));
1056 
1057 	/* [ requested_id require require.id resolved_id last_comp ] */
1058 	DUK_ASSERT_TOP(ctx, DUK__IDX_LASTCOMP + 1);
1059 
1060 	/*
1061 	 *  Cached module check.
1062 	 *
1063 	 *  If module has been loaded or its loading has already begun without
1064 	 *  finishing, return the same cached value ('exports').  The value is
1065 	 *  registered when module load starts so that circular references can
1066 	 *  be supported to some extent.
1067 	 */
1068 
1069 	duk_push_hobject_bidx(ctx, DUK_BIDX_DUKTAPE);
1070 	duk_get_prop_stridx(ctx, DUK__IDX_DUKTAPE, DUK_STRIDX_MOD_LOADED);  /* Duktape.modLoaded */
1071 	(void) duk_require_hobject(ctx, DUK__IDX_MODLOADED);
1072 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODLOADED + 1);
1073 
1074 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1075 	if (duk_get_prop(ctx, DUK__IDX_MODLOADED)) {
1076 		/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded Duktape.modLoaded[id] ] */
1077 		DUK_DD(DUK_DDPRINT("module already loaded: %!T",
1078 		                   duk_get_tval(ctx, DUK__IDX_RESOLVED_ID)));
1079 		duk_get_prop_stridx(ctx, -1, DUK_STRIDX_EXPORTS);  /* return module.exports */
1080 		return 1;
1081 	}
1082 	DUK_ASSERT_TOP(ctx, DUK__IDX_UNDEFINED + 1);
1083 
1084 	/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded undefined ] */
1085 
1086 	/*
1087 	 *  Module not loaded (and loading not started previously).
1088 	 *
1089 	 *  Create a new require() function with 'id' set to resolved ID
1090 	 *  of module being loaded.  Also create 'exports' and 'module'
1091 	 *  tables but don't register exports to the loaded table yet.
1092 	 *  We don't want to do that unless the user module search callbacks
1093 	 *  succeeds in finding the module.
1094 	 */
1095 
1096 	DUK_D(DUK_DPRINT("loading module %!T, resolution base %!T, requested ID %!T -> resolved ID %!T, last component %!T",
1097                          duk_get_tval(ctx, DUK__IDX_RESOLVED_ID),
1098                          duk_get_tval(ctx, DUK__IDX_REQUIRE_ID),
1099                          duk_get_tval(ctx, DUK__IDX_REQUESTED_ID),
1100                          duk_get_tval(ctx, DUK__IDX_RESOLVED_ID),
1101                          duk_get_tval(ctx, DUK__IDX_LASTCOMP)));
1102 
1103 	/* Fresh require: require.id is left configurable (but not writable)
1104 	 * so that is not easy to accidentally tweak it, but it can still be
1105 	 * done with Object.defineProperty().
1106 	 *
1107 	 * XXX: require.id could also be just made non-configurable, as there
1108 	 * is no practical reason to touch it.
1109 	 */
1110 	duk_push_c_function(ctx, duk_bi_global_object_require, 1 /*nargs*/);
1111 	duk_push_hstring_stridx(ctx, DUK_STRIDX_REQUIRE);
1112 	duk_xdef_prop_stridx(ctx, DUK__IDX_FRESH_REQUIRE, DUK_STRIDX_NAME, DUK_PROPDESC_FLAGS_NONE);
1113 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1114 	duk_xdef_prop_stridx(ctx, DUK__IDX_FRESH_REQUIRE, DUK_STRIDX_ID, DUK_PROPDESC_FLAGS_C);  /* a fresh require() with require.id = resolved target module id */
1115 
1116 	/* Module table:
1117 	 * - module.exports: initial exports table (may be replaced by user)
1118 	 * - module.id is non-writable and non-configurable, as the CommonJS
1119 	 *   spec suggests this if possible
1120 	 * - module.filename: not set, defaults to resolved ID if not explicitly
1121 	 *   set by modSearch() (note capitalization, not .fileName, matches Node.js)
1122 	 * - module.name: not set, defaults to last component of resolved ID if
1123 	 *   not explicitly set by modSearch()
1124 	 */
1125 	duk_push_object(ctx);  /* exports */
1126 	duk_push_object(ctx);  /* module */
1127 	duk_dup(ctx, DUK__IDX_EXPORTS);
1128 	duk_xdef_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_EXPORTS, DUK_PROPDESC_FLAGS_WC);  /* module.exports = exports */
1129 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);  /* resolved id: require(id) must return this same module */
1130 	duk_xdef_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_ID, DUK_PROPDESC_FLAGS_NONE);  /* module.id = resolved_id */
1131 	duk_compact(ctx, DUK__IDX_MODULE);  /* module table remains registered to modLoaded, minimize its size */
1132 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODULE + 1);
1133 
1134 	DUK_DD(DUK_DDPRINT("module table created: %!T", duk_get_tval(ctx, DUK__IDX_MODULE)));
1135 
1136 	/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded undefined fresh_require exports module ] */
1137 
1138 	/* Register the module table early to modLoaded[] so that we can
1139 	 * support circular references even in modSearch().  If an error
1140 	 * is thrown, we'll delete the reference.
1141 	 */
1142 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1143 	duk_dup(ctx, DUK__IDX_MODULE);
1144 	duk_put_prop(ctx, DUK__IDX_MODLOADED);  /* Duktape.modLoaded[resolved_id] = module */
1145 
1146 	/*
1147 	 *  Call user provided module search function and build the wrapped
1148 	 *  module source code (if necessary).  The module search function
1149 	 *  can be used to implement pure Ecmacsript, pure C, and mixed
1150 	 *  Ecmascript/C modules.
1151 	 *
1152 	 *  The module search function can operate on the exports table directly
1153 	 *  (e.g. DLL code can register values to it).  It can also return a
1154 	 *  string which is interpreted as module source code (if a non-string
1155 	 *  is returned the module is assumed to be a pure C one).  If a module
1156 	 *  cannot be found, an error must be thrown by the user callback.
1157 	 *
1158 	 *  Because Duktape.modLoaded[] already contains the module being
1159 	 *  loaded, circular references for C modules should also work
1160 	 *  (although expected to be quite rare).
1161 	 */
1162 
1163 	duk_push_string(ctx, "(function(require,exports,module){");
1164 
1165 	/* Duktape.modSearch(resolved_id, fresh_require, exports, module). */
1166 	duk_get_prop_stridx(ctx, DUK__IDX_DUKTAPE, DUK_STRIDX_MOD_SEARCH);  /* Duktape.modSearch */
1167 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1168 	duk_dup(ctx, DUK__IDX_FRESH_REQUIRE);
1169 	duk_dup(ctx, DUK__IDX_EXPORTS);
1170 	duk_dup(ctx, DUK__IDX_MODULE);  /* [ ... Duktape.modSearch resolved_id last_comp fresh_require exports module ] */
1171 	pcall_rc = duk_pcall(ctx, 4 /*nargs*/);  /* -> [ ... source ] */
1172 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODULE + 3);
1173 
1174 	if (pcall_rc != DUK_EXEC_SUCCESS) {
1175 		/* Delete entry in Duktape.modLoaded[] and rethrow. */
1176 		goto delete_rethrow;
1177 	}
1178 
1179 	/* If user callback did not return source code, module loading
1180 	 * is finished (user callback initialized exports table directly).
1181 	 */
1182 	if (!duk_is_string(ctx, -1)) {
1183 		/* User callback did not return source code, so module loading
1184 		 * is finished: just update modLoaded with final module.exports
1185 		 * and we're done.
1186 		 */
1187 		goto return_exports;
1188 	}
1189 
1190 	/* Finish the wrapped module source.  Force module.filename as the
1191 	 * function .fileName so it gets set for functions defined within a
1192 	 * module.  This also ensures loggers created within the module get
1193 	 * the module ID (or overridden filename) as their default logger name.
1194 	 * (Note capitalization: .filename matches Node.js while .fileName is
1195 	 * used elsewhere in Duktape.)
1196 	 */
1197 	duk_push_string(ctx, "})");
1198 	duk_concat(ctx, 3);
1199 	if (!duk_get_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_FILENAME)) {
1200 		/* module.filename for .fileName, default to resolved ID if
1201 		 * not present.
1202 		 */
1203 		duk_pop(ctx);
1204 		duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1205 	}
1206 	duk_eval_raw(ctx, NULL, 0, DUK_COMPILE_EVAL);
1207 
1208 	/* Module has now evaluated to a wrapped module function.  Force its
1209 	 * .name to match module.name (defaults to last component of resolved
1210 	 * ID) so that it is shown in stack traces too.  Note that we must not
1211 	 * introduce an actual name binding into the function scope (which is
1212 	 * usually the case with a named function) because it would affect the
1213 	 * scope seen by the module and shadow accesses to globals of the same name.
1214 	 * This is now done by compiling the function as anonymous and then forcing
1215 	 * its .name without setting a "has name binding" flag.
1216 	 */
1217 
1218 	duk_push_hstring_stridx(ctx, DUK_STRIDX_NAME);
1219 	if (!duk_get_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_NAME)) {
1220 		/* module.name for .name, default to last component if
1221 		 * not present.
1222 		 */
1223 		duk_pop(ctx);
1224 		duk_dup(ctx, DUK__IDX_LASTCOMP);
1225 	}
1226 	duk_def_prop(ctx, -3, DUK_DEFPROP_HAVE_VALUE | DUK_DEFPROP_FORCE);
1227 
1228 	/*
1229 	 *  Call the wrapped module function.
1230 	 *
1231 	 *  Use a protected call so that we can update Duktape.modLoaded[resolved_id]
1232 	 *  even if the module throws an error.
1233 	 */
1234 
1235 	/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded undefined fresh_require exports module mod_func ] */
1236 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODULE + 2);
1237 
1238 	duk_dup(ctx, DUK__IDX_EXPORTS);  /* exports (this binding) */
1239 	duk_dup(ctx, DUK__IDX_FRESH_REQUIRE);  /* fresh require (argument) */
1240 	duk_get_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_EXPORTS);  /* relookup exports from module.exports in case it was changed by modSearch */
1241 	duk_dup(ctx, DUK__IDX_MODULE);  /* module (argument) */
1242 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODULE + 6);
1243 
1244 	/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded undefined fresh_require exports module mod_func exports fresh_require exports module ] */
1245 
1246 	pcall_rc = duk_pcall_method(ctx, 3 /*nargs*/);
1247 	if (pcall_rc != DUK_EXEC_SUCCESS) {
1248 		/* Module loading failed.  Node.js will forget the module
1249 		 * registration so that another require() will try to load
1250 		 * the module again.  Mimic that behavior.
1251 		 */
1252 		goto delete_rethrow;
1253 	}
1254 
1255 	/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded undefined fresh_require exports module result(ignored) ] */
1256 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODULE + 2);
1257 
1258 	/* fall through */
1259 
1260  return_exports:
1261 	duk_get_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_EXPORTS);
1262 	duk_compact(ctx, -1);  /* compact the exports table */
1263 	return 1;  /* return module.exports */
1264 
1265  delete_rethrow:
1266 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1267 	duk_del_prop(ctx, DUK__IDX_MODLOADED);  /* delete Duktape.modLoaded[resolved_id] */
1268 	duk_throw(ctx);  /* rethrow original error */
1269 	return 0;  /* not reachable */
1270 }
1271 
1272 #undef DUK__IDX_REQUESTED_ID
1273 #undef DUK__IDX_REQUIRE
1274 #undef DUK__IDX_REQUIRE_ID
1275 #undef DUK__IDX_RESOLVED_ID
1276 #undef DUK__IDX_LASTCOMP
1277 #undef DUK__IDX_DUKTAPE
1278 #undef DUK__IDX_MODLOADED
1279 #undef DUK__IDX_UNDEFINED
1280 #undef DUK__IDX_FRESH_REQUIRE
1281 #undef DUK__IDX_EXPORTS
1282 #undef DUK__IDX_MODULE
1283 #else
duk_bi_global_object_require(duk_context * ctx)1284 DUK_INTERNAL duk_ret_t duk_bi_global_object_require(duk_context *ctx) {
1285 	DUK_UNREF(ctx);
1286 	return DUK_RET_UNSUPPORTED_ERROR;
1287 }
1288 #endif  /* DUK_USE_COMMONJS_MODULES */
1289