1 /*
2  *  Global object built-ins
3  */
4 
5 #include "duk_internal.h"
6 
7 /*
8  *  Encoding/decoding helpers
9  */
10 
11 /* XXX: Could add fast path (for each transform callback) with direct byte
12  * lookups (no shifting) and no explicit check for x < 0x80 before table
13  * lookup.
14  */
15 
16 /* Macros for creating and checking bitmasks for character encoding.
17  * Bit number is a bit counterintuitive, but minimizes code size.
18  */
19 #define DUK__MKBITS(a,b,c,d,e,f,g,h)  ((duk_uint8_t) ( \
20 	((a) << 0) | ((b) << 1) | ((c) << 2) | ((d) << 3) | \
21 	((e) << 4) | ((f) << 5) | ((g) << 6) | ((h) << 7) \
22 	))
23 #define DUK__CHECK_BITMASK(table,cp)  ((table)[(cp) >> 3] & (1 << ((cp) & 0x07)))
24 
25 /* E5.1 Section 15.1.3.3: uriReserved + uriUnescaped + '#' */
26 DUK_LOCAL const duk_uint8_t duk__encode_uriunescaped_table[16] = {
27 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
28 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
29 	DUK__MKBITS(0, 1, 0, 1, 1, 0, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x20-0x2f */
30 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 0, 1, 0, 1),  /* 0x30-0x3f */
31 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x40-0x4f */
32 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1),  /* 0x50-0x5f */
33 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x60-0x6f */
34 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 1, 0),  /* 0x70-0x7f */
35 };
36 
37 /* E5.1 Section 15.1.3.4: uriUnescaped */
38 DUK_LOCAL const duk_uint8_t duk__encode_uricomponent_unescaped_table[16] = {
39 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
40 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
41 	DUK__MKBITS(0, 1, 0, 0, 0, 0, 0, 1), DUK__MKBITS(1, 1, 1, 0, 0, 1, 1, 0),  /* 0x20-0x2f */
42 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 0, 0, 0, 0, 0, 0),  /* 0x30-0x3f */
43 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x40-0x4f */
44 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1),  /* 0x50-0x5f */
45 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x60-0x6f */
46 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 1, 0),  /* 0x70-0x7f */
47 };
48 
49 /* E5.1 Section 15.1.3.1: uriReserved + '#' */
50 DUK_LOCAL const duk_uint8_t duk__decode_uri_reserved_table[16] = {
51 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
52 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
53 	DUK__MKBITS(0, 0, 0, 1, 1, 0, 1, 0), DUK__MKBITS(0, 0, 0, 1, 1, 0, 0, 1),  /* 0x20-0x2f */
54 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 1, 1, 0, 1, 0, 1),  /* 0x30-0x3f */
55 	DUK__MKBITS(1, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x40-0x4f */
56 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x50-0x5f */
57 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x60-0x6f */
58 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x70-0x7f */
59 };
60 
61 /* E5.1 Section 15.1.3.2: empty */
62 DUK_LOCAL const duk_uint8_t duk__decode_uri_component_reserved_table[16] = {
63 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
64 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
65 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x20-0x2f */
66 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x30-0x3f */
67 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x40-0x4f */
68 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x50-0x5f */
69 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x60-0x6f */
70 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x70-0x7f */
71 };
72 
73 #ifdef DUK_USE_SECTION_B
74 /* E5.1 Section B.2.2, step 7. */
75 DUK_LOCAL const duk_uint8_t duk__escape_unescaped_table[16] = {
76 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x00-0x0f */
77 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0),  /* 0x10-0x1f */
78 	DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 1, 1, 0, 1, 1, 1),  /* 0x20-0x2f */
79 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 0, 0, 0, 0, 0, 0),  /* 0x30-0x3f */
80 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x40-0x4f */
81 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1),  /* 0x50-0x5f */
82 	DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1),  /* 0x60-0x6f */
83 	DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 0)   /* 0x70-0x7f */
84 };
85 #endif  /* DUK_USE_SECTION_B */
86 
87 #undef DUK__MKBITS
88 
89 typedef struct {
90 	duk_hthread *thr;
91 	duk_hstring *h_str;
92 	duk_bufwriter_ctx bw;
93 	const duk_uint8_t *p;
94 	const duk_uint8_t *p_start;
95 	const duk_uint8_t *p_end;
96 } duk__transform_context;
97 
98 typedef void (*duk__transform_callback)(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp);
99 
100 /* XXX: refactor and share with other code */
duk__decode_hex_escape(const duk_uint8_t * p,duk_small_int_t n)101 DUK_LOCAL duk_small_int_t duk__decode_hex_escape(const duk_uint8_t *p, duk_small_int_t n) {
102 	duk_small_int_t ch;
103 	duk_small_int_t t = 0;
104 
105 	while (n > 0) {
106 		t = t * 16;
107 		ch = (duk_small_int_t) duk_hex_dectab[*p++];
108 		if (DUK_LIKELY(ch >= 0)) {
109 			t += ch;
110 		} else {
111 			return -1;
112 		}
113 		n--;
114 	}
115 	return t;
116 }
117 
duk__transform_helper(duk_context * ctx,duk__transform_callback callback,const void * udata)118 DUK_LOCAL int duk__transform_helper(duk_context *ctx, duk__transform_callback callback, const void *udata) {
119 	duk_hthread *thr = (duk_hthread *) ctx;
120 	duk__transform_context tfm_ctx_alloc;
121 	duk__transform_context *tfm_ctx = &tfm_ctx_alloc;
122 	duk_codepoint_t cp;
123 
124 	tfm_ctx->thr = thr;
125 
126 	tfm_ctx->h_str = duk_to_hstring(ctx, 0);
127 	DUK_ASSERT(tfm_ctx->h_str != NULL);
128 
129 	DUK_BW_INIT_PUSHBUF(thr, &tfm_ctx->bw, DUK_HSTRING_GET_BYTELEN(tfm_ctx->h_str));  /* initial size guess */
130 
131 	tfm_ctx->p_start = DUK_HSTRING_GET_DATA(tfm_ctx->h_str);
132 	tfm_ctx->p_end = tfm_ctx->p_start + DUK_HSTRING_GET_BYTELEN(tfm_ctx->h_str);
133 	tfm_ctx->p = tfm_ctx->p_start;
134 
135 	while (tfm_ctx->p < tfm_ctx->p_end) {
136 		cp = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(thr, &tfm_ctx->p, tfm_ctx->p_start, tfm_ctx->p_end);
137 		callback(tfm_ctx, udata, cp);
138 	}
139 
140 	DUK_BW_COMPACT(thr, &tfm_ctx->bw);
141 
142 	duk_to_string(ctx, -1);
143 	return 1;
144 }
145 
duk__transform_callback_encode_uri(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)146 DUK_LOCAL void duk__transform_callback_encode_uri(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
147 	duk_uint8_t xutf8_buf[DUK_UNICODE_MAX_XUTF8_LENGTH];
148 	duk_small_int_t len;
149 	duk_codepoint_t cp1, cp2;
150 	duk_small_int_t i, t;
151 	const duk_uint8_t *unescaped_table = (const duk_uint8_t *) udata;
152 
153 	/* UTF-8 encoded bytes escaped as %xx%xx%xx... -> 3 * nbytes.
154 	 * Codepoint range is restricted so this is a slightly too large
155 	 * but doesn't matter.
156 	 */
157 	DUK_BW_ENSURE(tfm_ctx->thr, &tfm_ctx->bw, 3 * DUK_UNICODE_MAX_XUTF8_LENGTH);
158 
159 	if (cp < 0) {
160 		goto uri_error;
161 	} else if ((cp < 0x80L) && DUK__CHECK_BITMASK(unescaped_table, cp)) {
162 		DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) cp);
163 		return;
164 	} else if (cp >= 0xdc00L && cp <= 0xdfffL) {
165 		goto uri_error;
166 	} else if (cp >= 0xd800L && cp <= 0xdbffL) {
167 		/* Needs lookahead */
168 		if (duk_unicode_decode_xutf8(tfm_ctx->thr, &tfm_ctx->p, tfm_ctx->p_start, tfm_ctx->p_end, (duk_ucodepoint_t *) &cp2) == 0) {
169 			goto uri_error;
170 		}
171 		if (!(cp2 >= 0xdc00L && cp2 <= 0xdfffL)) {
172 			goto uri_error;
173 		}
174 		cp1 = cp;
175 		cp = ((cp1 - 0xd800L) << 10) + (cp2 - 0xdc00L) + 0x10000L;
176 	} else if (cp > 0x10ffffL) {
177 		/* Although we can allow non-BMP characters (they'll decode
178 		 * back into surrogate pairs), we don't allow extended UTF-8
179 		 * characters; they would encode to URIs which won't decode
180 		 * back because of strict UTF-8 checks in URI decoding.
181 		 * (However, we could just as well allow them here.)
182 		 */
183 		goto uri_error;
184 	} else {
185 		/* Non-BMP characters within valid UTF-8 range: encode as is.
186 		 * They'll decode back into surrogate pairs if the escaped
187 		 * output is decoded.
188 		 */
189 		;
190 	}
191 
192 	len = duk_unicode_encode_xutf8((duk_ucodepoint_t) cp, xutf8_buf);
193 	for (i = 0; i < len; i++) {
194 		t = (int) xutf8_buf[i];
195 		DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
196 		                      &tfm_ctx->bw,
197 		                      DUK_ASC_PERCENT,
198 		                      (duk_uint8_t) duk_uc_nybbles[t >> 4],
199                                       (duk_uint8_t) duk_uc_nybbles[t & 0x0f]);
200 	}
201 
202 	return;
203 
204  uri_error:
205 	DUK_ERROR(tfm_ctx->thr, DUK_ERR_URI_ERROR, "invalid input");
206 }
207 
duk__transform_callback_decode_uri(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)208 DUK_LOCAL void duk__transform_callback_decode_uri(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
209 	const duk_uint8_t *reserved_table = (const duk_uint8_t *) udata;
210 	duk_small_uint_t utf8_blen;
211 	duk_codepoint_t min_cp;
212 	duk_small_int_t t;  /* must be signed */
213 	duk_small_uint_t i;
214 
215 	/* Maximum write size: XUTF8 path writes max DUK_UNICODE_MAX_XUTF8_LENGTH,
216 	 * percent escape path writes max two times CESU-8 encoded BMP length.
217 	 */
218 	DUK_BW_ENSURE(tfm_ctx->thr,
219 	              &tfm_ctx->bw,
220 	              (DUK_UNICODE_MAX_XUTF8_LENGTH >= 2 * DUK_UNICODE_MAX_CESU8_BMP_LENGTH ?
221 	              DUK_UNICODE_MAX_XUTF8_LENGTH : DUK_UNICODE_MAX_CESU8_BMP_LENGTH));
222 
223 	if (cp == (duk_codepoint_t) '%') {
224 		const duk_uint8_t *p = tfm_ctx->p;
225 		duk_size_t left = (duk_size_t) (tfm_ctx->p_end - p);  /* bytes left */
226 
227 		DUK_DDD(DUK_DDDPRINT("percent encoding, left=%ld", (long) left));
228 
229 		if (left < 2) {
230 			goto uri_error;
231 		}
232 
233 		t = duk__decode_hex_escape(p, 2);
234 		DUK_DDD(DUK_DDDPRINT("first byte: %ld", (long) t));
235 		if (t < 0) {
236 			goto uri_error;
237 		}
238 
239 		if (t < 0x80) {
240 			if (DUK__CHECK_BITMASK(reserved_table, t)) {
241 				/* decode '%xx' to '%xx' if decoded char in reserved set */
242 				DUK_ASSERT(tfm_ctx->p - 1 >= tfm_ctx->p_start);
243 				DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
244 				                      &tfm_ctx->bw,
245 				                      DUK_ASC_PERCENT,
246 				                      p[0],
247 				                      p[1]);
248 			} else {
249 				DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) t);
250 			}
251 			tfm_ctx->p += 2;
252 			return;
253 		}
254 
255 		/* Decode UTF-8 codepoint from a sequence of hex escapes.  The
256 		 * first byte of the sequence has been decoded to 't'.
257 		 *
258 		 * Note that UTF-8 validation must be strict according to the
259 		 * specification: E5.1 Section 15.1.3, decode algorithm step
260 		 * 4.d.vii.8.  URIError from non-shortest encodings is also
261 		 * specifically noted in the spec.
262 		 */
263 
264 		DUK_ASSERT(t >= 0x80);
265 		if (t < 0xc0) {
266 			/* continuation byte */
267 			goto uri_error;
268 		} else if (t < 0xe0) {
269 			/* 110x xxxx; 2 bytes */
270 			utf8_blen = 2;
271 			min_cp = 0x80L;
272 			cp = t & 0x1f;
273 		} else if (t < 0xf0) {
274 			/* 1110 xxxx; 3 bytes */
275 			utf8_blen = 3;
276 			min_cp = 0x800L;
277 			cp = t & 0x0f;
278 		} else if (t < 0xf8) {
279 			/* 1111 0xxx; 4 bytes */
280 			utf8_blen = 4;
281 			min_cp = 0x10000L;
282 			cp = t & 0x07;
283 		} else {
284 			/* extended utf-8 not allowed for URIs */
285 			goto uri_error;
286 		}
287 
288 		if (left < utf8_blen * 3 - 1) {
289 			/* '%xx%xx...%xx', p points to char after first '%' */
290 			goto uri_error;
291 		}
292 
293 		p += 3;
294 		for (i = 1; i < utf8_blen; i++) {
295 			/* p points to digit part ('%xy', p points to 'x') */
296 			t = duk__decode_hex_escape(p, 2);
297 			DUK_DDD(DUK_DDDPRINT("i=%ld utf8_blen=%ld cp=%ld t=0x%02lx",
298 			                     (long) i, (long) utf8_blen, (long) cp, (unsigned long) t));
299 			if (t < 0) {
300 				goto uri_error;
301 			}
302 			if ((t & 0xc0) != 0x80) {
303 				goto uri_error;
304 			}
305 			cp = (cp << 6) + (t & 0x3f);
306 			p += 3;
307 		}
308 		p--;  /* p overshoots */
309 		tfm_ctx->p = p;
310 
311 		DUK_DDD(DUK_DDDPRINT("final cp=%ld, min_cp=%ld", (long) cp, (long) min_cp));
312 
313 		if (cp < min_cp || cp > 0x10ffffL || (cp >= 0xd800L && cp <= 0xdfffL)) {
314 			goto uri_error;
315 		}
316 
317 		/* The E5.1 algorithm checks whether or not a decoded codepoint
318 		 * is below 0x80 and perhaps may be in the "reserved" set.
319 		 * This seems pointless because the single byte UTF-8 case is
320 		 * handled separately, and non-shortest encodings are rejected.
321 		 * So, 'cp' cannot be below 0x80 here, and thus cannot be in
322 		 * the reserved set.
323 		 */
324 
325 		/* utf-8 validation ensures these */
326 		DUK_ASSERT(cp >= 0x80L && cp <= 0x10ffffL);
327 
328 		if (cp >= 0x10000L) {
329 			cp -= 0x10000L;
330 			DUK_ASSERT(cp < 0x100000L);
331 
332 			DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, ((cp >> 10) + 0xd800L));
333 			DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, ((cp & 0x03ffUL) + 0xdc00L));
334 		} else {
335 			DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
336 		}
337 	} else {
338 		DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
339 	}
340 	return;
341 
342  uri_error:
343 	DUK_ERROR(tfm_ctx->thr, DUK_ERR_URI_ERROR, "invalid input");
344 }
345 
346 #ifdef DUK_USE_SECTION_B
duk__transform_callback_escape(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)347 DUK_LOCAL void duk__transform_callback_escape(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
348 	DUK_UNREF(udata);
349 
350 	DUK_BW_ENSURE(tfm_ctx->thr, &tfm_ctx->bw, 6);
351 
352 	if (cp < 0) {
353 		goto esc_error;
354 	} else if ((cp < 0x80L) && DUK__CHECK_BITMASK(duk__escape_unescaped_table, cp)) {
355 		DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) cp);
356 	} else if (cp < 0x100L) {
357 		DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
358 		                      &tfm_ctx->bw,
359 		                      (duk_uint8_t) DUK_ASC_PERCENT,
360 		                      (duk_uint8_t) duk_uc_nybbles[cp >> 4],
361 		                      (duk_uint8_t) duk_uc_nybbles[cp & 0x0f]);
362 	} else if (cp < 0x10000L) {
363 		DUK_BW_WRITE_RAW_U8_6(tfm_ctx->thr,
364 		                      &tfm_ctx->bw,
365 		                      (duk_uint8_t) DUK_ASC_PERCENT,
366 		                      (duk_uint8_t) DUK_ASC_LC_U,
367 		                      (duk_uint8_t) duk_uc_nybbles[cp >> 12],
368 		                      (duk_uint8_t) duk_uc_nybbles[(cp >> 8) & 0x0f],
369 		                      (duk_uint8_t) duk_uc_nybbles[(cp >> 4) & 0x0f],
370 		                      (duk_uint8_t) duk_uc_nybbles[cp & 0x0f]);
371 	} else {
372 		/* Characters outside BMP cannot be escape()'d.  We could
373 		 * encode them as surrogate pairs (for codepoints inside
374 		 * valid UTF-8 range, but not extended UTF-8).  Because
375 		 * escape() and unescape() are legacy functions, we don't.
376 		 */
377 		goto esc_error;
378 	}
379 
380 	return;
381 
382  esc_error:
383 	DUK_ERROR_TYPE(tfm_ctx->thr, "invalid input");
384 }
385 
duk__transform_callback_unescape(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)386 DUK_LOCAL void duk__transform_callback_unescape(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
387 	duk_small_int_t t;
388 
389 	DUK_UNREF(udata);
390 
391 	if (cp == (duk_codepoint_t) '%') {
392 		const duk_uint8_t *p = tfm_ctx->p;
393 		duk_size_t left = (duk_size_t) (tfm_ctx->p_end - p);  /* bytes left */
394 
395 		if (left >= 5 && p[0] == 'u' &&
396 		    ((t = duk__decode_hex_escape(p + 1, 4)) >= 0)) {
397 			cp = (duk_codepoint_t) t;
398 			tfm_ctx->p += 5;
399 		} else if (left >= 2 &&
400 		           ((t = duk__decode_hex_escape(p, 2)) >= 0)) {
401 			cp = (duk_codepoint_t) t;
402 			tfm_ctx->p += 2;
403 		}
404 	}
405 
406 	DUK_BW_WRITE_ENSURE_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
407 }
408 #endif  /* DUK_USE_SECTION_B */
409 
410 /*
411  *  Eval
412  *
413  *  Eval needs to handle both a "direct eval" and an "indirect eval".
414  *  Direct eval handling needs access to the caller's activation so that its
415  *  lexical environment can be accessed.  A direct eval is only possible from
416  *  Ecmascript code; an indirect eval call is possible also from C code.
417  *  When an indirect eval call is made from C code, there may not be a
418  *  calling activation at all which needs careful handling.
419  */
420 
duk_bi_global_object_eval(duk_context * ctx)421 DUK_INTERNAL duk_ret_t duk_bi_global_object_eval(duk_context *ctx) {
422 	duk_hthread *thr = (duk_hthread *) ctx;
423 	duk_hstring *h;
424 	duk_activation *act_caller;
425 	duk_activation *act_eval;
426 	duk_activation *act;
427 	duk_hcompiledfunction *func;
428 	duk_hobject *outer_lex_env;
429 	duk_hobject *outer_var_env;
430 	duk_bool_t this_to_global = 1;
431 	duk_small_uint_t comp_flags;
432 	duk_int_t level = -2;
433 
434 	DUK_ASSERT(duk_get_top(ctx) == 1 || duk_get_top(ctx) == 2);  /* 2 when called by debugger */
435 	DUK_ASSERT(thr->callstack_top >= 1);  /* at least this function exists */
436 	DUK_ASSERT(((thr->callstack + thr->callstack_top - 1)->flags & DUK_ACT_FLAG_DIRECT_EVAL) == 0 || /* indirect eval */
437 	           (thr->callstack_top >= 2));  /* if direct eval, calling activation must exist */
438 
439 	/*
440 	 *  callstack_top - 1 --> this function
441 	 *  callstack_top - 2 --> caller (may not exist)
442 	 *
443 	 *  If called directly from C, callstack_top might be 1.  If calling
444 	 *  activation doesn't exist, call must be indirect.
445 	 */
446 
447 	h = duk_get_hstring(ctx, 0);
448 	if (!h) {
449 		return 1;  /* return arg as-is */
450 	}
451 
452 #if defined(DUK_USE_DEBUGGER_SUPPORT)
453 	/* NOTE: level is used only by the debugger and should never be present
454 	 * for an Ecmascript eval().
455 	 */
456 	DUK_ASSERT(level == -2);  /* by default, use caller's environment */
457 	if (duk_get_top(ctx) >= 2 && duk_is_number(ctx, 1)) {
458 		level = duk_get_int(ctx, 1);
459 	}
460 	DUK_ASSERT(level <= -2);  /* This is guaranteed by debugger code. */
461 #endif
462 
463 	/* [ source ] */
464 
465 	comp_flags = DUK_JS_COMPILE_FLAG_EVAL;
466 	act_eval = thr->callstack + thr->callstack_top - 1;    /* this function */
467 	if (thr->callstack_top >= (duk_size_t) -level) {
468 		/* Have a calling activation, check for direct eval (otherwise
469 		 * assume indirect eval.
470 		 */
471 		act_caller = thr->callstack + thr->callstack_top + level;  /* caller */
472 		if ((act_caller->flags & DUK_ACT_FLAG_STRICT) &&
473 		    (act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL)) {
474 			/* Only direct eval inherits strictness from calling code
475 			 * (E5.1 Section 10.1.1).
476 			 */
477 			comp_flags |= DUK_JS_COMPILE_FLAG_STRICT;
478 		}
479 	} else {
480 		DUK_ASSERT((act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL) == 0);
481 	}
482 	act_caller = NULL;  /* avoid dereference after potential callstack realloc */
483 	act_eval = NULL;
484 
485 	duk_push_hstring_stridx(ctx, DUK_STRIDX_INPUT);  /* XXX: copy from caller? */
486 	duk_js_compile(thr,
487 	               (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h),
488 	               (duk_size_t) DUK_HSTRING_GET_BYTELEN(h),
489 	               comp_flags);
490 	func = (duk_hcompiledfunction *) duk_get_hobject(ctx, -1);
491 	DUK_ASSERT(func != NULL);
492 	DUK_ASSERT(DUK_HOBJECT_IS_COMPILEDFUNCTION((duk_hobject *) func));
493 
494 	/* [ source template ] */
495 
496 	/* E5 Section 10.4.2 */
497 	DUK_ASSERT(thr->callstack_top >= 1);
498 	act = thr->callstack + thr->callstack_top - 1;  /* this function */
499 	if (act->flags & DUK_ACT_FLAG_DIRECT_EVAL) {
500 		DUK_ASSERT(thr->callstack_top >= 2);
501 		act = thr->callstack + thr->callstack_top + level;  /* caller */
502 		if (act->lex_env == NULL) {
503 			DUK_ASSERT(act->var_env == NULL);
504 			DUK_DDD(DUK_DDDPRINT("delayed environment initialization"));
505 
506 			/* this may have side effects, so re-lookup act */
507 			duk_js_init_activation_environment_records_delayed(thr, act);
508 			act = thr->callstack + thr->callstack_top + level;
509 		}
510 		DUK_ASSERT(act->lex_env != NULL);
511 		DUK_ASSERT(act->var_env != NULL);
512 
513 		this_to_global = 0;
514 
515 		if (DUK_HOBJECT_HAS_STRICT((duk_hobject *) func)) {
516 			duk_hobject *new_env;
517 			duk_hobject *act_lex_env;
518 
519 			DUK_DDD(DUK_DDDPRINT("direct eval call to a strict function -> "
520 			                     "var_env and lex_env to a fresh env, "
521 			                     "this_binding to caller's this_binding"));
522 
523 			act_lex_env = act->lex_env;
524 			act = NULL;  /* invalidated */
525 
526 			(void) duk_push_object_helper_proto(ctx,
527 			                                    DUK_HOBJECT_FLAG_EXTENSIBLE |
528 			                                    DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_DECENV),
529 			                                    act_lex_env);
530 			new_env = duk_require_hobject(ctx, -1);
531 			DUK_ASSERT(new_env != NULL);
532 			DUK_DDD(DUK_DDDPRINT("new_env allocated: %!iO",
533 			                     (duk_heaphdr *) new_env));
534 
535 			outer_lex_env = new_env;
536 			outer_var_env = new_env;
537 
538 			duk_insert(ctx, 0);  /* stash to bottom of value stack to keep new_env reachable for duration of eval */
539 
540 			/* compiler's responsibility */
541 			DUK_ASSERT(DUK_HOBJECT_HAS_NEWENV((duk_hobject *) func));
542 		} else {
543 			DUK_DDD(DUK_DDDPRINT("direct eval call to a non-strict function -> "
544 			                     "var_env and lex_env to caller's envs, "
545 			                     "this_binding to caller's this_binding"));
546 
547 			outer_lex_env = act->lex_env;
548 			outer_var_env = act->var_env;
549 
550 			/* compiler's responsibility */
551 			DUK_ASSERT(!DUK_HOBJECT_HAS_NEWENV((duk_hobject *) func));
552 		}
553 	} else {
554 		DUK_DDD(DUK_DDDPRINT("indirect eval call -> var_env and lex_env to "
555 		                     "global object, this_binding to global object"));
556 
557 		this_to_global = 1;
558 		outer_lex_env = thr->builtins[DUK_BIDX_GLOBAL_ENV];
559 		outer_var_env = thr->builtins[DUK_BIDX_GLOBAL_ENV];
560 	}
561 	act = NULL;
562 
563 	/* Eval code doesn't need an automatic .prototype object. */
564 	duk_js_push_closure(thr, func, outer_var_env, outer_lex_env, 0 /*add_auto_proto*/);
565 
566 	/* [ source template closure ] */
567 
568 	if (this_to_global) {
569 		DUK_ASSERT(thr->builtins[DUK_BIDX_GLOBAL] != NULL);
570 		duk_push_hobject_bidx(ctx, DUK_BIDX_GLOBAL);
571 	} else {
572 		duk_tval *tv;
573 		DUK_ASSERT(thr->callstack_top >= 2);
574 		act = thr->callstack + thr->callstack_top + level;  /* caller */
575 		tv = thr->valstack + act->idx_bottom - 1;  /* this is just beneath bottom */
576 		DUK_ASSERT(tv >= thr->valstack);
577 		duk_push_tval(ctx, tv);
578 	}
579 
580 	DUK_DDD(DUK_DDDPRINT("eval -> lex_env=%!iO, var_env=%!iO, this_binding=%!T",
581 	                     (duk_heaphdr *) outer_lex_env,
582 	                     (duk_heaphdr *) outer_var_env,
583 	                     duk_get_tval(ctx, -1)));
584 
585 	/* [ source template closure this ] */
586 
587 	duk_call_method(ctx, 0);
588 
589 	/* [ source template result ] */
590 
591 	return 1;
592 }
593 
594 /*
595  *  Parsing of ints and floats
596  */
597 
duk_bi_global_object_parse_int(duk_context * ctx)598 DUK_INTERNAL duk_ret_t duk_bi_global_object_parse_int(duk_context *ctx) {
599 	duk_int32_t radix;
600 	duk_small_uint_t s2n_flags;
601 
602 	DUK_ASSERT_TOP(ctx, 2);
603 	duk_to_string(ctx, 0);
604 
605 	radix = duk_to_int32(ctx, 1);
606 
607 	s2n_flags = DUK_S2N_FLAG_TRIM_WHITE |
608 	            DUK_S2N_FLAG_ALLOW_GARBAGE |
609 	            DUK_S2N_FLAG_ALLOW_PLUS |
610 	            DUK_S2N_FLAG_ALLOW_MINUS |
611 	            DUK_S2N_FLAG_ALLOW_LEADING_ZERO |
612 	            DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT;
613 
614 	/* Specification stripPrefix maps to DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT.
615 	 *
616 	 * Don't autodetect octals (from leading zeroes), require user code to
617 	 * provide an explicit radix 8 for parsing octal.  See write-up from Mozilla:
618 	 * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseInt#ECMAScript_5_Removes_Octal_Interpretation
619 	 */
620 
621 	if (radix != 0) {
622 		if (radix < 2 || radix > 36) {
623 			goto ret_nan;
624 		}
625 		if (radix != 16) {
626 			s2n_flags &= ~DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT;
627 		}
628 	} else {
629 		radix = 10;
630 	}
631 
632 	duk_dup(ctx, 0);
633 	duk_numconv_parse(ctx, radix, s2n_flags);
634 	return 1;
635 
636  ret_nan:
637 	duk_push_nan(ctx);
638 	return 1;
639 }
640 
duk_bi_global_object_parse_float(duk_context * ctx)641 DUK_INTERNAL duk_ret_t duk_bi_global_object_parse_float(duk_context *ctx) {
642 	duk_small_uint_t s2n_flags;
643 	duk_int32_t radix;
644 
645 	DUK_ASSERT_TOP(ctx, 1);
646 	duk_to_string(ctx, 0);
647 
648 	radix = 10;
649 
650 	/* XXX: check flags */
651 	s2n_flags = DUK_S2N_FLAG_TRIM_WHITE |
652 	            DUK_S2N_FLAG_ALLOW_EXP |
653 	            DUK_S2N_FLAG_ALLOW_GARBAGE |
654 	            DUK_S2N_FLAG_ALLOW_PLUS |
655 	            DUK_S2N_FLAG_ALLOW_MINUS |
656 	            DUK_S2N_FLAG_ALLOW_INF |
657 	            DUK_S2N_FLAG_ALLOW_FRAC |
658 	            DUK_S2N_FLAG_ALLOW_NAKED_FRAC |
659 	            DUK_S2N_FLAG_ALLOW_EMPTY_FRAC |
660 	            DUK_S2N_FLAG_ALLOW_LEADING_ZERO;
661 
662 	duk_numconv_parse(ctx, radix, s2n_flags);
663 	return 1;
664 }
665 
666 /*
667  *  Number checkers
668  */
669 
duk_bi_global_object_is_nan(duk_context * ctx)670 DUK_INTERNAL duk_ret_t duk_bi_global_object_is_nan(duk_context *ctx) {
671 	duk_double_t d = duk_to_number(ctx, 0);
672 	duk_push_boolean(ctx, DUK_ISNAN(d));
673 	return 1;
674 }
675 
duk_bi_global_object_is_finite(duk_context * ctx)676 DUK_INTERNAL duk_ret_t duk_bi_global_object_is_finite(duk_context *ctx) {
677 	duk_double_t d = duk_to_number(ctx, 0);
678 	duk_push_boolean(ctx, DUK_ISFINITE(d));
679 	return 1;
680 }
681 
682 /*
683  *  URI handling
684  */
685 
duk_bi_global_object_decode_uri(duk_context * ctx)686 DUK_INTERNAL duk_ret_t duk_bi_global_object_decode_uri(duk_context *ctx) {
687 	return duk__transform_helper(ctx, duk__transform_callback_decode_uri, (const void *) duk__decode_uri_reserved_table);
688 }
689 
duk_bi_global_object_decode_uri_component(duk_context * ctx)690 DUK_INTERNAL duk_ret_t duk_bi_global_object_decode_uri_component(duk_context *ctx) {
691 	return duk__transform_helper(ctx, duk__transform_callback_decode_uri, (const void *) duk__decode_uri_component_reserved_table);
692 }
693 
duk_bi_global_object_encode_uri(duk_context * ctx)694 DUK_INTERNAL duk_ret_t duk_bi_global_object_encode_uri(duk_context *ctx) {
695 	return duk__transform_helper(ctx, duk__transform_callback_encode_uri, (const void *) duk__encode_uriunescaped_table);
696 }
697 
duk_bi_global_object_encode_uri_component(duk_context * ctx)698 DUK_INTERNAL duk_ret_t duk_bi_global_object_encode_uri_component(duk_context *ctx) {
699 	return duk__transform_helper(ctx, duk__transform_callback_encode_uri, (const void *) duk__encode_uricomponent_unescaped_table);
700 }
701 
702 #ifdef DUK_USE_SECTION_B
duk_bi_global_object_escape(duk_context * ctx)703 DUK_INTERNAL duk_ret_t duk_bi_global_object_escape(duk_context *ctx) {
704 	return duk__transform_helper(ctx, duk__transform_callback_escape, (const void *) NULL);
705 }
706 
duk_bi_global_object_unescape(duk_context * ctx)707 DUK_INTERNAL duk_ret_t duk_bi_global_object_unescape(duk_context *ctx) {
708 	return duk__transform_helper(ctx, duk__transform_callback_unescape, (const void *) NULL);
709 }
710 #else  /* DUK_USE_SECTION_B */
duk_bi_global_object_escape(duk_context * ctx)711 DUK_INTERNAL duk_ret_t duk_bi_global_object_escape(duk_context *ctx) {
712 	DUK_UNREF(ctx);
713 	return DUK_RET_UNSUPPORTED_ERROR;
714 }
715 
duk_bi_global_object_unescape(duk_context * ctx)716 DUK_INTERNAL duk_ret_t duk_bi_global_object_unescape(duk_context *ctx) {
717 	DUK_UNREF(ctx);
718 	return DUK_RET_UNSUPPORTED_ERROR;
719 }
720 #endif  /* DUK_USE_SECTION_B */
721 
722 #if defined(DUK_USE_BROWSER_LIKE) && (defined(DUK_USE_FILE_IO) || defined(DUK_USE_DEBUGGER_SUPPORT))
duk_bi_global_object_print_helper(duk_context * ctx)723 DUK_INTERNAL duk_ret_t duk_bi_global_object_print_helper(duk_context *ctx) {
724 	duk_hthread *thr = (duk_hthread *) ctx;
725 	duk_int_t magic;
726 	duk_idx_t nargs;
727 	const duk_uint8_t *buf;
728 	duk_size_t sz_buf;
729 	const char nl = (const char) DUK_ASC_LF;
730 #ifndef DUK_USE_PREFER_SIZE
731 	duk_uint8_t buf_stack[256];
732 #endif
733 #ifdef DUK_USE_FILE_IO
734 	duk_file *f_out;
735 #endif
736 
737 	DUK_UNREF(thr);
738 
739 	magic = duk_get_current_magic(ctx);
740 	DUK_UNREF(magic);
741 
742 	nargs = duk_get_top(ctx);
743 
744 	/* If argument count is 1 and first argument is a buffer, write the buffer
745 	 * as raw data into the file without a newline; this allows exact control
746 	 * over stdout/stderr without an additional entrypoint (useful for now).
747 	 *
748 	 * Otherwise current print/alert semantics are to ToString() coerce
749 	 * arguments, join them with a single space, and append a newline.
750 	 */
751 
752 	if (nargs == 1 && duk_is_buffer(ctx, 0)) {
753 		buf = (const duk_uint8_t *) duk_get_buffer(ctx, 0, &sz_buf);
754 		DUK_ASSERT(buf != NULL);
755 	} else if (nargs > 0) {
756 #ifdef DUK_USE_PREFER_SIZE
757 		/* Compact but lots of churn. */
758 		duk_push_hstring_stridx(thr, DUK_STRIDX_SPACE);
759 		duk_insert(ctx, 0);
760 		duk_join(ctx, nargs);
761 		duk_push_string(thr, "\n");
762 		duk_concat(ctx, 2);
763 		buf = (const duk_uint8_t *) duk_get_lstring(ctx, -1, &sz_buf);
764 		DUK_ASSERT(buf != NULL);
765 #else  /* DUK_USE_PREFER_SIZE */
766 		/* Higher footprint, less churn. */
767 		duk_idx_t i;
768 		duk_size_t sz_str;
769 		const duk_uint8_t *p_str;
770 		duk_uint8_t *p;
771 
772 		sz_buf = (duk_size_t) nargs;  /* spaces (nargs - 1) + newline */
773 		for (i = 0; i < nargs; i++) {
774 			(void) duk_to_lstring(ctx, i, &sz_str);
775 			sz_buf += sz_str;
776 		}
777 
778 		if (sz_buf <= sizeof(buf_stack)) {
779 			p = (duk_uint8_t *) buf_stack;
780 		} else {
781 			p = (duk_uint8_t *) duk_push_fixed_buffer(ctx, sz_buf);
782 			DUK_ASSERT(p != NULL);
783 		}
784 
785 		buf = (const duk_uint8_t *) p;
786 		for (i = 0; i < nargs; i++) {
787 			p_str = (const duk_uint8_t *) duk_get_lstring(ctx, i, &sz_str);
788 			DUK_ASSERT(p_str != NULL);
789 			DUK_MEMCPY((void *) p, (const void *) p_str, sz_str);
790 			p += sz_str;
791 			*p++ = (duk_uint8_t) (i == nargs - 1 ? DUK_ASC_LF : DUK_ASC_SPACE);
792 		}
793 		DUK_ASSERT((const duk_uint8_t *) p == buf + sz_buf);
794 #endif  /* DUK_USE_PREFER_SIZE */
795 	} else {
796 		buf = (const duk_uint8_t *) &nl;
797 		sz_buf = 1;
798 	}
799 
800 	/* 'buf' contains the string to write, 'sz_buf' contains the length
801 	 * (which may be zero).
802 	 */
803 	DUK_ASSERT(buf != NULL);
804 
805 	if (sz_buf == 0) {
806 		return 0;
807 	}
808 
809 #ifdef DUK_USE_FILE_IO
810 	f_out = (magic ? DUK_STDERR : DUK_STDOUT);
811 	DUK_FWRITE((const void *) buf, 1, (size_t) sz_buf, f_out);
812 	DUK_FFLUSH(f_out);
813 #endif
814 
815 #if defined(DUK_USE_DEBUGGER_SUPPORT) && defined(DUK_USE_DEBUGGER_FWD_PRINTALERT)
816 	if (DUK_HEAP_IS_DEBUGGER_ATTACHED(thr->heap)) {
817 		duk_debug_write_notify(thr, magic ? DUK_DBG_CMD_ALERT : DUK_DBG_CMD_PRINT);
818 		duk_debug_write_string(thr, (const char *) buf, sz_buf);
819 		duk_debug_write_eom(thr);
820 	}
821 #endif
822 	return 0;
823 }
824 #elif defined(DUK_USE_BROWSER_LIKE)  /* print provider */
duk_bi_global_object_print_helper(duk_context * ctx)825 DUK_INTERNAL duk_ret_t duk_bi_global_object_print_helper(duk_context *ctx) {
826 	DUK_UNREF(ctx);
827 	return 0;
828 }
829 #else  /* print provider */
duk_bi_global_object_print_helper(duk_context * ctx)830 DUK_INTERNAL duk_ret_t duk_bi_global_object_print_helper(duk_context *ctx) {
831 	DUK_UNREF(ctx);
832 	return DUK_RET_UNSUPPORTED_ERROR;
833 }
834 #endif  /* print provider */
835 
836 /*
837  *  CommonJS require() and modules support
838  */
839 
840 #if defined(DUK_USE_COMMONJS_MODULES)
duk__bi_global_resolve_module_id(duk_context * ctx,const char * req_id,const char * mod_id)841 DUK_LOCAL void duk__bi_global_resolve_module_id(duk_context *ctx, const char *req_id, const char *mod_id) {
842 	duk_hthread *thr = (duk_hthread *) ctx;
843 	duk_uint8_t buf[DUK_BI_COMMONJS_MODULE_ID_LIMIT];
844 	duk_uint8_t *p;
845 	duk_uint8_t *q;
846 	duk_uint8_t *q_last;  /* last component */
847 	duk_int_t int_rc;
848 
849 	DUK_ASSERT(req_id != NULL);
850 	/* mod_id may be NULL */
851 
852 	/*
853 	 *  A few notes on the algorithm:
854 	 *
855 	 *    - Terms are not allowed to begin with a period unless the term
856 	 *      is either '.' or '..'.  This simplifies implementation (and
857 	 *      is within CommonJS modules specification).
858 	 *
859 	 *    - There are few output bound checks here.  This is on purpose:
860 	 *      the resolution input is length checked and the output is never
861 	 *      longer than the input.  The resolved output is written directly
862 	 *      over the input because it's never longer than the input at any
863 	 *      point in the algorithm.
864 	 *
865 	 *    - Non-ASCII characters are processed as individual bytes and
866 	 *      need no special treatment.  However, U+0000 terminates the
867 	 *      algorithm; this is not an issue because U+0000 is not a
868 	 *      desirable term character anyway.
869 	 */
870 
871 	/*
872 	 *  Set up the resolution input which is the requested ID directly
873 	 *  (if absolute or no current module path) or with current module
874 	 *  ID prepended (if relative and current module path exists).
875 	 *
876 	 *  Suppose current module is 'foo/bar' and relative path is './quux'.
877 	 *  The 'bar' component must be replaced so the initial input here is
878 	 *  'foo/bar/.././quux'.
879 	 */
880 
881 	if (mod_id != NULL && req_id[0] == '.') {
882 		int_rc = DUK_SNPRINTF((char *) buf, sizeof(buf), "%s/../%s", mod_id, req_id);
883 	} else {
884 		int_rc = DUK_SNPRINTF((char *) buf, sizeof(buf), "%s", req_id);
885 	}
886 	if (int_rc >= (duk_int_t) sizeof(buf) || int_rc < 0) {
887 		/* Potentially truncated, NUL not guaranteed in any case.
888 		 * The (int_rc < 0) case should not occur in practice.
889 		 */
890 		DUK_DD(DUK_DDPRINT("resolve error: temporary working module ID doesn't fit into resolve buffer"));
891 		goto resolve_error;
892 	}
893 	DUK_ASSERT(DUK_STRLEN((const char *) buf) < sizeof(buf));  /* at most sizeof(buf) - 1 */
894 
895 	DUK_DDD(DUK_DDDPRINT("input module id: '%s'", (const char *) buf));
896 
897 	/*
898 	 *  Resolution loop.  At the top of the loop we're expecting a valid
899 	 *  term: '.', '..', or a non-empty identifier not starting with a period.
900 	 */
901 
902 	p = buf;
903 	q = buf;
904 	for (;;) {
905 		duk_uint_fast8_t c;
906 
907 		/* Here 'p' always points to the start of a term.
908 		 *
909 		 * We can also unconditionally reset q_last here: if this is
910 		 * the last (non-empty) term q_last will have the right value
911 		 * on loop exit.
912 		 */
913 
914 		DUK_ASSERT(p >= q);  /* output is never longer than input during resolution */
915 
916 		DUK_DDD(DUK_DDDPRINT("resolve loop top: p -> '%s', q=%p, buf=%p",
917 		                     (const char *) p, (void *) q, (void *) buf));
918 
919 		q_last = q;
920 
921 		c = *p++;
922 		if (DUK_UNLIKELY(c == 0)) {
923 			DUK_DD(DUK_DDPRINT("resolve error: requested ID must end with a non-empty term"));
924 			goto resolve_error;
925 		} else if (DUK_UNLIKELY(c == '.')) {
926 			c = *p++;
927 			if (c == '/') {
928 				/* Term was '.' and is eaten entirely (including dup slashes). */
929 				goto eat_dup_slashes;
930 			}
931 			if (c == '.' && *p == '/') {
932 				/* Term was '..', backtrack resolved name by one component.
933 				 *  q[-1] = previous slash (or beyond start of buffer)
934 				 *  q[-2] = last char of previous component (or beyond start of buffer)
935 				 */
936 				p++;  /* eat (first) input slash */
937 				DUK_ASSERT(q >= buf);
938 				if (q == buf) {
939 					DUK_DD(DUK_DDPRINT("resolve error: term was '..' but nothing to backtrack"));
940 					goto resolve_error;
941 				}
942 				DUK_ASSERT(*(q - 1) == '/');
943 				q--;  /* backtrack to last output slash (dups already eliminated) */
944 				for (;;) {
945 					/* Backtrack to previous slash or start of buffer. */
946 					DUK_ASSERT(q >= buf);
947 					if (q == buf) {
948 						break;
949 					}
950 					if (*(q - 1) == '/') {
951 						break;
952 					}
953 					q--;
954 				}
955 				goto eat_dup_slashes;
956 			}
957 			DUK_DD(DUK_DDPRINT("resolve error: term begins with '.' but is not '.' or '..' (not allowed now)"));
958 			goto resolve_error;
959 		} else if (DUK_UNLIKELY(c == '/')) {
960 			/* e.g. require('/foo'), empty terms not allowed */
961 			DUK_DD(DUK_DDPRINT("resolve error: empty term (not allowed now)"));
962 			goto resolve_error;
963 		} else {
964 			for (;;) {
965 				/* Copy term name until end or '/'. */
966 				*q++ = c;
967 				c = *p++;
968 				if (DUK_UNLIKELY(c == 0)) {
969 					/* This was the last term, and q_last was
970 					 * updated to match this term at loop top.
971 					 */
972 					goto loop_done;
973 				} else if (DUK_UNLIKELY(c == '/')) {
974 					*q++ = '/';
975 					break;
976 				} else {
977 					/* write on next loop */
978 				}
979 			}
980 		}
981 
982 	 eat_dup_slashes:
983 		for (;;) {
984 			/* eat dup slashes */
985 			c = *p;
986 			if (DUK_LIKELY(c != '/')) {
987 				break;
988 			}
989 			p++;
990 		}
991 	}
992  loop_done:
993 	/* Output #1: resolved absolute name */
994 	DUK_ASSERT(q >= buf);
995 	duk_push_lstring(ctx, (const char *) buf, (size_t) (q - buf));
996 
997 	/* Output #2: last component name */
998 	DUK_ASSERT(q >= q_last);
999 	DUK_ASSERT(q_last >= buf);
1000 	duk_push_lstring(ctx, (const char *) q_last, (size_t) (q - q_last));
1001 
1002 	DUK_DD(DUK_DDPRINT("after resolving module name: buf=%p, q_last=%p, q=%p",
1003 	                   (void *) buf, (void *) q_last, (void *) q));
1004 	return;
1005 
1006  resolve_error:
1007 	DUK_ERROR_FMT1(thr, DUK_ERR_TYPE_ERROR, "cannot resolve module id: %s", (const char *) req_id);
1008 }
1009 #endif  /* DUK_USE_COMMONJS_MODULES */
1010 
1011 #if defined(DUK_USE_COMMONJS_MODULES)
1012 /* Stack indices for better readability */
1013 #define DUK__IDX_REQUESTED_ID   0  /* Module id requested */
1014 #define DUK__IDX_REQUIRE        1  /* Current require() function */
1015 #define DUK__IDX_REQUIRE_ID     2  /* The base ID of the current require() function, resolution base */
1016 #define DUK__IDX_RESOLVED_ID    3  /* Resolved, normalized absolute module ID */
1017 #define DUK__IDX_LASTCOMP       4  /* Last component name in resolved path */
1018 #define DUK__IDX_DUKTAPE        5  /* Duktape object */
1019 #define DUK__IDX_MODLOADED      6  /* Duktape.modLoaded[] module cache */
1020 #define DUK__IDX_UNDEFINED      7  /* 'undefined', artifact of lookup */
1021 #define DUK__IDX_FRESH_REQUIRE  8  /* New require() function for module, updated resolution base */
1022 #define DUK__IDX_EXPORTS        9  /* Default exports table */
1023 #define DUK__IDX_MODULE         10  /* Module object containing module.exports, etc */
1024 
duk_bi_global_object_require(duk_context * ctx)1025 DUK_INTERNAL duk_ret_t duk_bi_global_object_require(duk_context *ctx) {
1026 	const char *str_req_id;  /* requested identifier */
1027 	const char *str_mod_id;  /* require.id of current module */
1028 	duk_int_t pcall_rc;
1029 
1030 	/* NOTE: we try to minimize code size by avoiding unnecessary pops,
1031 	 * so the stack looks a bit cluttered in this function.  DUK_ASSERT_TOP()
1032 	 * assertions are used to ensure stack configuration is correct at each
1033 	 * step.
1034 	 */
1035 
1036 	/*
1037 	 *  Resolve module identifier into canonical absolute form.
1038 	 */
1039 
1040 	str_req_id = duk_require_string(ctx, DUK__IDX_REQUESTED_ID);
1041 	duk_push_current_function(ctx);
1042 	duk_get_prop_stridx(ctx, -1, DUK_STRIDX_ID);
1043 	str_mod_id = duk_get_string(ctx, DUK__IDX_REQUIRE_ID);  /* ignore non-strings */
1044 	DUK_DDD(DUK_DDDPRINT("resolve module id: requested=%!T, currentmodule=%!T",
1045 	                     duk_get_tval(ctx, DUK__IDX_REQUESTED_ID),
1046 	                     duk_get_tval(ctx, DUK__IDX_REQUIRE_ID)));
1047 	duk__bi_global_resolve_module_id(ctx, str_req_id, str_mod_id);
1048 	str_req_id = NULL;
1049 	str_mod_id = NULL;
1050 	DUK_DDD(DUK_DDDPRINT("resolved module id: requested=%!T, currentmodule=%!T, result=%!T, lastcomp=%!T",
1051 	                     duk_get_tval(ctx, DUK__IDX_REQUESTED_ID),
1052 	                     duk_get_tval(ctx, DUK__IDX_REQUIRE_ID),
1053 	                     duk_get_tval(ctx, DUK__IDX_RESOLVED_ID),
1054 	                     duk_get_tval(ctx, DUK__IDX_LASTCOMP)));
1055 
1056 	/* [ requested_id require require.id resolved_id last_comp ] */
1057 	DUK_ASSERT_TOP(ctx, DUK__IDX_LASTCOMP + 1);
1058 
1059 	/*
1060 	 *  Cached module check.
1061 	 *
1062 	 *  If module has been loaded or its loading has already begun without
1063 	 *  finishing, return the same cached value ('exports').  The value is
1064 	 *  registered when module load starts so that circular references can
1065 	 *  be supported to some extent.
1066 	 */
1067 
1068 	duk_push_hobject_bidx(ctx, DUK_BIDX_DUKTAPE);
1069 	duk_get_prop_stridx(ctx, DUK__IDX_DUKTAPE, DUK_STRIDX_MOD_LOADED);  /* Duktape.modLoaded */
1070 	(void) duk_require_hobject(ctx, DUK__IDX_MODLOADED);
1071 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODLOADED + 1);
1072 
1073 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1074 	if (duk_get_prop(ctx, DUK__IDX_MODLOADED)) {
1075 		/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded Duktape.modLoaded[id] ] */
1076 		DUK_DD(DUK_DDPRINT("module already loaded: %!T",
1077 		                   duk_get_tval(ctx, DUK__IDX_RESOLVED_ID)));
1078 		duk_get_prop_stridx(ctx, -1, DUK_STRIDX_EXPORTS);  /* return module.exports */
1079 		return 1;
1080 	}
1081 	DUK_ASSERT_TOP(ctx, DUK__IDX_UNDEFINED + 1);
1082 
1083 	/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded undefined ] */
1084 
1085 	/*
1086 	 *  Module not loaded (and loading not started previously).
1087 	 *
1088 	 *  Create a new require() function with 'id' set to resolved ID
1089 	 *  of module being loaded.  Also create 'exports' and 'module'
1090 	 *  tables but don't register exports to the loaded table yet.
1091 	 *  We don't want to do that unless the user module search callbacks
1092 	 *  succeeds in finding the module.
1093 	 */
1094 
1095 	DUK_D(DUK_DPRINT("loading module %!T, resolution base %!T, requested ID %!T -> resolved ID %!T, last component %!T",
1096                          duk_get_tval(ctx, DUK__IDX_RESOLVED_ID),
1097                          duk_get_tval(ctx, DUK__IDX_REQUIRE_ID),
1098                          duk_get_tval(ctx, DUK__IDX_REQUESTED_ID),
1099                          duk_get_tval(ctx, DUK__IDX_RESOLVED_ID),
1100                          duk_get_tval(ctx, DUK__IDX_LASTCOMP)));
1101 
1102 	/* Fresh require: require.id is left configurable (but not writable)
1103 	 * so that is not easy to accidentally tweak it, but it can still be
1104 	 * done with Object.defineProperty().
1105 	 *
1106 	 * XXX: require.id could also be just made non-configurable, as there
1107 	 * is no practical reason to touch it.
1108 	 */
1109 	duk_push_c_function(ctx, duk_bi_global_object_require, 1 /*nargs*/);
1110 	duk_push_hstring_stridx(ctx, DUK_STRIDX_REQUIRE);
1111 	duk_xdef_prop_stridx(ctx, DUK__IDX_FRESH_REQUIRE, DUK_STRIDX_NAME, DUK_PROPDESC_FLAGS_NONE);
1112 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1113 	duk_xdef_prop_stridx(ctx, DUK__IDX_FRESH_REQUIRE, DUK_STRIDX_ID, DUK_PROPDESC_FLAGS_C);  /* a fresh require() with require.id = resolved target module id */
1114 
1115 	/* Module table:
1116 	 * - module.exports: initial exports table (may be replaced by user)
1117 	 * - module.id is non-writable and non-configurable, as the CommonJS
1118 	 *   spec suggests this if possible
1119 	 * - module.filename: not set, defaults to resolved ID if not explicitly
1120 	 *   set by modSearch() (note capitalization, not .fileName, matches Node.js)
1121 	 * - module.name: not set, defaults to last component of resolved ID if
1122 	 *   not explicitly set by modSearch()
1123 	 */
1124 	duk_push_object(ctx);  /* exports */
1125 	duk_push_object(ctx);  /* module */
1126 	duk_dup(ctx, DUK__IDX_EXPORTS);
1127 	duk_xdef_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_EXPORTS, DUK_PROPDESC_FLAGS_WC);  /* module.exports = exports */
1128 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);  /* resolved id: require(id) must return this same module */
1129 	duk_xdef_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_ID, DUK_PROPDESC_FLAGS_NONE);  /* module.id = resolved_id */
1130 	duk_compact(ctx, DUK__IDX_MODULE);  /* module table remains registered to modLoaded, minimize its size */
1131 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODULE + 1);
1132 
1133 	DUK_DD(DUK_DDPRINT("module table created: %!T", duk_get_tval(ctx, DUK__IDX_MODULE)));
1134 
1135 	/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded undefined fresh_require exports module ] */
1136 
1137 	/* Register the module table early to modLoaded[] so that we can
1138 	 * support circular references even in modSearch().  If an error
1139 	 * is thrown, we'll delete the reference.
1140 	 */
1141 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1142 	duk_dup(ctx, DUK__IDX_MODULE);
1143 	duk_put_prop(ctx, DUK__IDX_MODLOADED);  /* Duktape.modLoaded[resolved_id] = module */
1144 
1145 	/*
1146 	 *  Call user provided module search function and build the wrapped
1147 	 *  module source code (if necessary).  The module search function
1148 	 *  can be used to implement pure Ecmacsript, pure C, and mixed
1149 	 *  Ecmascript/C modules.
1150 	 *
1151 	 *  The module search function can operate on the exports table directly
1152 	 *  (e.g. DLL code can register values to it).  It can also return a
1153 	 *  string which is interpreted as module source code (if a non-string
1154 	 *  is returned the module is assumed to be a pure C one).  If a module
1155 	 *  cannot be found, an error must be thrown by the user callback.
1156 	 *
1157 	 *  Because Duktape.modLoaded[] already contains the module being
1158 	 *  loaded, circular references for C modules should also work
1159 	 *  (although expected to be quite rare).
1160 	 */
1161 
1162 	duk_push_string(ctx, "(function(require,exports,module){");
1163 
1164 	/* Duktape.modSearch(resolved_id, fresh_require, exports, module). */
1165 	duk_get_prop_stridx(ctx, DUK__IDX_DUKTAPE, DUK_STRIDX_MOD_SEARCH);  /* Duktape.modSearch */
1166 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1167 	duk_dup(ctx, DUK__IDX_FRESH_REQUIRE);
1168 	duk_dup(ctx, DUK__IDX_EXPORTS);
1169 	duk_dup(ctx, DUK__IDX_MODULE);  /* [ ... Duktape.modSearch resolved_id last_comp fresh_require exports module ] */
1170 	pcall_rc = duk_pcall(ctx, 4 /*nargs*/);  /* -> [ ... source ] */
1171 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODULE + 3);
1172 
1173 	if (pcall_rc != DUK_EXEC_SUCCESS) {
1174 		/* Delete entry in Duktape.modLoaded[] and rethrow. */
1175 		goto delete_rethrow;
1176 	}
1177 
1178 	/* If user callback did not return source code, module loading
1179 	 * is finished (user callback initialized exports table directly).
1180 	 */
1181 	if (!duk_is_string(ctx, -1)) {
1182 		/* User callback did not return source code, so module loading
1183 		 * is finished: just update modLoaded with final module.exports
1184 		 * and we're done.
1185 		 */
1186 		goto return_exports;
1187 	}
1188 
1189 	/* Finish the wrapped module source.  Force module.filename as the
1190 	 * function .fileName so it gets set for functions defined within a
1191 	 * module.  This also ensures loggers created within the module get
1192 	 * the module ID (or overridden filename) as their default logger name.
1193 	 * (Note capitalization: .filename matches Node.js while .fileName is
1194 	 * used elsewhere in Duktape.)
1195 	 */
1196 	duk_push_string(ctx, "\n})");  /* Newline allows module last line to contain a // comment. */
1197 	duk_concat(ctx, 3);
1198 	if (!duk_get_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_FILENAME)) {
1199 		/* module.filename for .fileName, default to resolved ID if
1200 		 * not present.
1201 		 */
1202 		duk_pop(ctx);
1203 		duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1204 	}
1205 	duk_eval_raw(ctx, NULL, 0, DUK_COMPILE_EVAL);
1206 
1207 	/* Module has now evaluated to a wrapped module function.  Force its
1208 	 * .name to match module.name (defaults to last component of resolved
1209 	 * ID) so that it is shown in stack traces too.  Note that we must not
1210 	 * introduce an actual name binding into the function scope (which is
1211 	 * usually the case with a named function) because it would affect the
1212 	 * scope seen by the module and shadow accesses to globals of the same name.
1213 	 * This is now done by compiling the function as anonymous and then forcing
1214 	 * its .name without setting a "has name binding" flag.
1215 	 */
1216 
1217 	duk_push_hstring_stridx(ctx, DUK_STRIDX_NAME);
1218 	if (!duk_get_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_NAME)) {
1219 		/* module.name for .name, default to last component if
1220 		 * not present.
1221 		 */
1222 		duk_pop(ctx);
1223 		duk_dup(ctx, DUK__IDX_LASTCOMP);
1224 	}
1225 	duk_def_prop(ctx, -3, DUK_DEFPROP_HAVE_VALUE | DUK_DEFPROP_FORCE);
1226 
1227 	/*
1228 	 *  Call the wrapped module function.
1229 	 *
1230 	 *  Use a protected call so that we can update Duktape.modLoaded[resolved_id]
1231 	 *  even if the module throws an error.
1232 	 */
1233 
1234 	/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded undefined fresh_require exports module mod_func ] */
1235 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODULE + 2);
1236 
1237 	duk_dup(ctx, DUK__IDX_EXPORTS);  /* exports (this binding) */
1238 	duk_dup(ctx, DUK__IDX_FRESH_REQUIRE);  /* fresh require (argument) */
1239 	duk_get_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_EXPORTS);  /* relookup exports from module.exports in case it was changed by modSearch */
1240 	duk_dup(ctx, DUK__IDX_MODULE);  /* module (argument) */
1241 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODULE + 6);
1242 
1243 	/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded undefined fresh_require exports module mod_func exports fresh_require exports module ] */
1244 
1245 	pcall_rc = duk_pcall_method(ctx, 3 /*nargs*/);
1246 	if (pcall_rc != DUK_EXEC_SUCCESS) {
1247 		/* Module loading failed.  Node.js will forget the module
1248 		 * registration so that another require() will try to load
1249 		 * the module again.  Mimic that behavior.
1250 		 */
1251 		goto delete_rethrow;
1252 	}
1253 
1254 	/* [ requested_id require require.id resolved_id last_comp Duktape Duktape.modLoaded undefined fresh_require exports module result(ignored) ] */
1255 	DUK_ASSERT_TOP(ctx, DUK__IDX_MODULE + 2);
1256 
1257 	/* fall through */
1258 
1259  return_exports:
1260 	duk_get_prop_stridx(ctx, DUK__IDX_MODULE, DUK_STRIDX_EXPORTS);
1261 	duk_compact(ctx, -1);  /* compact the exports table */
1262 	return 1;  /* return module.exports */
1263 
1264  delete_rethrow:
1265 	duk_dup(ctx, DUK__IDX_RESOLVED_ID);
1266 	duk_del_prop(ctx, DUK__IDX_MODLOADED);  /* delete Duktape.modLoaded[resolved_id] */
1267 	duk_throw(ctx);  /* rethrow original error */
1268 	return 0;  /* not reachable */
1269 }
1270 
1271 #undef DUK__IDX_REQUESTED_ID
1272 #undef DUK__IDX_REQUIRE
1273 #undef DUK__IDX_REQUIRE_ID
1274 #undef DUK__IDX_RESOLVED_ID
1275 #undef DUK__IDX_LASTCOMP
1276 #undef DUK__IDX_DUKTAPE
1277 #undef DUK__IDX_MODLOADED
1278 #undef DUK__IDX_UNDEFINED
1279 #undef DUK__IDX_FRESH_REQUIRE
1280 #undef DUK__IDX_EXPORTS
1281 #undef DUK__IDX_MODULE
1282 #else
duk_bi_global_object_require(duk_context * ctx)1283 DUK_INTERNAL duk_ret_t duk_bi_global_object_require(duk_context *ctx) {
1284 	DUK_UNREF(ctx);
1285 	return DUK_RET_UNSUPPORTED_ERROR;
1286 }
1287 #endif  /* DUK_USE_COMMONJS_MODULES */
1288