1 /*
2 * Global object built-ins
3 */
4
5 #include "duk_internal.h"
6
7 /*
8 * Encoding/decoding helpers
9 */
10
11 /* XXX: Could add fast path (for each transform callback) with direct byte
12 * lookups (no shifting) and no explicit check for x < 0x80 before table
13 * lookup.
14 */
15
16 /* Macros for creating and checking bitmasks for character encoding.
17 * Bit number is a bit counterintuitive, but minimizes code size.
18 */
19 #define DUK__MKBITS(a,b,c,d,e,f,g,h) ((duk_uint8_t) ( \
20 ((a) << 0) | ((b) << 1) | ((c) << 2) | ((d) << 3) | \
21 ((e) << 4) | ((f) << 5) | ((g) << 6) | ((h) << 7) \
22 ))
23 #define DUK__CHECK_BITMASK(table,cp) ((table)[(cp) >> 3] & (1 << ((cp) & 0x07)))
24
25 /* E5.1 Section 15.1.3.3: uriReserved + uriUnescaped + '#' */
26 DUK_LOCAL const duk_uint8_t duk__encode_uriunescaped_table[16] = {
27 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x00-0x0f */
28 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x10-0x1f */
29 DUK__MKBITS(0, 1, 0, 1, 1, 0, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), /* 0x20-0x2f */
30 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 0, 1, 0, 1), /* 0x30-0x3f */
31 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), /* 0x40-0x4f */
32 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1), /* 0x50-0x5f */
33 DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), /* 0x60-0x6f */
34 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 1, 0), /* 0x70-0x7f */
35 };
36
37 /* E5.1 Section 15.1.3.4: uriUnescaped */
38 DUK_LOCAL const duk_uint8_t duk__encode_uricomponent_unescaped_table[16] = {
39 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x00-0x0f */
40 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x10-0x1f */
41 DUK__MKBITS(0, 1, 0, 0, 0, 0, 0, 1), DUK__MKBITS(1, 1, 1, 0, 0, 1, 1, 0), /* 0x20-0x2f */
42 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 0, 0, 0, 0, 0, 0), /* 0x30-0x3f */
43 DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), /* 0x40-0x4f */
44 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1), /* 0x50-0x5f */
45 DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), /* 0x60-0x6f */
46 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 1, 0), /* 0x70-0x7f */
47 };
48
49 /* E5.1 Section 15.1.3.1: uriReserved + '#' */
50 DUK_LOCAL const duk_uint8_t duk__decode_uri_reserved_table[16] = {
51 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x00-0x0f */
52 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x10-0x1f */
53 DUK__MKBITS(0, 0, 0, 1, 1, 0, 1, 0), DUK__MKBITS(0, 0, 0, 1, 1, 0, 0, 1), /* 0x20-0x2f */
54 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 1, 1, 0, 1, 0, 1), /* 0x30-0x3f */
55 DUK__MKBITS(1, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x40-0x4f */
56 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x50-0x5f */
57 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x60-0x6f */
58 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x70-0x7f */
59 };
60
61 /* E5.1 Section 15.1.3.2: empty */
62 DUK_LOCAL const duk_uint8_t duk__decode_uri_component_reserved_table[16] = {
63 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x00-0x0f */
64 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x10-0x1f */
65 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x20-0x2f */
66 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x30-0x3f */
67 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x40-0x4f */
68 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x50-0x5f */
69 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x60-0x6f */
70 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x70-0x7f */
71 };
72
73 #if defined(DUK_USE_SECTION_B)
74 /* E5.1 Section B.2.2, step 7. */
75 DUK_LOCAL const duk_uint8_t duk__escape_unescaped_table[16] = {
76 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x00-0x0f */
77 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), /* 0x10-0x1f */
78 DUK__MKBITS(0, 0, 0, 0, 0, 0, 0, 0), DUK__MKBITS(0, 0, 1, 1, 0, 1, 1, 1), /* 0x20-0x2f */
79 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 0, 0, 0, 0, 0, 0), /* 0x30-0x3f */
80 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), /* 0x40-0x4f */
81 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 1), /* 0x50-0x5f */
82 DUK__MKBITS(0, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), /* 0x60-0x6f */
83 DUK__MKBITS(1, 1, 1, 1, 1, 1, 1, 1), DUK__MKBITS(1, 1, 1, 0, 0, 0, 0, 0) /* 0x70-0x7f */
84 };
85 #endif /* DUK_USE_SECTION_B */
86
87 typedef struct {
88 duk_hthread *thr;
89 duk_hstring *h_str;
90 duk_bufwriter_ctx bw;
91 const duk_uint8_t *p;
92 const duk_uint8_t *p_start;
93 const duk_uint8_t *p_end;
94 } duk__transform_context;
95
96 typedef void (*duk__transform_callback)(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp);
97
98 /* XXX: refactor and share with other code */
duk__decode_hex_escape(const duk_uint8_t * p,duk_small_int_t n)99 DUK_LOCAL duk_small_int_t duk__decode_hex_escape(const duk_uint8_t *p, duk_small_int_t n) {
100 duk_small_int_t ch;
101 duk_small_int_t t = 0;
102
103 while (n > 0) {
104 t = t * 16;
105 ch = (duk_small_int_t) duk_hex_dectab[*p++];
106 if (DUK_LIKELY(ch >= 0)) {
107 t += ch;
108 } else {
109 return -1;
110 }
111 n--;
112 }
113 return t;
114 }
115
duk__transform_helper(duk_hthread * thr,duk__transform_callback callback,const void * udata)116 DUK_LOCAL int duk__transform_helper(duk_hthread *thr, duk__transform_callback callback, const void *udata) {
117 duk__transform_context tfm_ctx_alloc;
118 duk__transform_context *tfm_ctx = &tfm_ctx_alloc;
119 duk_codepoint_t cp;
120
121 tfm_ctx->thr = thr;
122
123 tfm_ctx->h_str = duk_to_hstring(thr, 0);
124 DUK_ASSERT(tfm_ctx->h_str != NULL);
125
126 DUK_BW_INIT_PUSHBUF(thr, &tfm_ctx->bw, DUK_HSTRING_GET_BYTELEN(tfm_ctx->h_str)); /* initial size guess */
127
128 tfm_ctx->p_start = DUK_HSTRING_GET_DATA(tfm_ctx->h_str);
129 tfm_ctx->p_end = tfm_ctx->p_start + DUK_HSTRING_GET_BYTELEN(tfm_ctx->h_str);
130 tfm_ctx->p = tfm_ctx->p_start;
131
132 while (tfm_ctx->p < tfm_ctx->p_end) {
133 cp = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(thr, &tfm_ctx->p, tfm_ctx->p_start, tfm_ctx->p_end);
134 callback(tfm_ctx, udata, cp);
135 }
136
137 DUK_BW_COMPACT(thr, &tfm_ctx->bw);
138
139 (void) duk_buffer_to_string(thr, -1); /* Safe if transform is safe. */
140 return 1;
141 }
142
duk__transform_callback_encode_uri(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)143 DUK_LOCAL void duk__transform_callback_encode_uri(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
144 duk_uint8_t xutf8_buf[DUK_UNICODE_MAX_XUTF8_LENGTH];
145 duk_small_int_t len;
146 duk_codepoint_t cp1, cp2;
147 duk_small_int_t i, t;
148 const duk_uint8_t *unescaped_table = (const duk_uint8_t *) udata;
149
150 /* UTF-8 encoded bytes escaped as %xx%xx%xx... -> 3 * nbytes.
151 * Codepoint range is restricted so this is a slightly too large
152 * but doesn't matter.
153 */
154 DUK_BW_ENSURE(tfm_ctx->thr, &tfm_ctx->bw, 3 * DUK_UNICODE_MAX_XUTF8_LENGTH);
155
156 if (cp < 0) {
157 goto uri_error;
158 } else if ((cp < 0x80L) && DUK__CHECK_BITMASK(unescaped_table, cp)) {
159 DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) cp);
160 return;
161 } else if (cp >= 0xdc00L && cp <= 0xdfffL) {
162 goto uri_error;
163 } else if (cp >= 0xd800L && cp <= 0xdbffL) {
164 /* Needs lookahead */
165 if (duk_unicode_decode_xutf8(tfm_ctx->thr, &tfm_ctx->p, tfm_ctx->p_start, tfm_ctx->p_end, (duk_ucodepoint_t *) &cp2) == 0) {
166 goto uri_error;
167 }
168 if (!(cp2 >= 0xdc00L && cp2 <= 0xdfffL)) {
169 goto uri_error;
170 }
171 cp1 = cp;
172 cp = (duk_codepoint_t) (((cp1 - 0xd800L) << 10) + (cp2 - 0xdc00L) + 0x10000L);
173 } else if (cp > 0x10ffffL) {
174 /* Although we can allow non-BMP characters (they'll decode
175 * back into surrogate pairs), we don't allow extended UTF-8
176 * characters; they would encode to URIs which won't decode
177 * back because of strict UTF-8 checks in URI decoding.
178 * (However, we could just as well allow them here.)
179 */
180 goto uri_error;
181 } else {
182 /* Non-BMP characters within valid UTF-8 range: encode as is.
183 * They'll decode back into surrogate pairs if the escaped
184 * output is decoded.
185 */
186 ;
187 }
188
189 len = duk_unicode_encode_xutf8((duk_ucodepoint_t) cp, xutf8_buf);
190 for (i = 0; i < len; i++) {
191 t = (duk_small_int_t) xutf8_buf[i];
192 DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
193 &tfm_ctx->bw,
194 DUK_ASC_PERCENT,
195 (duk_uint8_t) duk_uc_nybbles[t >> 4],
196 (duk_uint8_t) duk_uc_nybbles[t & 0x0f]);
197 }
198
199 return;
200
201 uri_error:
202 DUK_ERROR_URI(tfm_ctx->thr, DUK_STR_INVALID_INPUT);
203 DUK_WO_NORETURN(return;);
204 }
205
duk__transform_callback_decode_uri(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)206 DUK_LOCAL void duk__transform_callback_decode_uri(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
207 const duk_uint8_t *reserved_table = (const duk_uint8_t *) udata;
208 duk_small_uint_t utf8_blen;
209 duk_codepoint_t min_cp;
210 duk_small_int_t t; /* must be signed */
211 duk_small_uint_t i;
212
213 /* Maximum write size: XUTF8 path writes max DUK_UNICODE_MAX_XUTF8_LENGTH,
214 * percent escape path writes max two times CESU-8 encoded BMP length.
215 */
216 DUK_BW_ENSURE(tfm_ctx->thr,
217 &tfm_ctx->bw,
218 (DUK_UNICODE_MAX_XUTF8_LENGTH >= 2 * DUK_UNICODE_MAX_CESU8_BMP_LENGTH ?
219 DUK_UNICODE_MAX_XUTF8_LENGTH : DUK_UNICODE_MAX_CESU8_BMP_LENGTH));
220
221 if (cp == (duk_codepoint_t) '%') {
222 const duk_uint8_t *p = tfm_ctx->p;
223 duk_size_t left = (duk_size_t) (tfm_ctx->p_end - p); /* bytes left */
224
225 DUK_DDD(DUK_DDDPRINT("percent encoding, left=%ld", (long) left));
226
227 if (left < 2) {
228 goto uri_error;
229 }
230
231 t = duk__decode_hex_escape(p, 2);
232 DUK_DDD(DUK_DDDPRINT("first byte: %ld", (long) t));
233 if (t < 0) {
234 goto uri_error;
235 }
236
237 if (t < 0x80) {
238 if (DUK__CHECK_BITMASK(reserved_table, t)) {
239 /* decode '%xx' to '%xx' if decoded char in reserved set */
240 DUK_ASSERT(tfm_ctx->p - 1 >= tfm_ctx->p_start);
241 DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
242 &tfm_ctx->bw,
243 DUK_ASC_PERCENT,
244 p[0],
245 p[1]);
246 } else {
247 DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) t);
248 }
249 tfm_ctx->p += 2;
250 return;
251 }
252
253 /* Decode UTF-8 codepoint from a sequence of hex escapes. The
254 * first byte of the sequence has been decoded to 't'.
255 *
256 * Note that UTF-8 validation must be strict according to the
257 * specification: E5.1 Section 15.1.3, decode algorithm step
258 * 4.d.vii.8. URIError from non-shortest encodings is also
259 * specifically noted in the spec.
260 */
261
262 DUK_ASSERT(t >= 0x80);
263 if (t < 0xc0) {
264 /* continuation byte */
265 goto uri_error;
266 } else if (t < 0xe0) {
267 /* 110x xxxx; 2 bytes */
268 utf8_blen = 2;
269 min_cp = 0x80L;
270 cp = t & 0x1f;
271 } else if (t < 0xf0) {
272 /* 1110 xxxx; 3 bytes */
273 utf8_blen = 3;
274 min_cp = 0x800L;
275 cp = t & 0x0f;
276 } else if (t < 0xf8) {
277 /* 1111 0xxx; 4 bytes */
278 utf8_blen = 4;
279 min_cp = 0x10000L;
280 cp = t & 0x07;
281 } else {
282 /* extended utf-8 not allowed for URIs */
283 goto uri_error;
284 }
285
286 if (left < utf8_blen * 3 - 1) {
287 /* '%xx%xx...%xx', p points to char after first '%' */
288 goto uri_error;
289 }
290
291 p += 3;
292 for (i = 1; i < utf8_blen; i++) {
293 /* p points to digit part ('%xy', p points to 'x') */
294 t = duk__decode_hex_escape(p, 2);
295 DUK_DDD(DUK_DDDPRINT("i=%ld utf8_blen=%ld cp=%ld t=0x%02lx",
296 (long) i, (long) utf8_blen, (long) cp, (unsigned long) t));
297 if (t < 0) {
298 goto uri_error;
299 }
300 if ((t & 0xc0) != 0x80) {
301 goto uri_error;
302 }
303 cp = (cp << 6) + (t & 0x3f);
304 p += 3;
305 }
306 p--; /* p overshoots */
307 tfm_ctx->p = p;
308
309 DUK_DDD(DUK_DDDPRINT("final cp=%ld, min_cp=%ld", (long) cp, (long) min_cp));
310
311 if (cp < min_cp || cp > 0x10ffffL || (cp >= 0xd800L && cp <= 0xdfffL)) {
312 goto uri_error;
313 }
314
315 /* The E5.1 algorithm checks whether or not a decoded codepoint
316 * is below 0x80 and perhaps may be in the "reserved" set.
317 * This seems pointless because the single byte UTF-8 case is
318 * handled separately, and non-shortest encodings are rejected.
319 * So, 'cp' cannot be below 0x80 here, and thus cannot be in
320 * the reserved set.
321 */
322
323 /* utf-8 validation ensures these */
324 DUK_ASSERT(cp >= 0x80L && cp <= 0x10ffffL);
325
326 if (cp >= 0x10000L) {
327 cp -= 0x10000L;
328 DUK_ASSERT(cp < 0x100000L);
329
330 DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, ((cp >> 10) + 0xd800L));
331 DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, ((cp & 0x03ffL) + 0xdc00L));
332 } else {
333 DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
334 }
335 } else {
336 DUK_BW_WRITE_RAW_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
337 }
338 return;
339
340 uri_error:
341 DUK_ERROR_URI(tfm_ctx->thr, DUK_STR_INVALID_INPUT);
342 DUK_WO_NORETURN(return;);
343 }
344
345 #if defined(DUK_USE_SECTION_B)
duk__transform_callback_escape(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)346 DUK_LOCAL void duk__transform_callback_escape(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
347 DUK_UNREF(udata);
348
349 DUK_BW_ENSURE(tfm_ctx->thr, &tfm_ctx->bw, 6);
350
351 if (cp < 0) {
352 goto esc_error;
353 } else if ((cp < 0x80L) && DUK__CHECK_BITMASK(duk__escape_unescaped_table, cp)) {
354 DUK_BW_WRITE_RAW_U8(tfm_ctx->thr, &tfm_ctx->bw, (duk_uint8_t) cp);
355 } else if (cp < 0x100L) {
356 DUK_BW_WRITE_RAW_U8_3(tfm_ctx->thr,
357 &tfm_ctx->bw,
358 (duk_uint8_t) DUK_ASC_PERCENT,
359 (duk_uint8_t) duk_uc_nybbles[cp >> 4],
360 (duk_uint8_t) duk_uc_nybbles[cp & 0x0f]);
361 } else if (cp < 0x10000L) {
362 DUK_BW_WRITE_RAW_U8_6(tfm_ctx->thr,
363 &tfm_ctx->bw,
364 (duk_uint8_t) DUK_ASC_PERCENT,
365 (duk_uint8_t) DUK_ASC_LC_U,
366 (duk_uint8_t) duk_uc_nybbles[cp >> 12],
367 (duk_uint8_t) duk_uc_nybbles[(cp >> 8) & 0x0f],
368 (duk_uint8_t) duk_uc_nybbles[(cp >> 4) & 0x0f],
369 (duk_uint8_t) duk_uc_nybbles[cp & 0x0f]);
370 } else {
371 /* Characters outside BMP cannot be escape()'d. We could
372 * encode them as surrogate pairs (for codepoints inside
373 * valid UTF-8 range, but not extended UTF-8). Because
374 * escape() and unescape() are legacy functions, we don't.
375 */
376 goto esc_error;
377 }
378
379 return;
380
381 esc_error:
382 DUK_ERROR_TYPE(tfm_ctx->thr, DUK_STR_INVALID_INPUT);
383 DUK_WO_NORETURN(return;);
384 }
385
duk__transform_callback_unescape(duk__transform_context * tfm_ctx,const void * udata,duk_codepoint_t cp)386 DUK_LOCAL void duk__transform_callback_unescape(duk__transform_context *tfm_ctx, const void *udata, duk_codepoint_t cp) {
387 duk_small_int_t t;
388
389 DUK_UNREF(udata);
390
391 if (cp == (duk_codepoint_t) '%') {
392 const duk_uint8_t *p = tfm_ctx->p;
393 duk_size_t left = (duk_size_t) (tfm_ctx->p_end - p); /* bytes left */
394
395 if (left >= 5 && p[0] == 'u' &&
396 ((t = duk__decode_hex_escape(p + 1, 4)) >= 0)) {
397 cp = (duk_codepoint_t) t;
398 tfm_ctx->p += 5;
399 } else if (left >= 2 &&
400 ((t = duk__decode_hex_escape(p, 2)) >= 0)) {
401 cp = (duk_codepoint_t) t;
402 tfm_ctx->p += 2;
403 }
404 }
405
406 DUK_BW_WRITE_ENSURE_XUTF8(tfm_ctx->thr, &tfm_ctx->bw, cp);
407 }
408 #endif /* DUK_USE_SECTION_B */
409
410 /*
411 * Eval
412 *
413 * Eval needs to handle both a "direct eval" and an "indirect eval".
414 * Direct eval handling needs access to the caller's activation so that its
415 * lexical environment can be accessed. A direct eval is only possible from
416 * ECMAScript code; an indirect eval call is possible also from C code.
417 * When an indirect eval call is made from C code, there may not be a
418 * calling activation at all which needs careful handling.
419 */
420
duk_bi_global_object_eval(duk_hthread * thr)421 DUK_INTERNAL duk_ret_t duk_bi_global_object_eval(duk_hthread *thr) {
422 duk_hstring *h;
423 duk_activation *act_caller;
424 duk_activation *act_eval;
425 duk_hcompfunc *func;
426 duk_hobject *outer_lex_env;
427 duk_hobject *outer_var_env;
428 duk_bool_t this_to_global = 1;
429 duk_small_uint_t comp_flags;
430 duk_int_t level = -2;
431 duk_small_uint_t call_flags;
432
433 DUK_ASSERT(duk_get_top(thr) == 1 || duk_get_top(thr) == 2); /* 2 when called by debugger */
434 DUK_ASSERT(thr->callstack_top >= 1); /* at least this function exists */
435 DUK_ASSERT(thr->callstack_curr != NULL);
436 DUK_ASSERT((thr->callstack_curr->flags & DUK_ACT_FLAG_DIRECT_EVAL) == 0 || /* indirect eval */
437 (thr->callstack_top >= 2)); /* if direct eval, calling activation must exist */
438
439 /*
440 * callstack_top - 1 --> this function
441 * callstack_top - 2 --> caller (may not exist)
442 *
443 * If called directly from C, callstack_top might be 1. If calling
444 * activation doesn't exist, call must be indirect.
445 */
446
447 h = duk_get_hstring_notsymbol(thr, 0);
448 if (!h) {
449 /* Symbol must be returned as is, like any non-string values. */
450 return 1; /* return arg as-is */
451 }
452
453 #if defined(DUK_USE_DEBUGGER_SUPPORT)
454 /* NOTE: level is used only by the debugger and should never be present
455 * for an ECMAScript eval().
456 */
457 DUK_ASSERT(level == -2); /* by default, use caller's environment */
458 if (duk_get_top(thr) >= 2 && duk_is_number(thr, 1)) {
459 level = duk_get_int(thr, 1);
460 }
461 DUK_ASSERT(level <= -2); /* This is guaranteed by debugger code. */
462 #endif
463
464 /* [ source ] */
465
466 comp_flags = DUK_COMPILE_EVAL;
467 act_eval = thr->callstack_curr; /* this function */
468 DUK_ASSERT(act_eval != NULL);
469 act_caller = duk_hthread_get_activation_for_level(thr, level);
470 if (act_caller != NULL) {
471 /* Have a calling activation, check for direct eval (otherwise
472 * assume indirect eval.
473 */
474 if ((act_caller->flags & DUK_ACT_FLAG_STRICT) &&
475 (act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL)) {
476 /* Only direct eval inherits strictness from calling code
477 * (E5.1 Section 10.1.1).
478 */
479 comp_flags |= DUK_COMPILE_STRICT;
480 }
481 } else {
482 DUK_ASSERT((act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL) == 0);
483 }
484
485 duk_push_hstring_stridx(thr, DUK_STRIDX_INPUT); /* XXX: copy from caller? */
486 duk_js_compile(thr,
487 (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h),
488 (duk_size_t) DUK_HSTRING_GET_BYTELEN(h),
489 comp_flags);
490 func = (duk_hcompfunc *) duk_known_hobject(thr, -1);
491 DUK_ASSERT(DUK_HOBJECT_IS_COMPFUNC((duk_hobject *) func));
492
493 /* [ source template ] */
494
495 /* E5 Section 10.4.2 */
496
497 if (act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL) {
498 DUK_ASSERT(thr->callstack_top >= 2);
499 DUK_ASSERT(act_caller != NULL);
500 if (act_caller->lex_env == NULL) {
501 DUK_ASSERT(act_caller->var_env == NULL);
502 DUK_DDD(DUK_DDDPRINT("delayed environment initialization"));
503
504 /* this may have side effects, so re-lookup act */
505 duk_js_init_activation_environment_records_delayed(thr, act_caller);
506 }
507 DUK_ASSERT(act_caller->lex_env != NULL);
508 DUK_ASSERT(act_caller->var_env != NULL);
509
510 this_to_global = 0;
511
512 if (DUK_HOBJECT_HAS_STRICT((duk_hobject *) func)) {
513 duk_hdecenv *new_env;
514 duk_hobject *act_lex_env;
515
516 DUK_DDD(DUK_DDDPRINT("direct eval call to a strict function -> "
517 "var_env and lex_env to a fresh env, "
518 "this_binding to caller's this_binding"));
519
520 act_lex_env = act_caller->lex_env;
521
522 new_env = duk_hdecenv_alloc(thr,
523 DUK_HOBJECT_FLAG_EXTENSIBLE |
524 DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_DECENV));
525 DUK_ASSERT(new_env != NULL);
526 duk_push_hobject(thr, (duk_hobject *) new_env);
527
528 DUK_ASSERT(DUK_HOBJECT_GET_PROTOTYPE(thr->heap, (duk_hobject *) new_env) == NULL);
529 DUK_HOBJECT_SET_PROTOTYPE(thr->heap, (duk_hobject *) new_env, act_lex_env);
530 DUK_HOBJECT_INCREF_ALLOWNULL(thr, act_lex_env);
531 DUK_DDD(DUK_DDDPRINT("new_env allocated: %!iO", (duk_heaphdr *) new_env));
532
533 outer_lex_env = (duk_hobject *) new_env;
534 outer_var_env = (duk_hobject *) new_env;
535
536 duk_insert(thr, 0); /* stash to bottom of value stack to keep new_env reachable for duration of eval */
537
538 /* compiler's responsibility */
539 DUK_ASSERT(DUK_HOBJECT_HAS_NEWENV((duk_hobject *) func));
540 } else {
541 DUK_DDD(DUK_DDDPRINT("direct eval call to a non-strict function -> "
542 "var_env and lex_env to caller's envs, "
543 "this_binding to caller's this_binding"));
544
545 outer_lex_env = act_caller->lex_env;
546 outer_var_env = act_caller->var_env;
547
548 /* compiler's responsibility */
549 DUK_ASSERT(!DUK_HOBJECT_HAS_NEWENV((duk_hobject *) func));
550 }
551 } else {
552 DUK_DDD(DUK_DDDPRINT("indirect eval call -> var_env and lex_env to "
553 "global object, this_binding to global object"));
554
555 this_to_global = 1;
556 outer_lex_env = thr->builtins[DUK_BIDX_GLOBAL_ENV];
557 outer_var_env = thr->builtins[DUK_BIDX_GLOBAL_ENV];
558 }
559
560 /* Eval code doesn't need an automatic .prototype object. */
561 duk_js_push_closure(thr, func, outer_var_env, outer_lex_env, 0 /*add_auto_proto*/);
562
563 /* [ env? source template closure ] */
564
565 if (this_to_global) {
566 DUK_ASSERT(thr->builtins[DUK_BIDX_GLOBAL] != NULL);
567 duk_push_hobject_bidx(thr, DUK_BIDX_GLOBAL);
568 } else {
569 duk_tval *tv;
570 DUK_ASSERT(thr->callstack_top >= 2);
571 DUK_ASSERT(act_caller != NULL);
572 tv = (duk_tval *) (void *) ((duk_uint8_t *) thr->valstack + act_caller->bottom_byteoff - sizeof(duk_tval)); /* this is just beneath bottom */
573 DUK_ASSERT(tv >= thr->valstack);
574 duk_push_tval(thr, tv);
575 }
576
577 DUK_DDD(DUK_DDDPRINT("eval -> lex_env=%!iO, var_env=%!iO, this_binding=%!T",
578 (duk_heaphdr *) outer_lex_env,
579 (duk_heaphdr *) outer_var_env,
580 duk_get_tval(thr, -1)));
581
582 /* [ env? source template closure this ] */
583
584 call_flags = 0;
585 if (act_eval->flags & DUK_ACT_FLAG_DIRECT_EVAL) {
586 /* Set DIRECT_EVAL flag for the call; it's not strictly
587 * needed for the 'inner' eval call (the eval body) but
588 * current new.target implementation expects to find it
589 * so it can traverse direct eval chains up to the real
590 * calling function.
591 */
592 call_flags |= DUK_CALL_FLAG_DIRECT_EVAL;
593 }
594 duk_handle_call_unprotected_nargs(thr, 0, call_flags);
595
596 /* [ env? source template result ] */
597
598 return 1;
599 }
600
601 /*
602 * Parsing of ints and floats
603 */
604
605 #if defined(DUK_USE_GLOBAL_BUILTIN)
duk_bi_global_object_parse_int(duk_hthread * thr)606 DUK_INTERNAL duk_ret_t duk_bi_global_object_parse_int(duk_hthread *thr) {
607 duk_int32_t radix;
608 duk_small_uint_t s2n_flags;
609
610 DUK_ASSERT_TOP(thr, 2);
611 duk_to_string(thr, 0); /* Reject symbols. */
612
613 radix = duk_to_int32(thr, 1);
614
615 /* While parseInt() recognizes 0xdeadbeef, it doesn't recognize
616 * ES2015 0o123 or 0b10001.
617 */
618 s2n_flags = DUK_S2N_FLAG_TRIM_WHITE |
619 DUK_S2N_FLAG_ALLOW_GARBAGE |
620 DUK_S2N_FLAG_ALLOW_PLUS |
621 DUK_S2N_FLAG_ALLOW_MINUS |
622 DUK_S2N_FLAG_ALLOW_LEADING_ZERO |
623 DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT;
624
625 /* Specification stripPrefix maps to DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT.
626 *
627 * Don't autodetect octals (from leading zeroes), require user code to
628 * provide an explicit radix 8 for parsing octal. See write-up from Mozilla:
629 * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/parseInt#ECMAScript_5_Removes_Octal_Interpretation
630 */
631
632 if (radix != 0) {
633 if (radix < 2 || radix > 36) {
634 goto ret_nan;
635 }
636 if (radix != 16) {
637 s2n_flags &= ~DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT;
638 }
639 } else {
640 radix = 10;
641 }
642
643 duk_dup_0(thr);
644 duk_numconv_parse(thr, (duk_small_int_t) radix, s2n_flags);
645 return 1;
646
647 ret_nan:
648 duk_push_nan(thr);
649 return 1;
650 }
651 #endif /* DUK_USE_GLOBAL_BUILTIN */
652
653 #if defined(DUK_USE_GLOBAL_BUILTIN)
duk_bi_global_object_parse_float(duk_hthread * thr)654 DUK_INTERNAL duk_ret_t duk_bi_global_object_parse_float(duk_hthread *thr) {
655 duk_small_uint_t s2n_flags;
656
657 DUK_ASSERT_TOP(thr, 1);
658 duk_to_string(thr, 0); /* Reject symbols. */
659
660 /* XXX: check flags */
661 s2n_flags = DUK_S2N_FLAG_TRIM_WHITE |
662 DUK_S2N_FLAG_ALLOW_EXP |
663 DUK_S2N_FLAG_ALLOW_GARBAGE |
664 DUK_S2N_FLAG_ALLOW_PLUS |
665 DUK_S2N_FLAG_ALLOW_MINUS |
666 DUK_S2N_FLAG_ALLOW_INF |
667 DUK_S2N_FLAG_ALLOW_FRAC |
668 DUK_S2N_FLAG_ALLOW_NAKED_FRAC |
669 DUK_S2N_FLAG_ALLOW_EMPTY_FRAC |
670 DUK_S2N_FLAG_ALLOW_LEADING_ZERO;
671
672 duk_numconv_parse(thr, 10 /*radix*/, s2n_flags);
673 return 1;
674 }
675 #endif /* DUK_USE_GLOBAL_BUILTIN */
676
677 /*
678 * Number checkers
679 */
680
681 #if defined(DUK_USE_GLOBAL_BUILTIN)
duk_bi_global_object_is_nan(duk_hthread * thr)682 DUK_INTERNAL duk_ret_t duk_bi_global_object_is_nan(duk_hthread *thr) {
683 duk_double_t d = duk_to_number(thr, 0);
684 duk_push_boolean(thr, (duk_bool_t) DUK_ISNAN(d));
685 return 1;
686 }
687 #endif /* DUK_USE_GLOBAL_BUILTIN */
688
689 #if defined(DUK_USE_GLOBAL_BUILTIN)
duk_bi_global_object_is_finite(duk_hthread * thr)690 DUK_INTERNAL duk_ret_t duk_bi_global_object_is_finite(duk_hthread *thr) {
691 duk_double_t d = duk_to_number(thr, 0);
692 duk_push_boolean(thr, (duk_bool_t) DUK_ISFINITE(d));
693 return 1;
694 }
695 #endif /* DUK_USE_GLOBAL_BUILTIN */
696
697 /*
698 * URI handling
699 */
700
701 #if defined(DUK_USE_GLOBAL_BUILTIN)
duk_bi_global_object_decode_uri(duk_hthread * thr)702 DUK_INTERNAL duk_ret_t duk_bi_global_object_decode_uri(duk_hthread *thr) {
703 return duk__transform_helper(thr, duk__transform_callback_decode_uri, (const void *) duk__decode_uri_reserved_table);
704 }
705
duk_bi_global_object_decode_uri_component(duk_hthread * thr)706 DUK_INTERNAL duk_ret_t duk_bi_global_object_decode_uri_component(duk_hthread *thr) {
707 return duk__transform_helper(thr, duk__transform_callback_decode_uri, (const void *) duk__decode_uri_component_reserved_table);
708 }
709
duk_bi_global_object_encode_uri(duk_hthread * thr)710 DUK_INTERNAL duk_ret_t duk_bi_global_object_encode_uri(duk_hthread *thr) {
711 return duk__transform_helper(thr, duk__transform_callback_encode_uri, (const void *) duk__encode_uriunescaped_table);
712 }
713
duk_bi_global_object_encode_uri_component(duk_hthread * thr)714 DUK_INTERNAL duk_ret_t duk_bi_global_object_encode_uri_component(duk_hthread *thr) {
715 return duk__transform_helper(thr, duk__transform_callback_encode_uri, (const void *) duk__encode_uricomponent_unescaped_table);
716 }
717
718 #if defined(DUK_USE_SECTION_B)
duk_bi_global_object_escape(duk_hthread * thr)719 DUK_INTERNAL duk_ret_t duk_bi_global_object_escape(duk_hthread *thr) {
720 return duk__transform_helper(thr, duk__transform_callback_escape, (const void *) NULL);
721 }
722
duk_bi_global_object_unescape(duk_hthread * thr)723 DUK_INTERNAL duk_ret_t duk_bi_global_object_unescape(duk_hthread *thr) {
724 return duk__transform_helper(thr, duk__transform_callback_unescape, (const void *) NULL);
725 }
726 #endif /* DUK_USE_SECTION_B */
727 #endif /* DUK_USE_GLOBAL_BUILTIN */
728