1 /*
2  *  String manipulation
3  */
4 
5 #include "duk_internal.h"
6 
duk__concat_and_join_helper(duk_context * ctx,duk_idx_t count_in,duk_bool_t is_join)7 DUK_LOCAL void duk__concat_and_join_helper(duk_context *ctx, duk_idx_t count_in, duk_bool_t is_join) {
8 	duk_hthread *thr = (duk_hthread *) ctx;
9 	duk_uint_t count;
10 	duk_uint_t i;
11 	duk_size_t idx;
12 	duk_size_t len;
13 	duk_hstring *h;
14 	duk_uint8_t *buf;
15 
16 	DUK_ASSERT_CTX_VALID(ctx);
17 
18 	if (DUK_UNLIKELY(count_in <= 0)) {
19 		if (count_in < 0) {
20 			DUK_ERROR_API(thr, DUK_STR_INVALID_COUNT);
21 			return;
22 		}
23 		DUK_ASSERT(count_in == 0);
24 		duk_push_hstring_stridx(ctx, DUK_STRIDX_EMPTY_STRING);
25 		return;
26 	}
27 	count = (duk_uint_t) count_in;
28 
29 	if (is_join) {
30 		duk_size_t t1, t2, limit;
31 		h = duk_to_hstring(ctx, -((duk_idx_t) count) - 1);
32 		DUK_ASSERT(h != NULL);
33 
34 		/* A bit tricky overflow test, see doc/code-issues.rst. */
35 		t1 = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h);
36 		t2 = (duk_size_t) (count - 1);
37 		limit = (duk_size_t) DUK_HSTRING_MAX_BYTELEN;
38 		if (DUK_UNLIKELY(t2 != 0 && t1 > limit / t2)) {
39 			/* Combined size of separators already overflows */
40 			goto error_overflow;
41 		}
42 		len = (duk_size_t) (t1 * t2);
43 	} else {
44 		len = (duk_size_t) 0;
45 	}
46 
47 	for (i = count; i >= 1; i--) {
48 		duk_size_t new_len;
49 		duk_to_string(ctx, -((duk_idx_t) i));
50 		h = duk_require_hstring(ctx, -((duk_idx_t) i));
51 		new_len = len + (duk_size_t) DUK_HSTRING_GET_BYTELEN(h);
52 
53 		/* Impose a string maximum length, need to handle overflow
54 		 * correctly.
55 		 */
56 		if (new_len < len ||  /* wrapped */
57 		    new_len > (duk_size_t) DUK_HSTRING_MAX_BYTELEN) {
58 			goto error_overflow;
59 		}
60 		len = new_len;
61 	}
62 
63 	DUK_DDD(DUK_DDDPRINT("join/concat %lu strings, total length %lu bytes",
64 	                     (unsigned long) count, (unsigned long) len));
65 
66 	/* use stack allocated buffer to ensure reachability in errors (e.g. intern error) */
67 	buf = (duk_uint8_t *) duk_push_fixed_buffer(ctx, len);
68 	DUK_ASSERT(buf != NULL);
69 
70 	/* [... (sep) str1 str2 ... strN buf] */
71 
72 	idx = 0;
73 	for (i = count; i >= 1; i--) {
74 		if (is_join && i != count) {
75 			h = duk_require_hstring(ctx, -((duk_idx_t) count) - 2);  /* extra -1 for buffer */
76 			DUK_MEMCPY(buf + idx, DUK_HSTRING_GET_DATA(h), DUK_HSTRING_GET_BYTELEN(h));
77 			idx += DUK_HSTRING_GET_BYTELEN(h);
78 		}
79 		h = duk_require_hstring(ctx, -((duk_idx_t) i) - 1);  /* extra -1 for buffer */
80 		DUK_MEMCPY(buf + idx, DUK_HSTRING_GET_DATA(h), DUK_HSTRING_GET_BYTELEN(h));
81 		idx += DUK_HSTRING_GET_BYTELEN(h);
82 	}
83 
84 	DUK_ASSERT(idx == len);
85 
86 	/* [... (sep) str1 str2 ... strN buf] */
87 
88 	/* get rid of the strings early to minimize memory use before intern */
89 
90 	if (is_join) {
91 		duk_replace(ctx, -((duk_idx_t) count) - 2);  /* overwrite sep */
92 		duk_pop_n(ctx, count);
93 	} else {
94 		duk_replace(ctx, -((duk_idx_t) count) - 1);  /* overwrite str1 */
95 		duk_pop_n(ctx, count-1);
96 	}
97 
98 	/* [... buf] */
99 
100 	(void) duk_to_string(ctx, -1);
101 
102 	/* [... res] */
103 	return;
104 
105  error_overflow:
106 	DUK_ERROR_RANGE(thr, DUK_STR_CONCAT_RESULT_TOO_LONG);
107 }
108 
duk_concat(duk_context * ctx,duk_idx_t count)109 DUK_EXTERNAL void duk_concat(duk_context *ctx, duk_idx_t count) {
110 	DUK_ASSERT_CTX_VALID(ctx);
111 
112 	duk__concat_and_join_helper(ctx, count, 0 /*is_join*/);
113 }
114 
duk_join(duk_context * ctx,duk_idx_t count)115 DUK_EXTERNAL void duk_join(duk_context *ctx, duk_idx_t count) {
116 	DUK_ASSERT_CTX_VALID(ctx);
117 
118 	duk__concat_and_join_helper(ctx, count, 1 /*is_join*/);
119 }
120 
121 /* XXX: could map/decode be unified with duk_unicode_support.c code?
122  * Case conversion needs also the character surroundings though.
123  */
124 
duk_decode_string(duk_context * ctx,duk_idx_t index,duk_decode_char_function callback,void * udata)125 DUK_EXTERNAL void duk_decode_string(duk_context *ctx, duk_idx_t index, duk_decode_char_function callback, void *udata) {
126 	duk_hthread *thr = (duk_hthread *) ctx;
127 	duk_hstring *h_input;
128 	const duk_uint8_t *p, *p_start, *p_end;
129 	duk_codepoint_t cp;
130 
131 	DUK_ASSERT_CTX_VALID(ctx);
132 
133 	h_input = duk_require_hstring(ctx, index);
134 	DUK_ASSERT(h_input != NULL);
135 
136 	p_start = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h_input);
137 	p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
138 	p = p_start;
139 
140 	for (;;) {
141 		if (p >= p_end) {
142 			break;
143 		}
144 		cp = (int) duk_unicode_decode_xutf8_checked(thr, &p, p_start, p_end);
145 		callback(udata, cp);
146 	}
147 }
148 
duk_map_string(duk_context * ctx,duk_idx_t index,duk_map_char_function callback,void * udata)149 DUK_EXTERNAL void duk_map_string(duk_context *ctx, duk_idx_t index, duk_map_char_function callback, void *udata) {
150 	duk_hthread *thr = (duk_hthread *) ctx;
151 	duk_hstring *h_input;
152 	duk_bufwriter_ctx bw_alloc;
153 	duk_bufwriter_ctx *bw;
154 	const duk_uint8_t *p, *p_start, *p_end;
155 	duk_codepoint_t cp;
156 
157 	DUK_ASSERT_CTX_VALID(ctx);
158 
159 	index = duk_normalize_index(ctx, index);
160 
161 	h_input = duk_require_hstring(ctx, index);
162 	DUK_ASSERT(h_input != NULL);
163 
164 	bw = &bw_alloc;
165 	DUK_BW_INIT_PUSHBUF(thr, bw, DUK_HSTRING_GET_BYTELEN(h_input));  /* reasonable output estimate */
166 
167 	p_start = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h_input);
168 	p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
169 	p = p_start;
170 
171 	for (;;) {
172 		/* XXX: could write output in chunks with fewer ensure calls,
173 		 * but relative benefit would be small here.
174 		 */
175 
176 		if (p >= p_end) {
177 			break;
178 		}
179 		cp = (int) duk_unicode_decode_xutf8_checked(thr, &p, p_start, p_end);
180 		cp = callback(udata, cp);
181 
182 		DUK_BW_WRITE_ENSURE_XUTF8(thr, bw, cp);
183 	}
184 
185 	DUK_BW_COMPACT(thr, bw);
186 	duk_to_string(ctx, -1);
187 	duk_replace(ctx, index);
188 }
189 
duk_substring(duk_context * ctx,duk_idx_t index,duk_size_t start_offset,duk_size_t end_offset)190 DUK_EXTERNAL void duk_substring(duk_context *ctx, duk_idx_t index, duk_size_t start_offset, duk_size_t end_offset) {
191 	duk_hthread *thr = (duk_hthread *) ctx;
192 	duk_hstring *h;
193 	duk_hstring *res;
194 	duk_size_t start_byte_offset;
195 	duk_size_t end_byte_offset;
196 
197 	DUK_ASSERT_CTX_VALID(ctx);
198 
199 	index = duk_require_normalize_index(ctx, index);
200 	h = duk_require_hstring(ctx, index);
201 	DUK_ASSERT(h != NULL);
202 
203 	if (end_offset >= DUK_HSTRING_GET_CHARLEN(h)) {
204 		end_offset = DUK_HSTRING_GET_CHARLEN(h);
205 	}
206 	if (start_offset > end_offset) {
207 		start_offset = end_offset;
208 	}
209 
210 	DUK_ASSERT_DISABLE(start_offset >= 0);
211 	DUK_ASSERT(start_offset <= end_offset && start_offset <= DUK_HSTRING_GET_CHARLEN(h));
212 	DUK_ASSERT_DISABLE(end_offset >= 0);
213 	DUK_ASSERT(end_offset >= start_offset && end_offset <= DUK_HSTRING_GET_CHARLEN(h));
214 
215 	/* guaranteed by string limits */
216 	DUK_ASSERT(start_offset <= DUK_UINT32_MAX);
217 	DUK_ASSERT(end_offset <= DUK_UINT32_MAX);
218 
219 	start_byte_offset = (duk_size_t) duk_heap_strcache_offset_char2byte(thr, h, (duk_uint_fast32_t) start_offset);
220 	end_byte_offset = (duk_size_t) duk_heap_strcache_offset_char2byte(thr, h, (duk_uint_fast32_t) end_offset);
221 
222 	DUK_ASSERT(end_byte_offset >= start_byte_offset);
223 	DUK_ASSERT(end_byte_offset - start_byte_offset <= DUK_UINT32_MAX);  /* guaranteed by string limits */
224 
225 	/* no size check is necessary */
226 	res = duk_heap_string_intern_checked(thr,
227 	                                     DUK_HSTRING_GET_DATA(h) + start_byte_offset,
228 	                                     (duk_uint32_t) (end_byte_offset - start_byte_offset));
229 
230 	duk_push_hstring(ctx, res);
231 	duk_replace(ctx, index);
232 }
233 
234 /* XXX: this is quite clunky.  Add Unicode helpers to scan backwards and
235  * forwards with a callback to process codepoints?
236  */
duk_trim(duk_context * ctx,duk_idx_t index)237 DUK_EXTERNAL void duk_trim(duk_context *ctx, duk_idx_t index) {
238 	duk_hthread *thr = (duk_hthread *) ctx;
239 	duk_hstring *h;
240 	const duk_uint8_t *p, *p_start, *p_end, *p_tmp1, *p_tmp2;  /* pointers for scanning */
241 	const duk_uint8_t *q_start, *q_end;  /* start (incl) and end (excl) of trimmed part */
242 	duk_codepoint_t cp;
243 
244 	DUK_ASSERT_CTX_VALID(ctx);
245 
246 	index = duk_require_normalize_index(ctx, index);
247 	h = duk_require_hstring(ctx, index);
248 	DUK_ASSERT(h != NULL);
249 
250 	p_start = DUK_HSTRING_GET_DATA(h);
251 	p_end = p_start + DUK_HSTRING_GET_BYTELEN(h);
252 
253 	p = p_start;
254 	while (p < p_end) {
255 		p_tmp1 = p;
256 		cp = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(thr, &p_tmp1, p_start, p_end);
257 		if (!(duk_unicode_is_whitespace(cp) || duk_unicode_is_line_terminator(cp))) {
258 			break;
259 		}
260 		p = p_tmp1;
261 	}
262 	q_start = p;
263 	if (p == p_end) {
264 		/* entire string is whitespace */
265 		q_end = p;
266 		goto scan_done;
267 	}
268 
269 	p = p_end;
270 	while (p > p_start) {
271 		p_tmp1 = p;
272 		while (p > p_start) {
273 			p--;
274 			if (((*p) & 0xc0) != 0x80) {
275 				break;
276 			}
277 		}
278 		p_tmp2 = p;
279 
280 		cp = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(thr, &p_tmp2, p_start, p_end);
281 		if (!(duk_unicode_is_whitespace(cp) || duk_unicode_is_line_terminator(cp))) {
282 			p = p_tmp1;
283 			break;
284 		}
285 	}
286 	q_end = p;
287 
288  scan_done:
289 	/* This may happen when forward and backward scanning disagree
290 	 * (possible for non-extended-UTF-8 strings).
291 	 */
292 	if (q_end < q_start) {
293 		q_end = q_start;
294 	}
295 
296 	DUK_ASSERT(q_start >= p_start && q_start <= p_end);
297 	DUK_ASSERT(q_end >= p_start && q_end <= p_end);
298 	DUK_ASSERT(q_end >= q_start);
299 
300 	DUK_DDD(DUK_DDDPRINT("trim: p_start=%p, p_end=%p, q_start=%p, q_end=%p",
301 	                     (const void *) p_start, (const void *) p_end,
302 	                     (const void *) q_start, (const void *) q_end));
303 
304 	if (q_start == p_start && q_end == p_end) {
305 		DUK_DDD(DUK_DDDPRINT("nothing was trimmed: avoid interning (hashing etc)"));
306 		return;
307 	}
308 
309 	duk_push_lstring(ctx, (const char *) q_start, (duk_size_t) (q_end - q_start));
310 	duk_replace(ctx, index);
311 }
312 
duk_char_code_at(duk_context * ctx,duk_idx_t index,duk_size_t char_offset)313 DUK_EXTERNAL duk_codepoint_t duk_char_code_at(duk_context *ctx, duk_idx_t index, duk_size_t char_offset) {
314 	duk_hthread *thr = (duk_hthread *) ctx;
315 	duk_hstring *h;
316 	duk_ucodepoint_t cp;
317 
318 	DUK_ASSERT_CTX_VALID(ctx);
319 
320 	h = duk_require_hstring(ctx, index);
321 	DUK_ASSERT(h != NULL);
322 
323 	DUK_ASSERT_DISABLE(char_offset >= 0);  /* always true, arg is unsigned */
324 	if (char_offset >= DUK_HSTRING_GET_CHARLEN(h)) {
325 		return 0;
326 	}
327 
328 	DUK_ASSERT(char_offset <= DUK_UINT_MAX);  /* guaranteed by string limits */
329 	cp = duk_hstring_char_code_at_raw(thr, h, (duk_uint_t) char_offset);
330 	return (duk_codepoint_t) cp;
331 }
332