1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "util/Text.h"
8
9 #include "mozilla/Assertions.h"
10 #include "mozilla/Maybe.h"
11 #include "mozilla/PodOperations.h"
12 #include "mozilla/Utf8.h"
13
14 #include <stddef.h>
15 #include <stdint.h>
16
17 #include "gc/GC.h"
18 #include "js/GCAPI.h"
19 #include "util/Unicode.h"
20 #include "vm/JSContext.h"
21 #include "vm/StringType.h"
22
23 using namespace JS;
24 using namespace js;
25
26 using js::gc::AutoSuppressGC;
27 using mozilla::DecodeOneUtf8CodePoint;
28 using mozilla::IsAscii;
29 using mozilla::Maybe;
30 using mozilla::PodCopy;
31 using mozilla::Utf8Unit;
32
33 template <typename CharT>
js_strchr_limit(const CharT * s,char16_t c,const CharT * limit)34 const CharT* js_strchr_limit(const CharT* s, char16_t c, const CharT* limit) {
35 while (s < limit) {
36 if (*s == c) {
37 return s;
38 }
39 s++;
40 }
41 return nullptr;
42 }
43
44 template const Latin1Char* js_strchr_limit(const Latin1Char* s, char16_t c,
45 const Latin1Char* limit);
46
47 template const char16_t* js_strchr_limit(const char16_t* s, char16_t c,
48 const char16_t* limit);
49
js_fputs(const char16_t * s,FILE * f)50 int32_t js_fputs(const char16_t* s, FILE* f) {
51 while (*s != 0) {
52 if (fputwc(wchar_t(*s), f) == WEOF) {
53 return WEOF;
54 }
55 s++;
56 }
57 return 1;
58 }
59
DuplicateStringToArena(arena_id_t destArenaId,JSContext * cx,const char * s)60 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx,
61 const char* s) {
62 return DuplicateStringToArena(destArenaId, cx, s, strlen(s));
63 }
64
DuplicateStringToArena(arena_id_t destArenaId,JSContext * cx,const char * s,size_t n)65 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx,
66 const char* s, size_t n) {
67 auto ret = cx->make_pod_arena_array<char>(destArenaId, n + 1);
68 if (!ret) {
69 return nullptr;
70 }
71 PodCopy(ret.get(), s, n);
72 ret[n] = '\0';
73 return ret;
74 }
75
DuplicateStringToArena(arena_id_t destArenaId,JSContext * cx,const char16_t * s)76 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
77 JSContext* cx,
78 const char16_t* s) {
79 return DuplicateStringToArena(destArenaId, cx, s, js_strlen(s));
80 }
81
DuplicateStringToArena(arena_id_t destArenaId,JSContext * cx,const char16_t * s,size_t n)82 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
83 JSContext* cx, const char16_t* s,
84 size_t n) {
85 auto ret = cx->make_pod_arena_array<char16_t>(destArenaId, n + 1);
86 if (!ret) {
87 return nullptr;
88 }
89 PodCopy(ret.get(), s, n);
90 ret[n] = '\0';
91 return ret;
92 }
93
DuplicateStringToArena(arena_id_t destArenaId,const char * s)94 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, const char* s) {
95 return DuplicateStringToArena(destArenaId, s, strlen(s));
96 }
97
DuplicateStringToArena(arena_id_t destArenaId,const char * s,size_t n)98 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, const char* s,
99 size_t n) {
100 UniqueChars ret(js_pod_arena_malloc<char>(destArenaId, n + 1));
101 if (!ret) {
102 return nullptr;
103 }
104 PodCopy(ret.get(), s, n);
105 ret[n] = '\0';
106 return ret;
107 }
108
DuplicateStringToArena(arena_id_t destArenaId,const char16_t * s)109 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
110 const char16_t* s) {
111 return DuplicateStringToArena(destArenaId, s, js_strlen(s));
112 }
113
DuplicateStringToArena(arena_id_t destArenaId,const char16_t * s,size_t n)114 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
115 const char16_t* s, size_t n) {
116 UniqueTwoByteChars ret(js_pod_arena_malloc<char16_t>(destArenaId, n + 1));
117 if (!ret) {
118 return nullptr;
119 }
120 PodCopy(ret.get(), s, n);
121 ret[n] = '\0';
122 return ret;
123 }
124
DuplicateString(JSContext * cx,const char * s,size_t n)125 UniqueChars js::DuplicateString(JSContext* cx, const char* s, size_t n) {
126 return DuplicateStringToArena(js::MallocArena, cx, s, n);
127 }
128
DuplicateString(JSContext * cx,const char * s)129 UniqueChars js::DuplicateString(JSContext* cx, const char* s) {
130 return DuplicateStringToArena(js::MallocArena, cx, s);
131 }
132
DuplicateString(JSContext * cx,const char16_t * s)133 UniqueTwoByteChars js::DuplicateString(JSContext* cx, const char16_t* s) {
134 return DuplicateStringToArena(js::MallocArena, cx, s);
135 }
136
DuplicateString(JSContext * cx,const char16_t * s,size_t n)137 UniqueTwoByteChars js::DuplicateString(JSContext* cx, const char16_t* s,
138 size_t n) {
139 return DuplicateStringToArena(js::MallocArena, cx, s, n);
140 }
141
DuplicateString(const char * s)142 UniqueChars js::DuplicateString(const char* s) {
143 return DuplicateStringToArena(js::MallocArena, s);
144 }
145
DuplicateString(const char * s,size_t n)146 UniqueChars js::DuplicateString(const char* s, size_t n) {
147 return DuplicateStringToArena(js::MallocArena, s, n);
148 }
149
DuplicateString(const char16_t * s)150 UniqueTwoByteChars js::DuplicateString(const char16_t* s) {
151 return DuplicateStringToArena(js::MallocArena, s);
152 }
153
DuplicateString(const char16_t * s,size_t n)154 UniqueTwoByteChars js::DuplicateString(const char16_t* s, size_t n) {
155 return DuplicateStringToArena(js::MallocArena, s, n);
156 }
157
InflateString(JSContext * cx,const char * bytes,size_t length)158 char16_t* js::InflateString(JSContext* cx, const char* bytes, size_t length) {
159 char16_t* chars = cx->pod_malloc<char16_t>(length + 1);
160 if (!chars) {
161 return nullptr;
162 }
163 CopyAndInflateChars(chars, bytes, length);
164 chars[length] = '\0';
165 return chars;
166 }
167
168 /*
169 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
170 * least 4 bytes long. Return the number of UTF-8 bytes of data written.
171 */
OneUcs4ToUtf8Char(uint8_t * utf8Buffer,uint32_t ucs4Char)172 uint32_t js::OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char) {
173 MOZ_ASSERT(ucs4Char <= unicode::NonBMPMax);
174
175 if (ucs4Char < 0x80) {
176 utf8Buffer[0] = uint8_t(ucs4Char);
177 return 1;
178 }
179
180 uint32_t a = ucs4Char >> 11;
181 uint32_t utf8Length = 2;
182 while (a) {
183 a >>= 5;
184 utf8Length++;
185 }
186
187 MOZ_ASSERT(utf8Length <= 4);
188
189 uint32_t i = utf8Length;
190 while (--i) {
191 utf8Buffer[i] = uint8_t((ucs4Char & 0x3F) | 0x80);
192 ucs4Char >>= 6;
193 }
194
195 utf8Buffer[0] = uint8_t(0x100 - (1 << (8 - utf8Length)) + ucs4Char);
196 return utf8Length;
197 }
198
PutEscapedStringImpl(char * buffer,size_t bufferSize,GenericPrinter * out,JSLinearString * str,uint32_t quote)199 size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
200 GenericPrinter* out, JSLinearString* str,
201 uint32_t quote) {
202 size_t len = str->length();
203 AutoCheckCannotGC nogc;
204 return str->hasLatin1Chars()
205 ? PutEscapedStringImpl(buffer, bufferSize, out,
206 str->latin1Chars(nogc), len, quote)
207 : PutEscapedStringImpl(buffer, bufferSize, out,
208 str->twoByteChars(nogc), len, quote);
209 }
210
211 template <typename CharT>
PutEscapedStringImpl(char * buffer,size_t bufferSize,GenericPrinter * out,const CharT * chars,size_t length,uint32_t quote)212 size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
213 GenericPrinter* out, const CharT* chars,
214 size_t length, uint32_t quote) {
215 enum {
216 STOP,
217 FIRST_QUOTE,
218 LAST_QUOTE,
219 CHARS,
220 ESCAPE_START,
221 ESCAPE_MORE
222 } state;
223
224 MOZ_ASSERT(quote == 0 || quote == '\'' || quote == '"');
225 MOZ_ASSERT_IF(!buffer, bufferSize == 0);
226 MOZ_ASSERT_IF(out, !buffer);
227
228 if (bufferSize == 0) {
229 buffer = nullptr;
230 } else {
231 bufferSize--;
232 }
233
234 const CharT* charsEnd = chars + length;
235 size_t n = 0;
236 state = FIRST_QUOTE;
237 unsigned shift = 0;
238 unsigned hex = 0;
239 unsigned u = 0;
240 char c = 0; /* to quell GCC warnings */
241
242 for (;;) {
243 switch (state) {
244 case STOP:
245 goto stop;
246 case FIRST_QUOTE:
247 state = CHARS;
248 goto do_quote;
249 case LAST_QUOTE:
250 state = STOP;
251 do_quote:
252 if (quote == 0) {
253 continue;
254 }
255 c = (char)quote;
256 break;
257 case CHARS:
258 if (chars == charsEnd) {
259 state = LAST_QUOTE;
260 continue;
261 }
262 u = *chars++;
263 if (u < ' ') {
264 if (u != 0) {
265 const char* escape = strchr(js_EscapeMap, (int)u);
266 if (escape) {
267 u = escape[1];
268 goto do_escape;
269 }
270 }
271 goto do_hex_escape;
272 }
273 if (u < 127) {
274 if (u == quote || u == '\\') {
275 goto do_escape;
276 }
277 c = (char)u;
278 } else if (u < 0x100) {
279 goto do_hex_escape;
280 } else {
281 shift = 16;
282 hex = u;
283 u = 'u';
284 goto do_escape;
285 }
286 break;
287 do_hex_escape:
288 shift = 8;
289 hex = u;
290 u = 'x';
291 do_escape:
292 c = '\\';
293 state = ESCAPE_START;
294 break;
295 case ESCAPE_START:
296 MOZ_ASSERT(' ' <= u && u < 127);
297 c = (char)u;
298 state = ESCAPE_MORE;
299 break;
300 case ESCAPE_MORE:
301 if (shift == 0) {
302 state = CHARS;
303 continue;
304 }
305 shift -= 4;
306 u = 0xF & (hex >> shift);
307 c = (char)(u + (u < 10 ? '0' : 'A' - 10));
308 break;
309 }
310 if (buffer) {
311 MOZ_ASSERT(n <= bufferSize);
312 if (n != bufferSize) {
313 buffer[n] = c;
314 } else {
315 buffer[n] = '\0';
316 buffer = nullptr;
317 }
318 } else if (out) {
319 if (!out->put(&c, 1)) {
320 return size_t(-1);
321 }
322 }
323 n++;
324 }
325 stop:
326 if (buffer) {
327 buffer[n] = '\0';
328 }
329 return n;
330 }
331
ContainsFlag(const char * str,const char * flag)332 bool js::ContainsFlag(const char* str, const char* flag) {
333 size_t flaglen = strlen(flag);
334 const char* index = strstr(str, flag);
335 while (index) {
336 if ((index == str || index[-1] == ',') &&
337 (index[flaglen] == 0 || index[flaglen] == ',')) {
338 return true;
339 }
340 index = strstr(index + flaglen, flag);
341 }
342 return false;
343 }
344
345 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
346 GenericPrinter* out,
347 const Latin1Char* chars, size_t length,
348 uint32_t quote);
349
350 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
351 GenericPrinter* out, const char* chars,
352 size_t length, uint32_t quote);
353
354 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
355 GenericPrinter* out,
356 const char16_t* chars, size_t length,
357 uint32_t quote);
358
359 template size_t js::PutEscapedString(char* buffer, size_t bufferSize,
360 const Latin1Char* chars, size_t length,
361 uint32_t quote);
362
363 template size_t js::PutEscapedString(char* buffer, size_t bufferSize,
364 const char16_t* chars, size_t length,
365 uint32_t quote);
366
CountCodePoints(const Utf8Unit * begin,const Utf8Unit * end)367 size_t js::unicode::CountCodePoints(const Utf8Unit* begin,
368 const Utf8Unit* end) {
369 MOZ_ASSERT(begin <= end);
370
371 size_t count = 0;
372 const Utf8Unit* ptr = begin;
373 while (ptr < end) {
374 count++;
375
376 Utf8Unit lead = *ptr++;
377 if (IsAscii(lead)) {
378 continue;
379 }
380
381 #ifdef DEBUG
382 Maybe<char32_t> cp =
383 #endif
384 DecodeOneUtf8CodePoint(lead, &ptr, end);
385 MOZ_ASSERT(cp.isSome());
386 }
387 MOZ_ASSERT(ptr == end, "bad code unit count in line?");
388
389 return count;
390 }
391
CountCodePoints(const char16_t * begin,const char16_t * end)392 size_t js::unicode::CountCodePoints(const char16_t* begin,
393 const char16_t* end) {
394 MOZ_ASSERT(begin <= end);
395
396 size_t count = 0;
397
398 const char16_t* ptr = begin;
399 while (ptr < end) {
400 count++;
401
402 if (!IsLeadSurrogate(*ptr++)) {
403 continue;
404 }
405
406 if (ptr < end && IsTrailSurrogate(*ptr)) {
407 ptr++;
408 }
409 }
410 MOZ_ASSERT(ptr == end, "should have consumed the full range");
411
412 return count;
413 }
414