1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "util/Text.h"
8 
9 #include "mozilla/Assertions.h"
10 #include "mozilla/Maybe.h"
11 #include "mozilla/PodOperations.h"
12 #include "mozilla/Utf8.h"
13 
14 #include <stddef.h>
15 #include <stdint.h>
16 
17 #include "gc/GC.h"
18 #include "js/GCAPI.h"
19 #include "util/Unicode.h"
20 #include "vm/JSContext.h"
21 #include "vm/StringType.h"
22 
23 using namespace JS;
24 using namespace js;
25 
26 using js::gc::AutoSuppressGC;
27 using mozilla::DecodeOneUtf8CodePoint;
28 using mozilla::IsAscii;
29 using mozilla::Maybe;
30 using mozilla::PodCopy;
31 using mozilla::Utf8Unit;
32 
33 template <typename CharT>
js_strchr_limit(const CharT * s,char16_t c,const CharT * limit)34 const CharT* js_strchr_limit(const CharT* s, char16_t c, const CharT* limit) {
35   while (s < limit) {
36     if (*s == c) {
37       return s;
38     }
39     s++;
40   }
41   return nullptr;
42 }
43 
44 template const Latin1Char* js_strchr_limit(const Latin1Char* s, char16_t c,
45                                            const Latin1Char* limit);
46 
47 template const char16_t* js_strchr_limit(const char16_t* s, char16_t c,
48                                          const char16_t* limit);
49 
js_fputs(const char16_t * s,FILE * f)50 int32_t js_fputs(const char16_t* s, FILE* f) {
51   while (*s != 0) {
52     if (fputwc(wchar_t(*s), f) == WEOF) {
53       return WEOF;
54     }
55     s++;
56   }
57   return 1;
58 }
59 
DuplicateStringToArena(arena_id_t destArenaId,JSContext * cx,const char * s)60 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx,
61                                        const char* s) {
62   return DuplicateStringToArena(destArenaId, cx, s, strlen(s));
63 }
64 
DuplicateStringToArena(arena_id_t destArenaId,JSContext * cx,const char * s,size_t n)65 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, JSContext* cx,
66                                        const char* s, size_t n) {
67   auto ret = cx->make_pod_arena_array<char>(destArenaId, n + 1);
68   if (!ret) {
69     return nullptr;
70   }
71   PodCopy(ret.get(), s, n);
72   ret[n] = '\0';
73   return ret;
74 }
75 
DuplicateStringToArena(arena_id_t destArenaId,JSContext * cx,const char16_t * s)76 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
77                                               JSContext* cx,
78                                               const char16_t* s) {
79   return DuplicateStringToArena(destArenaId, cx, s, js_strlen(s));
80 }
81 
DuplicateStringToArena(arena_id_t destArenaId,JSContext * cx,const char16_t * s,size_t n)82 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
83                                               JSContext* cx, const char16_t* s,
84                                               size_t n) {
85   auto ret = cx->make_pod_arena_array<char16_t>(destArenaId, n + 1);
86   if (!ret) {
87     return nullptr;
88   }
89   PodCopy(ret.get(), s, n);
90   ret[n] = '\0';
91   return ret;
92 }
93 
DuplicateStringToArena(arena_id_t destArenaId,const char * s)94 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, const char* s) {
95   return DuplicateStringToArena(destArenaId, s, strlen(s));
96 }
97 
DuplicateStringToArena(arena_id_t destArenaId,const char * s,size_t n)98 UniqueChars js::DuplicateStringToArena(arena_id_t destArenaId, const char* s,
99                                        size_t n) {
100   UniqueChars ret(js_pod_arena_malloc<char>(destArenaId, n + 1));
101   if (!ret) {
102     return nullptr;
103   }
104   PodCopy(ret.get(), s, n);
105   ret[n] = '\0';
106   return ret;
107 }
108 
DuplicateStringToArena(arena_id_t destArenaId,const char16_t * s)109 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
110                                               const char16_t* s) {
111   return DuplicateStringToArena(destArenaId, s, js_strlen(s));
112 }
113 
DuplicateStringToArena(arena_id_t destArenaId,const char16_t * s,size_t n)114 UniqueTwoByteChars js::DuplicateStringToArena(arena_id_t destArenaId,
115                                               const char16_t* s, size_t n) {
116   UniqueTwoByteChars ret(js_pod_arena_malloc<char16_t>(destArenaId, n + 1));
117   if (!ret) {
118     return nullptr;
119   }
120   PodCopy(ret.get(), s, n);
121   ret[n] = '\0';
122   return ret;
123 }
124 
DuplicateString(JSContext * cx,const char * s,size_t n)125 UniqueChars js::DuplicateString(JSContext* cx, const char* s, size_t n) {
126   return DuplicateStringToArena(js::MallocArena, cx, s, n);
127 }
128 
DuplicateString(JSContext * cx,const char * s)129 UniqueChars js::DuplicateString(JSContext* cx, const char* s) {
130   return DuplicateStringToArena(js::MallocArena, cx, s);
131 }
132 
DuplicateString(JSContext * cx,const char16_t * s)133 UniqueTwoByteChars js::DuplicateString(JSContext* cx, const char16_t* s) {
134   return DuplicateStringToArena(js::MallocArena, cx, s);
135 }
136 
DuplicateString(JSContext * cx,const char16_t * s,size_t n)137 UniqueTwoByteChars js::DuplicateString(JSContext* cx, const char16_t* s,
138                                        size_t n) {
139   return DuplicateStringToArena(js::MallocArena, cx, s, n);
140 }
141 
DuplicateString(const char * s)142 UniqueChars js::DuplicateString(const char* s) {
143   return DuplicateStringToArena(js::MallocArena, s);
144 }
145 
DuplicateString(const char * s,size_t n)146 UniqueChars js::DuplicateString(const char* s, size_t n) {
147   return DuplicateStringToArena(js::MallocArena, s, n);
148 }
149 
DuplicateString(const char16_t * s)150 UniqueTwoByteChars js::DuplicateString(const char16_t* s) {
151   return DuplicateStringToArena(js::MallocArena, s);
152 }
153 
DuplicateString(const char16_t * s,size_t n)154 UniqueTwoByteChars js::DuplicateString(const char16_t* s, size_t n) {
155   return DuplicateStringToArena(js::MallocArena, s, n);
156 }
157 
InflateString(JSContext * cx,const char * bytes,size_t length)158 char16_t* js::InflateString(JSContext* cx, const char* bytes, size_t length) {
159   char16_t* chars = cx->pod_malloc<char16_t>(length + 1);
160   if (!chars) {
161     return nullptr;
162   }
163   CopyAndInflateChars(chars, bytes, length);
164   chars[length] = '\0';
165   return chars;
166 }
167 
168 /*
169  * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
170  * least 4 bytes long.  Return the number of UTF-8 bytes of data written.
171  */
OneUcs4ToUtf8Char(uint8_t * utf8Buffer,uint32_t ucs4Char)172 uint32_t js::OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char) {
173   MOZ_ASSERT(ucs4Char <= unicode::NonBMPMax);
174 
175   if (ucs4Char < 0x80) {
176     utf8Buffer[0] = uint8_t(ucs4Char);
177     return 1;
178   }
179 
180   uint32_t a = ucs4Char >> 11;
181   uint32_t utf8Length = 2;
182   while (a) {
183     a >>= 5;
184     utf8Length++;
185   }
186 
187   MOZ_ASSERT(utf8Length <= 4);
188 
189   uint32_t i = utf8Length;
190   while (--i) {
191     utf8Buffer[i] = uint8_t((ucs4Char & 0x3F) | 0x80);
192     ucs4Char >>= 6;
193   }
194 
195   utf8Buffer[0] = uint8_t(0x100 - (1 << (8 - utf8Length)) + ucs4Char);
196   return utf8Length;
197 }
198 
PutEscapedStringImpl(char * buffer,size_t bufferSize,GenericPrinter * out,JSLinearString * str,uint32_t quote)199 size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
200                                 GenericPrinter* out, JSLinearString* str,
201                                 uint32_t quote) {
202   size_t len = str->length();
203   AutoCheckCannotGC nogc;
204   return str->hasLatin1Chars()
205              ? PutEscapedStringImpl(buffer, bufferSize, out,
206                                     str->latin1Chars(nogc), len, quote)
207              : PutEscapedStringImpl(buffer, bufferSize, out,
208                                     str->twoByteChars(nogc), len, quote);
209 }
210 
211 template <typename CharT>
PutEscapedStringImpl(char * buffer,size_t bufferSize,GenericPrinter * out,const CharT * chars,size_t length,uint32_t quote)212 size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
213                                 GenericPrinter* out, const CharT* chars,
214                                 size_t length, uint32_t quote) {
215   enum {
216     STOP,
217     FIRST_QUOTE,
218     LAST_QUOTE,
219     CHARS,
220     ESCAPE_START,
221     ESCAPE_MORE
222   } state;
223 
224   MOZ_ASSERT(quote == 0 || quote == '\'' || quote == '"');
225   MOZ_ASSERT_IF(!buffer, bufferSize == 0);
226   MOZ_ASSERT_IF(out, !buffer);
227 
228   if (bufferSize == 0) {
229     buffer = nullptr;
230   } else {
231     bufferSize--;
232   }
233 
234   const CharT* charsEnd = chars + length;
235   size_t n = 0;
236   state = FIRST_QUOTE;
237   unsigned shift = 0;
238   unsigned hex = 0;
239   unsigned u = 0;
240   char c = 0; /* to quell GCC warnings */
241 
242   for (;;) {
243     switch (state) {
244       case STOP:
245         goto stop;
246       case FIRST_QUOTE:
247         state = CHARS;
248         goto do_quote;
249       case LAST_QUOTE:
250         state = STOP;
251       do_quote:
252         if (quote == 0) {
253           continue;
254         }
255         c = (char)quote;
256         break;
257       case CHARS:
258         if (chars == charsEnd) {
259           state = LAST_QUOTE;
260           continue;
261         }
262         u = *chars++;
263         if (u < ' ') {
264           if (u != 0) {
265             const char* escape = strchr(js_EscapeMap, (int)u);
266             if (escape) {
267               u = escape[1];
268               goto do_escape;
269             }
270           }
271           goto do_hex_escape;
272         }
273         if (u < 127) {
274           if (u == quote || u == '\\') {
275             goto do_escape;
276           }
277           c = (char)u;
278         } else if (u < 0x100) {
279           goto do_hex_escape;
280         } else {
281           shift = 16;
282           hex = u;
283           u = 'u';
284           goto do_escape;
285         }
286         break;
287       do_hex_escape:
288         shift = 8;
289         hex = u;
290         u = 'x';
291       do_escape:
292         c = '\\';
293         state = ESCAPE_START;
294         break;
295       case ESCAPE_START:
296         MOZ_ASSERT(' ' <= u && u < 127);
297         c = (char)u;
298         state = ESCAPE_MORE;
299         break;
300       case ESCAPE_MORE:
301         if (shift == 0) {
302           state = CHARS;
303           continue;
304         }
305         shift -= 4;
306         u = 0xF & (hex >> shift);
307         c = (char)(u + (u < 10 ? '0' : 'A' - 10));
308         break;
309     }
310     if (buffer) {
311       MOZ_ASSERT(n <= bufferSize);
312       if (n != bufferSize) {
313         buffer[n] = c;
314       } else {
315         buffer[n] = '\0';
316         buffer = nullptr;
317       }
318     } else if (out) {
319       if (!out->put(&c, 1)) {
320         return size_t(-1);
321       }
322     }
323     n++;
324   }
325 stop:
326   if (buffer) {
327     buffer[n] = '\0';
328   }
329   return n;
330 }
331 
ContainsFlag(const char * str,const char * flag)332 bool js::ContainsFlag(const char* str, const char* flag) {
333   size_t flaglen = strlen(flag);
334   const char* index = strstr(str, flag);
335   while (index) {
336     if ((index == str || index[-1] == ',') &&
337         (index[flaglen] == 0 || index[flaglen] == ',')) {
338       return true;
339     }
340     index = strstr(index + flaglen, flag);
341   }
342   return false;
343 }
344 
345 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
346                                          GenericPrinter* out,
347                                          const Latin1Char* chars, size_t length,
348                                          uint32_t quote);
349 
350 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
351                                          GenericPrinter* out, const char* chars,
352                                          size_t length, uint32_t quote);
353 
354 template size_t js::PutEscapedStringImpl(char* buffer, size_t bufferSize,
355                                          GenericPrinter* out,
356                                          const char16_t* chars, size_t length,
357                                          uint32_t quote);
358 
359 template size_t js::PutEscapedString(char* buffer, size_t bufferSize,
360                                      const Latin1Char* chars, size_t length,
361                                      uint32_t quote);
362 
363 template size_t js::PutEscapedString(char* buffer, size_t bufferSize,
364                                      const char16_t* chars, size_t length,
365                                      uint32_t quote);
366 
CountCodePoints(const Utf8Unit * begin,const Utf8Unit * end)367 size_t js::unicode::CountCodePoints(const Utf8Unit* begin,
368                                     const Utf8Unit* end) {
369   MOZ_ASSERT(begin <= end);
370 
371   size_t count = 0;
372   const Utf8Unit* ptr = begin;
373   while (ptr < end) {
374     count++;
375 
376     Utf8Unit lead = *ptr++;
377     if (IsAscii(lead)) {
378       continue;
379     }
380 
381 #ifdef DEBUG
382     Maybe<char32_t> cp =
383 #endif
384         DecodeOneUtf8CodePoint(lead, &ptr, end);
385     MOZ_ASSERT(cp.isSome());
386   }
387   MOZ_ASSERT(ptr == end, "bad code unit count in line?");
388 
389   return count;
390 }
391 
CountCodePoints(const char16_t * begin,const char16_t * end)392 size_t js::unicode::CountCodePoints(const char16_t* begin,
393                                     const char16_t* end) {
394   MOZ_ASSERT(begin <= end);
395 
396   size_t count = 0;
397 
398   const char16_t* ptr = begin;
399   while (ptr < end) {
400     count++;
401 
402     if (!IsLeadSurrogate(*ptr++)) {
403       continue;
404     }
405 
406     if (ptr < end && IsTrailSurrogate(*ptr)) {
407       ptr++;
408     }
409   }
410   MOZ_ASSERT(ptr == end, "should have consumed the full range");
411 
412   return count;
413 }
414