1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef jsstr_h
8 #define jsstr_h
9 
10 #include "mozilla/HashFunctions.h"
11 #include "mozilla/PodOperations.h"
12 #include "mozilla/UniquePtr.h"
13 
14 #include "jsutil.h"
15 #include "NamespaceImports.h"
16 
17 #include "gc/Rooting.h"
18 #include "js/RootingAPI.h"
19 #include "vm/Printer.h"
20 #include "vm/Unicode.h"
21 
22 class JSAutoByteString;
23 class JSLinearString;
24 
25 namespace js {
26 
27 class StringBuffer;
28 
29 template <AllowGC allowGC>
30 extern JSString*
31 ConcatStrings(ExclusiveContext* cx,
32               typename MaybeRooted<JSString*, allowGC>::HandleType left,
33               typename MaybeRooted<JSString*, allowGC>::HandleType right);
34 
35 // Return s advanced past any Unicode white space characters.
36 template <typename CharT>
37 static inline const CharT*
SkipSpace(const CharT * s,const CharT * end)38 SkipSpace(const CharT* s, const CharT* end)
39 {
40     MOZ_ASSERT(s <= end);
41 
42     while (s < end && unicode::IsSpace(*s))
43         s++;
44 
45     return s;
46 }
47 
48 // Return less than, equal to, or greater than zero depending on whether
49 // s1 is less than, equal to, or greater than s2.
50 template <typename Char1, typename Char2>
51 inline int32_t
CompareChars(const Char1 * s1,size_t len1,const Char2 * s2,size_t len2)52 CompareChars(const Char1* s1, size_t len1, const Char2* s2, size_t len2)
53 {
54     size_t n = Min(len1, len2);
55     for (size_t i = 0; i < n; i++) {
56         if (int32_t cmp = s1[i] - s2[i])
57             return cmp;
58     }
59 
60     return int32_t(len1 - len2);
61 }
62 
63 extern int32_t
64 CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2);
65 
66 }  /* namespace js */
67 
68 struct JSSubString {
69     JSLinearString* base;
70     size_t          offset;
71     size_t          length;
72 
JSSubStringJSSubString73     JSSubString() { mozilla::PodZero(this); }
74 
initEmptyJSSubString75     void initEmpty(JSLinearString* base) {
76         this->base = base;
77         offset = length = 0;
78     }
initJSSubString79     void init(JSLinearString* base, size_t offset, size_t length) {
80         this->base = base;
81         this->offset = offset;
82         this->length = length;
83     }
84 };
85 
86 /*
87  * Shorthands for ASCII (7-bit) decimal and hex conversion.
88  * Manually inline isdigit for performance; MSVC doesn't do this for us.
89  */
90 #define JS7_ISDEC(c)    ((((unsigned)(c)) - '0') <= 9)
91 #define JS7_UNDEC(c)    ((c) - '0')
92 #define JS7_ISOCT(c)    ((((unsigned)(c)) - '0') <= 7)
93 #define JS7_UNOCT(c)    (JS7_UNDEC(c))
94 #define JS7_ISHEX(c)    ((c) < 128 && isxdigit(c))
95 #define JS7_UNHEX(c)    (unsigned)(JS7_ISDEC(c) ? (c) - '0' : 10 + tolower(c) - 'a')
96 #define JS7_ISLET(c)    ((c) < 128 && isalpha(c))
97 
98 extern size_t
99 js_strlen(const char16_t* s);
100 
101 extern int32_t
102 js_strcmp(const char16_t* lhs, const char16_t* rhs);
103 
104 template <typename CharT>
105 extern const CharT*
106 js_strchr_limit(const CharT* s, char16_t c, const CharT* limit);
107 
108 static MOZ_ALWAYS_INLINE void
js_strncpy(char16_t * dst,const char16_t * src,size_t nelem)109 js_strncpy(char16_t* dst, const char16_t* src, size_t nelem)
110 {
111     return mozilla::PodCopy(dst, src, nelem);
112 }
113 
114 namespace js {
115 
116 /* Initialize the String class, returning its prototype object. */
117 extern JSObject*
118 InitStringClass(JSContext* cx, HandleObject obj);
119 
120 /*
121  * Convert a value to a printable C string.
122  */
123 extern const char*
124 ValueToPrintable(JSContext* cx, const Value&, JSAutoByteString* bytes, bool asSource = false);
125 
126 extern mozilla::UniquePtr<char[], JS::FreePolicy>
127 DuplicateString(ExclusiveContext* cx, const char* s);
128 
129 extern mozilla::UniquePtr<char16_t[], JS::FreePolicy>
130 DuplicateString(ExclusiveContext* cx, const char16_t* s);
131 
132 // This variant does not report OOMs, you must arrange for OOMs to be reported
133 // yourself.
134 extern mozilla::UniquePtr<char16_t[], JS::FreePolicy>
135 DuplicateString(const char16_t* s);
136 
137 /*
138  * Convert a non-string value to a string, returning null after reporting an
139  * error, otherwise returning a new string reference.
140  */
141 template <AllowGC allowGC>
142 extern JSString*
143 ToStringSlow(ExclusiveContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg);
144 
145 /*
146  * Convert the given value to a string.  This method includes an inline
147  * fast-path for the case where the value is already a string; if the value is
148  * known not to be a string, use ToStringSlow instead.
149  */
150 template <AllowGC allowGC>
151 static MOZ_ALWAYS_INLINE JSString*
ToString(JSContext * cx,JS::HandleValue v)152 ToString(JSContext* cx, JS::HandleValue v)
153 {
154     if (v.isString())
155         return v.toString();
156     return ToStringSlow<allowGC>(cx, v);
157 }
158 
159 /*
160  * This function implements E-262-3 section 9.8, toString. Convert the given
161  * value to a string of characters appended to the given buffer. On error, the
162  * passed buffer may have partial results appended.
163  */
164 inline bool
165 ValueToStringBuffer(JSContext* cx, const Value& v, StringBuffer& sb);
166 
167 /*
168  * Convert a value to its source expression, returning null after reporting
169  * an error, otherwise returning a new string reference.
170  */
171 extern JSString*
172 ValueToSource(JSContext* cx, HandleValue v);
173 
174 /*
175  * Convert a JSString to its source expression; returns null after reporting an
176  * error, otherwise returns a new string reference. No Handle needed since the
177  * input is dead after the GC.
178  */
179 extern JSString*
180 StringToSource(JSContext* cx, JSString* str);
181 
182 /*
183  * Test if strings are equal. The caller can call the function even if str1
184  * or str2 are not GC-allocated things.
185  */
186 extern bool
187 EqualStrings(JSContext* cx, JSString* str1, JSString* str2, bool* result);
188 
189 /* Use the infallible method instead! */
190 extern bool
191 EqualStrings(JSContext* cx, JSLinearString* str1, JSLinearString* str2, bool* result) = delete;
192 
193 /* EqualStrings is infallible on linear strings. */
194 extern bool
195 EqualStrings(JSLinearString* str1, JSLinearString* str2);
196 
197 extern bool
198 EqualChars(JSLinearString* str1, JSLinearString* str2);
199 
200 /*
201  * Return less than, equal to, or greater than zero depending on whether
202  * str1 is less than, equal to, or greater than str2.
203  */
204 extern bool
205 CompareStrings(JSContext* cx, JSString* str1, JSString* str2, int32_t* result);
206 
207 /*
208  * Same as CompareStrings but for atoms.  Don't use this to just test
209  * for equality; use this when you need an ordering on atoms.
210  */
211 extern int32_t
212 CompareAtoms(JSAtom* atom1, JSAtom* atom2);
213 
214 /*
215  * Return true if the string matches the given sequence of ASCII bytes.
216  */
217 extern bool
218 StringEqualsAscii(JSLinearString* str, const char* asciiBytes);
219 
220 /* Return true if the string contains a pattern anywhere inside it. */
221 extern bool
222 StringHasPattern(JSLinearString* text, const char16_t* pat, uint32_t patlen);
223 
224 extern int
225 StringFindPattern(JSLinearString* text, JSLinearString* pat, size_t start);
226 
227 /* Return true if the string contains a pattern at |start|. */
228 extern bool
229 HasSubstringAt(JSLinearString* text, JSLinearString* pat, size_t start);
230 
231 template <typename CharT>
232 extern bool
233 HasRegExpMetaChars(const CharT* chars, size_t length);
234 
235 extern bool
236 StringHasRegExpMetaChars(JSLinearString* str);
237 
238 template <typename Char1, typename Char2>
239 inline bool
240 EqualChars(const Char1* s1, const Char2* s2, size_t len);
241 
242 template <typename Char1>
243 inline bool
EqualChars(const Char1 * s1,const Char1 * s2,size_t len)244 EqualChars(const Char1* s1, const Char1* s2, size_t len)
245 {
246     return mozilla::PodEqual(s1, s2, len);
247 }
248 
249 template <typename Char1, typename Char2>
250 inline bool
EqualChars(const Char1 * s1,const Char2 * s2,size_t len)251 EqualChars(const Char1* s1, const Char2* s2, size_t len)
252 {
253     for (const Char1* s1end = s1 + len; s1 < s1end; s1++, s2++) {
254         if (*s1 != *s2)
255             return false;
256     }
257     return true;
258 }
259 
260 /*
261  * Computes |str|'s substring for the range [beginInt, beginInt + lengthInt).
262  * Negative, overlarge, swapped, etc. |beginInt| and |lengthInt| are forbidden
263  * and constitute API misuse.
264  */
265 JSString*
266 SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt, int32_t lengthInt);
267 
268 /*
269  * Inflate bytes in ASCII encoding to char16_t code units. Return null on error,
270  * otherwise return the char16_t buffer that was malloc'ed. length is updated to
271  * the length of the new string (in char16_t code units). A null char is
272  * appended, but it is not included in the length.
273  */
274 extern char16_t*
275 InflateString(ExclusiveContext* cx, const char* bytes, size_t* length);
276 
277 /*
278  * Inflate bytes to JS chars in an existing buffer. 'dst' must be large
279  * enough for 'srclen' char16_t code units. The buffer is NOT null-terminated.
280  */
281 inline void
CopyAndInflateChars(char16_t * dst,const char * src,size_t srclen)282 CopyAndInflateChars(char16_t* dst, const char* src, size_t srclen)
283 {
284     for (size_t i = 0; i < srclen; i++)
285         dst[i] = (unsigned char) src[i];
286 }
287 
288 inline void
CopyAndInflateChars(char16_t * dst,const JS::Latin1Char * src,size_t srclen)289 CopyAndInflateChars(char16_t* dst, const JS::Latin1Char* src, size_t srclen)
290 {
291     for (size_t i = 0; i < srclen; i++)
292         dst[i] = src[i];
293 }
294 
295 /*
296  * Deflate JS chars to bytes into a buffer. 'bytes' must be large enough for
297  * 'length chars. The buffer is NOT null-terminated. The destination length
298  * must to be initialized with the buffer size and will contain on return the
299  * number of copied bytes.
300  */
301 template <typename CharT>
302 extern bool
303 DeflateStringToBuffer(JSContext* maybecx, const CharT* chars,
304                       size_t charsLength, char* bytes, size_t* length);
305 
306 /*
307  * The String.prototype.replace fast-native entry point is exported for joined
308  * function optimization in js{interp,tracer}.cpp.
309  */
310 extern bool
311 str_replace(JSContext* cx, unsigned argc, js::Value* vp);
312 
313 extern bool
314 str_fromCharCode(JSContext* cx, unsigned argc, Value* vp);
315 
316 extern bool
317 str_fromCharCode_one_arg(JSContext* cx, HandleValue code, MutableHandleValue rval);
318 
319 /* String methods exposed so they can be installed in the self-hosting global. */
320 
321 extern bool
322 str_indexOf(JSContext* cx, unsigned argc, Value* vp);
323 
324 extern bool
325 str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp);
326 
327 extern bool
328 str_startsWith(JSContext* cx, unsigned argc, Value* vp);
329 
330 extern bool
331 str_toLowerCase(JSContext* cx, unsigned argc, Value* vp);
332 
333 extern bool
334 str_toUpperCase(JSContext* cx, unsigned argc, Value* vp);
335 
336 extern bool
337 str_toString(JSContext* cx, unsigned argc, Value* vp);
338 
339 extern bool
340 str_charAt(JSContext* cx, unsigned argc, Value* vp);
341 
342 extern bool
343 str_charCodeAt_impl(JSContext* cx, HandleString string, HandleValue index, MutableHandleValue res);
344 
345 extern bool
346 str_charCodeAt(JSContext* cx, unsigned argc, Value* vp);
347 /*
348  * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
349  * least 4 bytes long.  Return the number of UTF-8 bytes of data written.
350  */
351 extern uint32_t
352 OneUcs4ToUtf8Char(uint8_t* utf8Buffer, uint32_t ucs4Char);
353 
354 extern size_t
355 PutEscapedStringImpl(char* buffer, size_t size, GenericPrinter* out, JSLinearString* str,
356                      uint32_t quote);
357 
358 template <typename CharT>
359 extern size_t
360 PutEscapedStringImpl(char* buffer, size_t bufferSize, GenericPrinter* out, const CharT* chars,
361                      size_t length, uint32_t quote);
362 
363 /*
364  * Write str into buffer escaping any non-printable or non-ASCII character
365  * using \escapes for JS string literals.
366  * Guarantees that a NUL is at the end of the buffer unless size is 0. Returns
367  * the length of the written output, NOT including the NUL. Thus, a return
368  * value of size or more means that the output was truncated. If buffer
369  * is null, just returns the length of the output. If quote is not 0, it must
370  * be a single or double quote character that will quote the output.
371 */
372 inline size_t
PutEscapedString(char * buffer,size_t size,JSLinearString * str,uint32_t quote)373 PutEscapedString(char* buffer, size_t size, JSLinearString* str, uint32_t quote)
374 {
375     size_t n = PutEscapedStringImpl(buffer, size, nullptr, str, quote);
376 
377     /* PutEscapedStringImpl can only fail with a file. */
378     MOZ_ASSERT(n != size_t(-1));
379     return n;
380 }
381 
382 template <typename CharT>
383 inline size_t
PutEscapedString(char * buffer,size_t bufferSize,const CharT * chars,size_t length,uint32_t quote)384 PutEscapedString(char* buffer, size_t bufferSize, const CharT* chars, size_t length, uint32_t quote)
385 {
386     size_t n = PutEscapedStringImpl(buffer, bufferSize, nullptr, chars, length, quote);
387 
388     /* PutEscapedStringImpl can only fail with a file. */
389     MOZ_ASSERT(n != size_t(-1));
390     return n;
391 }
392 
393 inline bool
EscapedStringPrinter(GenericPrinter & out,JSLinearString * str,uint32_t quote)394 EscapedStringPrinter(GenericPrinter& out, JSLinearString* str, uint32_t quote)
395 {
396     return PutEscapedStringImpl(nullptr, 0, &out, str, quote) != size_t(-1);
397 }
398 
399 inline bool
EscapedStringPrinter(GenericPrinter & out,const char * chars,size_t length,uint32_t quote)400 EscapedStringPrinter(GenericPrinter& out, const char* chars, size_t length, uint32_t quote)
401 {
402     return PutEscapedStringImpl(nullptr, 0, &out, chars, length, quote) != size_t(-1);
403 }
404 
405 /*
406  * Write str into file escaping any non-printable or non-ASCII character.
407  * If quote is not 0, it must be a single or double quote character that
408  * will quote the output.
409 */
410 inline bool
FileEscapedString(FILE * fp,JSLinearString * str,uint32_t quote)411 FileEscapedString(FILE* fp, JSLinearString* str, uint32_t quote)
412 {
413     Fprinter out(fp);
414     bool res = EscapedStringPrinter(out, str, quote);
415     out.finish();
416     return res;
417 }
418 
419 inline bool
FileEscapedString(FILE * fp,const char * chars,size_t length,uint32_t quote)420 FileEscapedString(FILE* fp, const char* chars, size_t length, uint32_t quote)
421 {
422     Fprinter out(fp);
423     bool res = EscapedStringPrinter(out, chars, length, quote);
424     out.finish();
425     return res;
426 }
427 
428 bool
429 str_match(JSContext* cx, unsigned argc, Value* vp);
430 
431 bool
432 str_search(JSContext* cx, unsigned argc, Value* vp);
433 
434 bool
435 str_split(JSContext* cx, unsigned argc, Value* vp);
436 
437 JSObject*
438 str_split_string(JSContext* cx, HandleObjectGroup group, HandleString str, HandleString sep);
439 
440 JSString*
441 str_replace_string_raw(JSContext* cx, HandleString string, HandleString pattern,
442                        HandleString replacement);
443 
444 extern bool
445 StringConstructor(JSContext* cx, unsigned argc, Value* vp);
446 
447 } /* namespace js */
448 
449 #endif /* jsstr_h */
450