1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/builtins/builtins-utils-inl.h"
6 #include "src/builtins/builtins.h"
7 #include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
8 #include "src/logging/counters.h"
9 #include "src/numbers/conversions.h"
10 #include "src/objects/objects-inl.h"
11 #ifdef V8_INTL_SUPPORT
12 #include "src/objects/intl-objects.h"
13 #endif
14 #include "src/regexp/regexp-utils.h"
15 #include "src/strings/string-builder-inl.h"
16 #include "src/strings/string-case.h"
17 #include "src/strings/unicode-inl.h"
18 #include "src/strings/unicode.h"
19 
20 namespace v8 {
21 namespace internal {
22 
23 namespace {  // for String.fromCodePoint
24 
IsValidCodePoint(Isolate * isolate,Handle<Object> value)25 bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
26   if (!value->IsNumber() &&
27       !Object::ToNumber(isolate, value).ToHandle(&value)) {
28     return false;
29   }
30 
31   if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
32       value->Number()) {
33     return false;
34   }
35 
36   if (value->Number() < 0 || value->Number() > 0x10FFFF) {
37     return false;
38   }
39 
40   return true;
41 }
42 
43 static constexpr uc32 kInvalidCodePoint = static_cast<uc32>(-1);
44 
NextCodePoint(Isolate * isolate,BuiltinArguments args,int index)45 uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
46   Handle<Object> value = args.at(1 + index);
47   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
48       isolate, value, Object::ToNumber(isolate, value), kInvalidCodePoint);
49   if (!IsValidCodePoint(isolate, value)) {
50     isolate->Throw(*isolate->factory()->NewRangeError(
51         MessageTemplate::kInvalidCodePoint, value));
52     return kInvalidCodePoint;
53   }
54   return DoubleToUint32(value->Number());
55 }
56 
57 }  // namespace
58 
59 // ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
BUILTIN(StringFromCodePoint)60 BUILTIN(StringFromCodePoint) {
61   HandleScope scope(isolate);
62   int const length = args.length() - 1;
63   if (length == 0) return ReadOnlyRoots(isolate).empty_string();
64   DCHECK_LT(0, length);
65 
66   // Optimistically assume that the resulting String contains only one byte
67   // characters.
68   std::vector<uint8_t> one_byte_buffer;
69   one_byte_buffer.reserve(length);
70   uc32 code = 0;
71   int index;
72   for (index = 0; index < length; index++) {
73     code = NextCodePoint(isolate, args, index);
74     if (code == kInvalidCodePoint) {
75       return ReadOnlyRoots(isolate).exception();
76     }
77     if (code > String::kMaxOneByteCharCode) {
78       break;
79     }
80     one_byte_buffer.push_back(code);
81   }
82 
83   if (index == length) {
84     RETURN_RESULT_OR_FAILURE(
85         isolate, isolate->factory()->NewStringFromOneByte(Vector<uint8_t>(
86                      one_byte_buffer.data(), one_byte_buffer.size())));
87   }
88 
89   std::vector<uc16> two_byte_buffer;
90   two_byte_buffer.reserve(length - index);
91 
92   while (true) {
93     if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
94       two_byte_buffer.push_back(code);
95     } else {
96       two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
97       two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
98     }
99 
100     if (++index == length) {
101       break;
102     }
103     code = NextCodePoint(isolate, args, index);
104     if (code == kInvalidCodePoint) {
105       return ReadOnlyRoots(isolate).exception();
106     }
107   }
108 
109   Handle<SeqTwoByteString> result;
110   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
111       isolate, result,
112       isolate->factory()->NewRawTwoByteString(
113           static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
114 
115   DisallowHeapAllocation no_gc;
116   CopyChars(result->GetChars(no_gc), one_byte_buffer.data(),
117             one_byte_buffer.size());
118   CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(),
119             two_byte_buffer.data(), two_byte_buffer.size());
120 
121   return *result;
122 }
123 
124 // ES6 section 21.1.3.9
125 // String.prototype.lastIndexOf ( searchString [ , position ] )
BUILTIN(StringPrototypeLastIndexOf)126 BUILTIN(StringPrototypeLastIndexOf) {
127   HandleScope handle_scope(isolate);
128   return String::LastIndexOf(isolate, args.receiver(),
129                              args.atOrUndefined(isolate, 1),
130                              args.atOrUndefined(isolate, 2));
131 }
132 
133 // ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
134 //
135 // This function is implementation specific.  For now, we do not
136 // do anything locale specific.
BUILTIN(StringPrototypeLocaleCompare)137 BUILTIN(StringPrototypeLocaleCompare) {
138   HandleScope handle_scope(isolate);
139 
140   isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare);
141   const char* method = "String.prototype.localeCompare";
142 
143 #ifdef V8_INTL_SUPPORT
144   TO_THIS_STRING(str1, method);
145   Handle<String> str2;
146   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
147       isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
148   RETURN_RESULT_OR_FAILURE(
149       isolate, Intl::StringLocaleCompare(
150                    isolate, str1, str2, args.atOrUndefined(isolate, 2),
151                    args.atOrUndefined(isolate, 3), method));
152 #else
153   DCHECK_LE(2, args.length());
154 
155   TO_THIS_STRING(str1, method);
156   Handle<String> str2;
157   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
158                                      Object::ToString(isolate, args.at(1)));
159 
160   if (str1.is_identical_to(str2)) return Smi::zero();  // Equal.
161   int str1_length = str1->length();
162   int str2_length = str2->length();
163 
164   // Decide trivial cases without flattening.
165   if (str1_length == 0) {
166     if (str2_length == 0) return Smi::zero();  // Equal.
167     return Smi::FromInt(-str2_length);
168   } else {
169     if (str2_length == 0) return Smi::FromInt(str1_length);
170   }
171 
172   int end = str1_length < str2_length ? str1_length : str2_length;
173 
174   // No need to flatten if we are going to find the answer on the first
175   // character. At this point we know there is at least one character
176   // in each string, due to the trivial case handling above.
177   int d = str1->Get(0) - str2->Get(0);
178   if (d != 0) return Smi::FromInt(d);
179 
180   str1 = String::Flatten(isolate, str1);
181   str2 = String::Flatten(isolate, str2);
182 
183   DisallowHeapAllocation no_gc;
184   String::FlatContent flat1 = str1->GetFlatContent(no_gc);
185   String::FlatContent flat2 = str2->GetFlatContent(no_gc);
186 
187   for (int i = 0; i < end; i++) {
188     if (flat1.Get(i) != flat2.Get(i)) {
189       return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
190     }
191   }
192 
193   return Smi::FromInt(str1_length - str2_length);
194 #endif  // !V8_INTL_SUPPORT
195 }
196 
197 #ifndef V8_INTL_SUPPORT
198 // ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
199 //
200 // Simply checks the argument is valid and returns the string itself.
201 // If internationalization is enabled, then intl.js will override this function
202 // and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeNormalize)203 BUILTIN(StringPrototypeNormalize) {
204   HandleScope handle_scope(isolate);
205   TO_THIS_STRING(string, "String.prototype.normalize");
206 
207   Handle<Object> form_input = args.atOrUndefined(isolate, 1);
208   if (form_input->IsUndefined(isolate)) return *string;
209 
210   Handle<String> form;
211   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
212                                      Object::ToString(isolate, form_input));
213 
214   if (!(String::Equals(isolate, form, isolate->factory()->NFC_string()) ||
215         String::Equals(isolate, form, isolate->factory()->NFD_string()) ||
216         String::Equals(isolate, form, isolate->factory()->NFKC_string()) ||
217         String::Equals(isolate, form, isolate->factory()->NFKD_string()))) {
218     Handle<String> valid_forms =
219         isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
220     THROW_NEW_ERROR_RETURN_FAILURE(
221         isolate,
222         NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
223   }
224 
225   return *string;
226 }
227 #endif  // !V8_INTL_SUPPORT
228 
229 
230 #ifndef V8_INTL_SUPPORT
231 namespace {
232 
ToUpperOverflows(uc32 character)233 inline bool ToUpperOverflows(uc32 character) {
234   // y with umlauts and the micro sign are the only characters that stop
235   // fitting into one-byte when converting to uppercase.
236   static const uc32 yuml_code = 0xFF;
237   static const uc32 micro_code = 0xB5;
238   return (character == yuml_code || character == micro_code);
239 }
240 
241 template <class Converter>
ConvertCaseHelper(Isolate * isolate,String string,SeqString result,int result_length,unibrow::Mapping<Converter,128> * mapping)242 V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper(
243     Isolate* isolate, String string, SeqString result, int result_length,
244     unibrow::Mapping<Converter, 128>* mapping) {
245   DisallowHeapAllocation no_gc;
246   // We try this twice, once with the assumption that the result is no longer
247   // than the input and, if that assumption breaks, again with the exact
248   // length.  This may not be pretty, but it is nicer than what was here before
249   // and I hereby claim my vaffel-is.
250   //
251   // NOTE: This assumes that the upper/lower case of an ASCII
252   // character is also ASCII.  This is currently the case, but it
253   // might break in the future if we implement more context and locale
254   // dependent upper/lower conversions.
255   bool has_changed_character = false;
256 
257   // Convert all characters to upper case, assuming that they will fit
258   // in the buffer
259   StringCharacterStream stream(string);
260   unibrow::uchar chars[Converter::kMaxWidth];
261   // We can assume that the string is not empty
262   uc32 current = stream.GetNext();
263   bool ignore_overflow = Converter::kIsToLower || result.IsSeqTwoByteString();
264   for (int i = 0; i < result_length;) {
265     bool has_next = stream.HasMore();
266     uc32 next = has_next ? stream.GetNext() : 0;
267     int char_length = mapping->get(current, next, chars);
268     if (char_length == 0) {
269       // The case conversion of this character is the character itself.
270       result.Set(i, current);
271       i++;
272     } else if (char_length == 1 &&
273                (ignore_overflow || !ToUpperOverflows(current))) {
274       // Common case: converting the letter resulted in one character.
275       DCHECK(static_cast<uc32>(chars[0]) != current);
276       result.Set(i, chars[0]);
277       has_changed_character = true;
278       i++;
279     } else if (result_length == string.length()) {
280       bool overflows = ToUpperOverflows(current);
281       // We've assumed that the result would be as long as the
282       // input but here is a character that converts to several
283       // characters.  No matter, we calculate the exact length
284       // of the result and try the whole thing again.
285       //
286       // Note that this leaves room for optimization.  We could just
287       // memcpy what we already have to the result string.  Also,
288       // the result string is the last object allocated we could
289       // "realloc" it and probably, in the vast majority of cases,
290       // extend the existing string to be able to hold the full
291       // result.
292       int next_length = 0;
293       if (has_next) {
294         next_length = mapping->get(next, 0, chars);
295         if (next_length == 0) next_length = 1;
296       }
297       int current_length = i + char_length + next_length;
298       while (stream.HasMore()) {
299         current = stream.GetNext();
300         overflows |= ToUpperOverflows(current);
301         // NOTE: we use 0 as the next character here because, while
302         // the next character may affect what a character converts to,
303         // it does not in any case affect the length of what it convert
304         // to.
305         int char_length = mapping->get(current, 0, chars);
306         if (char_length == 0) char_length = 1;
307         current_length += char_length;
308         if (current_length > String::kMaxLength) {
309           AllowHeapAllocation allocate_error_and_return;
310           THROW_NEW_ERROR_RETURN_FAILURE(isolate,
311                                          NewInvalidStringLengthError());
312         }
313       }
314       // Try again with the real length.  Return signed if we need
315       // to allocate a two-byte string for to uppercase.
316       return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
317                                              : Smi::FromInt(current_length);
318     } else {
319       for (int j = 0; j < char_length; j++) {
320         result.Set(i, chars[j]);
321         i++;
322       }
323       has_changed_character = true;
324     }
325     current = next;
326   }
327   if (has_changed_character) {
328     return result;
329   } else {
330     // If we didn't actually change anything in doing the conversion
331     // we simple return the result and let the converted string
332     // become garbage; there is no reason to keep two identical strings
333     // alive.
334     return string;
335   }
336 }
337 
338 template <class Converter>
ConvertCase(Handle<String> s,Isolate * isolate,unibrow::Mapping<Converter,128> * mapping)339 V8_WARN_UNUSED_RESULT static Object ConvertCase(
340     Handle<String> s, Isolate* isolate,
341     unibrow::Mapping<Converter, 128>* mapping) {
342   s = String::Flatten(isolate, s);
343   int length = s->length();
344   // Assume that the string is not empty; we need this assumption later
345   if (length == 0) return *s;
346 
347   // Simpler handling of ASCII strings.
348   //
349   // NOTE: This assumes that the upper/lower case of an ASCII
350   // character is also ASCII.  This is currently the case, but it
351   // might break in the future if we implement more context and locale
352   // dependent upper/lower conversions.
353   if (String::IsOneByteRepresentationUnderneath(*s)) {
354     // Same length as input.
355     Handle<SeqOneByteString> result =
356         isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
357     DisallowHeapAllocation no_gc;
358     String::FlatContent flat_content = s->GetFlatContent(no_gc);
359     DCHECK(flat_content.IsFlat());
360     bool has_changed_character = false;
361     int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
362         reinterpret_cast<char*>(result->GetChars(no_gc)),
363         reinterpret_cast<const char*>(flat_content.ToOneByteVector().begin()),
364         length, &has_changed_character);
365     // If not ASCII, we discard the result and take the 2 byte path.
366     if (index_to_first_unprocessed == length)
367       return has_changed_character ? *result : *s;
368   }
369 
370   Handle<SeqString> result;  // Same length as input.
371   if (s->IsOneByteRepresentation()) {
372     result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
373   } else {
374     result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
375   }
376 
377   Object answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
378   if (answer.IsException(isolate) || answer.IsString()) return answer;
379 
380   DCHECK(answer.IsSmi());
381   length = Smi::ToInt(answer);
382   if (s->IsOneByteRepresentation() && length > 0) {
383     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
384         isolate, result, isolate->factory()->NewRawOneByteString(length));
385   } else {
386     if (length < 0) length = -length;
387     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
388         isolate, result, isolate->factory()->NewRawTwoByteString(length));
389   }
390   return ConvertCaseHelper(isolate, *s, *result, length, mapping);
391 }
392 
393 }  // namespace
394 
BUILTIN(StringPrototypeToLocaleLowerCase)395 BUILTIN(StringPrototypeToLocaleLowerCase) {
396   HandleScope scope(isolate);
397   TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
398   return ConvertCase(string, isolate,
399                      isolate->runtime_state()->to_lower_mapping());
400 }
401 
BUILTIN(StringPrototypeToLocaleUpperCase)402 BUILTIN(StringPrototypeToLocaleUpperCase) {
403   HandleScope scope(isolate);
404   TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
405   return ConvertCase(string, isolate,
406                      isolate->runtime_state()->to_upper_mapping());
407 }
408 
BUILTIN(StringPrototypeToLowerCase)409 BUILTIN(StringPrototypeToLowerCase) {
410   HandleScope scope(isolate);
411   TO_THIS_STRING(string, "String.prototype.toLowerCase");
412   return ConvertCase(string, isolate,
413                      isolate->runtime_state()->to_lower_mapping());
414 }
415 
BUILTIN(StringPrototypeToUpperCase)416 BUILTIN(StringPrototypeToUpperCase) {
417   HandleScope scope(isolate);
418   TO_THIS_STRING(string, "String.prototype.toUpperCase");
419   return ConvertCase(string, isolate,
420                      isolate->runtime_state()->to_upper_mapping());
421 }
422 #endif  // !V8_INTL_SUPPORT
423 
424 // ES6 #sec-string.prototype.raw
BUILTIN(StringRaw)425 BUILTIN(StringRaw) {
426   HandleScope scope(isolate);
427   Handle<Object> templ = args.atOrUndefined(isolate, 1);
428   const uint32_t argc = args.length();
429   Handle<String> raw_string =
430       isolate->factory()->NewStringFromAsciiChecked("raw");
431 
432   Handle<Object> cooked;
433   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
434                                      Object::ToObject(isolate, templ));
435 
436   Handle<Object> raw;
437   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
438       isolate, raw, Object::GetProperty(isolate, cooked, raw_string));
439   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
440                                      Object::ToObject(isolate, raw));
441   Handle<Object> raw_len;
442   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
443       isolate, raw_len,
444       Object::GetProperty(isolate, raw, isolate->factory()->length_string()));
445 
446   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
447                                      Object::ToLength(isolate, raw_len));
448 
449   IncrementalStringBuilder result_builder(isolate);
450   // Intentional spec violation: we ignore {length} values >= 2^32, because
451   // assuming non-empty chunks they would generate too-long strings anyway.
452   const double raw_len_number = raw_len->Number();
453   const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max()
454                               ? std::numeric_limits<uint32_t>::max()
455                               : static_cast<uint32_t>(raw_len_number);
456   if (length > 0) {
457     Handle<Object> first_element;
458     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
459                                        Object::GetElement(isolate, raw, 0));
460 
461     Handle<String> first_string;
462     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
463         isolate, first_string, Object::ToString(isolate, first_element));
464     result_builder.AppendString(first_string);
465 
466     for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) {
467       if (arg_i < argc) {
468         Handle<String> argument_string;
469         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
470             isolate, argument_string,
471             Object::ToString(isolate, args.at(arg_i)));
472         result_builder.AppendString(argument_string);
473       }
474 
475       Handle<Object> element;
476       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
477                                          Object::GetElement(isolate, raw, i));
478 
479       Handle<String> element_string;
480       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
481                                          Object::ToString(isolate, element));
482       result_builder.AppendString(element_string);
483     }
484   }
485 
486   RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
487 }
488 
489 }  // namespace internal
490 }  // namespace v8
491