1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/builtins/builtins-utils-inl.h"
6 #include "src/builtins/builtins.h"
7 #include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
8 #include "src/logging/counters.h"
9 #include "src/numbers/conversions.h"
10 #include "src/objects/objects-inl.h"
11 #ifdef V8_INTL_SUPPORT
12 #include "src/objects/intl-objects.h"
13 #endif
14 #include "src/base/strings.h"
15 #include "src/regexp/regexp-utils.h"
16 #include "src/strings/string-builder-inl.h"
17 #include "src/strings/string-case.h"
18 #include "src/strings/unicode-inl.h"
19 #include "src/strings/unicode.h"
20 
21 namespace v8 {
22 namespace internal {
23 
24 namespace {  // for String.fromCodePoint
25 
IsValidCodePoint(Isolate * isolate,Handle<Object> value)26 bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
27   if (!value->IsNumber() &&
28       !Object::ToNumber(isolate, value).ToHandle(&value)) {
29     return false;
30   }
31 
32   if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
33       value->Number()) {
34     return false;
35   }
36 
37   if (value->Number() < 0 || value->Number() > 0x10FFFF) {
38     return false;
39   }
40 
41   return true;
42 }
43 
44 static constexpr base::uc32 kInvalidCodePoint = static_cast<base::uc32>(-1);
45 
NextCodePoint(Isolate * isolate,BuiltinArguments args,int index)46 base::uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
47   Handle<Object> value = args.at(1 + index);
48   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
49       isolate, value, Object::ToNumber(isolate, value), kInvalidCodePoint);
50   if (!IsValidCodePoint(isolate, value)) {
51     isolate->Throw(*isolate->factory()->NewRangeError(
52         MessageTemplate::kInvalidCodePoint, value));
53     return kInvalidCodePoint;
54   }
55   return DoubleToUint32(value->Number());
56 }
57 
58 }  // namespace
59 
60 // ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
BUILTIN(StringFromCodePoint)61 BUILTIN(StringFromCodePoint) {
62   HandleScope scope(isolate);
63   int const length = args.length() - 1;
64   if (length == 0) return ReadOnlyRoots(isolate).empty_string();
65   DCHECK_LT(0, length);
66 
67   // Optimistically assume that the resulting String contains only one byte
68   // characters.
69   std::vector<uint8_t> one_byte_buffer;
70   one_byte_buffer.reserve(length);
71   base::uc32 code = 0;
72   int index;
73   for (index = 0; index < length; index++) {
74     code = NextCodePoint(isolate, args, index);
75     if (code == kInvalidCodePoint) {
76       return ReadOnlyRoots(isolate).exception();
77     }
78     if (code > String::kMaxOneByteCharCode) {
79       break;
80     }
81     one_byte_buffer.push_back(code);
82   }
83 
84   if (index == length) {
85     RETURN_RESULT_OR_FAILURE(
86         isolate, isolate->factory()->NewStringFromOneByte(base::Vector<uint8_t>(
87                      one_byte_buffer.data(), one_byte_buffer.size())));
88   }
89 
90   std::vector<base::uc16> two_byte_buffer;
91   two_byte_buffer.reserve(length - index);
92 
93   while (true) {
94     if (code <=
95         static_cast<base::uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
96       two_byte_buffer.push_back(code);
97     } else {
98       two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
99       two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
100     }
101 
102     if (++index == length) {
103       break;
104     }
105     code = NextCodePoint(isolate, args, index);
106     if (code == kInvalidCodePoint) {
107       return ReadOnlyRoots(isolate).exception();
108     }
109   }
110 
111   Handle<SeqTwoByteString> result;
112   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
113       isolate, result,
114       isolate->factory()->NewRawTwoByteString(
115           static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
116 
117   DisallowGarbageCollection no_gc;
118   CopyChars(result->GetChars(no_gc), one_byte_buffer.data(),
119             one_byte_buffer.size());
120   CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(),
121             two_byte_buffer.data(), two_byte_buffer.size());
122 
123   return *result;
124 }
125 
126 // ES6 section 21.1.3.9
127 // String.prototype.lastIndexOf ( searchString [ , position ] )
BUILTIN(StringPrototypeLastIndexOf)128 BUILTIN(StringPrototypeLastIndexOf) {
129   HandleScope handle_scope(isolate);
130   return String::LastIndexOf(isolate, args.receiver(),
131                              args.atOrUndefined(isolate, 1),
132                              args.atOrUndefined(isolate, 2));
133 }
134 
135 // ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
136 //
137 // This function is implementation specific.  For now, we do not
138 // do anything locale specific.
BUILTIN(StringPrototypeLocaleCompare)139 BUILTIN(StringPrototypeLocaleCompare) {
140   HandleScope handle_scope(isolate);
141 
142   isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare);
143   static const char* const kMethod = "String.prototype.localeCompare";
144 
145 #ifdef V8_INTL_SUPPORT
146   TO_THIS_STRING(str1, kMethod);
147   Handle<String> str2;
148   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
149       isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
150   base::Optional<int> result = Intl::StringLocaleCompare(
151       isolate, str1, str2, args.atOrUndefined(isolate, 2),
152       args.atOrUndefined(isolate, 3), kMethod);
153   if (!result.has_value()) {
154     DCHECK(isolate->has_pending_exception());
155     return ReadOnlyRoots(isolate).exception();
156   }
157   return Smi::FromInt(result.value());
158 #else
159   DCHECK_LE(2, args.length());
160 
161   TO_THIS_STRING(str1, kMethod);
162   Handle<String> str2;
163   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
164                                      Object::ToString(isolate, args.at(1)));
165 
166   if (str1.is_identical_to(str2)) return Smi::zero();  // Equal.
167   int str1_length = str1->length();
168   int str2_length = str2->length();
169 
170   // Decide trivial cases without flattening.
171   if (str1_length == 0) {
172     if (str2_length == 0) return Smi::zero();  // Equal.
173     return Smi::FromInt(-str2_length);
174   } else {
175     if (str2_length == 0) return Smi::FromInt(str1_length);
176   }
177 
178   int end = str1_length < str2_length ? str1_length : str2_length;
179 
180   // No need to flatten if we are going to find the answer on the first
181   // character. At this point we know there is at least one character
182   // in each string, due to the trivial case handling above.
183   int d = str1->Get(0) - str2->Get(0);
184   if (d != 0) return Smi::FromInt(d);
185 
186   str1 = String::Flatten(isolate, str1);
187   str2 = String::Flatten(isolate, str2);
188 
189   DisallowGarbageCollection no_gc;
190   String::FlatContent flat1 = str1->GetFlatContent(no_gc);
191   String::FlatContent flat2 = str2->GetFlatContent(no_gc);
192 
193   for (int i = 0; i < end; i++) {
194     if (flat1.Get(i) != flat2.Get(i)) {
195       return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
196     }
197   }
198 
199   return Smi::FromInt(str1_length - str2_length);
200 #endif  // !V8_INTL_SUPPORT
201 }
202 
203 #ifndef V8_INTL_SUPPORT
204 // ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
205 //
206 // Simply checks the argument is valid and returns the string itself.
207 // If internationalization is enabled, then intl.js will override this function
208 // and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeNormalize)209 BUILTIN(StringPrototypeNormalize) {
210   HandleScope handle_scope(isolate);
211   TO_THIS_STRING(string, "String.prototype.normalize");
212 
213   Handle<Object> form_input = args.atOrUndefined(isolate, 1);
214   if (form_input->IsUndefined(isolate)) return *string;
215 
216   Handle<String> form;
217   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
218                                      Object::ToString(isolate, form_input));
219 
220   if (!(String::Equals(isolate, form, isolate->factory()->NFC_string()) ||
221         String::Equals(isolate, form, isolate->factory()->NFD_string()) ||
222         String::Equals(isolate, form, isolate->factory()->NFKC_string()) ||
223         String::Equals(isolate, form, isolate->factory()->NFKD_string()))) {
224     Handle<String> valid_forms =
225         isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
226     THROW_NEW_ERROR_RETURN_FAILURE(
227         isolate,
228         NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
229   }
230 
231   return *string;
232 }
233 #endif  // !V8_INTL_SUPPORT
234 
235 
236 #ifndef V8_INTL_SUPPORT
237 namespace {
238 
ToUpperOverflows(base::uc32 character)239 inline bool ToUpperOverflows(base::uc32 character) {
240   // y with umlauts and the micro sign are the only characters that stop
241   // fitting into one-byte when converting to uppercase.
242   static const base::uc32 yuml_code = 0xFF;
243   static const base::uc32 micro_code = 0xB5;
244   return (character == yuml_code || character == micro_code);
245 }
246 
247 template <class Converter>
ConvertCaseHelper(Isolate * isolate,String string,SeqString result,int result_length,unibrow::Mapping<Converter,128> * mapping)248 V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper(
249     Isolate* isolate, String string, SeqString result, int result_length,
250     unibrow::Mapping<Converter, 128>* mapping) {
251   DisallowGarbageCollection no_gc;
252   // We try this twice, once with the assumption that the result is no longer
253   // than the input and, if that assumption breaks, again with the exact
254   // length.  This may not be pretty, but it is nicer than what was here before
255   // and I hereby claim my vaffel-is.
256   //
257   // NOTE: This assumes that the upper/lower case of an ASCII
258   // character is also ASCII.  This is currently the case, but it
259   // might break in the future if we implement more context and locale
260   // dependent upper/lower conversions.
261   bool has_changed_character = false;
262 
263   // Convert all characters to upper case, assuming that they will fit
264   // in the buffer
265   StringCharacterStream stream(string);
266   unibrow::uchar chars[Converter::kMaxWidth];
267   // We can assume that the string is not empty
268   base::uc32 current = stream.GetNext();
269   bool ignore_overflow = Converter::kIsToLower || result.IsSeqTwoByteString();
270   for (int i = 0; i < result_length;) {
271     bool has_next = stream.HasMore();
272     base::uc32 next = has_next ? stream.GetNext() : 0;
273     int char_length = mapping->get(current, next, chars);
274     if (char_length == 0) {
275       // The case conversion of this character is the character itself.
276       result.Set(i, current);
277       i++;
278     } else if (char_length == 1 &&
279                (ignore_overflow || !ToUpperOverflows(current))) {
280       // Common case: converting the letter resulted in one character.
281       DCHECK(static_cast<base::uc32>(chars[0]) != current);
282       result.Set(i, chars[0]);
283       has_changed_character = true;
284       i++;
285     } else if (result_length == string.length()) {
286       bool overflows = ToUpperOverflows(current);
287       // We've assumed that the result would be as long as the
288       // input but here is a character that converts to several
289       // characters.  No matter, we calculate the exact length
290       // of the result and try the whole thing again.
291       //
292       // Note that this leaves room for optimization.  We could just
293       // memcpy what we already have to the result string.  Also,
294       // the result string is the last object allocated we could
295       // "realloc" it and probably, in the vast majority of cases,
296       // extend the existing string to be able to hold the full
297       // result.
298       int next_length = 0;
299       if (has_next) {
300         next_length = mapping->get(next, 0, chars);
301         if (next_length == 0) next_length = 1;
302       }
303       int current_length = i + char_length + next_length;
304       while (stream.HasMore()) {
305         current = stream.GetNext();
306         overflows |= ToUpperOverflows(current);
307         // NOTE: we use 0 as the next character here because, while
308         // the next character may affect what a character converts to,
309         // it does not in any case affect the length of what it convert
310         // to.
311         int char_length = mapping->get(current, 0, chars);
312         if (char_length == 0) char_length = 1;
313         current_length += char_length;
314         if (current_length > String::kMaxLength) {
315           AllowGarbageCollection allocate_error_and_return;
316           THROW_NEW_ERROR_RETURN_FAILURE(isolate,
317                                          NewInvalidStringLengthError());
318         }
319       }
320       // Try again with the real length.  Return signed if we need
321       // to allocate a two-byte string for to uppercase.
322       return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
323                                              : Smi::FromInt(current_length);
324     } else {
325       for (int j = 0; j < char_length; j++) {
326         result.Set(i, chars[j]);
327         i++;
328       }
329       has_changed_character = true;
330     }
331     current = next;
332   }
333   if (has_changed_character) {
334     return result;
335   } else {
336     // If we didn't actually change anything in doing the conversion
337     // we simple return the result and let the converted string
338     // become garbage; there is no reason to keep two identical strings
339     // alive.
340     return string;
341   }
342 }
343 
344 template <class Converter>
ConvertCase(Handle<String> s,Isolate * isolate,unibrow::Mapping<Converter,128> * mapping)345 V8_WARN_UNUSED_RESULT static Object ConvertCase(
346     Handle<String> s, Isolate* isolate,
347     unibrow::Mapping<Converter, 128>* mapping) {
348   s = String::Flatten(isolate, s);
349   int length = s->length();
350   // Assume that the string is not empty; we need this assumption later
351   if (length == 0) return *s;
352 
353   // Simpler handling of ASCII strings.
354   //
355   // NOTE: This assumes that the upper/lower case of an ASCII
356   // character is also ASCII.  This is currently the case, but it
357   // might break in the future if we implement more context and locale
358   // dependent upper/lower conversions.
359   if (String::IsOneByteRepresentationUnderneath(*s)) {
360     // Same length as input.
361     Handle<SeqOneByteString> result =
362         isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
363     DisallowGarbageCollection no_gc;
364     String::FlatContent flat_content = s->GetFlatContent(no_gc);
365     DCHECK(flat_content.IsFlat());
366     bool has_changed_character = false;
367     int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
368         reinterpret_cast<char*>(result->GetChars(no_gc)),
369         reinterpret_cast<const char*>(flat_content.ToOneByteVector().begin()),
370         length, &has_changed_character);
371     // If not ASCII, we discard the result and take the 2 byte path.
372     if (index_to_first_unprocessed == length)
373       return has_changed_character ? *result : *s;
374   }
375 
376   Handle<SeqString> result;  // Same length as input.
377   if (s->IsOneByteRepresentation()) {
378     result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
379   } else {
380     result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
381   }
382 
383   Object answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
384   if (answer.IsException(isolate) || answer.IsString()) return answer;
385 
386   DCHECK(answer.IsSmi());
387   length = Smi::ToInt(answer);
388   if (s->IsOneByteRepresentation() && length > 0) {
389     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
390         isolate, result, isolate->factory()->NewRawOneByteString(length));
391   } else {
392     if (length < 0) length = -length;
393     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
394         isolate, result, isolate->factory()->NewRawTwoByteString(length));
395   }
396   return ConvertCaseHelper(isolate, *s, *result, length, mapping);
397 }
398 
399 }  // namespace
400 
BUILTIN(StringPrototypeToLocaleLowerCase)401 BUILTIN(StringPrototypeToLocaleLowerCase) {
402   HandleScope scope(isolate);
403   TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
404   return ConvertCase(string, isolate,
405                      isolate->runtime_state()->to_lower_mapping());
406 }
407 
BUILTIN(StringPrototypeToLocaleUpperCase)408 BUILTIN(StringPrototypeToLocaleUpperCase) {
409   HandleScope scope(isolate);
410   TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
411   return ConvertCase(string, isolate,
412                      isolate->runtime_state()->to_upper_mapping());
413 }
414 
BUILTIN(StringPrototypeToLowerCase)415 BUILTIN(StringPrototypeToLowerCase) {
416   HandleScope scope(isolate);
417   TO_THIS_STRING(string, "String.prototype.toLowerCase");
418   return ConvertCase(string, isolate,
419                      isolate->runtime_state()->to_lower_mapping());
420 }
421 
BUILTIN(StringPrototypeToUpperCase)422 BUILTIN(StringPrototypeToUpperCase) {
423   HandleScope scope(isolate);
424   TO_THIS_STRING(string, "String.prototype.toUpperCase");
425   return ConvertCase(string, isolate,
426                      isolate->runtime_state()->to_upper_mapping());
427 }
428 #endif  // !V8_INTL_SUPPORT
429 
430 // ES6 #sec-string.prototype.raw
BUILTIN(StringRaw)431 BUILTIN(StringRaw) {
432   HandleScope scope(isolate);
433   Handle<Object> templ = args.atOrUndefined(isolate, 1);
434   const uint32_t argc = args.length();
435   Handle<String> raw_string =
436       isolate->factory()->NewStringFromAsciiChecked("raw");
437 
438   Handle<Object> cooked;
439   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
440                                      Object::ToObject(isolate, templ));
441 
442   Handle<Object> raw;
443   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
444       isolate, raw, Object::GetProperty(isolate, cooked, raw_string));
445   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
446                                      Object::ToObject(isolate, raw));
447   Handle<Object> raw_len;
448   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
449       isolate, raw_len,
450       Object::GetProperty(isolate, raw, isolate->factory()->length_string()));
451 
452   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
453                                      Object::ToLength(isolate, raw_len));
454 
455   IncrementalStringBuilder result_builder(isolate);
456   // Intentional spec violation: we ignore {length} values >= 2^32, because
457   // assuming non-empty chunks they would generate too-long strings anyway.
458   const double raw_len_number = raw_len->Number();
459   const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max()
460                               ? std::numeric_limits<uint32_t>::max()
461                               : static_cast<uint32_t>(raw_len_number);
462   if (length > 0) {
463     Handle<Object> first_element;
464     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
465                                        Object::GetElement(isolate, raw, 0));
466 
467     Handle<String> first_string;
468     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
469         isolate, first_string, Object::ToString(isolate, first_element));
470     result_builder.AppendString(first_string);
471 
472     for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) {
473       if (arg_i < argc) {
474         Handle<String> argument_string;
475         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
476             isolate, argument_string,
477             Object::ToString(isolate, args.at(arg_i)));
478         result_builder.AppendString(argument_string);
479       }
480 
481       Handle<Object> element;
482       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
483                                          Object::GetElement(isolate, raw, i));
484 
485       Handle<String> element_string;
486       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
487                                          Object::ToString(isolate, element));
488       result_builder.AppendString(element_string);
489     }
490   }
491 
492   RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
493 }
494 
495 }  // namespace internal
496 }  // namespace v8
497