1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/builtins/builtins-utils.h"
6 #include "src/builtins/builtins.h"
7 #include "src/conversions.h"
8 #include "src/counters.h"
9 #include "src/objects-inl.h"
10 #include "src/regexp/regexp-utils.h"
11 #include "src/string-builder.h"
12 #include "src/string-case.h"
13 #include "src/unicode-inl.h"
14 #include "src/unicode.h"
15 
16 namespace v8 {
17 namespace internal {
18 
19 namespace {  // for String.fromCodePoint
20 
IsValidCodePoint(Isolate * isolate,Handle<Object> value)21 bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
22   if (!value->IsNumber() && !Object::ToNumber(value).ToHandle(&value)) {
23     return false;
24   }
25 
26   if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
27       value->Number()) {
28     return false;
29   }
30 
31   if (value->Number() < 0 || value->Number() > 0x10FFFF) {
32     return false;
33   }
34 
35   return true;
36 }
37 
NextCodePoint(Isolate * isolate,BuiltinArguments args,int index)38 uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
39   Handle<Object> value = args.at(1 + index);
40   ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, value, Object::ToNumber(value), -1);
41   if (!IsValidCodePoint(isolate, value)) {
42     isolate->Throw(*isolate->factory()->NewRangeError(
43         MessageTemplate::kInvalidCodePoint, value));
44     return -1;
45   }
46   return DoubleToUint32(value->Number());
47 }
48 
49 }  // namespace
50 
51 // ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
BUILTIN(StringFromCodePoint)52 BUILTIN(StringFromCodePoint) {
53   HandleScope scope(isolate);
54   int const length = args.length() - 1;
55   if (length == 0) return isolate->heap()->empty_string();
56   DCHECK_LT(0, length);
57 
58   // Optimistically assume that the resulting String contains only one byte
59   // characters.
60   std::vector<uint8_t> one_byte_buffer;
61   one_byte_buffer.reserve(length);
62   uc32 code = 0;
63   int index;
64   for (index = 0; index < length; index++) {
65     code = NextCodePoint(isolate, args, index);
66     if (code < 0) {
67       return isolate->heap()->exception();
68     }
69     if (code > String::kMaxOneByteCharCode) {
70       break;
71     }
72     one_byte_buffer.push_back(code);
73   }
74 
75   if (index == length) {
76     RETURN_RESULT_OR_FAILURE(
77         isolate, isolate->factory()->NewStringFromOneByte(Vector<uint8_t>(
78                      one_byte_buffer.data(), one_byte_buffer.size())));
79   }
80 
81   std::vector<uc16> two_byte_buffer;
82   two_byte_buffer.reserve(length - index);
83 
84   while (true) {
85     if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
86       two_byte_buffer.push_back(code);
87     } else {
88       two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
89       two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
90     }
91 
92     if (++index == length) {
93       break;
94     }
95     code = NextCodePoint(isolate, args, index);
96     if (code < 0) {
97       return isolate->heap()->exception();
98     }
99   }
100 
101   Handle<SeqTwoByteString> result;
102   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
103       isolate, result,
104       isolate->factory()->NewRawTwoByteString(
105           static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
106 
107   CopyChars(result->GetChars(), one_byte_buffer.data(), one_byte_buffer.size());
108   CopyChars(result->GetChars() + one_byte_buffer.size(), two_byte_buffer.data(),
109             two_byte_buffer.size());
110 
111   return *result;
112 }
113 
114 // ES6 section 21.1.3.6
115 // String.prototype.endsWith ( searchString [ , endPosition ] )
BUILTIN(StringPrototypeEndsWith)116 BUILTIN(StringPrototypeEndsWith) {
117   HandleScope handle_scope(isolate);
118   TO_THIS_STRING(str, "String.prototype.endsWith");
119 
120   // Check if the search string is a regExp and fail if it is.
121   Handle<Object> search = args.atOrUndefined(isolate, 1);
122   Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
123   if (is_reg_exp.IsNothing()) {
124     DCHECK(isolate->has_pending_exception());
125     return isolate->heap()->exception();
126   }
127   if (is_reg_exp.FromJust()) {
128     THROW_NEW_ERROR_RETURN_FAILURE(
129         isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
130                               isolate->factory()->NewStringFromStaticChars(
131                                   "String.prototype.endsWith")));
132   }
133   Handle<String> search_string;
134   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
135                                      Object::ToString(isolate, search));
136 
137   Handle<Object> position = args.atOrUndefined(isolate, 2);
138   int end;
139 
140   if (position->IsUndefined(isolate)) {
141     end = str->length();
142   } else {
143     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
144                                        Object::ToInteger(isolate, position));
145     end = str->ToValidIndex(*position);
146   }
147 
148   int start = end - search_string->length();
149   if (start < 0) return isolate->heap()->false_value();
150 
151   str = String::Flatten(str);
152   search_string = String::Flatten(search_string);
153 
154   DisallowHeapAllocation no_gc;  // ensure vectors stay valid
155   String::FlatContent str_content = str->GetFlatContent();
156   String::FlatContent search_content = search_string->GetFlatContent();
157 
158   if (str_content.IsOneByte() && search_content.IsOneByte()) {
159     Vector<const uint8_t> str_vector = str_content.ToOneByteVector();
160     Vector<const uint8_t> search_vector = search_content.ToOneByteVector();
161 
162     return isolate->heap()->ToBoolean(memcmp(str_vector.start() + start,
163                                              search_vector.start(),
164                                              search_string->length()) == 0);
165   }
166 
167   FlatStringReader str_reader(isolate, str);
168   FlatStringReader search_reader(isolate, search_string);
169 
170   for (int i = 0; i < search_string->length(); i++) {
171     if (str_reader.Get(start + i) != search_reader.Get(i)) {
172       return isolate->heap()->false_value();
173     }
174   }
175   return isolate->heap()->true_value();
176 }
177 
178 // ES6 section 21.1.3.9
179 // String.prototype.lastIndexOf ( searchString [ , position ] )
BUILTIN(StringPrototypeLastIndexOf)180 BUILTIN(StringPrototypeLastIndexOf) {
181   HandleScope handle_scope(isolate);
182   return String::LastIndexOf(isolate, args.receiver(),
183                              args.atOrUndefined(isolate, 1),
184                              args.atOrUndefined(isolate, 2));
185 }
186 
187 // ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
188 //
189 // This function is implementation specific.  For now, we do not
190 // do anything locale specific.
191 // If internationalization is enabled, then intl.js will override this function
192 // and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeLocaleCompare)193 BUILTIN(StringPrototypeLocaleCompare) {
194   HandleScope handle_scope(isolate);
195   DCHECK_EQ(2, args.length());
196 
197   TO_THIS_STRING(str1, "String.prototype.localeCompare");
198   Handle<String> str2;
199   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
200                                      Object::ToString(isolate, args.at(1)));
201 
202   if (str1.is_identical_to(str2)) return Smi::kZero;  // Equal.
203   int str1_length = str1->length();
204   int str2_length = str2->length();
205 
206   // Decide trivial cases without flattening.
207   if (str1_length == 0) {
208     if (str2_length == 0) return Smi::kZero;  // Equal.
209     return Smi::FromInt(-str2_length);
210   } else {
211     if (str2_length == 0) return Smi::FromInt(str1_length);
212   }
213 
214   int end = str1_length < str2_length ? str1_length : str2_length;
215 
216   // No need to flatten if we are going to find the answer on the first
217   // character. At this point we know there is at least one character
218   // in each string, due to the trivial case handling above.
219   int d = str1->Get(0) - str2->Get(0);
220   if (d != 0) return Smi::FromInt(d);
221 
222   str1 = String::Flatten(str1);
223   str2 = String::Flatten(str2);
224 
225   DisallowHeapAllocation no_gc;
226   String::FlatContent flat1 = str1->GetFlatContent();
227   String::FlatContent flat2 = str2->GetFlatContent();
228 
229   for (int i = 0; i < end; i++) {
230     if (flat1.Get(i) != flat2.Get(i)) {
231       return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
232     }
233   }
234 
235   return Smi::FromInt(str1_length - str2_length);
236 }
237 
238 #ifndef V8_INTL_SUPPORT
239 // ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
240 //
241 // Simply checks the argument is valid and returns the string itself.
242 // If internationalization is enabled, then intl.js will override this function
243 // and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeNormalize)244 BUILTIN(StringPrototypeNormalize) {
245   HandleScope handle_scope(isolate);
246   TO_THIS_STRING(string, "String.prototype.normalize");
247 
248   Handle<Object> form_input = args.atOrUndefined(isolate, 1);
249   if (form_input->IsUndefined(isolate)) return *string;
250 
251   Handle<String> form;
252   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
253                                      Object::ToString(isolate, form_input));
254 
255   if (!(String::Equals(form,
256                        isolate->factory()->NewStringFromStaticChars("NFC")) ||
257         String::Equals(form,
258                        isolate->factory()->NewStringFromStaticChars("NFD")) ||
259         String::Equals(form,
260                        isolate->factory()->NewStringFromStaticChars("NFKC")) ||
261         String::Equals(form,
262                        isolate->factory()->NewStringFromStaticChars("NFKD")))) {
263     Handle<String> valid_forms =
264         isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
265     THROW_NEW_ERROR_RETURN_FAILURE(
266         isolate,
267         NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
268   }
269 
270   return *string;
271 }
272 #endif  // !V8_INTL_SUPPORT
273 
BUILTIN(StringPrototypeStartsWith)274 BUILTIN(StringPrototypeStartsWith) {
275   HandleScope handle_scope(isolate);
276   TO_THIS_STRING(str, "String.prototype.startsWith");
277 
278   // Check if the search string is a regExp and fail if it is.
279   Handle<Object> search = args.atOrUndefined(isolate, 1);
280   Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
281   if (is_reg_exp.IsNothing()) {
282     DCHECK(isolate->has_pending_exception());
283     return isolate->heap()->exception();
284   }
285   if (is_reg_exp.FromJust()) {
286     THROW_NEW_ERROR_RETURN_FAILURE(
287         isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
288                               isolate->factory()->NewStringFromStaticChars(
289                                   "String.prototype.startsWith")));
290   }
291   Handle<String> search_string;
292   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
293                                      Object::ToString(isolate, search));
294 
295   Handle<Object> position = args.atOrUndefined(isolate, 2);
296   int start;
297 
298   if (position->IsUndefined(isolate)) {
299     start = 0;
300   } else {
301     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
302                                        Object::ToInteger(isolate, position));
303     start = str->ToValidIndex(*position);
304   }
305 
306   if (start + search_string->length() > str->length()) {
307     return isolate->heap()->false_value();
308   }
309 
310   FlatStringReader str_reader(isolate, String::Flatten(str));
311   FlatStringReader search_reader(isolate, String::Flatten(search_string));
312 
313   for (int i = 0; i < search_string->length(); i++) {
314     if (str_reader.Get(start + i) != search_reader.Get(i)) {
315       return isolate->heap()->false_value();
316     }
317   }
318   return isolate->heap()->true_value();
319 }
320 
321 #ifndef V8_INTL_SUPPORT
322 namespace {
323 
ToUpperOverflows(uc32 character)324 inline bool ToUpperOverflows(uc32 character) {
325   // y with umlauts and the micro sign are the only characters that stop
326   // fitting into one-byte when converting to uppercase.
327   static const uc32 yuml_code = 0xFF;
328   static const uc32 micro_code = 0xB5;
329   return (character == yuml_code || character == micro_code);
330 }
331 
332 template <class Converter>
ConvertCaseHelper(Isolate * isolate,String * string,SeqString * result,int result_length,unibrow::Mapping<Converter,128> * mapping)333 V8_WARN_UNUSED_RESULT static Object* ConvertCaseHelper(
334     Isolate* isolate, String* string, SeqString* result, int result_length,
335     unibrow::Mapping<Converter, 128>* mapping) {
336   DisallowHeapAllocation no_gc;
337   // We try this twice, once with the assumption that the result is no longer
338   // than the input and, if that assumption breaks, again with the exact
339   // length.  This may not be pretty, but it is nicer than what was here before
340   // and I hereby claim my vaffel-is.
341   //
342   // NOTE: This assumes that the upper/lower case of an ASCII
343   // character is also ASCII.  This is currently the case, but it
344   // might break in the future if we implement more context and locale
345   // dependent upper/lower conversions.
346   bool has_changed_character = false;
347 
348   // Convert all characters to upper case, assuming that they will fit
349   // in the buffer
350   StringCharacterStream stream(string);
351   unibrow::uchar chars[Converter::kMaxWidth];
352   // We can assume that the string is not empty
353   uc32 current = stream.GetNext();
354   bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
355   for (int i = 0; i < result_length;) {
356     bool has_next = stream.HasMore();
357     uc32 next = has_next ? stream.GetNext() : 0;
358     int char_length = mapping->get(current, next, chars);
359     if (char_length == 0) {
360       // The case conversion of this character is the character itself.
361       result->Set(i, current);
362       i++;
363     } else if (char_length == 1 &&
364                (ignore_overflow || !ToUpperOverflows(current))) {
365       // Common case: converting the letter resulted in one character.
366       DCHECK(static_cast<uc32>(chars[0]) != current);
367       result->Set(i, chars[0]);
368       has_changed_character = true;
369       i++;
370     } else if (result_length == string->length()) {
371       bool overflows = ToUpperOverflows(current);
372       // We've assumed that the result would be as long as the
373       // input but here is a character that converts to several
374       // characters.  No matter, we calculate the exact length
375       // of the result and try the whole thing again.
376       //
377       // Note that this leaves room for optimization.  We could just
378       // memcpy what we already have to the result string.  Also,
379       // the result string is the last object allocated we could
380       // "realloc" it and probably, in the vast majority of cases,
381       // extend the existing string to be able to hold the full
382       // result.
383       int next_length = 0;
384       if (has_next) {
385         next_length = mapping->get(next, 0, chars);
386         if (next_length == 0) next_length = 1;
387       }
388       int current_length = i + char_length + next_length;
389       while (stream.HasMore()) {
390         current = stream.GetNext();
391         overflows |= ToUpperOverflows(current);
392         // NOTE: we use 0 as the next character here because, while
393         // the next character may affect what a character converts to,
394         // it does not in any case affect the length of what it convert
395         // to.
396         int char_length = mapping->get(current, 0, chars);
397         if (char_length == 0) char_length = 1;
398         current_length += char_length;
399         if (current_length > String::kMaxLength) {
400           AllowHeapAllocation allocate_error_and_return;
401           THROW_NEW_ERROR_RETURN_FAILURE(isolate,
402                                          NewInvalidStringLengthError());
403         }
404       }
405       // Try again with the real length.  Return signed if we need
406       // to allocate a two-byte string for to uppercase.
407       return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
408                                              : Smi::FromInt(current_length);
409     } else {
410       for (int j = 0; j < char_length; j++) {
411         result->Set(i, chars[j]);
412         i++;
413       }
414       has_changed_character = true;
415     }
416     current = next;
417   }
418   if (has_changed_character) {
419     return result;
420   } else {
421     // If we didn't actually change anything in doing the conversion
422     // we simple return the result and let the converted string
423     // become garbage; there is no reason to keep two identical strings
424     // alive.
425     return string;
426   }
427 }
428 
429 template <class Converter>
ConvertCase(Handle<String> s,Isolate * isolate,unibrow::Mapping<Converter,128> * mapping)430 V8_WARN_UNUSED_RESULT static Object* ConvertCase(
431     Handle<String> s, Isolate* isolate,
432     unibrow::Mapping<Converter, 128>* mapping) {
433   s = String::Flatten(s);
434   int length = s->length();
435   // Assume that the string is not empty; we need this assumption later
436   if (length == 0) return *s;
437 
438   // Simpler handling of ASCII strings.
439   //
440   // NOTE: This assumes that the upper/lower case of an ASCII
441   // character is also ASCII.  This is currently the case, but it
442   // might break in the future if we implement more context and locale
443   // dependent upper/lower conversions.
444   if (s->IsOneByteRepresentationUnderneath()) {
445     // Same length as input.
446     Handle<SeqOneByteString> result =
447         isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
448     DisallowHeapAllocation no_gc;
449     String::FlatContent flat_content = s->GetFlatContent();
450     DCHECK(flat_content.IsFlat());
451     bool has_changed_character = false;
452     int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
453         reinterpret_cast<char*>(result->GetChars()),
454         reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
455         length, &has_changed_character);
456     // If not ASCII, we discard the result and take the 2 byte path.
457     if (index_to_first_unprocessed == length)
458       return has_changed_character ? *result : *s;
459   }
460 
461   Handle<SeqString> result;  // Same length as input.
462   if (s->IsOneByteRepresentation()) {
463     result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
464   } else {
465     result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
466   }
467 
468   Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
469   if (answer->IsException(isolate) || answer->IsString()) return answer;
470 
471   DCHECK(answer->IsSmi());
472   length = Smi::ToInt(answer);
473   if (s->IsOneByteRepresentation() && length > 0) {
474     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
475         isolate, result, isolate->factory()->NewRawOneByteString(length));
476   } else {
477     if (length < 0) length = -length;
478     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
479         isolate, result, isolate->factory()->NewRawTwoByteString(length));
480   }
481   return ConvertCaseHelper(isolate, *s, *result, length, mapping);
482 }
483 
484 }  // namespace
485 
BUILTIN(StringPrototypeToLocaleLowerCase)486 BUILTIN(StringPrototypeToLocaleLowerCase) {
487   HandleScope scope(isolate);
488   TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
489   return ConvertCase(string, isolate,
490                      isolate->runtime_state()->to_lower_mapping());
491 }
492 
BUILTIN(StringPrototypeToLocaleUpperCase)493 BUILTIN(StringPrototypeToLocaleUpperCase) {
494   HandleScope scope(isolate);
495   TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
496   return ConvertCase(string, isolate,
497                      isolate->runtime_state()->to_upper_mapping());
498 }
499 
BUILTIN(StringPrototypeToLowerCase)500 BUILTIN(StringPrototypeToLowerCase) {
501   HandleScope scope(isolate);
502   TO_THIS_STRING(string, "String.prototype.toLowerCase");
503   return ConvertCase(string, isolate,
504                      isolate->runtime_state()->to_lower_mapping());
505 }
506 
BUILTIN(StringPrototypeToUpperCase)507 BUILTIN(StringPrototypeToUpperCase) {
508   HandleScope scope(isolate);
509   TO_THIS_STRING(string, "String.prototype.toUpperCase");
510   return ConvertCase(string, isolate,
511                      isolate->runtime_state()->to_upper_mapping());
512 }
513 #endif  // !V8_INTL_SUPPORT
514 
515 // ES6 #sec-string.prototype.raw
BUILTIN(StringRaw)516 BUILTIN(StringRaw) {
517   HandleScope scope(isolate);
518   Handle<Object> templ = args.atOrUndefined(isolate, 1);
519   const uint32_t argc = args.length();
520   Handle<String> raw_string =
521       isolate->factory()->NewStringFromAsciiChecked("raw");
522 
523   Handle<Object> cooked;
524   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
525                                      Object::ToObject(isolate, templ));
526 
527   Handle<Object> raw;
528   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
529                                      Object::GetProperty(cooked, raw_string));
530   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
531                                      Object::ToObject(isolate, raw));
532   Handle<Object> raw_len;
533   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
534       isolate, raw_len,
535       Object::GetProperty(raw, isolate->factory()->length_string()));
536 
537   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
538                                      Object::ToLength(isolate, raw_len));
539 
540   IncrementalStringBuilder result_builder(isolate);
541   const uint32_t length = static_cast<uint32_t>(raw_len->Number());
542   if (length > 0) {
543     Handle<Object> first_element;
544     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
545                                        Object::GetElement(isolate, raw, 0));
546 
547     Handle<String> first_string;
548     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
549         isolate, first_string, Object::ToString(isolate, first_element));
550     result_builder.AppendString(first_string);
551 
552     for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) {
553       if (arg_i < argc) {
554         Handle<String> argument_string;
555         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
556             isolate, argument_string,
557             Object::ToString(isolate, args.at(arg_i)));
558         result_builder.AppendString(argument_string);
559       }
560 
561       Handle<Object> element;
562       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
563                                          Object::GetElement(isolate, raw, i));
564 
565       Handle<String> element_string;
566       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
567                                          Object::ToString(isolate, element));
568       result_builder.AppendString(element_string);
569     }
570   }
571 
572   RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
573 }
574 
575 }  // namespace internal
576 }  // namespace v8
577