1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/builtins/builtins-utils-inl.h"
6 #include "src/builtins/builtins.h"
7 #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
8 #include "src/logging/counters.h"
9 #include "src/numbers/conversions.h"
10 #include "src/objects/objects-inl.h"
11 #ifdef V8_INTL_SUPPORT
12 #include "src/objects/intl-objects.h"
13 #endif
14 #include "src/regexp/regexp-utils.h"
15 #include "src/strings/string-builder-inl.h"
16 #include "src/strings/string-case.h"
17 #include "src/strings/unicode-inl.h"
18 #include "src/strings/unicode.h"
19
20 namespace v8 {
21 namespace internal {
22
23 namespace { // for String.fromCodePoint
24
IsValidCodePoint(Isolate * isolate,Handle<Object> value)25 bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
26 if (!value->IsNumber() &&
27 !Object::ToNumber(isolate, value).ToHandle(&value)) {
28 return false;
29 }
30
31 if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
32 value->Number()) {
33 return false;
34 }
35
36 if (value->Number() < 0 || value->Number() > 0x10FFFF) {
37 return false;
38 }
39
40 return true;
41 }
42
43 static constexpr uc32 kInvalidCodePoint = static_cast<uc32>(-1);
44
NextCodePoint(Isolate * isolate,BuiltinArguments args,int index)45 uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
46 Handle<Object> value = args.at(1 + index);
47 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
48 isolate, value, Object::ToNumber(isolate, value), kInvalidCodePoint);
49 if (!IsValidCodePoint(isolate, value)) {
50 isolate->Throw(*isolate->factory()->NewRangeError(
51 MessageTemplate::kInvalidCodePoint, value));
52 return kInvalidCodePoint;
53 }
54 return DoubleToUint32(value->Number());
55 }
56
57 } // namespace
58
59 // ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
BUILTIN(StringFromCodePoint)60 BUILTIN(StringFromCodePoint) {
61 HandleScope scope(isolate);
62 int const length = args.length() - 1;
63 if (length == 0) return ReadOnlyRoots(isolate).empty_string();
64 DCHECK_LT(0, length);
65
66 // Optimistically assume that the resulting String contains only one byte
67 // characters.
68 std::vector<uint8_t> one_byte_buffer;
69 one_byte_buffer.reserve(length);
70 uc32 code = 0;
71 int index;
72 for (index = 0; index < length; index++) {
73 code = NextCodePoint(isolate, args, index);
74 if (code == kInvalidCodePoint) {
75 return ReadOnlyRoots(isolate).exception();
76 }
77 if (code > String::kMaxOneByteCharCode) {
78 break;
79 }
80 one_byte_buffer.push_back(code);
81 }
82
83 if (index == length) {
84 RETURN_RESULT_OR_FAILURE(
85 isolate, isolate->factory()->NewStringFromOneByte(Vector<uint8_t>(
86 one_byte_buffer.data(), one_byte_buffer.size())));
87 }
88
89 std::vector<uc16> two_byte_buffer;
90 two_byte_buffer.reserve(length - index);
91
92 while (true) {
93 if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
94 two_byte_buffer.push_back(code);
95 } else {
96 two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
97 two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
98 }
99
100 if (++index == length) {
101 break;
102 }
103 code = NextCodePoint(isolate, args, index);
104 if (code == kInvalidCodePoint) {
105 return ReadOnlyRoots(isolate).exception();
106 }
107 }
108
109 Handle<SeqTwoByteString> result;
110 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
111 isolate, result,
112 isolate->factory()->NewRawTwoByteString(
113 static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
114
115 DisallowHeapAllocation no_gc;
116 CopyChars(result->GetChars(no_gc), one_byte_buffer.data(),
117 one_byte_buffer.size());
118 CopyChars(result->GetChars(no_gc) + one_byte_buffer.size(),
119 two_byte_buffer.data(), two_byte_buffer.size());
120
121 return *result;
122 }
123
124 // ES6 section 21.1.3.9
125 // String.prototype.lastIndexOf ( searchString [ , position ] )
BUILTIN(StringPrototypeLastIndexOf)126 BUILTIN(StringPrototypeLastIndexOf) {
127 HandleScope handle_scope(isolate);
128 return String::LastIndexOf(isolate, args.receiver(),
129 args.atOrUndefined(isolate, 1),
130 args.atOrUndefined(isolate, 2));
131 }
132
133 // ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
134 //
135 // This function is implementation specific. For now, we do not
136 // do anything locale specific.
BUILTIN(StringPrototypeLocaleCompare)137 BUILTIN(StringPrototypeLocaleCompare) {
138 HandleScope handle_scope(isolate);
139
140 isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare);
141 const char* method = "String.prototype.localeCompare";
142
143 #ifdef V8_INTL_SUPPORT
144 TO_THIS_STRING(str1, method);
145 Handle<String> str2;
146 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
147 isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
148 RETURN_RESULT_OR_FAILURE(
149 isolate, Intl::StringLocaleCompare(
150 isolate, str1, str2, args.atOrUndefined(isolate, 2),
151 args.atOrUndefined(isolate, 3), method));
152 #else
153 DCHECK_LE(2, args.length());
154
155 TO_THIS_STRING(str1, method);
156 Handle<String> str2;
157 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
158 Object::ToString(isolate, args.at(1)));
159
160 if (str1.is_identical_to(str2)) return Smi::zero(); // Equal.
161 int str1_length = str1->length();
162 int str2_length = str2->length();
163
164 // Decide trivial cases without flattening.
165 if (str1_length == 0) {
166 if (str2_length == 0) return Smi::zero(); // Equal.
167 return Smi::FromInt(-str2_length);
168 } else {
169 if (str2_length == 0) return Smi::FromInt(str1_length);
170 }
171
172 int end = str1_length < str2_length ? str1_length : str2_length;
173
174 // No need to flatten if we are going to find the answer on the first
175 // character. At this point we know there is at least one character
176 // in each string, due to the trivial case handling above.
177 int d = str1->Get(0) - str2->Get(0);
178 if (d != 0) return Smi::FromInt(d);
179
180 str1 = String::Flatten(isolate, str1);
181 str2 = String::Flatten(isolate, str2);
182
183 DisallowHeapAllocation no_gc;
184 String::FlatContent flat1 = str1->GetFlatContent(no_gc);
185 String::FlatContent flat2 = str2->GetFlatContent(no_gc);
186
187 for (int i = 0; i < end; i++) {
188 if (flat1.Get(i) != flat2.Get(i)) {
189 return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
190 }
191 }
192
193 return Smi::FromInt(str1_length - str2_length);
194 #endif // !V8_INTL_SUPPORT
195 }
196
197 #ifndef V8_INTL_SUPPORT
198 // ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
199 //
200 // Simply checks the argument is valid and returns the string itself.
201 // If internationalization is enabled, then intl.js will override this function
202 // and provide the proper functionality, so this is just a fallback.
BUILTIN(StringPrototypeNormalize)203 BUILTIN(StringPrototypeNormalize) {
204 HandleScope handle_scope(isolate);
205 TO_THIS_STRING(string, "String.prototype.normalize");
206
207 Handle<Object> form_input = args.atOrUndefined(isolate, 1);
208 if (form_input->IsUndefined(isolate)) return *string;
209
210 Handle<String> form;
211 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
212 Object::ToString(isolate, form_input));
213
214 if (!(String::Equals(isolate, form, isolate->factory()->NFC_string()) ||
215 String::Equals(isolate, form, isolate->factory()->NFD_string()) ||
216 String::Equals(isolate, form, isolate->factory()->NFKC_string()) ||
217 String::Equals(isolate, form, isolate->factory()->NFKD_string()))) {
218 Handle<String> valid_forms =
219 isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
220 THROW_NEW_ERROR_RETURN_FAILURE(
221 isolate,
222 NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
223 }
224
225 return *string;
226 }
227 #endif // !V8_INTL_SUPPORT
228
229
230 #ifndef V8_INTL_SUPPORT
231 namespace {
232
ToUpperOverflows(uc32 character)233 inline bool ToUpperOverflows(uc32 character) {
234 // y with umlauts and the micro sign are the only characters that stop
235 // fitting into one-byte when converting to uppercase.
236 static const uc32 yuml_code = 0xFF;
237 static const uc32 micro_code = 0xB5;
238 return (character == yuml_code || character == micro_code);
239 }
240
241 template <class Converter>
ConvertCaseHelper(Isolate * isolate,String string,SeqString result,int result_length,unibrow::Mapping<Converter,128> * mapping)242 V8_WARN_UNUSED_RESULT static Object ConvertCaseHelper(
243 Isolate* isolate, String string, SeqString result, int result_length,
244 unibrow::Mapping<Converter, 128>* mapping) {
245 DisallowHeapAllocation no_gc;
246 // We try this twice, once with the assumption that the result is no longer
247 // than the input and, if that assumption breaks, again with the exact
248 // length. This may not be pretty, but it is nicer than what was here before
249 // and I hereby claim my vaffel-is.
250 //
251 // NOTE: This assumes that the upper/lower case of an ASCII
252 // character is also ASCII. This is currently the case, but it
253 // might break in the future if we implement more context and locale
254 // dependent upper/lower conversions.
255 bool has_changed_character = false;
256
257 // Convert all characters to upper case, assuming that they will fit
258 // in the buffer
259 StringCharacterStream stream(string);
260 unibrow::uchar chars[Converter::kMaxWidth];
261 // We can assume that the string is not empty
262 uc32 current = stream.GetNext();
263 bool ignore_overflow = Converter::kIsToLower || result.IsSeqTwoByteString();
264 for (int i = 0; i < result_length;) {
265 bool has_next = stream.HasMore();
266 uc32 next = has_next ? stream.GetNext() : 0;
267 int char_length = mapping->get(current, next, chars);
268 if (char_length == 0) {
269 // The case conversion of this character is the character itself.
270 result.Set(i, current);
271 i++;
272 } else if (char_length == 1 &&
273 (ignore_overflow || !ToUpperOverflows(current))) {
274 // Common case: converting the letter resulted in one character.
275 DCHECK(static_cast<uc32>(chars[0]) != current);
276 result.Set(i, chars[0]);
277 has_changed_character = true;
278 i++;
279 } else if (result_length == string.length()) {
280 bool overflows = ToUpperOverflows(current);
281 // We've assumed that the result would be as long as the
282 // input but here is a character that converts to several
283 // characters. No matter, we calculate the exact length
284 // of the result and try the whole thing again.
285 //
286 // Note that this leaves room for optimization. We could just
287 // memcpy what we already have to the result string. Also,
288 // the result string is the last object allocated we could
289 // "realloc" it and probably, in the vast majority of cases,
290 // extend the existing string to be able to hold the full
291 // result.
292 int next_length = 0;
293 if (has_next) {
294 next_length = mapping->get(next, 0, chars);
295 if (next_length == 0) next_length = 1;
296 }
297 int current_length = i + char_length + next_length;
298 while (stream.HasMore()) {
299 current = stream.GetNext();
300 overflows |= ToUpperOverflows(current);
301 // NOTE: we use 0 as the next character here because, while
302 // the next character may affect what a character converts to,
303 // it does not in any case affect the length of what it convert
304 // to.
305 int char_length = mapping->get(current, 0, chars);
306 if (char_length == 0) char_length = 1;
307 current_length += char_length;
308 if (current_length > String::kMaxLength) {
309 AllowHeapAllocation allocate_error_and_return;
310 THROW_NEW_ERROR_RETURN_FAILURE(isolate,
311 NewInvalidStringLengthError());
312 }
313 }
314 // Try again with the real length. Return signed if we need
315 // to allocate a two-byte string for to uppercase.
316 return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
317 : Smi::FromInt(current_length);
318 } else {
319 for (int j = 0; j < char_length; j++) {
320 result.Set(i, chars[j]);
321 i++;
322 }
323 has_changed_character = true;
324 }
325 current = next;
326 }
327 if (has_changed_character) {
328 return result;
329 } else {
330 // If we didn't actually change anything in doing the conversion
331 // we simple return the result and let the converted string
332 // become garbage; there is no reason to keep two identical strings
333 // alive.
334 return string;
335 }
336 }
337
338 template <class Converter>
ConvertCase(Handle<String> s,Isolate * isolate,unibrow::Mapping<Converter,128> * mapping)339 V8_WARN_UNUSED_RESULT static Object ConvertCase(
340 Handle<String> s, Isolate* isolate,
341 unibrow::Mapping<Converter, 128>* mapping) {
342 s = String::Flatten(isolate, s);
343 int length = s->length();
344 // Assume that the string is not empty; we need this assumption later
345 if (length == 0) return *s;
346
347 // Simpler handling of ASCII strings.
348 //
349 // NOTE: This assumes that the upper/lower case of an ASCII
350 // character is also ASCII. This is currently the case, but it
351 // might break in the future if we implement more context and locale
352 // dependent upper/lower conversions.
353 if (String::IsOneByteRepresentationUnderneath(*s)) {
354 // Same length as input.
355 Handle<SeqOneByteString> result =
356 isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
357 DisallowHeapAllocation no_gc;
358 String::FlatContent flat_content = s->GetFlatContent(no_gc);
359 DCHECK(flat_content.IsFlat());
360 bool has_changed_character = false;
361 int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
362 reinterpret_cast<char*>(result->GetChars(no_gc)),
363 reinterpret_cast<const char*>(flat_content.ToOneByteVector().begin()),
364 length, &has_changed_character);
365 // If not ASCII, we discard the result and take the 2 byte path.
366 if (index_to_first_unprocessed == length)
367 return has_changed_character ? *result : *s;
368 }
369
370 Handle<SeqString> result; // Same length as input.
371 if (s->IsOneByteRepresentation()) {
372 result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
373 } else {
374 result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
375 }
376
377 Object answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
378 if (answer.IsException(isolate) || answer.IsString()) return answer;
379
380 DCHECK(answer.IsSmi());
381 length = Smi::ToInt(answer);
382 if (s->IsOneByteRepresentation() && length > 0) {
383 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
384 isolate, result, isolate->factory()->NewRawOneByteString(length));
385 } else {
386 if (length < 0) length = -length;
387 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
388 isolate, result, isolate->factory()->NewRawTwoByteString(length));
389 }
390 return ConvertCaseHelper(isolate, *s, *result, length, mapping);
391 }
392
393 } // namespace
394
BUILTIN(StringPrototypeToLocaleLowerCase)395 BUILTIN(StringPrototypeToLocaleLowerCase) {
396 HandleScope scope(isolate);
397 TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
398 return ConvertCase(string, isolate,
399 isolate->runtime_state()->to_lower_mapping());
400 }
401
BUILTIN(StringPrototypeToLocaleUpperCase)402 BUILTIN(StringPrototypeToLocaleUpperCase) {
403 HandleScope scope(isolate);
404 TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
405 return ConvertCase(string, isolate,
406 isolate->runtime_state()->to_upper_mapping());
407 }
408
BUILTIN(StringPrototypeToLowerCase)409 BUILTIN(StringPrototypeToLowerCase) {
410 HandleScope scope(isolate);
411 TO_THIS_STRING(string, "String.prototype.toLowerCase");
412 return ConvertCase(string, isolate,
413 isolate->runtime_state()->to_lower_mapping());
414 }
415
BUILTIN(StringPrototypeToUpperCase)416 BUILTIN(StringPrototypeToUpperCase) {
417 HandleScope scope(isolate);
418 TO_THIS_STRING(string, "String.prototype.toUpperCase");
419 return ConvertCase(string, isolate,
420 isolate->runtime_state()->to_upper_mapping());
421 }
422 #endif // !V8_INTL_SUPPORT
423
424 // ES6 #sec-string.prototype.raw
BUILTIN(StringRaw)425 BUILTIN(StringRaw) {
426 HandleScope scope(isolate);
427 Handle<Object> templ = args.atOrUndefined(isolate, 1);
428 const uint32_t argc = args.length();
429 Handle<String> raw_string =
430 isolate->factory()->NewStringFromAsciiChecked("raw");
431
432 Handle<Object> cooked;
433 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
434 Object::ToObject(isolate, templ));
435
436 Handle<Object> raw;
437 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
438 isolate, raw, Object::GetProperty(isolate, cooked, raw_string));
439 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
440 Object::ToObject(isolate, raw));
441 Handle<Object> raw_len;
442 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
443 isolate, raw_len,
444 Object::GetProperty(isolate, raw, isolate->factory()->length_string()));
445
446 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
447 Object::ToLength(isolate, raw_len));
448
449 IncrementalStringBuilder result_builder(isolate);
450 // Intentional spec violation: we ignore {length} values >= 2^32, because
451 // assuming non-empty chunks they would generate too-long strings anyway.
452 const double raw_len_number = raw_len->Number();
453 const uint32_t length = raw_len_number > std::numeric_limits<uint32_t>::max()
454 ? std::numeric_limits<uint32_t>::max()
455 : static_cast<uint32_t>(raw_len_number);
456 if (length > 0) {
457 Handle<Object> first_element;
458 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
459 Object::GetElement(isolate, raw, 0));
460
461 Handle<String> first_string;
462 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
463 isolate, first_string, Object::ToString(isolate, first_element));
464 result_builder.AppendString(first_string);
465
466 for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) {
467 if (arg_i < argc) {
468 Handle<String> argument_string;
469 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
470 isolate, argument_string,
471 Object::ToString(isolate, args.at(arg_i)));
472 result_builder.AppendString(argument_string);
473 }
474
475 Handle<Object> element;
476 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
477 Object::GetElement(isolate, raw, i));
478
479 Handle<String> element_string;
480 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
481 Object::ToString(isolate, element));
482 result_builder.AppendString(element_string);
483 }
484 }
485
486 RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
487 }
488
489 } // namespace internal
490 } // namespace v8
491