1 /*
2  * (C) 1999 Lars Knoll (knoll@kde.org)
3  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserved.
4  * Copyright (C) 2007-2009 Torch Mobile, Inc.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Library General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Library General Public License for more details.
15  *
16  * You should have received a copy of the GNU Library General Public License
17  * along with this library; see the file COPYING.LIB.  If not, write to
18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  * Boston, MA 02110-1301, USA.
20  */
21 
22 #include "config.h"
23 #include "WTFString.h"
24 
25 #include <stdarg.h>
26 #include <wtf/ASCIICType.h>
27 #include <wtf/text/CString.h>
28 #include <wtf/StringExtras.h>
29 #include <wtf/Vector.h>
30 #include <wtf/dtoa.h>
31 #include <wtf/unicode/UTF8.h>
32 #include <wtf/unicode/Unicode.h>
33 
34 using namespace std;
35 
36 namespace WTF {
37 
38 using namespace Unicode;
39 using namespace std;
40 
41 // Construct a string with UTF-16 data.
String(const UChar * characters,unsigned length)42 String::String(const UChar* characters, unsigned length)
43     : m_impl(characters ? StringImpl::create(characters, length) : 0)
44 {
45 }
46 
47 // Construct a string with UTF-16 data, from a null-terminated source.
String(const UChar * str)48 String::String(const UChar* str)
49 {
50     if (!str)
51         return;
52 
53     size_t len = 0;
54     while (str[len] != UChar(0))
55         len++;
56 
57     if (len > numeric_limits<unsigned>::max())
58         CRASH();
59 
60     m_impl = StringImpl::create(str, len);
61 }
62 
63 // Construct a string with latin1 data.
String(const char * characters,unsigned length)64 String::String(const char* characters, unsigned length)
65     : m_impl(characters ? StringImpl::create(characters, length) : 0)
66 {
67 }
68 
69 // Construct a string with latin1 data, from a null-terminated source.
String(const char * characters)70 String::String(const char* characters)
71     : m_impl(characters ? StringImpl::create(characters) : 0)
72 {
73 }
74 
append(const String & str)75 void String::append(const String& str)
76 {
77     if (str.isEmpty())
78        return;
79 
80     // FIXME: This is extremely inefficient. So much so that we might want to take this
81     // out of String's API. We can make it better by optimizing the case where exactly
82     // one String is pointing at this StringImpl, but even then it's going to require a
83     // call to fastMalloc every single time.
84     if (str.m_impl) {
85         if (m_impl) {
86             UChar* data;
87             if (str.length() > numeric_limits<unsigned>::max() - m_impl->length())
88                 CRASH();
89             RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data);
90             memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
91             memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar));
92             m_impl = newImpl.release();
93         } else
94             m_impl = str.m_impl;
95     }
96 }
97 
append(char c)98 void String::append(char c)
99 {
100     // FIXME: This is extremely inefficient. So much so that we might want to take this
101     // out of String's API. We can make it better by optimizing the case where exactly
102     // one String is pointing at this StringImpl, but even then it's going to require a
103     // call to fastMalloc every single time.
104     if (m_impl) {
105         UChar* data;
106         if (m_impl->length() >= numeric_limits<unsigned>::max())
107             CRASH();
108         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
109         memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
110         data[m_impl->length()] = c;
111         m_impl = newImpl.release();
112     } else
113         m_impl = StringImpl::create(&c, 1);
114 }
115 
append(UChar c)116 void String::append(UChar c)
117 {
118     // FIXME: This is extremely inefficient. So much so that we might want to take this
119     // out of String's API. We can make it better by optimizing the case where exactly
120     // one String is pointing at this StringImpl, but even then it's going to require a
121     // call to fastMalloc every single time.
122     if (m_impl) {
123         UChar* data;
124         if (m_impl->length() >= numeric_limits<unsigned>::max())
125             CRASH();
126         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
127         memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
128         data[m_impl->length()] = c;
129         m_impl = newImpl.release();
130     } else
131         m_impl = StringImpl::create(&c, 1);
132 }
133 
operator +(const String & a,const String & b)134 String operator+(const String& a, const String& b)
135 {
136     if (a.isEmpty())
137         return b;
138     if (b.isEmpty())
139         return a;
140     String c = a;
141     c += b;
142     return c;
143 }
144 
operator +(const String & s,const char * cs)145 String operator+(const String& s, const char* cs)
146 {
147     return s + String(cs);
148 }
149 
operator +(const char * cs,const String & s)150 String operator+(const char* cs, const String& s)
151 {
152     return String(cs) + s;
153 }
154 
codePointCompare(const String & a,const String & b)155 int codePointCompare(const String& a, const String& b)
156 {
157     return codePointCompare(a.impl(), b.impl());
158 }
159 
insert(const String & str,unsigned pos)160 void String::insert(const String& str, unsigned pos)
161 {
162     if (str.isEmpty()) {
163         if (str.isNull())
164             return;
165         if (isNull())
166             m_impl = str.impl();
167         return;
168     }
169     insert(str.characters(), str.length(), pos);
170 }
171 
append(const UChar * charactersToAppend,unsigned lengthToAppend)172 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
173 {
174     if (!m_impl) {
175         if (!charactersToAppend)
176             return;
177         m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
178         return;
179     }
180 
181     if (!lengthToAppend)
182         return;
183 
184     ASSERT(charactersToAppend);
185     UChar* data;
186     if (lengthToAppend > numeric_limits<unsigned>::max() - length())
187         CRASH();
188     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data);
189     memcpy(data, characters(), length() * sizeof(UChar));
190     memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar));
191     m_impl = newImpl.release();
192 }
193 
insert(const UChar * charactersToInsert,unsigned lengthToInsert,unsigned position)194 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
195 {
196     if (position >= length()) {
197         append(charactersToInsert, lengthToInsert);
198         return;
199     }
200 
201     ASSERT(m_impl);
202 
203     if (!lengthToInsert)
204         return;
205 
206     ASSERT(charactersToInsert);
207     UChar* data;
208     if (lengthToInsert > numeric_limits<unsigned>::max() - length())
209         CRASH();
210     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToInsert, data);
211     memcpy(data, characters(), position * sizeof(UChar));
212     memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar));
213     memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar));
214     m_impl = newImpl.release();
215 }
216 
characterStartingAt(unsigned i) const217 UChar32 String::characterStartingAt(unsigned i) const
218 {
219     if (!m_impl || i >= m_impl->length())
220         return 0;
221     return m_impl->characterStartingAt(i);
222 }
223 
truncate(unsigned position)224 void String::truncate(unsigned position)
225 {
226     if (position >= length())
227         return;
228     UChar* data;
229     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
230     memcpy(data, characters(), position * sizeof(UChar));
231     m_impl = newImpl.release();
232 }
233 
remove(unsigned position,int lengthToRemove)234 void String::remove(unsigned position, int lengthToRemove)
235 {
236     if (lengthToRemove <= 0)
237         return;
238     if (position >= length())
239         return;
240     if (static_cast<unsigned>(lengthToRemove) > length() - position)
241         lengthToRemove = length() - position;
242     UChar* data;
243     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data);
244     memcpy(data, characters(), position * sizeof(UChar));
245     memcpy(data + position, characters() + position + lengthToRemove,
246         (length() - lengthToRemove - position) * sizeof(UChar));
247     m_impl = newImpl.release();
248 }
249 
substring(unsigned pos,unsigned len) const250 String String::substring(unsigned pos, unsigned len) const
251 {
252     if (!m_impl)
253         return String();
254     return m_impl->substring(pos, len);
255 }
256 
substringSharingImpl(unsigned offset,unsigned length) const257 String String::substringSharingImpl(unsigned offset, unsigned length) const
258 {
259     // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
260 
261     unsigned stringLength = this->length();
262     offset = min(offset, stringLength);
263     length = min(length, stringLength - offset);
264 
265     if (!offset && length == stringLength)
266         return *this;
267     return String(StringImpl::create(m_impl, offset, length));
268 }
269 
lower() const270 String String::lower() const
271 {
272     if (!m_impl)
273         return String();
274     return m_impl->lower();
275 }
276 
upper() const277 String String::upper() const
278 {
279     if (!m_impl)
280         return String();
281     return m_impl->upper();
282 }
283 
stripWhiteSpace() const284 String String::stripWhiteSpace() const
285 {
286     if (!m_impl)
287         return String();
288     return m_impl->stripWhiteSpace();
289 }
290 
simplifyWhiteSpace() const291 String String::simplifyWhiteSpace() const
292 {
293     if (!m_impl)
294         return String();
295     return m_impl->simplifyWhiteSpace();
296 }
297 
removeCharacters(CharacterMatchFunctionPtr findMatch) const298 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const
299 {
300     if (!m_impl)
301         return String();
302     return m_impl->removeCharacters(findMatch);
303 }
304 
foldCase() const305 String String::foldCase() const
306 {
307     if (!m_impl)
308         return String();
309     return m_impl->foldCase();
310 }
311 
percentage(int & result) const312 bool String::percentage(int& result) const
313 {
314     if (!m_impl || !m_impl->length())
315         return false;
316 
317     if ((*m_impl)[m_impl->length() - 1] != '%')
318        return false;
319 
320     result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);
321     return true;
322 }
323 
charactersWithNullTermination()324 const UChar* String::charactersWithNullTermination()
325 {
326     if (!m_impl)
327         return 0;
328     if (m_impl->hasTerminatingNullCharacter())
329         return m_impl->characters();
330     m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl);
331     return m_impl->characters();
332 }
333 
format(const char * format,...)334 String String::format(const char *format, ...)
335 {
336 #if PLATFORM(QT)
337     // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf.
338     // https://bugs.webkit.org/show_bug.cgi?id=18994
339     va_list args;
340     va_start(args, format);
341 
342     QString buffer;
343     buffer.vsprintf(format, args);
344 
345     va_end(args);
346 
347     QByteArray ba = buffer.toUtf8();
348     return StringImpl::create(ba.constData(), ba.length());
349 
350 #elif OS(WINCE)
351     va_list args;
352     va_start(args, format);
353 
354     Vector<char, 256> buffer;
355 
356     int bufferSize = 256;
357     buffer.resize(bufferSize);
358     for (;;) {
359         int written = vsnprintf(buffer.data(), bufferSize, format, args);
360         va_end(args);
361 
362         if (written == 0)
363             return String("");
364         if (written > 0)
365             return StringImpl::create(buffer.data(), written);
366 
367         bufferSize <<= 1;
368         buffer.resize(bufferSize);
369         va_start(args, format);
370     }
371 
372 #else
373     va_list args;
374     va_start(args, format);
375 
376     Vector<char, 256> buffer;
377 
378     // Do the format once to get the length.
379 #if COMPILER(MSVC)
380     int result = _vscprintf(format, args);
381 #else
382     char ch;
383     int result = vsnprintf(&ch, 1, format, args);
384     // We need to call va_end() and then va_start() again here, as the
385     // contents of args is undefined after the call to vsnprintf
386     // according to http://man.cx/snprintf(3)
387     //
388     // Not calling va_end/va_start here happens to work on lots of
389     // systems, but fails e.g. on 64bit Linux.
390     va_end(args);
391     va_start(args, format);
392 #endif
393 
394     if (result == 0)
395         return String("");
396     if (result < 0)
397         return String();
398     unsigned len = result;
399     buffer.grow(len + 1);
400 
401     // Now do the formatting again, guaranteed to fit.
402     vsnprintf(buffer.data(), buffer.size(), format, args);
403 
404     va_end(args);
405 
406     return StringImpl::create(buffer.data(), len);
407 #endif
408 }
409 
number(short n)410 String String::number(short n)
411 {
412     return String::format("%hd", n);
413 }
414 
number(unsigned short n)415 String String::number(unsigned short n)
416 {
417     return String::format("%hu", n);
418 }
419 
number(int n)420 String String::number(int n)
421 {
422     return String::format("%d", n);
423 }
424 
number(unsigned n)425 String String::number(unsigned n)
426 {
427     return String::format("%u", n);
428 }
429 
number(long n)430 String String::number(long n)
431 {
432     return String::format("%ld", n);
433 }
434 
number(unsigned long n)435 String String::number(unsigned long n)
436 {
437     return String::format("%lu", n);
438 }
439 
number(long long n)440 String String::number(long long n)
441 {
442 #if OS(WINDOWS) && !PLATFORM(QT)
443     return String::format("%I64i", n);
444 #else
445     return String::format("%lli", n);
446 #endif
447 }
448 
number(unsigned long long n)449 String String::number(unsigned long long n)
450 {
451 #if OS(WINDOWS) && !PLATFORM(QT)
452     return String::format("%I64u", n);
453 #else
454     return String::format("%llu", n);
455 #endif
456 }
457 
number(double n)458 String String::number(double n)
459 {
460     return String::format("%.6lg", n);
461 }
462 
toIntStrict(bool * ok,int base) const463 int String::toIntStrict(bool* ok, int base) const
464 {
465     if (!m_impl) {
466         if (ok)
467             *ok = false;
468         return 0;
469     }
470     return m_impl->toIntStrict(ok, base);
471 }
472 
toUIntStrict(bool * ok,int base) const473 unsigned String::toUIntStrict(bool* ok, int base) const
474 {
475     if (!m_impl) {
476         if (ok)
477             *ok = false;
478         return 0;
479     }
480     return m_impl->toUIntStrict(ok, base);
481 }
482 
toInt64Strict(bool * ok,int base) const483 int64_t String::toInt64Strict(bool* ok, int base) const
484 {
485     if (!m_impl) {
486         if (ok)
487             *ok = false;
488         return 0;
489     }
490     return m_impl->toInt64Strict(ok, base);
491 }
492 
toUInt64Strict(bool * ok,int base) const493 uint64_t String::toUInt64Strict(bool* ok, int base) const
494 {
495     if (!m_impl) {
496         if (ok)
497             *ok = false;
498         return 0;
499     }
500     return m_impl->toUInt64Strict(ok, base);
501 }
502 
toIntPtrStrict(bool * ok,int base) const503 intptr_t String::toIntPtrStrict(bool* ok, int base) const
504 {
505     if (!m_impl) {
506         if (ok)
507             *ok = false;
508         return 0;
509     }
510     return m_impl->toIntPtrStrict(ok, base);
511 }
512 
513 
toInt(bool * ok) const514 int String::toInt(bool* ok) const
515 {
516     if (!m_impl) {
517         if (ok)
518             *ok = false;
519         return 0;
520     }
521     return m_impl->toInt(ok);
522 }
523 
toUInt(bool * ok) const524 unsigned String::toUInt(bool* ok) const
525 {
526     if (!m_impl) {
527         if (ok)
528             *ok = false;
529         return 0;
530     }
531     return m_impl->toUInt(ok);
532 }
533 
toInt64(bool * ok) const534 int64_t String::toInt64(bool* ok) const
535 {
536     if (!m_impl) {
537         if (ok)
538             *ok = false;
539         return 0;
540     }
541     return m_impl->toInt64(ok);
542 }
543 
toUInt64(bool * ok) const544 uint64_t String::toUInt64(bool* ok) const
545 {
546     if (!m_impl) {
547         if (ok)
548             *ok = false;
549         return 0;
550     }
551     return m_impl->toUInt64(ok);
552 }
553 
toIntPtr(bool * ok) const554 intptr_t String::toIntPtr(bool* ok) const
555 {
556     if (!m_impl) {
557         if (ok)
558             *ok = false;
559         return 0;
560     }
561     return m_impl->toIntPtr(ok);
562 }
563 
toDouble(bool * ok,bool * didReadNumber) const564 double String::toDouble(bool* ok, bool* didReadNumber) const
565 {
566     if (!m_impl) {
567         if (ok)
568             *ok = false;
569         if (didReadNumber)
570             *didReadNumber = false;
571         return 0.0;
572     }
573     return m_impl->toDouble(ok, didReadNumber);
574 }
575 
toFloat(bool * ok,bool * didReadNumber) const576 float String::toFloat(bool* ok, bool* didReadNumber) const
577 {
578     if (!m_impl) {
579         if (ok)
580             *ok = false;
581         if (didReadNumber)
582             *didReadNumber = false;
583         return 0.0f;
584     }
585     return m_impl->toFloat(ok, didReadNumber);
586 }
587 
threadsafeCopy() const588 String String::threadsafeCopy() const
589 {
590     if (!m_impl)
591         return String();
592     return m_impl->threadsafeCopy();
593 }
594 
crossThreadString() const595 String String::crossThreadString() const
596 {
597     if (!m_impl)
598         return String();
599     return m_impl->crossThreadString();
600 }
601 
split(const String & separator,bool allowEmptyEntries,Vector<String> & result) const602 void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
603 {
604     result.clear();
605 
606     unsigned startPos = 0;
607     size_t endPos;
608     while ((endPos = find(separator, startPos)) != notFound) {
609         if (allowEmptyEntries || startPos != endPos)
610             result.append(substring(startPos, endPos - startPos));
611         startPos = endPos + separator.length();
612     }
613     if (allowEmptyEntries || startPos != length())
614         result.append(substring(startPos));
615 }
616 
split(const String & separator,Vector<String> & result) const617 void String::split(const String& separator, Vector<String>& result) const
618 {
619     split(separator, false, result);
620 }
621 
split(UChar separator,bool allowEmptyEntries,Vector<String> & result) const622 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
623 {
624     result.clear();
625 
626     unsigned startPos = 0;
627     size_t endPos;
628     while ((endPos = find(separator, startPos)) != notFound) {
629         if (allowEmptyEntries || startPos != endPos)
630             result.append(substring(startPos, endPos - startPos));
631         startPos = endPos + 1;
632     }
633     if (allowEmptyEntries || startPos != length())
634         result.append(substring(startPos));
635 }
636 
split(UChar separator,Vector<String> & result) const637 void String::split(UChar separator, Vector<String>& result) const
638 {
639     split(String(&separator, 1), false, result);
640 }
641 
ascii() const642 CString String::ascii() const
643 {
644     // Printable ASCII characters 32..127 and the null character are
645     // preserved, characters outside of this range are converted to '?'.
646 
647     unsigned length = this->length();
648     const UChar* characters = this->characters();
649 
650     char* characterBuffer;
651     CString result = CString::newUninitialized(length, characterBuffer);
652 
653     for (unsigned i = 0; i < length; ++i) {
654         UChar ch = characters[i];
655         characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
656     }
657 
658     return result;
659 }
660 
latin1() const661 CString String::latin1() const
662 {
663     // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
664     // preserved, characters outside of this range are converted to '?'.
665 
666     unsigned length = this->length();
667     const UChar* characters = this->characters();
668 
669     char* characterBuffer;
670     CString result = CString::newUninitialized(length, characterBuffer);
671 
672     for (unsigned i = 0; i < length; ++i) {
673         UChar ch = characters[i];
674         characterBuffer[i] = ch > 0xff ? '?' : ch;
675     }
676 
677     return result;
678 }
679 
680 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
putUTF8Triple(char * & buffer,UChar ch)681 static inline void putUTF8Triple(char*& buffer, UChar ch)
682 {
683     ASSERT(ch >= 0x0800);
684     *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
685     *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
686     *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
687 }
688 
utf8(bool strict) const689 CString String::utf8(bool strict) const
690 {
691     unsigned length = this->length();
692     const UChar* characters = this->characters();
693 
694     // Allocate a buffer big enough to hold all the characters
695     // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
696     // Optimization ideas, if we find this function is hot:
697     //  * We could speculatively create a CStringBuffer to contain 'length'
698     //    characters, and resize if necessary (i.e. if the buffer contains
699     //    non-ascii characters). (Alternatively, scan the buffer first for
700     //    ascii characters, so we know this will be sufficient).
701     //  * We could allocate a CStringBuffer with an appropriate size to
702     //    have a good chance of being able to write the string into the
703     //    buffer without reallocing (say, 1.5 x length).
704     if (length > numeric_limits<unsigned>::max() / 3)
705         return CString();
706     Vector<char, 1024> bufferVector(length * 3);
707 
708     char* buffer = bufferVector.data();
709     ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
710     ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
711 
712     // Only produced from strict conversion.
713     if (result == sourceIllegal)
714         return CString();
715 
716     // Check for an unconverted high surrogate.
717     if (result == sourceExhausted) {
718         if (strict)
719             return CString();
720         // This should be one unpaired high surrogate. Treat it the same
721         // was as an unpaired high surrogate would have been handled in
722         // the middle of a string with non-strict conversion - which is
723         // to say, simply encode it to UTF-8.
724         ASSERT((characters + 1) == (this->characters() + length));
725         ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
726         // There should be room left, since one UChar hasn't been converted.
727         ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
728         putUTF8Triple(buffer, *characters);
729     }
730 
731     return CString(bufferVector.data(), buffer - bufferVector.data());
732 }
733 
fromUTF8(const char * stringStart,size_t length)734 String String::fromUTF8(const char* stringStart, size_t length)
735 {
736     if (length > numeric_limits<unsigned>::max())
737         CRASH();
738 
739     if (!stringStart)
740         return String();
741 
742     // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be
743     // the right length, if there are any multi-byte sequences this buffer will be too large.
744     UChar* buffer;
745     String stringBuffer(StringImpl::createUninitialized(length, buffer));
746     UChar* bufferEnd = buffer + length;
747 
748     // Try converting into the buffer.
749     const char* stringCurrent = stringStart;
750     if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &buffer, bufferEnd) != conversionOK)
751         return String();
752 
753     // stringBuffer is full (the input must have been all ascii) so just return it!
754     if (buffer == bufferEnd)
755         return stringBuffer;
756 
757     // stringBuffer served its purpose as a buffer, copy the contents out into a new string.
758     unsigned utf16Length = buffer - stringBuffer.characters();
759     ASSERT(utf16Length < length);
760     return String(stringBuffer.characters(), utf16Length);
761 }
762 
fromUTF8(const char * string)763 String String::fromUTF8(const char* string)
764 {
765     if (!string)
766         return String();
767     return fromUTF8(string, strlen(string));
768 }
769 
fromUTF8WithLatin1Fallback(const char * string,size_t size)770 String String::fromUTF8WithLatin1Fallback(const char* string, size_t size)
771 {
772     String utf8 = fromUTF8(string, size);
773     if (!utf8)
774         return String(string, size);
775     return utf8;
776 }
777 
778 // String Operations
779 
isCharacterAllowedInBase(UChar c,int base)780 static bool isCharacterAllowedInBase(UChar c, int base)
781 {
782     if (c > 0x7F)
783         return false;
784     if (isASCIIDigit(c))
785         return c - '0' < base;
786     if (isASCIIAlpha(c)) {
787         if (base > 36)
788             base = 36;
789         return (c >= 'a' && c < 'a' + base - 10)
790             || (c >= 'A' && c < 'A' + base - 10);
791     }
792     return false;
793 }
794 
795 template <typename IntegralType>
toIntegralType(const UChar * data,size_t length,bool * ok,int base)796 static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base)
797 {
798     static const IntegralType integralMax = numeric_limits<IntegralType>::max();
799     static const bool isSigned = numeric_limits<IntegralType>::is_signed;
800     const IntegralType maxMultiplier = integralMax / base;
801 
802     IntegralType value = 0;
803     bool isOk = false;
804     bool isNegative = false;
805 
806     if (!data)
807         goto bye;
808 
809     // skip leading whitespace
810     while (length && isSpaceOrNewline(*data)) {
811         length--;
812         data++;
813     }
814 
815     if (isSigned && length && *data == '-') {
816         length--;
817         data++;
818         isNegative = true;
819     } else if (length && *data == '+') {
820         length--;
821         data++;
822     }
823 
824     if (!length || !isCharacterAllowedInBase(*data, base))
825         goto bye;
826 
827     while (length && isCharacterAllowedInBase(*data, base)) {
828         length--;
829         IntegralType digitValue;
830         UChar c = *data;
831         if (isASCIIDigit(c))
832             digitValue = c - '0';
833         else if (c >= 'a')
834             digitValue = c - 'a' + 10;
835         else
836             digitValue = c - 'A' + 10;
837 
838         if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
839             goto bye;
840 
841         value = base * value + digitValue;
842         data++;
843     }
844 
845 #if COMPILER(MSVC)
846 #pragma warning(push, 0)
847 #pragma warning(disable:4146)
848 #endif
849 
850     if (isNegative)
851         value = -value;
852 
853 #if COMPILER(MSVC)
854 #pragma warning(pop)
855 #endif
856 
857     // skip trailing space
858     while (length && isSpaceOrNewline(*data)) {
859         length--;
860         data++;
861     }
862 
863     if (!length)
864         isOk = true;
865 bye:
866     if (ok)
867         *ok = isOk;
868     return isOk ? value : 0;
869 }
870 
lengthOfCharactersAsInteger(const UChar * data,size_t length)871 static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length)
872 {
873     size_t i = 0;
874 
875     // Allow leading spaces.
876     for (; i != length; ++i) {
877         if (!isSpaceOrNewline(data[i]))
878             break;
879     }
880 
881     // Allow sign.
882     if (i != length && (data[i] == '+' || data[i] == '-'))
883         ++i;
884 
885     // Allow digits.
886     for (; i != length; ++i) {
887         if (!isASCIIDigit(data[i]))
888             break;
889     }
890 
891     return i;
892 }
893 
charactersToIntStrict(const UChar * data,size_t length,bool * ok,int base)894 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
895 {
896     return toIntegralType<int>(data, length, ok, base);
897 }
898 
charactersToUIntStrict(const UChar * data,size_t length,bool * ok,int base)899 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
900 {
901     return toIntegralType<unsigned>(data, length, ok, base);
902 }
903 
charactersToInt64Strict(const UChar * data,size_t length,bool * ok,int base)904 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
905 {
906     return toIntegralType<int64_t>(data, length, ok, base);
907 }
908 
charactersToUInt64Strict(const UChar * data,size_t length,bool * ok,int base)909 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
910 {
911     return toIntegralType<uint64_t>(data, length, ok, base);
912 }
913 
charactersToIntPtrStrict(const UChar * data,size_t length,bool * ok,int base)914 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
915 {
916     return toIntegralType<intptr_t>(data, length, ok, base);
917 }
918 
charactersToInt(const UChar * data,size_t length,bool * ok)919 int charactersToInt(const UChar* data, size_t length, bool* ok)
920 {
921     return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
922 }
923 
charactersToUInt(const UChar * data,size_t length,bool * ok)924 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
925 {
926     return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
927 }
928 
charactersToInt64(const UChar * data,size_t length,bool * ok)929 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
930 {
931     return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
932 }
933 
charactersToUInt64(const UChar * data,size_t length,bool * ok)934 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
935 {
936     return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
937 }
938 
charactersToIntPtr(const UChar * data,size_t length,bool * ok)939 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
940 {
941     return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
942 }
943 
charactersToDouble(const UChar * data,size_t length,bool * ok,bool * didReadNumber)944 double charactersToDouble(const UChar* data, size_t length, bool* ok, bool* didReadNumber)
945 {
946     if (!length) {
947         if (ok)
948             *ok = false;
949         if (didReadNumber)
950             *didReadNumber = false;
951         return 0.0;
952     }
953 
954     Vector<char, 256> bytes(length + 1);
955     for (unsigned i = 0; i < length; ++i)
956         bytes[i] = data[i] < 0x7F ? data[i] : '?';
957     bytes[length] = '\0';
958     char* start = bytes.data();
959     char* end;
960     double val = WTF::strtod(start, &end);
961     if (ok)
962         *ok = (end == 0 || *end == '\0');
963     if (didReadNumber)
964         *didReadNumber = end - start;
965     return val;
966 }
967 
charactersToFloat(const UChar * data,size_t length,bool * ok,bool * didReadNumber)968 float charactersToFloat(const UChar* data, size_t length, bool* ok, bool* didReadNumber)
969 {
970     // FIXME: This will return ok even when the string fits into a double but not a float.
971     return static_cast<float>(charactersToDouble(data, length, ok, didReadNumber));
972 }
973 
974 } // namespace WTF
975 
976 #ifndef NDEBUG
977 // For use in the debugger
978 String* string(const char*);
979 Vector<char> asciiDebug(StringImpl* impl);
980 Vector<char> asciiDebug(String& string);
981 
string(const char * s)982 String* string(const char* s)
983 {
984     // leaks memory!
985     return new String(s);
986 }
987 
asciiDebug(StringImpl * impl)988 Vector<char> asciiDebug(StringImpl* impl)
989 {
990     if (!impl)
991         return asciiDebug(String("[null]").impl());
992 
993     Vector<char> buffer;
994     unsigned length = impl->length();
995     const UChar* characters = impl->characters();
996 
997     buffer.resize(length + 1);
998     for (unsigned i = 0; i < length; ++i) {
999         UChar ch = characters[i];
1000         buffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
1001     }
1002     buffer[length] = '\0';
1003 
1004     return buffer;
1005 }
1006 
asciiDebug(String & string)1007 Vector<char> asciiDebug(String& string)
1008 {
1009     return asciiDebug(string.impl());
1010 }
1011 
1012 #endif
1013