1 /*
2   ==============================================================================
3 
4    This file is part of the Water library.
5    Copyright (c) 2016 ROLI Ltd.
6    Copyright (C) 2017 Filipe Coelho <falktx@falktx.com>
7 
8    Permission is granted to use this software under the terms of the ISC license
9    http://www.isc.org/downloads/software-support-policy/isc-license/
10 
11    Permission to use, copy, modify, and/or distribute this software for any
12    purpose with or without fee is hereby granted, provided that the above
13    copyright notice and this permission notice appear in all copies.
14 
15    THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD
16    TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17    FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
18    OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
19    USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
20    TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
21    OF THIS SOFTWARE.
22 
23   ==============================================================================
24 */
25 
26 #ifndef WATER_CHARACTERFUNCTIONS_H_INCLUDED
27 #define WATER_CHARACTERFUNCTIONS_H_INCLUDED
28 
29 #include "../memory/Memory.h"
30 
31 #include <algorithm>
32 #include <limits>
33 
34 namespace water {
35 
36 /** A platform-independent 32-bit unicode character type. */
37 typedef uint32 water_uchar;
38 
39 //==============================================================================
40 /**
41     A collection of functions for manipulating characters and character strings.
42 
43     Most of these methods are designed for internal use by the String and CharPointer
44     classes, but some of them may be useful to call directly.
45 
46     @see String, CharPointer_UTF8, CharPointer_UTF16, CharPointer_UTF32
47 */
48 class CharacterFunctions
49 {
50 public:
51     //==============================================================================
52     /** Converts a character to upper-case. */
53     static water_uchar toUpperCase (water_uchar character) noexcept;
54     /** Converts a character to lower-case. */
55     static water_uchar toLowerCase (water_uchar character) noexcept;
56 
57     /** Checks whether a unicode character is upper-case. */
58     static bool isUpperCase (water_uchar character) noexcept;
59     /** Checks whether a unicode character is lower-case. */
60     static bool isLowerCase (water_uchar character) noexcept;
61 
62     /** Checks whether a character is whitespace. */
63     static bool isWhitespace (char character) noexcept;
64     /** Checks whether a character is whitespace. */
65     static bool isWhitespace (water_uchar character) noexcept;
66 
67     /** Checks whether a character is a digit. */
68     static bool isDigit (char character) noexcept;
69     /** Checks whether a character is a digit. */
70     static bool isDigit (water_uchar character) noexcept;
71 
72     /** Checks whether a character is alphabetic. */
73     static bool isLetter (char character) noexcept;
74     /** Checks whether a character is alphabetic. */
75     static bool isLetter (water_uchar character) noexcept;
76 
77     /** Checks whether a character is alphabetic or numeric. */
78     static bool isLetterOrDigit (char character) noexcept;
79     /** Checks whether a character is alphabetic or numeric. */
80     static bool isLetterOrDigit (water_uchar character) noexcept;
81 
82     /** Checks whether a character is a printable character, i.e. alphabetic, numeric,
83         a punctuation character or a space.
84     */
85     static bool isPrintable (char character) noexcept;
86 
87     /** Checks whether a character is a printable character, i.e. alphabetic, numeric,
88         a punctuation character or a space.
89     */
90     static bool isPrintable (water_uchar character) noexcept;
91 
92     /** Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit. */
93     static int getHexDigitValue (water_uchar digit) noexcept;
94 
95     /** Converts a byte of Windows 1252 codepage to unicode. */
96     static water_uchar getUnicodeCharFromWindows1252Codepage (uint8 windows1252Char) noexcept;
97 
98     //==============================================================================
99     /** Parses a character string to read a floating-point number.
100         Note that this will advance the pointer that is passed in, leaving it at
101         the end of the number.
102     */
103     template <typename CharPointerType>
readDoubleValue(CharPointerType & text)104     static double readDoubleValue (CharPointerType& text) noexcept
105     {
106         double result[3] = { 0 }, accumulator[2] = { 0 };
107         int exponentAdjustment[2] = { 0 }, exponentAccumulator[2] = { -1, -1 };
108         int exponent = 0, decPointIndex = 0, digit = 0;
109         int lastDigit = 0, numSignificantDigits = 0;
110         bool isNegative = false, digitsFound = false;
111         const int maxSignificantDigits = 15 + 2;
112 
113         text = text.findEndOfWhitespace();
114         water_uchar c = *text;
115 
116         switch (c)
117         {
118             case '-':   isNegative = true; // fall-through..
119             case '+':   c = *++text;
120         }
121 
122         switch (c)
123         {
124             case 'n':
125             case 'N':
126                 if ((text[1] == 'a' || text[1] == 'A') && (text[2] == 'n' || text[2] == 'N'))
127                     return std::numeric_limits<double>::quiet_NaN();
128                 break;
129 
130             case 'i':
131             case 'I':
132                 if ((text[1] == 'n' || text[1] == 'N') && (text[2] == 'f' || text[2] == 'F'))
133                     return std::numeric_limits<double>::infinity();
134                 break;
135         }
136 
137         for (;;)
138         {
139             if (text.isDigit())
140             {
141                 lastDigit = digit;
142                 digit = (int) text.getAndAdvance() - '0';
143                 digitsFound = true;
144 
145                 if (decPointIndex != 0)
146                     exponentAdjustment[1]++;
147 
148                 if (numSignificantDigits == 0 && digit == 0)
149                     continue;
150 
151                 if (++numSignificantDigits > maxSignificantDigits)
152                 {
153                     if (digit > 5)
154                         ++accumulator [decPointIndex];
155                     else if (digit == 5 && (lastDigit & 1) != 0)
156                         ++accumulator [decPointIndex];
157 
158                     if (decPointIndex > 0)
159                         exponentAdjustment[1]--;
160                     else
161                         exponentAdjustment[0]++;
162 
163                     while (text.isDigit())
164                     {
165                         ++text;
166                         if (decPointIndex == 0)
167                             exponentAdjustment[0]++;
168                     }
169                 }
170                 else
171                 {
172                     const double maxAccumulatorValue = (double) ((std::numeric_limits<unsigned int>::max() - 9) / 10);
173                     if (accumulator [decPointIndex] > maxAccumulatorValue)
174                     {
175                         result [decPointIndex] = mulexp10 (result [decPointIndex], exponentAccumulator [decPointIndex])
176                                                     + accumulator [decPointIndex];
177                         accumulator [decPointIndex] = 0;
178                         exponentAccumulator [decPointIndex] = 0;
179                     }
180 
181                     accumulator [decPointIndex] = accumulator[decPointIndex] * 10 + digit;
182                     exponentAccumulator [decPointIndex]++;
183                 }
184             }
185             else if (decPointIndex == 0 && *text == '.')
186             {
187                 ++text;
188                 decPointIndex = 1;
189 
190                 if (numSignificantDigits > maxSignificantDigits)
191                 {
192                     while (text.isDigit())
193                         ++text;
194                     break;
195                 }
196             }
197             else
198             {
199                 break;
200             }
201         }
202 
203         result[0] = mulexp10 (result[0], exponentAccumulator[0]) + accumulator[0];
204 
205         if (decPointIndex != 0)
206             result[1] = mulexp10 (result[1], exponentAccumulator[1]) + accumulator[1];
207 
208         c = *text;
209         if ((c == 'e' || c == 'E') && digitsFound)
210         {
211             bool negativeExponent = false;
212 
213             switch (*++text)
214             {
215                 case '-':   negativeExponent = true; // fall-through..
216                 case '+':   ++text;
217             }
218 
219             while (text.isDigit())
220                 exponent = (exponent * 10) + ((int) text.getAndAdvance() - '0');
221 
222             if (negativeExponent)
223                 exponent = -exponent;
224         }
225 
226         double r = mulexp10 (result[0], exponent + exponentAdjustment[0]);
227         if (decPointIndex != 0)
228             r += mulexp10 (result[1], exponent - exponentAdjustment[1]);
229 
230         return isNegative ? -r : r;
231     }
232 
233     /** Parses a character string, to read a floating-point value. */
234     template <typename CharPointerType>
getDoubleValue(CharPointerType text)235     static double getDoubleValue (CharPointerType text) noexcept
236     {
237         return readDoubleValue (text);
238     }
239 
240     //==============================================================================
241     /** Parses a character string, to read an integer value. */
242     template <typename IntType, typename CharPointerType>
getIntValue(const CharPointerType text)243     static IntType getIntValue (const CharPointerType text) noexcept
244     {
245         IntType v = 0;
246         CharPointerType s (text.findEndOfWhitespace());
247 
248         const bool isNeg = *s == '-';
249         if (isNeg)
250             ++s;
251 
252         for (;;)
253         {
254             const water_uchar c = s.getAndAdvance();
255 
256             if (c >= '0' && c <= '9')
257                 v = v * 10 + (IntType) (c - '0');
258             else
259                 break;
260         }
261 
262         return isNeg ? -v : v;
263     }
264 
265     template <typename ResultType>
266     struct HexParser
267     {
268         template <typename CharPointerType>
parseHexParser269         static ResultType parse (CharPointerType t) noexcept
270         {
271             ResultType result = 0;
272 
273             while (! t.isEmpty())
274             {
275                 const int hexValue = CharacterFunctions::getHexDigitValue (t.getAndAdvance());
276 
277                 if (hexValue >= 0)
278                     result = (result << 4) | hexValue;
279             }
280 
281             return result;
282         }
283     };
284 
285     //==============================================================================
286     /** Counts the number of characters in a given string, stopping if the count exceeds
287         a specified limit. */
288     template <typename CharPointerType>
lengthUpTo(CharPointerType text,const size_t maxCharsToCount)289     static size_t lengthUpTo (CharPointerType text, const size_t maxCharsToCount) noexcept
290     {
291         size_t len = 0;
292 
293         while (len < maxCharsToCount && text.getAndAdvance() != 0)
294             ++len;
295 
296         return len;
297     }
298 
299     /** Counts the number of characters in a given string, stopping if the count exceeds
300         a specified end-pointer. */
301     template <typename CharPointerType>
lengthUpTo(CharPointerType start,const CharPointerType end)302     static size_t lengthUpTo (CharPointerType start, const CharPointerType end) noexcept
303     {
304         size_t len = 0;
305 
306         while (start < end && start.getAndAdvance() != 0)
307             ++len;
308 
309         return len;
310     }
311 
312     /** Copies null-terminated characters from one string to another. */
313     template <typename DestCharPointerType, typename SrcCharPointerType>
copyAll(DestCharPointerType & dest,SrcCharPointerType src)314     static void copyAll (DestCharPointerType& dest, SrcCharPointerType src) noexcept
315     {
316         while (water_uchar c = src.getAndAdvance())
317             dest.write (c);
318 
319         dest.writeNull();
320     }
321 
322     /** Copies characters from one string to another, up to a null terminator
323         or a given byte size limit. */
324     template <typename DestCharPointerType, typename SrcCharPointerType>
copyWithDestByteLimit(DestCharPointerType & dest,SrcCharPointerType src,size_t maxBytesToWrite)325     static size_t copyWithDestByteLimit (DestCharPointerType& dest, SrcCharPointerType src, size_t maxBytesToWrite) noexcept
326     {
327         typename DestCharPointerType::CharType const* const startAddress = dest.getAddress();
328         ssize_t maxBytes = (ssize_t) maxBytesToWrite;
329         maxBytes -= sizeof (typename DestCharPointerType::CharType); // (allow for a terminating null)
330 
331         for (;;)
332         {
333             const water_uchar c = src.getAndAdvance();
334             const size_t bytesNeeded = DestCharPointerType::getBytesRequiredFor (c);
335 
336             maxBytes -= bytesNeeded;
337             if (c == 0 || maxBytes < 0)
338                 break;
339 
340             dest.write (c);
341         }
342 
343         dest.writeNull();
344 
345         return (size_t) getAddressDifference (dest.getAddress(), startAddress)
346                  + sizeof (typename DestCharPointerType::CharType);
347     }
348 
349     /** Copies characters from one string to another, up to a null terminator
350         or a given maximum number of characters. */
351     template <typename DestCharPointerType, typename SrcCharPointerType>
copyWithCharLimit(DestCharPointerType & dest,SrcCharPointerType src,int maxChars)352     static void copyWithCharLimit (DestCharPointerType& dest, SrcCharPointerType src, int maxChars) noexcept
353     {
354         while (--maxChars > 0)
355         {
356             const water_uchar c = src.getAndAdvance();
357             if (c == 0)
358                 break;
359 
360             dest.write (c);
361         }
362 
363         dest.writeNull();
364     }
365 
366     /** Compares two characters. */
compare(water_uchar char1,water_uchar char2)367     static inline int compare (water_uchar char1, water_uchar char2) noexcept
368     {
369         if (int diff = static_cast<int> (char1) - static_cast<int> (char2))
370             return diff < 0 ? -1 : 1;
371 
372         return 0;
373     }
374 
375     /** Compares two null-terminated character strings. */
376     template <typename CharPointerType1, typename CharPointerType2>
compare(CharPointerType1 s1,CharPointerType2 s2)377     static int compare (CharPointerType1 s1, CharPointerType2 s2) noexcept
378     {
379         for (;;)
380         {
381             const water_uchar c1 = s1.getAndAdvance();
382 
383             if (int diff = compare (c1, s2.getAndAdvance()))
384                 return diff;
385 
386             if (c1 == 0)
387                 break;
388         }
389 
390         return 0;
391     }
392 
393     /** Compares two null-terminated character strings, up to a given number of characters. */
394     template <typename CharPointerType1, typename CharPointerType2>
compareUpTo(CharPointerType1 s1,CharPointerType2 s2,int maxChars)395     static int compareUpTo (CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
396     {
397         while (--maxChars >= 0)
398         {
399             const water_uchar c1 = s1.getAndAdvance();
400 
401             if (int diff = compare (c1, s2.getAndAdvance()))
402                 return diff;
403 
404             if (c1 == 0)
405                 break;
406         }
407 
408         return 0;
409     }
410 
411     /** Compares two characters, using a case-independant match. */
compareIgnoreCase(water_uchar char1,water_uchar char2)412     static inline int compareIgnoreCase (water_uchar char1, water_uchar char2) noexcept
413     {
414         return char1 != char2 ? compare (toUpperCase (char1), toUpperCase (char2)) : 0;
415     }
416 
417     /** Compares two null-terminated character strings, using a case-independant match. */
418     template <typename CharPointerType1, typename CharPointerType2>
compareIgnoreCase(CharPointerType1 s1,CharPointerType2 s2)419     static int compareIgnoreCase (CharPointerType1 s1, CharPointerType2 s2) noexcept
420     {
421         for (;;)
422         {
423             const water_uchar c1 = s1.getAndAdvance();
424 
425             if (int diff = compareIgnoreCase (c1, s2.getAndAdvance()))
426                 return diff;
427 
428             if (c1 == 0)
429                 break;
430         }
431 
432         return 0;
433     }
434 
435     /** Compares two null-terminated character strings, using a case-independent match. */
436     template <typename CharPointerType1, typename CharPointerType2>
compareIgnoreCaseUpTo(CharPointerType1 s1,CharPointerType2 s2,int maxChars)437     static int compareIgnoreCaseUpTo (CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
438     {
439         while (--maxChars >= 0)
440         {
441             const water_uchar c1 = s1.getAndAdvance();
442 
443             if (int diff = compareIgnoreCase (c1, s2.getAndAdvance()))
444                 return diff;
445 
446             if (c1 == 0)
447                 break;
448         }
449 
450         return 0;
451     }
452 
453     /** Finds the character index of a given substring in another string.
454         Returns -1 if the substring is not found.
455     */
456     template <typename CharPointerType1, typename CharPointerType2>
indexOf(CharPointerType1 textToSearch,const CharPointerType2 substringToLookFor)457     static int indexOf (CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
458     {
459         int index = 0;
460         const int substringLength = (int) substringToLookFor.length();
461 
462         for (;;)
463         {
464             if (textToSearch.compareUpTo (substringToLookFor, substringLength) == 0)
465                 return index;
466 
467             if (textToSearch.getAndAdvance() == 0)
468                 return -1;
469 
470             ++index;
471         }
472     }
473 
474     /** Returns a pointer to the first occurrence of a substring in a string.
475         If the substring is not found, this will return a pointer to the string's
476         null terminator.
477     */
478     template <typename CharPointerType1, typename CharPointerType2>
find(CharPointerType1 textToSearch,const CharPointerType2 substringToLookFor)479     static CharPointerType1 find (CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
480     {
481         const int substringLength = (int) substringToLookFor.length();
482 
483         while (textToSearch.compareUpTo (substringToLookFor, substringLength) != 0
484                  && ! textToSearch.isEmpty())
485             ++textToSearch;
486 
487         return textToSearch;
488     }
489 
490     /** Returns a pointer to the first occurrence of a substring in a string.
491         If the substring is not found, this will return a pointer to the string's
492         null terminator.
493     */
494     template <typename CharPointerType>
find(CharPointerType textToSearch,const water_uchar charToLookFor)495     static CharPointerType find (CharPointerType textToSearch, const water_uchar charToLookFor) noexcept
496     {
497         for (;; ++textToSearch)
498         {
499             const water_uchar c = *textToSearch;
500 
501             if (c == charToLookFor || c == 0)
502                 break;
503         }
504 
505         return textToSearch;
506     }
507 
508     /** Finds the character index of a given substring in another string, using
509         a case-independent match.
510         Returns -1 if the substring is not found.
511     */
512     template <typename CharPointerType1, typename CharPointerType2>
indexOfIgnoreCase(CharPointerType1 haystack,const CharPointerType2 needle)513     static int indexOfIgnoreCase (CharPointerType1 haystack, const CharPointerType2 needle) noexcept
514     {
515         int index = 0;
516         const int needleLength = (int) needle.length();
517 
518         for (;;)
519         {
520             if (haystack.compareIgnoreCaseUpTo (needle, needleLength) == 0)
521                 return index;
522 
523             if (haystack.getAndAdvance() == 0)
524                 return -1;
525 
526             ++index;
527         }
528     }
529 
530     /** Finds the character index of a given character in another string.
531         Returns -1 if the character is not found.
532     */
533     template <typename Type>
indexOfChar(Type text,const water_uchar charToFind)534     static int indexOfChar (Type text, const water_uchar charToFind) noexcept
535     {
536         int i = 0;
537 
538         while (! text.isEmpty())
539         {
540             if (text.getAndAdvance() == charToFind)
541                 return i;
542 
543             ++i;
544         }
545 
546         return -1;
547     }
548 
549     /** Finds the character index of a given character in another string, using
550         a case-independent match.
551         Returns -1 if the character is not found.
552     */
553     template <typename Type>
indexOfCharIgnoreCase(Type text,water_uchar charToFind)554     static int indexOfCharIgnoreCase (Type text, water_uchar charToFind) noexcept
555     {
556         charToFind = CharacterFunctions::toLowerCase (charToFind);
557         int i = 0;
558 
559         while (! text.isEmpty())
560         {
561             if (text.toLowerCase() == charToFind)
562                 return i;
563 
564             ++text;
565             ++i;
566         }
567 
568         return -1;
569     }
570 
571     /** Returns a pointer to the first non-whitespace character in a string.
572         If the string contains only whitespace, this will return a pointer
573         to its null terminator.
574     */
575     template <typename Type>
findEndOfWhitespace(Type text)576     static Type findEndOfWhitespace (Type text) noexcept
577     {
578         while (text.isWhitespace())
579             ++text;
580 
581         return text;
582     }
583 
584     /** Returns a pointer to the first character in the string which is found in
585         the breakCharacters string.
586     */
587     template <typename Type, typename BreakType>
findEndOfToken(Type text,const BreakType breakCharacters,const Type quoteCharacters)588     static Type findEndOfToken (Type text, const BreakType breakCharacters, const Type quoteCharacters)
589     {
590         water_uchar currentQuoteChar = 0;
591 
592         while (! text.isEmpty())
593         {
594             const water_uchar c = text.getAndAdvance();
595 
596             if (currentQuoteChar == 0 && breakCharacters.indexOf (c) >= 0)
597             {
598                 --text;
599                 break;
600             }
601 
602             if (quoteCharacters.indexOf (c) >= 0)
603             {
604                 if (currentQuoteChar == 0)
605                     currentQuoteChar = c;
606                 else if (currentQuoteChar == c)
607                     currentQuoteChar = 0;
608             }
609         }
610 
611         return text;
612     }
613 
614 private:
615     static double mulexp10 (const double value, int exponent) noexcept;
616 };
617 
618 }
619 
620 #endif // WATER_CHARACTERFUNCTIONS_H_INCLUDED
621