1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #ifndef nsUnicharUtils_h__
7 #define nsUnicharUtils_h__
8 
9 #include "nsString.h"
10 
11 /* (0x3131u <= (u) && (u) <= 0x318eu) => Hangul Compatibility Jamo */
12 /* (0xac00u <= (u) && (u) <= 0xd7a3u) => Hangul Syllables          */
13 #define IS_CJ_CHAR(u)                                                          \
14   ((0x2e80u <= (u) && (u) <= 0x312fu) || (0x3190u <= (u) && (u) <= 0xabffu) || \
15    (0xf900u <= (u) && (u) <= 0xfaffu) || (0xff00u <= (u) && (u) <= 0xffefu))
16 
17 #define IS_ZERO_WIDTH_SPACE(u) ((u) == 0x200B)
18 
19 #define IS_ASCII(u) ((u) < 0x80)
20 #define IS_ASCII_UPPER(u) (('A' <= (u)) && ((u) <= 'Z'))
21 #define IS_ASCII_LOWER(u) (('a' <= (u)) && ((u) <= 'z'))
22 #define IS_ASCII_ALPHA(u) (IS_ASCII_UPPER(u) || IS_ASCII_LOWER(u))
23 #define IS_ASCII_SPACE(u) (' ' == (u))
24 
25 void ToLowerCase(nsAString& aString);
26 void ToLowerCaseASCII(nsAString& aString);
27 void ToUpperCase(nsAString& aString);
28 
29 void ToLowerCase(const nsAString& aSource, nsAString& aDest);
30 void ToLowerCaseASCII(const nsAString& aSource, nsAString& aDest);
31 void ToUpperCase(const nsAString& aSource, nsAString& aDest);
32 
33 uint32_t ToLowerCase(uint32_t aChar);
34 uint32_t ToUpperCase(uint32_t aChar);
35 uint32_t ToTitleCase(uint32_t aChar);
36 
37 void ToLowerCase(const char16_t* aIn, char16_t* aOut, size_t aLen);
38 void ToLowerCaseASCII(const char16_t* aIn, char16_t* aOut, size_t aLen);
39 void ToUpperCase(const char16_t* aIn, char16_t* aOut, size_t aLen);
40 
41 char ToLowerCaseASCII(const char aChar);
42 char16_t ToLowerCaseASCII(const char16_t aChar);
43 char32_t ToLowerCaseASCII(const char32_t aChar);
44 
45 char ToUpperCaseASCII(const char aChar);
46 char16_t ToUpperCaseASCII(const char16_t aChar);
47 char32_t ToUpperCaseASCII(const char32_t aChar);
48 
IsUpperCase(uint32_t c)49 inline bool IsUpperCase(uint32_t c) { return ToLowerCase(c) != c; }
50 
IsLowerCase(uint32_t c)51 inline bool IsLowerCase(uint32_t c) { return ToUpperCase(c) != c; }
52 
53 #ifdef MOZILLA_INTERNAL_API
54 
55 uint32_t ToFoldedCase(uint32_t aChar);
56 void ToFoldedCase(nsAString& aString);
57 void ToFoldedCase(const char16_t* aIn, char16_t* aOut, size_t aLen);
58 
59 uint32_t ToNaked(uint32_t aChar);
60 void ToNaked(nsAString& aString);
61 
62 int32_t nsCaseInsensitiveStringComparator(const char16_t*, const char16_t*,
63                                           size_t, size_t);
64 
65 int32_t nsCaseInsensitiveUTF8StringComparator(const char*, const char*, size_t,
66                                               size_t);
67 
68 class nsCaseInsensitiveStringArrayComparator {
69  public:
70   template <class A, class B>
Equals(const A & a,const B & b)71   bool Equals(const A& a, const B& b) const {
72     return a.Equals(b, nsCaseInsensitiveStringComparator);
73   }
74 };
75 
76 int32_t nsASCIICaseInsensitiveStringComparator(const char16_t*, const char16_t*,
77                                                size_t, size_t);
78 
CaseInsensitiveFindInReadable(const nsAString & aPattern,nsAString::const_iterator & aSearchStart,nsAString::const_iterator & aSearchEnd)79 inline bool CaseInsensitiveFindInReadable(
80     const nsAString& aPattern, nsAString::const_iterator& aSearchStart,
81     nsAString::const_iterator& aSearchEnd) {
82   return FindInReadable(aPattern, aSearchStart, aSearchEnd,
83                         nsCaseInsensitiveStringComparator);
84 }
85 
CaseInsensitiveFindInReadable(const nsAString & aPattern,const nsAString & aHay)86 inline bool CaseInsensitiveFindInReadable(const nsAString& aPattern,
87                                           const nsAString& aHay) {
88   nsAString::const_iterator searchBegin, searchEnd;
89   return FindInReadable(aPattern, aHay.BeginReading(searchBegin),
90                         aHay.EndReading(searchEnd),
91                         nsCaseInsensitiveStringComparator);
92 }
93 
94 #endif  // MOZILLA_INTERNAL_API
95 
96 int32_t CaseInsensitiveCompare(const char16_t* a, const char16_t* b,
97                                size_t len);
98 
99 int32_t CaseInsensitiveCompare(const char* aLeft, const char* aRight,
100                                size_t aLeftBytes, size_t aRightBytes);
101 
102 /**
103  * Calculates the lower-case of the codepoint of the UTF8 sequence starting at
104  * aStr.  Sets aNext to the byte following the end of the sequence.
105  *
106  * If the sequence is invalid, or if computing the codepoint would take us off
107  * the end of the string (as marked by aEnd), returns -1 and does not set
108  * aNext.  Note that this function doesn't check that aStr < aEnd -- it assumes
109  * you've done that already.
110  */
111 uint32_t GetLowerUTF8Codepoint(const char* aStr, const char* aEnd,
112                                const char** aNext);
113 
114 /**
115  * This function determines whether the UTF-8 sequence pointed to by aLeft is
116  * case insensitively equal to the UTF-8 sequence pointed to by aRight (or
117  * optionally, case and diacritic insensitively equal), as defined by having
118  * matching (naked) lower-cased codepoints.
119  *
120  * aLeftEnd marks the first memory location past aLeft that is not part of
121  * aLeft; aRightEnd similarly marks the end of aRight.
122  *
123  * The function assumes that aLeft < aLeftEnd and aRight < aRightEnd.
124  *
125  * The function stores the addresses of the next characters in the sequence
126  * into aLeftNext and aRightNext.  It's up to the caller to make sure that the
127  * returned pointers are valid -- i.e. the function may return aLeftNext >=
128  * aLeftEnd or aRightNext >= aRightEnd.
129  *
130  * If the function encounters invalid text, it sets aErr to true and returns
131  * false, possibly leaving aLeftNext and aRightNext uninitialized.  If the
132  * function returns true, aErr is guaranteed to be false and both aLeftNext and
133  * aRightNext are guaranteed to be initialized.
134  *
135  * If aMatchDiacritics is false, the comparison is neither case-sensitive nor
136  * diacritic-sensitive.
137  */
138 bool CaseInsensitiveUTF8CharsEqual(const char* aLeft, const char* aRight,
139                                    const char* aLeftEnd, const char* aRightEnd,
140                                    const char** aLeftNext,
141                                    const char** aRightNext, bool* aErr,
142                                    bool aMatchDiacritics = true);
143 
144 namespace mozilla {
145 
146 /**
147  * Hash a UTF8 string as though it were a UTF16 string.
148  *
149  * The value returned is the same as if we converted the string to UTF16 and
150  * then ran HashString() on the result.
151  *
152  * The given |length| is in bytes.
153  */
154 uint32_t HashUTF8AsUTF16(const char* aUTF8, size_t aLength, bool* aErr);
155 
156 bool IsSegmentBreakSkipChar(uint32_t u);
157 
158 }  // namespace mozilla
159 
160 #endif /* nsUnicharUtils_h__ */
161