1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 /*
8  * A class which represents a fragment of text (eg inside a text
9  * node); if only codepoints below 256 are used, the text is stored as
10  * a char*; otherwise the text is stored as a char16_t*
11  */
12 
13 #ifndef nsTextFragment_h___
14 #define nsTextFragment_h___
15 
16 #include "mozilla/Attributes.h"
17 #include "mozilla/MemoryReporting.h"
18 
19 #include "nsCharTraits.h"
20 #include "nsString.h"
21 #include "nsStringBuffer.h"
22 #include "nsReadableUtils.h"
23 #include "nsISupportsImpl.h"
24 
25 // XXX should this normalize the code to keep a \u0000 at the end?
26 
27 // XXX nsTextFragmentPool?
28 
29 /**
30  * A fragment of text. If mIs2b is 1 then the m2b pointer is valid
31  * otherwise the m1b pointer is valid. If m1b is used then each byte
32  * of data represents a single ucs2 character with the high byte being
33  * zero.
34  *
35  * This class does not have a virtual destructor therefore it is not
36  * meant to be subclassed.
37  */
38 class nsTextFragment final {
39  public:
40   static nsresult Init();
41   static void Shutdown();
42 
43   /**
44    * Default constructor. Initialize the fragment to be empty.
45    */
nsTextFragment()46   nsTextFragment() : m1b(nullptr), mAllBits(0) {
47     MOZ_COUNT_CTOR(nsTextFragment);
48     NS_ASSERTION(sizeof(FragmentBits) == 4, "Bad field packing!");
49   }
50 
51   ~nsTextFragment();
52 
53   /**
54    * Change the contents of this fragment to be a copy of the
55    * the argument fragment, or to "" if unable to allocate enough memory.
56    */
57   nsTextFragment& operator=(const nsTextFragment& aOther);
58 
59   /**
60    * Return true if this fragment is represented by char16_t data
61    */
Is2b()62   bool Is2b() const { return mState.mIs2b; }
63 
64   /**
65    * Return true if this fragment contains Bidi text
66    * For performance reasons this flag is only set if explicitely requested (by
67    * setting the aUpdateBidi argument on SetTo or Append to true).
68    */
IsBidi()69   bool IsBidi() const { return mState.mIsBidi; }
70 
71   /**
72    * Get a pointer to constant char16_t data.
73    */
Get2b()74   const char16_t* Get2b() const {
75     MOZ_ASSERT(Is2b(), "not 2b text");
76     return static_cast<char16_t*>(m2b->Data());
77   }
78 
79   /**
80    * Get a pointer to constant char data.
81    */
Get1b()82   const char* Get1b() const {
83     NS_ASSERTION(!Is2b(), "not 1b text");
84     return (const char*)m1b;
85   }
86 
87   /**
88    * Get the length of the fragment. The length is the number of logical
89    * characters, not the number of bytes to store the characters.
90    */
GetLength()91   uint32_t GetLength() const { return mState.mLength; }
92 
93 #define NS_MAX_TEXT_FRAGMENT_LENGTH (static_cast<uint32_t>(0x1FFFFFFF))
94 
CanGrowBy(size_t n)95   bool CanGrowBy(size_t n) const {
96     return n < (1 << 29) && mState.mLength + n < (1 << 29);
97   }
98 
99   /**
100    * Change the contents of this fragment to be a copy of the given
101    * buffer. If aUpdateBidi is true, contents of the fragment will be scanned,
102    * and mState.mIsBidi will be turned on if it includes any Bidi characters.
103    * If aForce2b is true, aBuffer will be stored as char16_t as is.  Then,
104    * you can access the value faster but may waste memory if all characters
105    * are less than U+0100.
106    */
107   bool SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi,
108              bool aForce2b);
109 
SetTo(const nsString & aString,bool aUpdateBidi,bool aForce2b)110   bool SetTo(const nsString& aString, bool aUpdateBidi, bool aForce2b) {
111     if (MOZ_UNLIKELY(aString.Length() > NS_MAX_TEXT_FRAGMENT_LENGTH)) {
112       return false;
113     }
114     ReleaseText();
115     if (aForce2b && !aUpdateBidi) {
116       nsStringBuffer* buffer = nsStringBuffer::FromString(aString);
117       if (buffer) {
118         NS_ADDREF(m2b = buffer);
119         mState.mInHeap = true;
120         mState.mIs2b = true;
121         mState.mLength = aString.Length();
122         return true;
123       }
124     }
125 
126     return SetTo(aString.get(), aString.Length(), aUpdateBidi, aForce2b);
127   }
128 
129   /**
130    * Append aData to the end of this fragment. If aUpdateBidi is true, contents
131    * of the fragment will be scanned, and mState.mIsBidi will be turned on if
132    * it includes any Bidi characters.
133    * If aForce2b is true, the string will be stored as char16_t as is.  Then,
134    * you can access the value faster but may waste memory if all characters
135    * are less than U+0100.
136    */
137   bool Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi,
138               bool aForce2b);
139 
140   /**
141    * Append the contents of this string fragment to aString
142    */
AppendTo(nsAString & aString)143   void AppendTo(nsAString& aString) const {
144     if (!AppendTo(aString, mozilla::fallible)) {
145       aString.AllocFailed(aString.Length() + GetLength());
146     }
147   }
148 
149   /**
150    * Append the contents of this string fragment to aString
151    * @return false if an out of memory condition is detected, true otherwise
152    */
AppendTo(nsAString & aString,const mozilla::fallible_t & aFallible)153   [[nodiscard]] bool AppendTo(nsAString& aString,
154                               const mozilla::fallible_t& aFallible) const {
155     if (mState.mIs2b) {
156       if (aString.IsEmpty()) {
157         m2b->ToString(mState.mLength, aString);
158         return true;
159       }
160       bool ok = aString.Append(Get2b(), mState.mLength, aFallible);
161       if (!ok) {
162         return false;
163       }
164 
165       return true;
166     } else {
167       return AppendASCIItoUTF16(Substring(m1b, mState.mLength), aString,
168                                 aFallible);
169     }
170   }
171 
172   /**
173    * Append a substring of the contents of this string fragment to aString.
174    * @param aOffset where to start the substring in this text fragment
175    * @param aLength the length of the substring
176    */
AppendTo(nsAString & aString,int32_t aOffset,int32_t aLength)177   void AppendTo(nsAString& aString, int32_t aOffset, int32_t aLength) const {
178     if (!AppendTo(aString, aOffset, aLength, mozilla::fallible)) {
179       aString.AllocFailed(aString.Length() + aLength);
180     }
181   }
182 
183   /**
184    * Append a substring of the contents of this string fragment to aString.
185    * @param aString the string in which to append
186    * @param aOffset where to start the substring in this text fragment
187    * @param aLength the length of the substring
188    * @return false if an out of memory condition is detected, true otherwise
189    */
AppendTo(nsAString & aString,int32_t aOffset,int32_t aLength,const mozilla::fallible_t & aFallible)190   [[nodiscard]] bool AppendTo(nsAString& aString, int32_t aOffset,
191                               int32_t aLength,
192                               const mozilla::fallible_t& aFallible) const {
193     if (mState.mIs2b) {
194       bool ok = aString.Append(Get2b() + aOffset, aLength, aFallible);
195       if (!ok) {
196         return false;
197       }
198 
199       return true;
200     } else {
201       return AppendASCIItoUTF16(Substring(m1b + aOffset, aLength), aString,
202                                 aFallible);
203     }
204   }
205 
206   /**
207    * Make a copy of the fragments contents starting at offset for
208    * count characters. The offset and count will be adjusted to
209    * lie within the fragments data. The fragments data is converted if
210    * necessary.
211    */
212   void CopyTo(char16_t* aDest, int32_t aOffset, int32_t aCount);
213 
214   /**
215    * Return the character in the text-fragment at the given
216    * index. This always returns a char16_t.
217    */
CharAt(int32_t aIndex)218   char16_t CharAt(int32_t aIndex) const {
219     MOZ_ASSERT(uint32_t(aIndex) < mState.mLength, "bad index");
220     return mState.mIs2b ? Get2b()[aIndex]
221                         : static_cast<unsigned char>(m1b[aIndex]);
222   }
223 
224   /**
225    * IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at
226    * aIndex is high surrogate and it's followed by low surrogate.
227    */
IsHighSurrogateFollowedByLowSurrogateAt(int32_t aIndex)228   inline bool IsHighSurrogateFollowedByLowSurrogateAt(int32_t aIndex) const {
229     MOZ_ASSERT(aIndex >= 0);
230     MOZ_ASSERT(aIndex < mState.mLength);
231     if (!mState.mIs2b || aIndex + 1 >= mState.mLength) {
232       return false;
233     }
234     return NS_IS_SURROGATE_PAIR(Get2b()[aIndex], Get2b()[aIndex + 1]);
235   }
236 
237   /**
238    * IsLowSurrogateFollowingHighSurrogateAt() returns true if character at
239    * aIndex is low surrogate and it follows high surrogate.
240    */
IsLowSurrogateFollowingHighSurrogateAt(int32_t aIndex)241   inline bool IsLowSurrogateFollowingHighSurrogateAt(int32_t aIndex) const {
242     MOZ_ASSERT(aIndex >= 0);
243     MOZ_ASSERT(aIndex < mState.mLength);
244     if (!mState.mIs2b || aIndex <= 0) {
245       return false;
246     }
247     return NS_IS_SURROGATE_PAIR(Get2b()[aIndex - 1], Get2b()[aIndex]);
248   }
249 
250   /**
251    * ScalarValueAt() returns a Unicode scalar value at aIndex.  If the character
252    * at aIndex is a high surrogate followed by low surrogate, returns character
253    * code for the pair.  If the index is low surrogate, or a high surrogate but
254    * not in a pair, returns 0.
255    */
ScalarValueAt(int32_t aIndex)256   inline char32_t ScalarValueAt(int32_t aIndex) const {
257     MOZ_ASSERT(aIndex >= 0);
258     MOZ_ASSERT(aIndex < mState.mLength);
259     if (!mState.mIs2b) {
260       return static_cast<unsigned char>(m1b[aIndex]);
261     }
262     char16_t ch = Get2b()[aIndex];
263     if (!IS_SURROGATE(ch)) {
264       return ch;
265     }
266     if (aIndex + 1 < mState.mLength && NS_IS_HIGH_SURROGATE(ch)) {
267       char16_t nextCh = Get2b()[aIndex + 1];
268       if (NS_IS_LOW_SURROGATE(nextCh)) {
269         return SURROGATE_TO_UCS4(ch, nextCh);
270       }
271     }
272     return 0;
273   }
274 
SetBidi(bool aBidi)275   void SetBidi(bool aBidi) { mState.mIsBidi = aBidi; }
276 
277   struct FragmentBits {
278     // uint32_t to ensure that the values are unsigned, because we
279     // want 0/1, not 0/-1!
280     // Making these bool causes Windows to not actually pack them,
281     // which causes crashes because we assume this structure is no more than
282     // 32 bits!
283     uint32_t mInHeap : 1;
284     uint32_t mIs2b : 1;
285     uint32_t mIsBidi : 1;
286     // Note that when you change the bits of mLength, you also need to change
287     // NS_MAX_TEXT_FRAGMENT_LENGTH.
288     uint32_t mLength : 29;
289   };
290 
291   size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const;
292 
293   /**
294    * Check whether the text in this fragment is the same as the text in the
295    * other fragment.
296    */
297   [[nodiscard]] bool TextEquals(const nsTextFragment& aOther) const;
298 
299  private:
300   void ReleaseText();
301 
302   /**
303    * Scan the contents of the fragment and turn on mState.mIsBidi if it
304    * includes any Bidi characters.
305    */
306   void UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength);
307 
308   union {
309     nsStringBuffer* m2b;
310     const char* m1b;  // This is const since it can point to shared data
311   };
312 
313   union {
314     uint32_t mAllBits;
315     FragmentBits mState;
316   };
317 };
318 
319 #endif /* nsTextFragment_h___ */
320