1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 /* 8 * A class which represents a fragment of text (eg inside a text 9 * node); if only codepoints below 256 are used, the text is stored as 10 * a char*; otherwise the text is stored as a char16_t* 11 */ 12 13 #ifndef nsTextFragment_h___ 14 #define nsTextFragment_h___ 15 16 #include "mozilla/Attributes.h" 17 #include "mozilla/MemoryReporting.h" 18 19 #include "nsCharTraits.h" 20 #include "nsString.h" 21 #include "nsStringBuffer.h" 22 #include "nsReadableUtils.h" 23 #include "nsISupportsImpl.h" 24 25 // XXX should this normalize the code to keep a \u0000 at the end? 26 27 // XXX nsTextFragmentPool? 28 29 /** 30 * A fragment of text. If mIs2b is 1 then the m2b pointer is valid 31 * otherwise the m1b pointer is valid. If m1b is used then each byte 32 * of data represents a single ucs2 character with the high byte being 33 * zero. 34 * 35 * This class does not have a virtual destructor therefore it is not 36 * meant to be subclassed. 37 */ 38 class nsTextFragment final { 39 public: 40 static nsresult Init(); 41 static void Shutdown(); 42 43 /** 44 * Default constructor. Initialize the fragment to be empty. 45 */ nsTextFragment()46 nsTextFragment() : m1b(nullptr), mAllBits(0) { 47 MOZ_COUNT_CTOR(nsTextFragment); 48 NS_ASSERTION(sizeof(FragmentBits) == 4, "Bad field packing!"); 49 } 50 51 ~nsTextFragment(); 52 53 /** 54 * Change the contents of this fragment to be a copy of the 55 * the argument fragment, or to "" if unable to allocate enough memory. 56 */ 57 nsTextFragment& operator=(const nsTextFragment& aOther); 58 59 /** 60 * Return true if this fragment is represented by char16_t data 61 */ Is2b()62 bool Is2b() const { return mState.mIs2b; } 63 64 /** 65 * Return true if this fragment contains Bidi text 66 * For performance reasons this flag is only set if explicitely requested (by 67 * setting the aUpdateBidi argument on SetTo or Append to true). 68 */ IsBidi()69 bool IsBidi() const { return mState.mIsBidi; } 70 71 /** 72 * Get a pointer to constant char16_t data. 73 */ Get2b()74 const char16_t* Get2b() const { 75 MOZ_ASSERT(Is2b(), "not 2b text"); 76 return static_cast<char16_t*>(m2b->Data()); 77 } 78 79 /** 80 * Get a pointer to constant char data. 81 */ Get1b()82 const char* Get1b() const { 83 NS_ASSERTION(!Is2b(), "not 1b text"); 84 return (const char*)m1b; 85 } 86 87 /** 88 * Get the length of the fragment. The length is the number of logical 89 * characters, not the number of bytes to store the characters. 90 */ GetLength()91 uint32_t GetLength() const { return mState.mLength; } 92 93 #define NS_MAX_TEXT_FRAGMENT_LENGTH (static_cast<uint32_t>(0x1FFFFFFF)) 94 CanGrowBy(size_t n)95 bool CanGrowBy(size_t n) const { 96 return n < (1 << 29) && mState.mLength + n < (1 << 29); 97 } 98 99 /** 100 * Change the contents of this fragment to be a copy of the given 101 * buffer. If aUpdateBidi is true, contents of the fragment will be scanned, 102 * and mState.mIsBidi will be turned on if it includes any Bidi characters. 103 * If aForce2b is true, aBuffer will be stored as char16_t as is. Then, 104 * you can access the value faster but may waste memory if all characters 105 * are less than U+0100. 106 */ 107 bool SetTo(const char16_t* aBuffer, int32_t aLength, bool aUpdateBidi, 108 bool aForce2b); 109 SetTo(const nsString & aString,bool aUpdateBidi,bool aForce2b)110 bool SetTo(const nsString& aString, bool aUpdateBidi, bool aForce2b) { 111 if (MOZ_UNLIKELY(aString.Length() > NS_MAX_TEXT_FRAGMENT_LENGTH)) { 112 return false; 113 } 114 ReleaseText(); 115 if (aForce2b && !aUpdateBidi) { 116 nsStringBuffer* buffer = nsStringBuffer::FromString(aString); 117 if (buffer) { 118 NS_ADDREF(m2b = buffer); 119 mState.mInHeap = true; 120 mState.mIs2b = true; 121 mState.mLength = aString.Length(); 122 return true; 123 } 124 } 125 126 return SetTo(aString.get(), aString.Length(), aUpdateBidi, aForce2b); 127 } 128 129 /** 130 * Append aData to the end of this fragment. If aUpdateBidi is true, contents 131 * of the fragment will be scanned, and mState.mIsBidi will be turned on if 132 * it includes any Bidi characters. 133 * If aForce2b is true, the string will be stored as char16_t as is. Then, 134 * you can access the value faster but may waste memory if all characters 135 * are less than U+0100. 136 */ 137 bool Append(const char16_t* aBuffer, uint32_t aLength, bool aUpdateBidi, 138 bool aForce2b); 139 140 /** 141 * Append the contents of this string fragment to aString 142 */ AppendTo(nsAString & aString)143 void AppendTo(nsAString& aString) const { 144 if (!AppendTo(aString, mozilla::fallible)) { 145 aString.AllocFailed(aString.Length() + GetLength()); 146 } 147 } 148 149 /** 150 * Append the contents of this string fragment to aString 151 * @return false if an out of memory condition is detected, true otherwise 152 */ AppendTo(nsAString & aString,const mozilla::fallible_t & aFallible)153 [[nodiscard]] bool AppendTo(nsAString& aString, 154 const mozilla::fallible_t& aFallible) const { 155 if (mState.mIs2b) { 156 if (aString.IsEmpty()) { 157 m2b->ToString(mState.mLength, aString); 158 return true; 159 } 160 bool ok = aString.Append(Get2b(), mState.mLength, aFallible); 161 if (!ok) { 162 return false; 163 } 164 165 return true; 166 } else { 167 return AppendASCIItoUTF16(Substring(m1b, mState.mLength), aString, 168 aFallible); 169 } 170 } 171 172 /** 173 * Append a substring of the contents of this string fragment to aString. 174 * @param aOffset where to start the substring in this text fragment 175 * @param aLength the length of the substring 176 */ AppendTo(nsAString & aString,int32_t aOffset,int32_t aLength)177 void AppendTo(nsAString& aString, int32_t aOffset, int32_t aLength) const { 178 if (!AppendTo(aString, aOffset, aLength, mozilla::fallible)) { 179 aString.AllocFailed(aString.Length() + aLength); 180 } 181 } 182 183 /** 184 * Append a substring of the contents of this string fragment to aString. 185 * @param aString the string in which to append 186 * @param aOffset where to start the substring in this text fragment 187 * @param aLength the length of the substring 188 * @return false if an out of memory condition is detected, true otherwise 189 */ AppendTo(nsAString & aString,int32_t aOffset,int32_t aLength,const mozilla::fallible_t & aFallible)190 [[nodiscard]] bool AppendTo(nsAString& aString, int32_t aOffset, 191 int32_t aLength, 192 const mozilla::fallible_t& aFallible) const { 193 if (mState.mIs2b) { 194 bool ok = aString.Append(Get2b() + aOffset, aLength, aFallible); 195 if (!ok) { 196 return false; 197 } 198 199 return true; 200 } else { 201 return AppendASCIItoUTF16(Substring(m1b + aOffset, aLength), aString, 202 aFallible); 203 } 204 } 205 206 /** 207 * Make a copy of the fragments contents starting at offset for 208 * count characters. The offset and count will be adjusted to 209 * lie within the fragments data. The fragments data is converted if 210 * necessary. 211 */ 212 void CopyTo(char16_t* aDest, int32_t aOffset, int32_t aCount); 213 214 /** 215 * Return the character in the text-fragment at the given 216 * index. This always returns a char16_t. 217 */ CharAt(int32_t aIndex)218 char16_t CharAt(int32_t aIndex) const { 219 MOZ_ASSERT(uint32_t(aIndex) < mState.mLength, "bad index"); 220 return mState.mIs2b ? Get2b()[aIndex] 221 : static_cast<unsigned char>(m1b[aIndex]); 222 } 223 224 /** 225 * IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at 226 * aIndex is high surrogate and it's followed by low surrogate. 227 */ IsHighSurrogateFollowedByLowSurrogateAt(int32_t aIndex)228 inline bool IsHighSurrogateFollowedByLowSurrogateAt(int32_t aIndex) const { 229 MOZ_ASSERT(aIndex >= 0); 230 MOZ_ASSERT(aIndex < mState.mLength); 231 if (!mState.mIs2b || aIndex + 1 >= mState.mLength) { 232 return false; 233 } 234 return NS_IS_SURROGATE_PAIR(Get2b()[aIndex], Get2b()[aIndex + 1]); 235 } 236 237 /** 238 * IsLowSurrogateFollowingHighSurrogateAt() returns true if character at 239 * aIndex is low surrogate and it follows high surrogate. 240 */ IsLowSurrogateFollowingHighSurrogateAt(int32_t aIndex)241 inline bool IsLowSurrogateFollowingHighSurrogateAt(int32_t aIndex) const { 242 MOZ_ASSERT(aIndex >= 0); 243 MOZ_ASSERT(aIndex < mState.mLength); 244 if (!mState.mIs2b || aIndex <= 0) { 245 return false; 246 } 247 return NS_IS_SURROGATE_PAIR(Get2b()[aIndex - 1], Get2b()[aIndex]); 248 } 249 250 /** 251 * ScalarValueAt() returns a Unicode scalar value at aIndex. If the character 252 * at aIndex is a high surrogate followed by low surrogate, returns character 253 * code for the pair. If the index is low surrogate, or a high surrogate but 254 * not in a pair, returns 0. 255 */ ScalarValueAt(int32_t aIndex)256 inline char32_t ScalarValueAt(int32_t aIndex) const { 257 MOZ_ASSERT(aIndex >= 0); 258 MOZ_ASSERT(aIndex < mState.mLength); 259 if (!mState.mIs2b) { 260 return static_cast<unsigned char>(m1b[aIndex]); 261 } 262 char16_t ch = Get2b()[aIndex]; 263 if (!IS_SURROGATE(ch)) { 264 return ch; 265 } 266 if (aIndex + 1 < mState.mLength && NS_IS_HIGH_SURROGATE(ch)) { 267 char16_t nextCh = Get2b()[aIndex + 1]; 268 if (NS_IS_LOW_SURROGATE(nextCh)) { 269 return SURROGATE_TO_UCS4(ch, nextCh); 270 } 271 } 272 return 0; 273 } 274 SetBidi(bool aBidi)275 void SetBidi(bool aBidi) { mState.mIsBidi = aBidi; } 276 277 struct FragmentBits { 278 // uint32_t to ensure that the values are unsigned, because we 279 // want 0/1, not 0/-1! 280 // Making these bool causes Windows to not actually pack them, 281 // which causes crashes because we assume this structure is no more than 282 // 32 bits! 283 uint32_t mInHeap : 1; 284 uint32_t mIs2b : 1; 285 uint32_t mIsBidi : 1; 286 // Note that when you change the bits of mLength, you also need to change 287 // NS_MAX_TEXT_FRAGMENT_LENGTH. 288 uint32_t mLength : 29; 289 }; 290 291 size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf) const; 292 293 /** 294 * Check whether the text in this fragment is the same as the text in the 295 * other fragment. 296 */ 297 [[nodiscard]] bool TextEquals(const nsTextFragment& aOther) const; 298 299 private: 300 void ReleaseText(); 301 302 /** 303 * Scan the contents of the fragment and turn on mState.mIsBidi if it 304 * includes any Bidi characters. 305 */ 306 void UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength); 307 308 union { 309 nsStringBuffer* m2b; 310 const char* m1b; // This is const since it can point to shared data 311 }; 312 313 union { 314 uint32_t mAllBits; 315 FragmentBits mState; 316 }; 317 }; 318 319 #endif /* nsTextFragment_h___ */ 320