1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #ifndef nsScannerString_h___
8 #define nsScannerString_h___
9
10 #include "nsString.h"
11 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
12 #include "mozilla/LinkedList.h"
13 #include <algorithm>
14
15 /**
16 * NOTE: nsScannerString (and the other classes defined in this file) are
17 * not related to nsAString or any of the other xpcom/string classes.
18 *
19 * nsScannerString is based on the nsSlidingString implementation that used
20 * to live in xpcom/string. Now that nsAString is limited to representing
21 * only single fragment strings, nsSlidingString can no longer be used.
22 *
23 * An advantage to this design is that it does not employ any virtual
24 * functions.
25 *
26 * This file uses SCC-style indenting in deference to the nsSlidingString
27 * code from which this code is derived ;-)
28 */
29
30 class nsScannerIterator;
31 class nsScannerSubstring;
32 class nsScannerString;
33
34 /**
35 * nsScannerBufferList
36 *
37 * This class maintains a list of heap-allocated Buffer objects. The buffers
38 * are maintained in a circular linked list. Each buffer has a usage count
39 * that is decremented by the owning nsScannerSubstring.
40 *
41 * The buffer list itself is reference counted. This allows the buffer list
42 * to be shared by multiple nsScannerSubstring objects. The reference
43 * counting is not threadsafe, which is not at all a requirement.
44 *
45 * When a nsScannerSubstring releases its reference to a buffer list, it
46 * decrements the usage count of the first buffer in the buffer list that it
47 * was referencing. It informs the buffer list that it can discard buffers
48 * starting at that prefix. The buffer list will do so if the usage count of
49 * that buffer is 0 and if it is the first buffer in the list. It will
50 * continue to prune buffers starting from the front of the buffer list until
51 * it finds a buffer that has a usage count that is non-zero.
52 */
53 class nsScannerBufferList {
54 public:
55 /**
56 * Buffer objects are directly followed by a data segment. The start
57 * of the data segment is determined by increment the |this| pointer
58 * by 1 unit.
59 */
60 class Buffer : public mozilla::LinkedListElement<Buffer> {
61 public:
IncrementUsageCount()62 void IncrementUsageCount() { ++mUsageCount; }
DecrementUsageCount()63 void DecrementUsageCount() { --mUsageCount; }
64
IsInUse()65 bool IsInUse() const { return mUsageCount != 0; }
66
DataStart()67 const char16_t* DataStart() const { return (const char16_t*)(this + 1); }
DataStart()68 char16_t* DataStart() { return (char16_t*)(this + 1); }
69
DataEnd()70 const char16_t* DataEnd() const { return mDataEnd; }
DataEnd()71 char16_t* DataEnd() { return mDataEnd; }
72
Next()73 const Buffer* Next() const { return getNext(); }
Next()74 Buffer* Next() { return getNext(); }
75
Prev()76 const Buffer* Prev() const { return getPrevious(); }
Prev()77 Buffer* Prev() { return getPrevious(); }
78
DataLength()79 uint32_t DataLength() const { return mDataEnd - DataStart(); }
SetDataLength(uint32_t len)80 void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
81
82 private:
83 friend class nsScannerBufferList;
84
85 int32_t mUsageCount;
86 char16_t* mDataEnd;
87 };
88
89 /**
90 * Position objects serve as lightweight pointers into a buffer list.
91 * The mPosition member must be contained with mBuffer->DataStart()
92 * and mBuffer->DataEnd().
93 */
94 class Position {
95 public:
Position()96 Position() : mBuffer(nullptr), mPosition(nullptr) {}
97
Position(Buffer * buffer,char16_t * position)98 Position(Buffer* buffer, char16_t* position)
99 : mBuffer(buffer), mPosition(position) {}
100
101 inline explicit Position(const nsScannerIterator& aIter);
102
103 inline Position& operator=(const nsScannerIterator& aIter);
104
105 static size_t Distance(const Position& p1, const Position& p2);
106
107 Buffer* mBuffer;
108 char16_t* mPosition;
109 };
110
111 static Buffer* AllocBufferFromString(const nsAString&);
112 static Buffer* AllocBuffer(uint32_t capacity); // capacity = number of chars
113
nsScannerBufferList(Buffer * buf)114 explicit nsScannerBufferList(Buffer* buf) : mRefCnt(0) {
115 mBuffers.insertBack(buf);
116 }
117
AddRef()118 void AddRef() { ++mRefCnt; }
Release()119 void Release() {
120 if (--mRefCnt == 0) delete this;
121 }
122
Append(Buffer * buf)123 void Append(Buffer* buf) { mBuffers.insertBack(buf); }
InsertAfter(Buffer * buf,Buffer * prev)124 void InsertAfter(Buffer* buf, Buffer* prev) { prev->setNext(buf); }
125 void SplitBuffer(const Position&);
126 void DiscardUnreferencedPrefix(Buffer*);
127
Head()128 Buffer* Head() { return mBuffers.getFirst(); }
Head()129 const Buffer* Head() const { return mBuffers.getFirst(); }
130
Tail()131 Buffer* Tail() { return mBuffers.getLast(); }
Tail()132 const Buffer* Tail() const { return mBuffers.getLast(); }
133
134 private:
135 friend class nsScannerSubstring;
136
~nsScannerBufferList()137 ~nsScannerBufferList() { ReleaseAll(); }
138 void ReleaseAll();
139
140 int32_t mRefCnt;
141 mozilla::LinkedList<Buffer> mBuffers;
142 };
143
144 /**
145 * nsScannerFragment represents a "slice" of a Buffer object.
146 */
147 struct nsScannerFragment {
148 typedef nsScannerBufferList::Buffer Buffer;
149
150 const Buffer* mBuffer;
151 const char16_t* mFragmentStart;
152 const char16_t* mFragmentEnd;
153 };
154
155 /**
156 * nsScannerSubstring is the base class for nsScannerString. It provides
157 * access to iterators and methods to bind the substring to another
158 * substring or nsAString instance.
159 *
160 * This class owns the buffer list.
161 */
162 class nsScannerSubstring {
163 public:
164 typedef nsScannerBufferList::Buffer Buffer;
165 typedef nsScannerBufferList::Position Position;
166 typedef uint32_t size_type;
167
168 nsScannerSubstring();
169 explicit nsScannerSubstring(const nsAString& s);
170
171 ~nsScannerSubstring();
172
173 nsScannerIterator& BeginReading(nsScannerIterator& iter) const;
174 nsScannerIterator& EndReading(nsScannerIterator& iter) const;
175
Length()176 size_type Length() const { return mLength; }
177
178 int32_t CountChar(char16_t) const;
179
180 void Rebind(const nsScannerSubstring&, const nsScannerIterator&,
181 const nsScannerIterator&);
182 void Rebind(const nsAString&);
183
184 const nsAString& AsString() const;
185
186 bool GetNextFragment(nsScannerFragment&) const;
187 bool GetPrevFragment(nsScannerFragment&) const;
188
AllocBufferFromString(const nsAString & aStr)189 static inline Buffer* AllocBufferFromString(const nsAString& aStr) {
190 return nsScannerBufferList::AllocBufferFromString(aStr);
191 }
AllocBuffer(size_type aCapacity)192 static inline Buffer* AllocBuffer(size_type aCapacity) {
193 return nsScannerBufferList::AllocBuffer(aCapacity);
194 }
195
196 protected:
acquire_ownership_of_buffer_list()197 void acquire_ownership_of_buffer_list() const {
198 mBufferList->AddRef();
199 mStart.mBuffer->IncrementUsageCount();
200 }
201
release_ownership_of_buffer_list()202 void release_ownership_of_buffer_list() {
203 if (mBufferList) {
204 mStart.mBuffer->DecrementUsageCount();
205 mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
206 mBufferList->Release();
207 }
208 }
209
init_range_from_buffer_list()210 void init_range_from_buffer_list() {
211 mStart.mBuffer = mBufferList->Head();
212 mStart.mPosition = mStart.mBuffer->DataStart();
213
214 mEnd.mBuffer = mBufferList->Tail();
215 mEnd.mPosition = mEnd.mBuffer->DataEnd();
216
217 mLength = Position::Distance(mStart, mEnd);
218 }
219
220 Position mStart;
221 Position mEnd;
222 nsScannerBufferList* mBufferList;
223 size_type mLength;
224
225 // these fields are used to implement AsString
226 nsDependentSubstring mFlattenedRep;
227 bool mIsDirty;
228
229 friend class nsScannerSharedSubstring;
230 };
231
232 /**
233 * nsScannerString provides methods to grow and modify a buffer list.
234 */
235 class nsScannerString : public nsScannerSubstring {
236 public:
237 explicit nsScannerString(Buffer*);
238
239 // you are giving ownership to the string, it takes and keeps your
240 // buffer, deleting it when done.
241 // Use AllocBuffer or AllocBufferFromString to create a Buffer object
242 // for use with this function.
243 void AppendBuffer(Buffer*);
244
245 void DiscardPrefix(const nsScannerIterator&);
246 // any other way you want to do this?
247
248 void UngetReadable(const nsAString& aReadable,
249 const nsScannerIterator& aCurrentPosition);
250 };
251
252 /**
253 * nsScannerSharedSubstring implements copy-on-write semantics for
254 * nsScannerSubstring. When you call .writable(), it will copy the data
255 * and return a mutable string object. This class also manages releasing
256 * the reference to the scanner buffer when it is no longer needed.
257 */
258
259 class nsScannerSharedSubstring {
260 public:
nsScannerSharedSubstring()261 nsScannerSharedSubstring() : mBuffer(nullptr), mBufferList(nullptr) {}
262
~nsScannerSharedSubstring()263 ~nsScannerSharedSubstring() {
264 if (mBufferList) ReleaseBuffer();
265 }
266
267 // Acquire a copy-on-write reference to the given substring.
268 void Rebind(const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
269
270 // Get a mutable reference to this string
writable()271 nsAString& writable() {
272 if (mBufferList) MakeMutable();
273
274 return mString;
275 }
276
277 // Get a const reference to this string
str()278 const nsAString& str() const { return mString; }
279
280 private:
281 typedef nsScannerBufferList::Buffer Buffer;
282
283 void ReleaseBuffer();
284 void MakeMutable();
285
286 nsDependentSubstring mString;
287 Buffer* mBuffer;
288 nsScannerBufferList* mBufferList;
289 };
290
291 /**
292 * nsScannerIterator works just like nsReadingIterator<CharT> except that
293 * it knows how to iterate over a list of scanner buffers.
294 */
295 class nsScannerIterator {
296 public:
297 typedef nsScannerIterator self_type;
298 typedef ptrdiff_t difference_type;
299 typedef char16_t value_type;
300 typedef const char16_t* pointer;
301 typedef const char16_t& reference;
302 typedef nsScannerSubstring::Buffer Buffer;
303
304 protected:
305 nsScannerFragment mFragment;
306 const char16_t* mPosition;
307 const nsScannerSubstring* mOwner;
308
309 friend class nsScannerSubstring;
310 friend class nsScannerSharedSubstring;
311
312 public:
313 // nsScannerIterator(); // auto-generate
314 // default constructor is OK nsScannerIterator( const nsScannerIterator& ); //
315 // auto-generated copy-constructor OK nsScannerIterator& operator=( const
316 // nsScannerIterator& ); // auto-generated copy-assignment operator OK
317
318 inline void normalize_forward();
319 inline void normalize_backward();
320
get()321 pointer get() const { return mPosition; }
322
323 char16_t operator*() const { return *get(); }
324
fragment()325 const nsScannerFragment& fragment() const { return mFragment; }
326
buffer()327 const Buffer* buffer() const { return mFragment.mBuffer; }
328
329 self_type& operator++() {
330 ++mPosition;
331 normalize_forward();
332 return *this;
333 }
334
335 self_type operator++(int) {
336 self_type result(*this);
337 ++mPosition;
338 normalize_forward();
339 return result;
340 }
341
342 self_type& operator--() {
343 normalize_backward();
344 --mPosition;
345 return *this;
346 }
347
348 self_type operator--(int) {
349 self_type result(*this);
350 normalize_backward();
351 --mPosition;
352 return result;
353 }
354
size_forward()355 difference_type size_forward() const {
356 return mFragment.mFragmentEnd - mPosition;
357 }
358
size_backward()359 difference_type size_backward() const {
360 return mPosition - mFragment.mFragmentStart;
361 }
362
advance(difference_type n)363 self_type& advance(difference_type n) {
364 while (n > 0) {
365 difference_type one_hop = std::min(n, size_forward());
366
367 NS_ASSERTION(one_hop > 0,
368 "Infinite loop: can't advance a reading iterator beyond the "
369 "end of a string");
370 // perhaps I should |break| if |!one_hop|?
371
372 mPosition += one_hop;
373 normalize_forward();
374 n -= one_hop;
375 }
376
377 while (n < 0) {
378 normalize_backward();
379 difference_type one_hop = std::max(n, -size_backward());
380
381 NS_ASSERTION(one_hop < 0,
382 "Infinite loop: can't advance (backward) a reading iterator "
383 "beyond the end of a string");
384 // perhaps I should |break| if |!one_hop|?
385
386 mPosition += one_hop;
387 n -= one_hop;
388 }
389
390 return *this;
391 }
392 };
393
SameFragment(const nsScannerIterator & a,const nsScannerIterator & b)394 inline bool SameFragment(const nsScannerIterator& a,
395 const nsScannerIterator& b) {
396 return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
397 }
398
399 /**
400 * this class is needed in order to make use of the methods in nsAlgorithm.h
401 */
402 template <>
403 struct nsCharSourceTraits<nsScannerIterator> {
404 typedef nsScannerIterator::difference_type difference_type;
405
406 static uint32_t readable_distance(const nsScannerIterator& first,
407 const nsScannerIterator& last) {
408 return uint32_t(SameFragment(first, last) ? last.get() - first.get()
409 : first.size_forward());
410 }
411
412 static const nsScannerIterator::value_type* read(
413 const nsScannerIterator& iter) {
414 return iter.get();
415 }
416
417 static void advance(nsScannerIterator& s, difference_type n) { s.advance(n); }
418 };
419
420 /**
421 * inline methods follow
422 */
423
424 inline void nsScannerIterator::normalize_forward() {
425 while (mPosition == mFragment.mFragmentEnd &&
426 mOwner->GetNextFragment(mFragment))
427 mPosition = mFragment.mFragmentStart;
428 }
429
430 inline void nsScannerIterator::normalize_backward() {
431 while (mPosition == mFragment.mFragmentStart &&
432 mOwner->GetPrevFragment(mFragment))
433 mPosition = mFragment.mFragmentEnd;
434 }
435
436 inline bool operator==(const nsScannerIterator& lhs,
437 const nsScannerIterator& rhs) {
438 return lhs.get() == rhs.get();
439 }
440
441 inline bool operator!=(const nsScannerIterator& lhs,
442 const nsScannerIterator& rhs) {
443 return lhs.get() != rhs.get();
444 }
445
446 inline nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
447 : mBuffer(const_cast<Buffer*>(aIter.buffer())),
448 mPosition(const_cast<char16_t*>(aIter.get())) {}
449
450 inline nsScannerBufferList::Position& nsScannerBufferList::Position::operator=(
451 const nsScannerIterator& aIter) {
452 mBuffer = const_cast<Buffer*>(aIter.buffer());
453 mPosition = const_cast<char16_t*>(aIter.get());
454 return *this;
455 }
456
457 /**
458 * scanner string utils
459 *
460 * These methods mimic the API provided by nsReadableUtils in xpcom/string.
461 * Here we provide only the methods that the htmlparser module needs.
462 */
463
464 inline size_t Distance(const nsScannerIterator& aStart,
465 const nsScannerIterator& aEnd) {
466 typedef nsScannerBufferList::Position Position;
467 return Position::Distance(Position(aStart), Position(aEnd));
468 }
469
470 bool CopyUnicodeTo(const nsScannerIterator& aSrcStart,
471 const nsScannerIterator& aSrcEnd, nsAString& aDest);
472
473 inline bool CopyUnicodeTo(const nsScannerSubstring& aSrc, nsAString& aDest) {
474 nsScannerIterator begin, end;
475 return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
476 }
477
478 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
479 const nsScannerIterator& aSrcEnd, nsAString& aDest);
480
481 inline bool AppendUnicodeTo(const nsScannerSubstring& aSrc, nsAString& aDest) {
482 nsScannerIterator begin, end;
483 return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
484 }
485
486 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
487 const nsScannerIterator& aSrcEnd,
488 nsScannerSharedSubstring& aDest);
489
490 bool FindCharInReadable(char16_t aChar, nsScannerIterator& aStart,
491 const nsScannerIterator& aEnd);
492
493 bool FindInReadable(const nsAString& aPattern, nsScannerIterator& aStart,
494 nsScannerIterator& aEnd,
495 nsStringComparator = nsTDefaultStringComparator);
496
497 bool RFindInReadable(const nsAString& aPattern, nsScannerIterator& aStart,
498 nsScannerIterator& aEnd,
499 nsStringComparator = nsTDefaultStringComparator);
500
501 inline bool CaseInsensitiveFindInReadable(const nsAString& aPattern,
502 nsScannerIterator& aStart,
503 nsScannerIterator& aEnd) {
504 return FindInReadable(aPattern, aStart, aEnd,
505 nsCaseInsensitiveStringComparator);
506 }
507
508 #endif // !defined(nsScannerString_h___)
509