1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef nsScannerString_h___
8 #define nsScannerString_h___
9 
10 #include "nsString.h"
11 #include "nsUnicharUtils.h"  // for nsCaseInsensitiveStringComparator
12 #include "mozilla/LinkedList.h"
13 #include <algorithm>
14 
15 /**
16  * NOTE: nsScannerString (and the other classes defined in this file) are
17  * not related to nsAString or any of the other xpcom/string classes.
18  *
19  * nsScannerString is based on the nsSlidingString implementation that used
20  * to live in xpcom/string.  Now that nsAString is limited to representing
21  * only single fragment strings, nsSlidingString can no longer be used.
22  *
23  * An advantage to this design is that it does not employ any virtual
24  * functions.
25  *
26  * This file uses SCC-style indenting in deference to the nsSlidingString
27  * code from which this code is derived ;-)
28  */
29 
30 class nsScannerIterator;
31 class nsScannerSubstring;
32 class nsScannerString;
33 
34 /**
35  * nsScannerBufferList
36  *
37  * This class maintains a list of heap-allocated Buffer objects.  The buffers
38  * are maintained in a circular linked list.  Each buffer has a usage count
39  * that is decremented by the owning nsScannerSubstring.
40  *
41  * The buffer list itself is reference counted.  This allows the buffer list
42  * to be shared by multiple nsScannerSubstring objects.  The reference
43  * counting is not threadsafe, which is not at all a requirement.
44  *
45  * When a nsScannerSubstring releases its reference to a buffer list, it
46  * decrements the usage count of the first buffer in the buffer list that it
47  * was referencing.  It informs the buffer list that it can discard buffers
48  * starting at that prefix.  The buffer list will do so if the usage count of
49  * that buffer is 0 and if it is the first buffer in the list.  It will
50  * continue to prune buffers starting from the front of the buffer list until
51  * it finds a buffer that has a usage count that is non-zero.
52  */
53 class nsScannerBufferList {
54  public:
55   /**
56    * Buffer objects are directly followed by a data segment.  The start
57    * of the data segment is determined by increment the |this| pointer
58    * by 1 unit.
59    */
60   class Buffer : public mozilla::LinkedListElement<Buffer> {
61    public:
IncrementUsageCount()62     void IncrementUsageCount() { ++mUsageCount; }
DecrementUsageCount()63     void DecrementUsageCount() { --mUsageCount; }
64 
IsInUse()65     bool IsInUse() const { return mUsageCount != 0; }
66 
DataStart()67     const char16_t* DataStart() const { return (const char16_t*)(this + 1); }
DataStart()68     char16_t* DataStart() { return (char16_t*)(this + 1); }
69 
DataEnd()70     const char16_t* DataEnd() const { return mDataEnd; }
DataEnd()71     char16_t* DataEnd() { return mDataEnd; }
72 
Next()73     const Buffer* Next() const { return getNext(); }
Next()74     Buffer* Next() { return getNext(); }
75 
Prev()76     const Buffer* Prev() const { return getPrevious(); }
Prev()77     Buffer* Prev() { return getPrevious(); }
78 
DataLength()79     uint32_t DataLength() const { return mDataEnd - DataStart(); }
SetDataLength(uint32_t len)80     void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
81 
82    private:
83     friend class nsScannerBufferList;
84 
85     int32_t mUsageCount;
86     char16_t* mDataEnd;
87   };
88 
89   /**
90    * Position objects serve as lightweight pointers into a buffer list.
91    * The mPosition member must be contained with mBuffer->DataStart()
92    * and mBuffer->DataEnd().
93    */
94   class Position {
95    public:
Position()96     Position() : mBuffer(nullptr), mPosition(nullptr) {}
97 
Position(Buffer * buffer,char16_t * position)98     Position(Buffer* buffer, char16_t* position)
99         : mBuffer(buffer), mPosition(position) {}
100 
101     inline explicit Position(const nsScannerIterator& aIter);
102 
103     inline Position& operator=(const nsScannerIterator& aIter);
104 
105     static size_t Distance(const Position& p1, const Position& p2);
106 
107     Buffer* mBuffer;
108     char16_t* mPosition;
109   };
110 
111   static Buffer* AllocBufferFromString(const nsAString&);
112   static Buffer* AllocBuffer(uint32_t capacity);  // capacity = number of chars
113 
nsScannerBufferList(Buffer * buf)114   explicit nsScannerBufferList(Buffer* buf) : mRefCnt(0) {
115     mBuffers.insertBack(buf);
116   }
117 
AddRef()118   void AddRef() { ++mRefCnt; }
Release()119   void Release() {
120     if (--mRefCnt == 0) delete this;
121   }
122 
Append(Buffer * buf)123   void Append(Buffer* buf) { mBuffers.insertBack(buf); }
InsertAfter(Buffer * buf,Buffer * prev)124   void InsertAfter(Buffer* buf, Buffer* prev) { prev->setNext(buf); }
125   void SplitBuffer(const Position&);
126   void DiscardUnreferencedPrefix(Buffer*);
127 
Head()128   Buffer* Head() { return mBuffers.getFirst(); }
Head()129   const Buffer* Head() const { return mBuffers.getFirst(); }
130 
Tail()131   Buffer* Tail() { return mBuffers.getLast(); }
Tail()132   const Buffer* Tail() const { return mBuffers.getLast(); }
133 
134  private:
135   friend class nsScannerSubstring;
136 
~nsScannerBufferList()137   ~nsScannerBufferList() { ReleaseAll(); }
138   void ReleaseAll();
139 
140   int32_t mRefCnt;
141   mozilla::LinkedList<Buffer> mBuffers;
142 };
143 
144 /**
145  * nsScannerFragment represents a "slice" of a Buffer object.
146  */
147 struct nsScannerFragment {
148   typedef nsScannerBufferList::Buffer Buffer;
149 
150   const Buffer* mBuffer;
151   const char16_t* mFragmentStart;
152   const char16_t* mFragmentEnd;
153 };
154 
155 /**
156  * nsScannerSubstring is the base class for nsScannerString.  It provides
157  * access to iterators and methods to bind the substring to another
158  * substring or nsAString instance.
159  *
160  * This class owns the buffer list.
161  */
162 class nsScannerSubstring {
163  public:
164   typedef nsScannerBufferList::Buffer Buffer;
165   typedef nsScannerBufferList::Position Position;
166   typedef uint32_t size_type;
167 
168   nsScannerSubstring();
169   explicit nsScannerSubstring(const nsAString& s);
170 
171   ~nsScannerSubstring();
172 
173   nsScannerIterator& BeginReading(nsScannerIterator& iter) const;
174   nsScannerIterator& EndReading(nsScannerIterator& iter) const;
175 
Length()176   size_type Length() const { return mLength; }
177 
178   int32_t CountChar(char16_t) const;
179 
180   void Rebind(const nsScannerSubstring&, const nsScannerIterator&,
181               const nsScannerIterator&);
182   void Rebind(const nsAString&);
183 
184   const nsAString& AsString() const;
185 
186   bool GetNextFragment(nsScannerFragment&) const;
187   bool GetPrevFragment(nsScannerFragment&) const;
188 
AllocBufferFromString(const nsAString & aStr)189   static inline Buffer* AllocBufferFromString(const nsAString& aStr) {
190     return nsScannerBufferList::AllocBufferFromString(aStr);
191   }
AllocBuffer(size_type aCapacity)192   static inline Buffer* AllocBuffer(size_type aCapacity) {
193     return nsScannerBufferList::AllocBuffer(aCapacity);
194   }
195 
196  protected:
acquire_ownership_of_buffer_list()197   void acquire_ownership_of_buffer_list() const {
198     mBufferList->AddRef();
199     mStart.mBuffer->IncrementUsageCount();
200   }
201 
release_ownership_of_buffer_list()202   void release_ownership_of_buffer_list() {
203     if (mBufferList) {
204       mStart.mBuffer->DecrementUsageCount();
205       mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
206       mBufferList->Release();
207     }
208   }
209 
init_range_from_buffer_list()210   void init_range_from_buffer_list() {
211     mStart.mBuffer = mBufferList->Head();
212     mStart.mPosition = mStart.mBuffer->DataStart();
213 
214     mEnd.mBuffer = mBufferList->Tail();
215     mEnd.mPosition = mEnd.mBuffer->DataEnd();
216 
217     mLength = Position::Distance(mStart, mEnd);
218   }
219 
220   Position mStart;
221   Position mEnd;
222   nsScannerBufferList* mBufferList;
223   size_type mLength;
224 
225   // these fields are used to implement AsString
226   nsDependentSubstring mFlattenedRep;
227   bool mIsDirty;
228 
229   friend class nsScannerSharedSubstring;
230 };
231 
232 /**
233  * nsScannerString provides methods to grow and modify a buffer list.
234  */
235 class nsScannerString : public nsScannerSubstring {
236  public:
237   explicit nsScannerString(Buffer*);
238 
239   // you are giving ownership to the string, it takes and keeps your
240   // buffer, deleting it when done.
241   // Use AllocBuffer or AllocBufferFromString to create a Buffer object
242   // for use with this function.
243   void AppendBuffer(Buffer*);
244 
245   void DiscardPrefix(const nsScannerIterator&);
246   // any other way you want to do this?
247 
248   void UngetReadable(const nsAString& aReadable,
249                      const nsScannerIterator& aCurrentPosition);
250 };
251 
252 /**
253  * nsScannerSharedSubstring implements copy-on-write semantics for
254  * nsScannerSubstring.  When you call .writable(), it will copy the data
255  * and return a mutable string object.  This class also manages releasing
256  * the reference to the scanner buffer when it is no longer needed.
257  */
258 
259 class nsScannerSharedSubstring {
260  public:
nsScannerSharedSubstring()261   nsScannerSharedSubstring() : mBuffer(nullptr), mBufferList(nullptr) {}
262 
~nsScannerSharedSubstring()263   ~nsScannerSharedSubstring() {
264     if (mBufferList) ReleaseBuffer();
265   }
266 
267   // Acquire a copy-on-write reference to the given substring.
268   void Rebind(const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
269 
270   // Get a mutable reference to this string
writable()271   nsAString& writable() {
272     if (mBufferList) MakeMutable();
273 
274     return mString;
275   }
276 
277   // Get a const reference to this string
str()278   const nsAString& str() const { return mString; }
279 
280  private:
281   typedef nsScannerBufferList::Buffer Buffer;
282 
283   void ReleaseBuffer();
284   void MakeMutable();
285 
286   nsDependentSubstring mString;
287   Buffer* mBuffer;
288   nsScannerBufferList* mBufferList;
289 };
290 
291 /**
292  * nsScannerIterator works just like nsReadingIterator<CharT> except that
293  * it knows how to iterate over a list of scanner buffers.
294  */
295 class nsScannerIterator {
296  public:
297   typedef nsScannerIterator self_type;
298   typedef ptrdiff_t difference_type;
299   typedef char16_t value_type;
300   typedef const char16_t* pointer;
301   typedef const char16_t& reference;
302   typedef nsScannerSubstring::Buffer Buffer;
303 
304  protected:
305   nsScannerFragment mFragment;
306   const char16_t* mPosition;
307   const nsScannerSubstring* mOwner;
308 
309   friend class nsScannerSubstring;
310   friend class nsScannerSharedSubstring;
311 
312  public:
313   // nsScannerIterator();                                       // auto-generate
314   // default constructor is OK nsScannerIterator( const nsScannerIterator& ); //
315   // auto-generated copy-constructor OK nsScannerIterator& operator=( const
316   // nsScannerIterator& );  // auto-generated copy-assignment operator OK
317 
318   inline void normalize_forward();
319   inline void normalize_backward();
320 
get()321   pointer get() const { return mPosition; }
322 
323   char16_t operator*() const { return *get(); }
324 
fragment()325   const nsScannerFragment& fragment() const { return mFragment; }
326 
buffer()327   const Buffer* buffer() const { return mFragment.mBuffer; }
328 
329   self_type& operator++() {
330     ++mPosition;
331     normalize_forward();
332     return *this;
333   }
334 
335   self_type operator++(int) {
336     self_type result(*this);
337     ++mPosition;
338     normalize_forward();
339     return result;
340   }
341 
342   self_type& operator--() {
343     normalize_backward();
344     --mPosition;
345     return *this;
346   }
347 
348   self_type operator--(int) {
349     self_type result(*this);
350     normalize_backward();
351     --mPosition;
352     return result;
353   }
354 
size_forward()355   difference_type size_forward() const {
356     return mFragment.mFragmentEnd - mPosition;
357   }
358 
size_backward()359   difference_type size_backward() const {
360     return mPosition - mFragment.mFragmentStart;
361   }
362 
advance(difference_type n)363   self_type& advance(difference_type n) {
364     while (n > 0) {
365       difference_type one_hop = std::min(n, size_forward());
366 
367       NS_ASSERTION(one_hop > 0,
368                    "Infinite loop: can't advance a reading iterator beyond the "
369                    "end of a string");
370       // perhaps I should |break| if |!one_hop|?
371 
372       mPosition += one_hop;
373       normalize_forward();
374       n -= one_hop;
375     }
376 
377     while (n < 0) {
378       normalize_backward();
379       difference_type one_hop = std::max(n, -size_backward());
380 
381       NS_ASSERTION(one_hop < 0,
382                    "Infinite loop: can't advance (backward) a reading iterator "
383                    "beyond the end of a string");
384       // perhaps I should |break| if |!one_hop|?
385 
386       mPosition += one_hop;
387       n -= one_hop;
388     }
389 
390     return *this;
391   }
392 };
393 
SameFragment(const nsScannerIterator & a,const nsScannerIterator & b)394 inline bool SameFragment(const nsScannerIterator& a,
395                          const nsScannerIterator& b) {
396   return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
397 }
398 
399 /**
400  * this class is needed in order to make use of the methods in nsAlgorithm.h
401  */
402 template <>
403 struct nsCharSourceTraits<nsScannerIterator> {
404   typedef nsScannerIterator::difference_type difference_type;
405 
406   static uint32_t readable_distance(const nsScannerIterator& first,
407                                     const nsScannerIterator& last) {
408     return uint32_t(SameFragment(first, last) ? last.get() - first.get()
409                                               : first.size_forward());
410   }
411 
412   static const nsScannerIterator::value_type* read(
413       const nsScannerIterator& iter) {
414     return iter.get();
415   }
416 
417   static void advance(nsScannerIterator& s, difference_type n) { s.advance(n); }
418 };
419 
420 /**
421  * inline methods follow
422  */
423 
424 inline void nsScannerIterator::normalize_forward() {
425   while (mPosition == mFragment.mFragmentEnd &&
426          mOwner->GetNextFragment(mFragment))
427     mPosition = mFragment.mFragmentStart;
428 }
429 
430 inline void nsScannerIterator::normalize_backward() {
431   while (mPosition == mFragment.mFragmentStart &&
432          mOwner->GetPrevFragment(mFragment))
433     mPosition = mFragment.mFragmentEnd;
434 }
435 
436 inline bool operator==(const nsScannerIterator& lhs,
437                        const nsScannerIterator& rhs) {
438   return lhs.get() == rhs.get();
439 }
440 
441 inline bool operator!=(const nsScannerIterator& lhs,
442                        const nsScannerIterator& rhs) {
443   return lhs.get() != rhs.get();
444 }
445 
446 inline nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
447     : mBuffer(const_cast<Buffer*>(aIter.buffer())),
448       mPosition(const_cast<char16_t*>(aIter.get())) {}
449 
450 inline nsScannerBufferList::Position& nsScannerBufferList::Position::operator=(
451     const nsScannerIterator& aIter) {
452   mBuffer = const_cast<Buffer*>(aIter.buffer());
453   mPosition = const_cast<char16_t*>(aIter.get());
454   return *this;
455 }
456 
457 /**
458  * scanner string utils
459  *
460  * These methods mimic the API provided by nsReadableUtils in xpcom/string.
461  * Here we provide only the methods that the htmlparser module needs.
462  */
463 
464 inline size_t Distance(const nsScannerIterator& aStart,
465                        const nsScannerIterator& aEnd) {
466   typedef nsScannerBufferList::Position Position;
467   return Position::Distance(Position(aStart), Position(aEnd));
468 }
469 
470 bool CopyUnicodeTo(const nsScannerIterator& aSrcStart,
471                    const nsScannerIterator& aSrcEnd, nsAString& aDest);
472 
473 inline bool CopyUnicodeTo(const nsScannerSubstring& aSrc, nsAString& aDest) {
474   nsScannerIterator begin, end;
475   return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
476 }
477 
478 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
479                      const nsScannerIterator& aSrcEnd, nsAString& aDest);
480 
481 inline bool AppendUnicodeTo(const nsScannerSubstring& aSrc, nsAString& aDest) {
482   nsScannerIterator begin, end;
483   return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
484 }
485 
486 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
487                      const nsScannerIterator& aSrcEnd,
488                      nsScannerSharedSubstring& aDest);
489 
490 bool FindCharInReadable(char16_t aChar, nsScannerIterator& aStart,
491                         const nsScannerIterator& aEnd);
492 
493 bool FindInReadable(const nsAString& aPattern, nsScannerIterator& aStart,
494                     nsScannerIterator& aEnd,
495                     nsStringComparator = nsTDefaultStringComparator);
496 
497 bool RFindInReadable(const nsAString& aPattern, nsScannerIterator& aStart,
498                      nsScannerIterator& aEnd,
499                      nsStringComparator = nsTDefaultStringComparator);
500 
501 inline bool CaseInsensitiveFindInReadable(const nsAString& aPattern,
502                                           nsScannerIterator& aStart,
503                                           nsScannerIterator& aEnd) {
504   return FindInReadable(aPattern, aStart, aEnd,
505                         nsCaseInsensitiveStringComparator);
506 }
507 
508 #endif  // !defined(nsScannerString_h___)
509