1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef nsScannerString_h___
8 #define nsScannerString_h___
9 
10 #include "nsString.h"
11 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
12 #include "mozilla/LinkedList.h"
13 #include <algorithm>
14 
15 
16   /**
17    * NOTE: nsScannerString (and the other classes defined in this file) are
18    * not related to nsAString or any of the other xpcom/string classes.
19    *
20    * nsScannerString is based on the nsSlidingString implementation that used
21    * to live in xpcom/string.  Now that nsAString is limited to representing
22    * only single fragment strings, nsSlidingString can no longer be used.
23    *
24    * An advantage to this design is that it does not employ any virtual
25    * functions.
26    *
27    * This file uses SCC-style indenting in deference to the nsSlidingString
28    * code from which this code is derived ;-)
29    */
30 
31 class nsScannerIterator;
32 class nsScannerSubstring;
33 class nsScannerString;
34 
35 
36   /**
37    * nsScannerBufferList
38    *
39    * This class maintains a list of heap-allocated Buffer objects.  The buffers
40    * are maintained in a circular linked list.  Each buffer has a usage count
41    * that is decremented by the owning nsScannerSubstring.
42    *
43    * The buffer list itself is reference counted.  This allows the buffer list
44    * to be shared by multiple nsScannerSubstring objects.  The reference
45    * counting is not threadsafe, which is not at all a requirement.
46    *
47    * When a nsScannerSubstring releases its reference to a buffer list, it
48    * decrements the usage count of the first buffer in the buffer list that it
49    * was referencing.  It informs the buffer list that it can discard buffers
50    * starting at that prefix.  The buffer list will do so if the usage count of
51    * that buffer is 0 and if it is the first buffer in the list.  It will
52    * continue to prune buffers starting from the front of the buffer list until
53    * it finds a buffer that has a usage count that is non-zero.
54    */
55 class nsScannerBufferList
56   {
57     public:
58 
59         /**
60          * Buffer objects are directly followed by a data segment.  The start
61          * of the data segment is determined by increment the |this| pointer
62          * by 1 unit.
63          */
64       class Buffer : public mozilla::LinkedListElement<Buffer>
65         {
66           public:
67 
IncrementUsageCount()68             void IncrementUsageCount() { ++mUsageCount; }
DecrementUsageCount()69             void DecrementUsageCount() { --mUsageCount; }
70 
IsInUse()71             bool IsInUse() const { return mUsageCount != 0; }
72 
DataStart()73             const char16_t* DataStart() const { return (const char16_t*) (this+1); }
DataStart()74                   char16_t* DataStart()       { return (      char16_t*) (this+1); }
75 
DataEnd()76             const char16_t* DataEnd() const { return mDataEnd; }
DataEnd()77                   char16_t* DataEnd()       { return mDataEnd; }
78 
Next()79             const Buffer* Next() const { return getNext(); }
Next()80                   Buffer* Next()       { return getNext(); }
81 
Prev()82             const Buffer* Prev() const { return getPrevious(); }
Prev()83                   Buffer* Prev()       { return getPrevious(); }
84 
DataLength()85             uint32_t DataLength() const { return mDataEnd - DataStart(); }
SetDataLength(uint32_t len)86             void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
87 
88           private:
89 
90             friend class nsScannerBufferList;
91 
92             int32_t    mUsageCount;
93             char16_t* mDataEnd;
94         };
95 
96         /**
97          * Position objects serve as lightweight pointers into a buffer list.
98          * The mPosition member must be contained with mBuffer->DataStart()
99          * and mBuffer->DataEnd().
100          */
101       class Position
102         {
103           public:
104 
Position()105             Position() {}
106 
Position(Buffer * buffer,char16_t * position)107             Position( Buffer* buffer, char16_t* position )
108               : mBuffer(buffer)
109               , mPosition(position)
110               {}
111 
112             inline
113             explicit Position( const nsScannerIterator& aIter );
114 
115             inline
116             Position& operator=( const nsScannerIterator& aIter );
117 
118             static size_t Distance( const Position& p1, const Position& p2 );
119 
120             Buffer*    mBuffer;
121             char16_t* mPosition;
122         };
123 
124       static Buffer* AllocBufferFromString( const nsAString& );
125       static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars
126 
nsScannerBufferList(Buffer * buf)127       explicit nsScannerBufferList( Buffer* buf )
128         : mRefCnt(0)
129         {
130           mBuffers.insertBack(buf);
131         }
132 
AddRef()133       void  AddRef()  { ++mRefCnt; }
Release()134       void  Release() { if (--mRefCnt == 0) delete this; }
135 
Append(Buffer * buf)136       void  Append( Buffer* buf ) { mBuffers.insertBack(buf); }
InsertAfter(Buffer * buf,Buffer * prev)137       void  InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); }
138       void  SplitBuffer( const Position& );
139       void  DiscardUnreferencedPrefix( Buffer* );
140 
Head()141             Buffer* Head()       { return mBuffers.getFirst(); }
Head()142       const Buffer* Head() const { return mBuffers.getFirst(); }
143 
Tail()144             Buffer* Tail()       { return mBuffers.getLast(); }
Tail()145       const Buffer* Tail() const { return mBuffers.getLast(); }
146 
147     private:
148 
149       friend class nsScannerSubstring;
150 
~nsScannerBufferList()151       ~nsScannerBufferList() { ReleaseAll(); }
152       void ReleaseAll();
153 
154       int32_t mRefCnt;
155       mozilla::LinkedList<Buffer> mBuffers;
156   };
157 
158 
159   /**
160    * nsScannerFragment represents a "slice" of a Buffer object.
161    */
162 struct nsScannerFragment
163   {
164     typedef nsScannerBufferList::Buffer Buffer;
165 
166     const Buffer*    mBuffer;
167     const char16_t* mFragmentStart;
168     const char16_t* mFragmentEnd;
169   };
170 
171 
172   /**
173    * nsScannerSubstring is the base class for nsScannerString.  It provides
174    * access to iterators and methods to bind the substring to another
175    * substring or nsAString instance.
176    *
177    * This class owns the buffer list.
178    */
179 class nsScannerSubstring
180   {
181     public:
182       typedef nsScannerBufferList::Buffer      Buffer;
183       typedef nsScannerBufferList::Position    Position;
184       typedef uint32_t                         size_type;
185 
186       nsScannerSubstring();
187       explicit nsScannerSubstring( const nsAString& s );
188 
189       ~nsScannerSubstring();
190 
191       nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
192       nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
193 
Length()194       size_type Length() const { return mLength; }
195 
196       int32_t CountChar( char16_t ) const;
197 
198       void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
199       void Rebind( const nsAString& );
200 
201       const nsSubstring& AsString() const;
202 
203       bool GetNextFragment( nsScannerFragment& ) const;
204       bool GetPrevFragment( nsScannerFragment& ) const;
205 
AllocBufferFromString(const nsAString & aStr)206       static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
AllocBuffer(size_type aCapacity)207       static inline Buffer* AllocBuffer( size_type aCapacity )             { return nsScannerBufferList::AllocBuffer(aCapacity); }
208 
209     protected:
210 
acquire_ownership_of_buffer_list()211       void acquire_ownership_of_buffer_list() const
212         {
213           mBufferList->AddRef();
214           mStart.mBuffer->IncrementUsageCount();
215         }
216 
release_ownership_of_buffer_list()217       void release_ownership_of_buffer_list()
218         {
219           if (mBufferList)
220             {
221               mStart.mBuffer->DecrementUsageCount();
222               mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
223               mBufferList->Release();
224             }
225         }
226 
init_range_from_buffer_list()227       void init_range_from_buffer_list()
228         {
229           mStart.mBuffer = mBufferList->Head();
230           mStart.mPosition = mStart.mBuffer->DataStart();
231 
232           mEnd.mBuffer = mBufferList->Tail();
233           mEnd.mPosition = mEnd.mBuffer->DataEnd();
234 
235           mLength = Position::Distance(mStart, mEnd);
236         }
237 
238       Position             mStart;
239       Position             mEnd;
240       nsScannerBufferList *mBufferList;
241       size_type            mLength;
242 
243       // these fields are used to implement AsString
244       nsDependentSubstring mFlattenedRep;
245       bool                 mIsDirty;
246 
247       friend class nsScannerSharedSubstring;
248   };
249 
250 
251   /**
252    * nsScannerString provides methods to grow and modify a buffer list.
253    */
254 class nsScannerString : public nsScannerSubstring
255   {
256     public:
257 
258       explicit nsScannerString( Buffer* );
259 
260         // you are giving ownership to the string, it takes and keeps your
261         // buffer, deleting it when done.
262         // Use AllocBuffer or AllocBufferFromString to create a Buffer object
263         // for use with this function.
264       void AppendBuffer( Buffer* );
265 
266       void DiscardPrefix( const nsScannerIterator& );
267         // any other way you want to do this?
268 
269       void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
270   };
271 
272 
273   /**
274    * nsScannerSharedSubstring implements copy-on-write semantics for
275    * nsScannerSubstring.  When you call .writable(), it will copy the data
276    * and return a mutable string object.  This class also manages releasing
277    * the reference to the scanner buffer when it is no longer needed.
278    */
279 
280 class nsScannerSharedSubstring
281   {
282     public:
nsScannerSharedSubstring()283       nsScannerSharedSubstring()
284         : mBuffer(nullptr), mBufferList(nullptr) { }
285 
~nsScannerSharedSubstring()286       ~nsScannerSharedSubstring()
287         {
288           if (mBufferList)
289             ReleaseBuffer();
290         }
291 
292         // Acquire a copy-on-write reference to the given substring.
293       void Rebind(const nsScannerIterator& aStart,
294                               const nsScannerIterator& aEnd);
295 
296        // Get a mutable reference to this string
writable()297       nsSubstring& writable()
298         {
299           if (mBufferList)
300             MakeMutable();
301 
302           return mString;
303         }
304 
305         // Get a const reference to this string
str()306       const nsSubstring& str() const { return mString; }
307 
308     private:
309       typedef nsScannerBufferList::Buffer Buffer;
310 
311       void ReleaseBuffer();
312       void MakeMutable();
313 
314       nsDependentSubstring  mString;
315       Buffer               *mBuffer;
316       nsScannerBufferList  *mBufferList;
317   };
318 
319   /**
320    * nsScannerIterator works just like nsReadingIterator<CharT> except that
321    * it knows how to iterate over a list of scanner buffers.
322    */
323 class nsScannerIterator
324   {
325     public:
326       typedef nsScannerIterator             self_type;
327       typedef ptrdiff_t                     difference_type;
328       typedef char16_t                     value_type;
329       typedef const char16_t*              pointer;
330       typedef const char16_t&              reference;
331       typedef nsScannerSubstring::Buffer    Buffer;
332 
333     protected:
334 
335       nsScannerFragment         mFragment;
336       const char16_t*          mPosition;
337       const nsScannerSubstring* mOwner;
338 
339       friend class nsScannerSubstring;
340       friend class nsScannerSharedSubstring;
341 
342     public:
nsScannerIterator()343       nsScannerIterator() {}
344       // nsScannerIterator( const nsScannerIterator& );             // auto-generated copy-constructor OK
345       // nsScannerIterator& operator=( const nsScannerIterator& );  // auto-generated copy-assignment operator OK
346 
347       inline void normalize_forward();
348       inline void normalize_backward();
349 
get()350       pointer get() const
351         {
352           return mPosition;
353         }
354 
355       char16_t operator*() const
356         {
357           return *get();
358         }
359 
fragment()360       const nsScannerFragment& fragment() const
361         {
362           return mFragment;
363         }
364 
buffer()365       const Buffer* buffer() const
366         {
367           return mFragment.mBuffer;
368         }
369 
370       self_type& operator++()
371         {
372           ++mPosition;
373           normalize_forward();
374           return *this;
375         }
376 
377       self_type operator++( int )
378         {
379           self_type result(*this);
380           ++mPosition;
381           normalize_forward();
382           return result;
383         }
384 
385       self_type& operator--()
386         {
387           normalize_backward();
388           --mPosition;
389           return *this;
390         }
391 
392       self_type operator--( int )
393         {
394           self_type result(*this);
395           normalize_backward();
396           --mPosition;
397           return result;
398         }
399 
size_forward()400       difference_type size_forward() const
401         {
402           return mFragment.mFragmentEnd - mPosition;
403         }
404 
size_backward()405       difference_type size_backward() const
406         {
407           return mPosition - mFragment.mFragmentStart;
408         }
409 
advance(difference_type n)410       self_type& advance( difference_type n )
411         {
412           while ( n > 0 )
413             {
414               difference_type one_hop = std::min(n, size_forward());
415 
416               NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
417                 // perhaps I should |break| if |!one_hop|?
418 
419               mPosition += one_hop;
420               normalize_forward();
421               n -= one_hop;
422             }
423 
424           while ( n < 0 )
425             {
426               normalize_backward();
427               difference_type one_hop = std::max(n, -size_backward());
428 
429               NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
430                 // perhaps I should |break| if |!one_hop|?
431 
432               mPosition += one_hop;
433               n -= one_hop;
434             }
435 
436           return *this;
437         }
438   };
439 
440 
441 inline
442 bool
SameFragment(const nsScannerIterator & a,const nsScannerIterator & b)443 SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
444   {
445     return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
446   }
447 
448 
449   /**
450    * this class is needed in order to make use of the methods in nsAlgorithm.h
451    */
452 template <>
453 struct nsCharSourceTraits<nsScannerIterator>
454   {
455     typedef nsScannerIterator::difference_type difference_type;
456 
457     static
458     uint32_t
459     readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
460       {
461         return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
462       }
463 
464     static
465     const nsScannerIterator::value_type*
466     read( const nsScannerIterator& iter )
467       {
468         return iter.get();
469       }
470 
471     static
472     void
473     advance( nsScannerIterator& s, difference_type n )
474       {
475         s.advance(n);
476       }
477   };
478 
479 
480   /**
481    * inline methods follow
482    */
483 
484 inline
485 void
486 nsScannerIterator::normalize_forward()
487   {
488     while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
489       mPosition = mFragment.mFragmentStart;
490   }
491 
492 inline
493 void
494 nsScannerIterator::normalize_backward()
495   {
496     while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
497       mPosition = mFragment.mFragmentEnd;
498   }
499 
500 inline
501 bool
502 operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
503   {
504     return lhs.get() == rhs.get();
505   }
506 
507 inline
508 bool
509 operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
510   {
511     return lhs.get() != rhs.get();
512   }
513 
514 
515 inline
516 nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
517   : mBuffer(const_cast<Buffer*>(aIter.buffer()))
518   , mPosition(const_cast<char16_t*>(aIter.get()))
519   {}
520 
521 inline
522 nsScannerBufferList::Position&
523 nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
524   {
525     mBuffer   = const_cast<Buffer*>(aIter.buffer());
526     mPosition = const_cast<char16_t*>(aIter.get());
527     return *this;
528   }
529 
530 
531   /**
532    * scanner string utils
533    *
534    * These methods mimic the API provided by nsReadableUtils in xpcom/string.
535    * Here we provide only the methods that the htmlparser module needs.
536    */
537 
538 inline
539 size_t
540 Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
541   {
542     typedef nsScannerBufferList::Position Position;
543     return Position::Distance(Position(aStart), Position(aEnd));
544   }
545 
546 bool
547 CopyUnicodeTo( const nsScannerIterator& aSrcStart,
548                const nsScannerIterator& aSrcEnd,
549                nsAString& aDest );
550 
551 inline
552 bool
553 CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
554   {
555     nsScannerIterator begin, end;
556     return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
557   }
558 
559 bool
560 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
561                  const nsScannerIterator& aSrcEnd,
562                  nsAString& aDest );
563 
564 inline
565 bool
566 AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
567   {
568     nsScannerIterator begin, end;
569     return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
570   }
571 
572 bool
573 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
574                  const nsScannerIterator& aSrcEnd,
575                  nsScannerSharedSubstring& aDest );
576 
577 bool
578 FindCharInReadable( char16_t aChar,
579                     nsScannerIterator& aStart,
580                     const nsScannerIterator& aEnd );
581 
582 bool
583 FindInReadable( const nsAString& aPattern,
584                 nsScannerIterator& aStart,
585                 nsScannerIterator& aEnd,
586                 const nsStringComparator& = nsDefaultStringComparator() );
587 
588 bool
589 RFindInReadable( const nsAString& aPattern,
590                  nsScannerIterator& aStart,
591                  nsScannerIterator& aEnd,
592                  const nsStringComparator& = nsDefaultStringComparator() );
593 
594 inline
595 bool
596 CaseInsensitiveFindInReadable( const nsAString& aPattern,
597                                nsScannerIterator& aStart,
598                                nsScannerIterator& aEnd )
599   {
600     return FindInReadable(aPattern, aStart, aEnd,
601                           nsCaseInsensitiveStringComparator());
602   }
603 
604 #endif // !defined(nsScannerString_h___)
605