1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #ifndef nsScannerString_h___
8 #define nsScannerString_h___
9
10 #include "nsString.h"
11 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
12 #include "mozilla/LinkedList.h"
13 #include <algorithm>
14
15
16 /**
17 * NOTE: nsScannerString (and the other classes defined in this file) are
18 * not related to nsAString or any of the other xpcom/string classes.
19 *
20 * nsScannerString is based on the nsSlidingString implementation that used
21 * to live in xpcom/string. Now that nsAString is limited to representing
22 * only single fragment strings, nsSlidingString can no longer be used.
23 *
24 * An advantage to this design is that it does not employ any virtual
25 * functions.
26 *
27 * This file uses SCC-style indenting in deference to the nsSlidingString
28 * code from which this code is derived ;-)
29 */
30
31 class nsScannerIterator;
32 class nsScannerSubstring;
33 class nsScannerString;
34
35
36 /**
37 * nsScannerBufferList
38 *
39 * This class maintains a list of heap-allocated Buffer objects. The buffers
40 * are maintained in a circular linked list. Each buffer has a usage count
41 * that is decremented by the owning nsScannerSubstring.
42 *
43 * The buffer list itself is reference counted. This allows the buffer list
44 * to be shared by multiple nsScannerSubstring objects. The reference
45 * counting is not threadsafe, which is not at all a requirement.
46 *
47 * When a nsScannerSubstring releases its reference to a buffer list, it
48 * decrements the usage count of the first buffer in the buffer list that it
49 * was referencing. It informs the buffer list that it can discard buffers
50 * starting at that prefix. The buffer list will do so if the usage count of
51 * that buffer is 0 and if it is the first buffer in the list. It will
52 * continue to prune buffers starting from the front of the buffer list until
53 * it finds a buffer that has a usage count that is non-zero.
54 */
55 class nsScannerBufferList
56 {
57 public:
58
59 /**
60 * Buffer objects are directly followed by a data segment. The start
61 * of the data segment is determined by increment the |this| pointer
62 * by 1 unit.
63 */
64 class Buffer : public mozilla::LinkedListElement<Buffer>
65 {
66 public:
67
IncrementUsageCount()68 void IncrementUsageCount() { ++mUsageCount; }
DecrementUsageCount()69 void DecrementUsageCount() { --mUsageCount; }
70
IsInUse()71 bool IsInUse() const { return mUsageCount != 0; }
72
DataStart()73 const char16_t* DataStart() const { return (const char16_t*) (this+1); }
DataStart()74 char16_t* DataStart() { return ( char16_t*) (this+1); }
75
DataEnd()76 const char16_t* DataEnd() const { return mDataEnd; }
DataEnd()77 char16_t* DataEnd() { return mDataEnd; }
78
Next()79 const Buffer* Next() const { return getNext(); }
Next()80 Buffer* Next() { return getNext(); }
81
Prev()82 const Buffer* Prev() const { return getPrevious(); }
Prev()83 Buffer* Prev() { return getPrevious(); }
84
DataLength()85 uint32_t DataLength() const { return mDataEnd - DataStart(); }
SetDataLength(uint32_t len)86 void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
87
88 private:
89
90 friend class nsScannerBufferList;
91
92 int32_t mUsageCount;
93 char16_t* mDataEnd;
94 };
95
96 /**
97 * Position objects serve as lightweight pointers into a buffer list.
98 * The mPosition member must be contained with mBuffer->DataStart()
99 * and mBuffer->DataEnd().
100 */
101 class Position
102 {
103 public:
104
Position()105 Position() {}
106
Position(Buffer * buffer,char16_t * position)107 Position( Buffer* buffer, char16_t* position )
108 : mBuffer(buffer)
109 , mPosition(position)
110 {}
111
112 inline
113 explicit Position( const nsScannerIterator& aIter );
114
115 inline
116 Position& operator=( const nsScannerIterator& aIter );
117
118 static size_t Distance( const Position& p1, const Position& p2 );
119
120 Buffer* mBuffer;
121 char16_t* mPosition;
122 };
123
124 static Buffer* AllocBufferFromString( const nsAString& );
125 static Buffer* AllocBuffer( uint32_t capacity ); // capacity = number of chars
126
nsScannerBufferList(Buffer * buf)127 explicit nsScannerBufferList( Buffer* buf )
128 : mRefCnt(0)
129 {
130 mBuffers.insertBack(buf);
131 }
132
AddRef()133 void AddRef() { ++mRefCnt; }
Release()134 void Release() { if (--mRefCnt == 0) delete this; }
135
Append(Buffer * buf)136 void Append( Buffer* buf ) { mBuffers.insertBack(buf); }
InsertAfter(Buffer * buf,Buffer * prev)137 void InsertAfter( Buffer* buf, Buffer* prev ) { prev->setNext(buf); }
138 void SplitBuffer( const Position& );
139 void DiscardUnreferencedPrefix( Buffer* );
140
Head()141 Buffer* Head() { return mBuffers.getFirst(); }
Head()142 const Buffer* Head() const { return mBuffers.getFirst(); }
143
Tail()144 Buffer* Tail() { return mBuffers.getLast(); }
Tail()145 const Buffer* Tail() const { return mBuffers.getLast(); }
146
147 private:
148
149 friend class nsScannerSubstring;
150
~nsScannerBufferList()151 ~nsScannerBufferList() { ReleaseAll(); }
152 void ReleaseAll();
153
154 int32_t mRefCnt;
155 mozilla::LinkedList<Buffer> mBuffers;
156 };
157
158
159 /**
160 * nsScannerFragment represents a "slice" of a Buffer object.
161 */
162 struct nsScannerFragment
163 {
164 typedef nsScannerBufferList::Buffer Buffer;
165
166 const Buffer* mBuffer;
167 const char16_t* mFragmentStart;
168 const char16_t* mFragmentEnd;
169 };
170
171
172 /**
173 * nsScannerSubstring is the base class for nsScannerString. It provides
174 * access to iterators and methods to bind the substring to another
175 * substring or nsAString instance.
176 *
177 * This class owns the buffer list.
178 */
179 class nsScannerSubstring
180 {
181 public:
182 typedef nsScannerBufferList::Buffer Buffer;
183 typedef nsScannerBufferList::Position Position;
184 typedef uint32_t size_type;
185
186 nsScannerSubstring();
187 explicit nsScannerSubstring( const nsAString& s );
188
189 ~nsScannerSubstring();
190
191 nsScannerIterator& BeginReading( nsScannerIterator& iter ) const;
192 nsScannerIterator& EndReading( nsScannerIterator& iter ) const;
193
Length()194 size_type Length() const { return mLength; }
195
196 int32_t CountChar( char16_t ) const;
197
198 void Rebind( const nsScannerSubstring&, const nsScannerIterator&, const nsScannerIterator& );
199 void Rebind( const nsAString& );
200
201 const nsSubstring& AsString() const;
202
203 bool GetNextFragment( nsScannerFragment& ) const;
204 bool GetPrevFragment( nsScannerFragment& ) const;
205
AllocBufferFromString(const nsAString & aStr)206 static inline Buffer* AllocBufferFromString( const nsAString& aStr ) { return nsScannerBufferList::AllocBufferFromString(aStr); }
AllocBuffer(size_type aCapacity)207 static inline Buffer* AllocBuffer( size_type aCapacity ) { return nsScannerBufferList::AllocBuffer(aCapacity); }
208
209 protected:
210
acquire_ownership_of_buffer_list()211 void acquire_ownership_of_buffer_list() const
212 {
213 mBufferList->AddRef();
214 mStart.mBuffer->IncrementUsageCount();
215 }
216
release_ownership_of_buffer_list()217 void release_ownership_of_buffer_list()
218 {
219 if (mBufferList)
220 {
221 mStart.mBuffer->DecrementUsageCount();
222 mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
223 mBufferList->Release();
224 }
225 }
226
init_range_from_buffer_list()227 void init_range_from_buffer_list()
228 {
229 mStart.mBuffer = mBufferList->Head();
230 mStart.mPosition = mStart.mBuffer->DataStart();
231
232 mEnd.mBuffer = mBufferList->Tail();
233 mEnd.mPosition = mEnd.mBuffer->DataEnd();
234
235 mLength = Position::Distance(mStart, mEnd);
236 }
237
238 Position mStart;
239 Position mEnd;
240 nsScannerBufferList *mBufferList;
241 size_type mLength;
242
243 // these fields are used to implement AsString
244 nsDependentSubstring mFlattenedRep;
245 bool mIsDirty;
246
247 friend class nsScannerSharedSubstring;
248 };
249
250
251 /**
252 * nsScannerString provides methods to grow and modify a buffer list.
253 */
254 class nsScannerString : public nsScannerSubstring
255 {
256 public:
257
258 explicit nsScannerString( Buffer* );
259
260 // you are giving ownership to the string, it takes and keeps your
261 // buffer, deleting it when done.
262 // Use AllocBuffer or AllocBufferFromString to create a Buffer object
263 // for use with this function.
264 void AppendBuffer( Buffer* );
265
266 void DiscardPrefix( const nsScannerIterator& );
267 // any other way you want to do this?
268
269 void UngetReadable(const nsAString& aReadable, const nsScannerIterator& aCurrentPosition);
270 };
271
272
273 /**
274 * nsScannerSharedSubstring implements copy-on-write semantics for
275 * nsScannerSubstring. When you call .writable(), it will copy the data
276 * and return a mutable string object. This class also manages releasing
277 * the reference to the scanner buffer when it is no longer needed.
278 */
279
280 class nsScannerSharedSubstring
281 {
282 public:
nsScannerSharedSubstring()283 nsScannerSharedSubstring()
284 : mBuffer(nullptr), mBufferList(nullptr) { }
285
~nsScannerSharedSubstring()286 ~nsScannerSharedSubstring()
287 {
288 if (mBufferList)
289 ReleaseBuffer();
290 }
291
292 // Acquire a copy-on-write reference to the given substring.
293 void Rebind(const nsScannerIterator& aStart,
294 const nsScannerIterator& aEnd);
295
296 // Get a mutable reference to this string
writable()297 nsSubstring& writable()
298 {
299 if (mBufferList)
300 MakeMutable();
301
302 return mString;
303 }
304
305 // Get a const reference to this string
str()306 const nsSubstring& str() const { return mString; }
307
308 private:
309 typedef nsScannerBufferList::Buffer Buffer;
310
311 void ReleaseBuffer();
312 void MakeMutable();
313
314 nsDependentSubstring mString;
315 Buffer *mBuffer;
316 nsScannerBufferList *mBufferList;
317 };
318
319 /**
320 * nsScannerIterator works just like nsReadingIterator<CharT> except that
321 * it knows how to iterate over a list of scanner buffers.
322 */
323 class nsScannerIterator
324 {
325 public:
326 typedef nsScannerIterator self_type;
327 typedef ptrdiff_t difference_type;
328 typedef char16_t value_type;
329 typedef const char16_t* pointer;
330 typedef const char16_t& reference;
331 typedef nsScannerSubstring::Buffer Buffer;
332
333 protected:
334
335 nsScannerFragment mFragment;
336 const char16_t* mPosition;
337 const nsScannerSubstring* mOwner;
338
339 friend class nsScannerSubstring;
340 friend class nsScannerSharedSubstring;
341
342 public:
nsScannerIterator()343 nsScannerIterator() {}
344 // nsScannerIterator( const nsScannerIterator& ); // auto-generated copy-constructor OK
345 // nsScannerIterator& operator=( const nsScannerIterator& ); // auto-generated copy-assignment operator OK
346
347 inline void normalize_forward();
348 inline void normalize_backward();
349
get()350 pointer get() const
351 {
352 return mPosition;
353 }
354
355 char16_t operator*() const
356 {
357 return *get();
358 }
359
fragment()360 const nsScannerFragment& fragment() const
361 {
362 return mFragment;
363 }
364
buffer()365 const Buffer* buffer() const
366 {
367 return mFragment.mBuffer;
368 }
369
370 self_type& operator++()
371 {
372 ++mPosition;
373 normalize_forward();
374 return *this;
375 }
376
377 self_type operator++( int )
378 {
379 self_type result(*this);
380 ++mPosition;
381 normalize_forward();
382 return result;
383 }
384
385 self_type& operator--()
386 {
387 normalize_backward();
388 --mPosition;
389 return *this;
390 }
391
392 self_type operator--( int )
393 {
394 self_type result(*this);
395 normalize_backward();
396 --mPosition;
397 return result;
398 }
399
size_forward()400 difference_type size_forward() const
401 {
402 return mFragment.mFragmentEnd - mPosition;
403 }
404
size_backward()405 difference_type size_backward() const
406 {
407 return mPosition - mFragment.mFragmentStart;
408 }
409
advance(difference_type n)410 self_type& advance( difference_type n )
411 {
412 while ( n > 0 )
413 {
414 difference_type one_hop = std::min(n, size_forward());
415
416 NS_ASSERTION(one_hop>0, "Infinite loop: can't advance a reading iterator beyond the end of a string");
417 // perhaps I should |break| if |!one_hop|?
418
419 mPosition += one_hop;
420 normalize_forward();
421 n -= one_hop;
422 }
423
424 while ( n < 0 )
425 {
426 normalize_backward();
427 difference_type one_hop = std::max(n, -size_backward());
428
429 NS_ASSERTION(one_hop<0, "Infinite loop: can't advance (backward) a reading iterator beyond the end of a string");
430 // perhaps I should |break| if |!one_hop|?
431
432 mPosition += one_hop;
433 n -= one_hop;
434 }
435
436 return *this;
437 }
438 };
439
440
441 inline
442 bool
SameFragment(const nsScannerIterator & a,const nsScannerIterator & b)443 SameFragment( const nsScannerIterator& a, const nsScannerIterator& b )
444 {
445 return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
446 }
447
448
449 /**
450 * this class is needed in order to make use of the methods in nsAlgorithm.h
451 */
452 template <>
453 struct nsCharSourceTraits<nsScannerIterator>
454 {
455 typedef nsScannerIterator::difference_type difference_type;
456
457 static
458 uint32_t
459 readable_distance( const nsScannerIterator& first, const nsScannerIterator& last )
460 {
461 return uint32_t(SameFragment(first, last) ? last.get() - first.get() : first.size_forward());
462 }
463
464 static
465 const nsScannerIterator::value_type*
466 read( const nsScannerIterator& iter )
467 {
468 return iter.get();
469 }
470
471 static
472 void
473 advance( nsScannerIterator& s, difference_type n )
474 {
475 s.advance(n);
476 }
477 };
478
479
480 /**
481 * inline methods follow
482 */
483
484 inline
485 void
486 nsScannerIterator::normalize_forward()
487 {
488 while (mPosition == mFragment.mFragmentEnd && mOwner->GetNextFragment(mFragment))
489 mPosition = mFragment.mFragmentStart;
490 }
491
492 inline
493 void
494 nsScannerIterator::normalize_backward()
495 {
496 while (mPosition == mFragment.mFragmentStart && mOwner->GetPrevFragment(mFragment))
497 mPosition = mFragment.mFragmentEnd;
498 }
499
500 inline
501 bool
502 operator==( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
503 {
504 return lhs.get() == rhs.get();
505 }
506
507 inline
508 bool
509 operator!=( const nsScannerIterator& lhs, const nsScannerIterator& rhs )
510 {
511 return lhs.get() != rhs.get();
512 }
513
514
515 inline
516 nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
517 : mBuffer(const_cast<Buffer*>(aIter.buffer()))
518 , mPosition(const_cast<char16_t*>(aIter.get()))
519 {}
520
521 inline
522 nsScannerBufferList::Position&
523 nsScannerBufferList::Position::operator=(const nsScannerIterator& aIter)
524 {
525 mBuffer = const_cast<Buffer*>(aIter.buffer());
526 mPosition = const_cast<char16_t*>(aIter.get());
527 return *this;
528 }
529
530
531 /**
532 * scanner string utils
533 *
534 * These methods mimic the API provided by nsReadableUtils in xpcom/string.
535 * Here we provide only the methods that the htmlparser module needs.
536 */
537
538 inline
539 size_t
540 Distance( const nsScannerIterator& aStart, const nsScannerIterator& aEnd )
541 {
542 typedef nsScannerBufferList::Position Position;
543 return Position::Distance(Position(aStart), Position(aEnd));
544 }
545
546 bool
547 CopyUnicodeTo( const nsScannerIterator& aSrcStart,
548 const nsScannerIterator& aSrcEnd,
549 nsAString& aDest );
550
551 inline
552 bool
553 CopyUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
554 {
555 nsScannerIterator begin, end;
556 return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
557 }
558
559 bool
560 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
561 const nsScannerIterator& aSrcEnd,
562 nsAString& aDest );
563
564 inline
565 bool
566 AppendUnicodeTo( const nsScannerSubstring& aSrc, nsAString& aDest )
567 {
568 nsScannerIterator begin, end;
569 return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
570 }
571
572 bool
573 AppendUnicodeTo( const nsScannerIterator& aSrcStart,
574 const nsScannerIterator& aSrcEnd,
575 nsScannerSharedSubstring& aDest );
576
577 bool
578 FindCharInReadable( char16_t aChar,
579 nsScannerIterator& aStart,
580 const nsScannerIterator& aEnd );
581
582 bool
583 FindInReadable( const nsAString& aPattern,
584 nsScannerIterator& aStart,
585 nsScannerIterator& aEnd,
586 const nsStringComparator& = nsDefaultStringComparator() );
587
588 bool
589 RFindInReadable( const nsAString& aPattern,
590 nsScannerIterator& aStart,
591 nsScannerIterator& aEnd,
592 const nsStringComparator& = nsDefaultStringComparator() );
593
594 inline
595 bool
596 CaseInsensitiveFindInReadable( const nsAString& aPattern,
597 nsScannerIterator& aStart,
598 nsScannerIterator& aEnd )
599 {
600 return FindInReadable(aPattern, aStart, aEnd,
601 nsCaseInsensitiveStringComparator());
602 }
603
604 #endif // !defined(nsScannerString_h___)
605