1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include <stdlib.h>
8 #include "nsScannerString.h"
9 #include "mozilla/CheckedInt.h"
10 
11 /**
12  * nsScannerBufferList
13  */
14 
15 #define MAX_CAPACITY \
16   ((UINT32_MAX / sizeof(char16_t)) - (sizeof(Buffer) + sizeof(char16_t)))
17 
AllocBufferFromString(const nsAString & aString)18 nsScannerBufferList::Buffer* nsScannerBufferList::AllocBufferFromString(
19     const nsAString& aString) {
20   uint32_t len = aString.Length();
21   Buffer* buf = AllocBuffer(len);
22 
23   if (buf) {
24     nsAString::const_iterator source;
25     aString.BeginReading(source);
26     nsCharTraits<char16_t>::copy(buf->DataStart(), source.get(), len);
27   }
28   return buf;
29 }
30 
AllocBuffer(uint32_t capacity)31 nsScannerBufferList::Buffer* nsScannerBufferList::AllocBuffer(
32     uint32_t capacity) {
33   if (capacity > MAX_CAPACITY) return nullptr;
34 
35   void* ptr = malloc(sizeof(Buffer) + (capacity + 1) * sizeof(char16_t));
36   if (!ptr) return nullptr;
37 
38   Buffer* buf = new (ptr) Buffer();
39 
40   buf->mUsageCount = 0;
41   buf->mDataEnd = buf->DataStart() + capacity;
42 
43   // XXX null terminate.  this shouldn't be required, but we do it because
44   // nsScanner erroneously thinks it can dereference DataEnd :-(
45   *buf->mDataEnd = char16_t(0);
46   return buf;
47 }
48 
ReleaseAll()49 void nsScannerBufferList::ReleaseAll() {
50   while (!mBuffers.isEmpty()) {
51     Buffer* node = mBuffers.popFirst();
52     // printf(">>> freeing buffer @%p\n", node);
53     free(node);
54   }
55 }
56 
SplitBuffer(const Position & pos)57 void nsScannerBufferList::SplitBuffer(const Position& pos) {
58   // splitting to the right keeps the work string and any extant token
59   // pointing to and holding a reference count on the same buffer.
60 
61   Buffer* bufferToSplit = pos.mBuffer;
62   NS_ASSERTION(bufferToSplit, "null pointer");
63 
64   uint32_t splitOffset = pos.mPosition - bufferToSplit->DataStart();
65   NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
66                    splitOffset <= bufferToSplit->DataLength(),
67                "split offset is outside buffer");
68 
69   uint32_t len = bufferToSplit->DataLength() - splitOffset;
70   Buffer* new_buffer = AllocBuffer(len);
71   if (new_buffer) {
72     nsCharTraits<char16_t>::copy(new_buffer->DataStart(),
73                                  bufferToSplit->DataStart() + splitOffset, len);
74     InsertAfter(new_buffer, bufferToSplit);
75     bufferToSplit->SetDataLength(splitOffset);
76   }
77 }
78 
DiscardUnreferencedPrefix(Buffer * aBuf)79 void nsScannerBufferList::DiscardUnreferencedPrefix(Buffer* aBuf) {
80   if (aBuf == Head()) {
81     while (!mBuffers.isEmpty() && !Head()->IsInUse()) {
82       Buffer* buffer = Head();
83       buffer->remove();
84       free(buffer);
85     }
86   }
87 }
88 
Distance(const Position & aStart,const Position & aEnd)89 size_t nsScannerBufferList::Position::Distance(const Position& aStart,
90                                                const Position& aEnd) {
91   size_t result = 0;
92   if (aStart.mBuffer == aEnd.mBuffer) {
93     result = aEnd.mPosition - aStart.mPosition;
94   } else {
95     result = aStart.mBuffer->DataEnd() - aStart.mPosition;
96     for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
97       result += b->DataLength();
98     result += aEnd.mPosition - aEnd.mBuffer->DataStart();
99   }
100   return result;
101 }
102 
103 /**
104  * nsScannerSubstring
105  */
106 
nsScannerSubstring()107 nsScannerSubstring::nsScannerSubstring()
108     : mStart(nullptr, nullptr),
109       mEnd(nullptr, nullptr),
110       mBufferList(nullptr),
111       mLength(0),
112       mIsDirty(true) {}
113 
nsScannerSubstring(const nsAString & s)114 nsScannerSubstring::nsScannerSubstring(const nsAString& s)
115     : mBufferList(nullptr), mIsDirty(true) {
116   Rebind(s);
117 }
118 
~nsScannerSubstring()119 nsScannerSubstring::~nsScannerSubstring() {
120   release_ownership_of_buffer_list();
121 }
122 
CountChar(char16_t c) const123 int32_t nsScannerSubstring::CountChar(char16_t c) const {
124   /*
125     re-write this to use a counting sink
126    */
127 
128   size_type result = 0;
129   size_type lengthToExamine = Length();
130 
131   nsScannerIterator iter;
132   for (BeginReading(iter);;) {
133     int32_t lengthToExamineInThisFragment = iter.size_forward();
134     const char16_t* fromBegin = iter.get();
135     result += size_type(
136         NS_COUNT(fromBegin, fromBegin + lengthToExamineInThisFragment, c));
137     if (!(lengthToExamine -= lengthToExamineInThisFragment)) return result;
138     iter.advance(lengthToExamineInThisFragment);
139   }
140   // never reached; quiets warnings
141   return 0;
142 }
143 
Rebind(const nsScannerSubstring & aString,const nsScannerIterator & aStart,const nsScannerIterator & aEnd)144 void nsScannerSubstring::Rebind(const nsScannerSubstring& aString,
145                                 const nsScannerIterator& aStart,
146                                 const nsScannerIterator& aEnd) {
147   // allow for the case where &aString == this
148 
149   aString.acquire_ownership_of_buffer_list();
150   release_ownership_of_buffer_list();
151 
152   mStart = aStart;
153   mEnd = aEnd;
154   mBufferList = aString.mBufferList;
155   mLength = Distance(aStart, aEnd);
156   mIsDirty = true;
157 }
158 
Rebind(const nsAString & aString)159 void nsScannerSubstring::Rebind(const nsAString& aString) {
160   release_ownership_of_buffer_list();
161 
162   mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
163   mIsDirty = true;
164 
165   init_range_from_buffer_list();
166   acquire_ownership_of_buffer_list();
167 }
168 
AsString() const169 const nsAString& nsScannerSubstring::AsString() const {
170   if (mIsDirty) {
171     nsScannerSubstring* mutable_this = const_cast<nsScannerSubstring*>(this);
172 
173     if (mStart.mBuffer == mEnd.mBuffer) {
174       // We only have a single fragment to deal with, so just return it
175       // as a substring.
176       mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
177     } else {
178       // Otherwise, we need to copy the data into a flattened buffer.
179       nsScannerIterator start, end;
180       CopyUnicodeTo(BeginReading(start), EndReading(end),
181                     mutable_this->mFlattenedRep);
182     }
183 
184     mutable_this->mIsDirty = false;
185   }
186 
187   return mFlattenedRep;
188 }
189 
BeginReading(nsScannerIterator & iter) const190 nsScannerIterator& nsScannerSubstring::BeginReading(
191     nsScannerIterator& iter) const {
192   iter.mOwner = this;
193 
194   iter.mFragment.mBuffer = mStart.mBuffer;
195   iter.mFragment.mFragmentStart = mStart.mPosition;
196   if (mStart.mBuffer == mEnd.mBuffer)
197     iter.mFragment.mFragmentEnd = mEnd.mPosition;
198   else
199     iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
200 
201   iter.mPosition = mStart.mPosition;
202   iter.normalize_forward();
203   return iter;
204 }
205 
EndReading(nsScannerIterator & iter) const206 nsScannerIterator& nsScannerSubstring::EndReading(
207     nsScannerIterator& iter) const {
208   iter.mOwner = this;
209 
210   iter.mFragment.mBuffer = mEnd.mBuffer;
211   iter.mFragment.mFragmentEnd = mEnd.mPosition;
212   if (mStart.mBuffer == mEnd.mBuffer)
213     iter.mFragment.mFragmentStart = mStart.mPosition;
214   else
215     iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
216 
217   iter.mPosition = mEnd.mPosition;
218   // must not |normalize_backward| as that would likely invalidate tests like
219   // |while ( first != last )|
220   return iter;
221 }
222 
GetNextFragment(nsScannerFragment & frag) const223 bool nsScannerSubstring::GetNextFragment(nsScannerFragment& frag) const {
224   // check to see if we are at the end of the buffer list
225   if (frag.mBuffer == mEnd.mBuffer) return false;
226 
227   frag.mBuffer = frag.mBuffer->getNext();
228 
229   if (frag.mBuffer == mStart.mBuffer)
230     frag.mFragmentStart = mStart.mPosition;
231   else
232     frag.mFragmentStart = frag.mBuffer->DataStart();
233 
234   if (frag.mBuffer == mEnd.mBuffer)
235     frag.mFragmentEnd = mEnd.mPosition;
236   else
237     frag.mFragmentEnd = frag.mBuffer->DataEnd();
238 
239   return true;
240 }
241 
GetPrevFragment(nsScannerFragment & frag) const242 bool nsScannerSubstring::GetPrevFragment(nsScannerFragment& frag) const {
243   // check to see if we are at the beginning of the buffer list
244   if (frag.mBuffer == mStart.mBuffer) return false;
245 
246   frag.mBuffer = frag.mBuffer->getPrevious();
247 
248   if (frag.mBuffer == mStart.mBuffer)
249     frag.mFragmentStart = mStart.mPosition;
250   else
251     frag.mFragmentStart = frag.mBuffer->DataStart();
252 
253   if (frag.mBuffer == mEnd.mBuffer)
254     frag.mFragmentEnd = mEnd.mPosition;
255   else
256     frag.mFragmentEnd = frag.mBuffer->DataEnd();
257 
258   return true;
259 }
260 
261 /**
262  * nsScannerString
263  */
264 
nsScannerString(Buffer * aBuf)265 nsScannerString::nsScannerString(Buffer* aBuf) {
266   mBufferList = new nsScannerBufferList(aBuf);
267 
268   init_range_from_buffer_list();
269   acquire_ownership_of_buffer_list();
270 }
271 
AppendBuffer(Buffer * aBuf)272 void nsScannerString::AppendBuffer(Buffer* aBuf) {
273   mBufferList->Append(aBuf);
274   mLength += aBuf->DataLength();
275 
276   mEnd.mBuffer = aBuf;
277   mEnd.mPosition = aBuf->DataEnd();
278 
279   mIsDirty = true;
280 }
281 
DiscardPrefix(const nsScannerIterator & aIter)282 void nsScannerString::DiscardPrefix(const nsScannerIterator& aIter) {
283   Position old_start(mStart);
284   mStart = aIter;
285   mLength -= Position::Distance(old_start, mStart);
286 
287   mStart.mBuffer->IncrementUsageCount();
288   old_start.mBuffer->DecrementUsageCount();
289 
290   mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
291 
292   mIsDirty = true;
293 }
294 
UngetReadable(const nsAString & aReadable,const nsScannerIterator & aInsertPoint)295 void nsScannerString::UngetReadable(const nsAString& aReadable,
296                                     const nsScannerIterator& aInsertPoint)
297 /*
298  * Warning: this routine manipulates the shared buffer list in an
299  * unexpected way.  The original design did not really allow for
300  * insertions, but this call promises that if called for a point after the
301  * end of all extant token strings, that no token string or the work string
302  * will be invalidated.
303  *
304  * This routine is protected because it is the responsibility of the
305  * derived class to keep those promises.
306  */
307 {
308   Position insertPos(aInsertPoint);
309 
310   mBufferList->SplitBuffer(insertPos);
311   // splitting to the right keeps the work string and any extant token
312   // pointing to and holding a reference count on the same buffer
313 
314   Buffer* new_buffer = AllocBufferFromString(aReadable);
315   // make a new buffer with all the data to insert...
316   // ALERT: we may have empty space to re-use in the split buffer,
317   // measure the cost of this and decide if we should do the work to fill
318   // it
319 
320   Buffer* buffer_to_split = insertPos.mBuffer;
321   mBufferList->InsertAfter(new_buffer, buffer_to_split);
322   mLength += aReadable.Length();
323 
324   mEnd.mBuffer = mBufferList->Tail();
325   mEnd.mPosition = mEnd.mBuffer->DataEnd();
326 
327   mIsDirty = true;
328 }
329 
330 /**
331  * nsScannerSharedSubstring
332  */
333 
Rebind(const nsScannerIterator & aStart,const nsScannerIterator & aEnd)334 void nsScannerSharedSubstring::Rebind(const nsScannerIterator& aStart,
335                                       const nsScannerIterator& aEnd) {
336   // If the start and end positions are inside the same buffer, we must
337   // acquire ownership of the buffer.  If not, we can optimize by not holding
338   // onto it.
339 
340   Buffer* buffer = const_cast<Buffer*>(aStart.buffer());
341   bool sameBuffer = buffer == aEnd.buffer();
342 
343   nsScannerBufferList* bufferList;
344 
345   if (sameBuffer) {
346     bufferList = aStart.mOwner->mBufferList;
347     bufferList->AddRef();
348     buffer->IncrementUsageCount();
349   }
350 
351   if (mBufferList) ReleaseBuffer();
352 
353   if (sameBuffer) {
354     mBuffer = buffer;
355     mBufferList = bufferList;
356     mString.Rebind(aStart.mPosition, aEnd.mPosition);
357   } else {
358     mBuffer = nullptr;
359     mBufferList = nullptr;
360     CopyUnicodeTo(aStart, aEnd, mString);
361   }
362 }
363 
ReleaseBuffer()364 void nsScannerSharedSubstring::ReleaseBuffer() {
365   NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
366   mBuffer->DecrementUsageCount();
367   mBufferList->DiscardUnreferencedPrefix(mBuffer);
368   mBufferList->Release();
369 }
370 
MakeMutable()371 void nsScannerSharedSubstring::MakeMutable() {
372   nsString temp(mString);  // this will force a copy of the data
373   mString.Assign(temp);    // mString will now share the just-allocated buffer
374 
375   ReleaseBuffer();
376 
377   mBuffer = nullptr;
378   mBufferList = nullptr;
379 }
380 
381 /**
382  * utils -- based on code from nsReadableUtils.cpp
383  */
384 
385 // private helper function
copy_multifragment_string(nsScannerIterator & first,const nsScannerIterator & last,nsAString::iterator & result)386 static inline nsAString::iterator& copy_multifragment_string(
387     nsScannerIterator& first, const nsScannerIterator& last,
388     nsAString::iterator& result) {
389   typedef nsCharSourceTraits<nsScannerIterator> source_traits;
390   typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
391 
392   while (first != last) {
393     uint32_t distance = source_traits::readable_distance(first, last);
394     sink_traits::write(result, source_traits::read(first), distance);
395     NS_ASSERTION(distance > 0,
396                  "|copy_multifragment_string| will never terminate");
397     source_traits::advance(first, distance);
398   }
399 
400   return result;
401 }
402 
CopyUnicodeTo(const nsScannerIterator & aSrcStart,const nsScannerIterator & aSrcEnd,nsAString & aDest)403 bool CopyUnicodeTo(const nsScannerIterator& aSrcStart,
404                    const nsScannerIterator& aSrcEnd, nsAString& aDest) {
405   mozilla::CheckedInt<nsAString::size_type> distance(
406       Distance(aSrcStart, aSrcEnd));
407   if (!distance.isValid()) {
408     return false;  // overflow detected
409   }
410 
411   if (!aDest.SetLength(distance.value(), mozilla::fallible)) {
412     aDest.Truncate();
413     return false;  // out of memory
414   }
415   auto writer = aDest.BeginWriting();
416   nsScannerIterator fromBegin(aSrcStart);
417 
418   copy_multifragment_string(fromBegin, aSrcEnd, writer);
419   return true;
420 }
421 
AppendUnicodeTo(const nsScannerIterator & aSrcStart,const nsScannerIterator & aSrcEnd,nsScannerSharedSubstring & aDest)422 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
423                      const nsScannerIterator& aSrcEnd,
424                      nsScannerSharedSubstring& aDest) {
425   // Check whether we can just create a dependent string.
426   if (aDest.str().IsEmpty()) {
427     // We can just make |aDest| point to the buffer.
428     // This will take care of copying if the buffer spans fragments.
429     aDest.Rebind(aSrcStart, aSrcEnd);
430     return true;
431   }
432   // The dest string is not empty, so it can't be a dependent substring.
433   return AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
434 }
435 
AppendUnicodeTo(const nsScannerIterator & aSrcStart,const nsScannerIterator & aSrcEnd,nsAString & aDest)436 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
437                      const nsScannerIterator& aSrcEnd, nsAString& aDest) {
438   const nsAString::size_type oldLength = aDest.Length();
439   mozilla::CheckedInt<nsAString::size_type> newLen(
440       Distance(aSrcStart, aSrcEnd));
441   newLen += oldLength;
442   if (!newLen.isValid()) {
443     return false;  // overflow detected
444   }
445 
446   if (!aDest.SetLength(newLen.value(), mozilla::fallible))
447     return false;  // out of memory
448   auto writer = aDest.BeginWriting();
449   std::advance(writer, oldLength);
450   nsScannerIterator fromBegin(aSrcStart);
451 
452   copy_multifragment_string(fromBegin, aSrcEnd, writer);
453   return true;
454 }
455 
FindCharInReadable(char16_t aChar,nsScannerIterator & aSearchStart,const nsScannerIterator & aSearchEnd)456 bool FindCharInReadable(char16_t aChar, nsScannerIterator& aSearchStart,
457                         const nsScannerIterator& aSearchEnd) {
458   while (aSearchStart != aSearchEnd) {
459     int32_t fragmentLength;
460     if (SameFragment(aSearchStart, aSearchEnd))
461       fragmentLength = aSearchEnd.get() - aSearchStart.get();
462     else
463       fragmentLength = aSearchStart.size_forward();
464 
465     const char16_t* charFoundAt =
466         nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
467     if (charFoundAt) {
468       aSearchStart.advance(charFoundAt - aSearchStart.get());
469       return true;
470     }
471 
472     aSearchStart.advance(fragmentLength);
473   }
474 
475   return false;
476 }
477 
FindInReadable(const nsAString & aPattern,nsScannerIterator & aSearchStart,nsScannerIterator & aSearchEnd,const nsStringComparator compare)478 bool FindInReadable(const nsAString& aPattern, nsScannerIterator& aSearchStart,
479                     nsScannerIterator& aSearchEnd,
480                     const nsStringComparator compare) {
481   bool found_it = false;
482 
483   // only bother searching at all if we're given a non-empty range to search
484   if (aSearchStart != aSearchEnd) {
485     nsAString::const_iterator aPatternStart, aPatternEnd;
486     aPattern.BeginReading(aPatternStart);
487     aPattern.EndReading(aPatternEnd);
488 
489     // outer loop keeps searching till we find it or run out of string to search
490     while (!found_it) {
491       // fast inner loop (that's what it's called, not what it is) looks for a
492       // potential match
493       while (aSearchStart != aSearchEnd &&
494              compare(aPatternStart.get(), aSearchStart.get(), 1, 1))
495         ++aSearchStart;
496 
497       // if we broke out of the `fast' loop because we're out of string ...
498       // we're done: no match
499       if (aSearchStart == aSearchEnd) break;
500 
501       // otherwise, we're at a potential match, let's see if we really hit one
502       nsAString::const_iterator testPattern(aPatternStart);
503       nsScannerIterator testSearch(aSearchStart);
504 
505       // slow inner loop verifies the potential match (found by the `fast' loop)
506       // at the current position
507       for (;;) {
508         // we already compared the first character in the outer loop,
509         //  so we'll advance before the next comparison
510         ++testPattern;
511         ++testSearch;
512 
513         // if we verified all the way to the end of the pattern, then we found
514         // it!
515         if (testPattern == aPatternEnd) {
516           found_it = true;
517           aSearchEnd = testSearch;  // return the exact found range through the
518                                     // parameters
519           break;
520         }
521 
522         // if we got to end of the string we're searching before we hit the end
523         // of the
524         //  pattern, we'll never find what we're looking for
525         if (testSearch == aSearchEnd) {
526           aSearchStart = aSearchEnd;
527           break;
528         }
529 
530         // else if we mismatched ... it's time to advance to the next search
531         // position
532         //  and get back into the `fast' loop
533         if (compare(testPattern.get(), testSearch.get(), 1, 1)) {
534           ++aSearchStart;
535           break;
536         }
537       }
538     }
539   }
540 
541   return found_it;
542 }
543 
544 /**
545  * This implementation is simple, but does too much work.
546  * It searches the entire string from left to right, and returns the last match
547  * found, if any. This implementation will be replaced when I get
548  * |reverse_iterator|s working.
549  */
RFindInReadable(const nsAString & aPattern,nsScannerIterator & aSearchStart,nsScannerIterator & aSearchEnd,const nsStringComparator aComparator)550 bool RFindInReadable(const nsAString& aPattern, nsScannerIterator& aSearchStart,
551                      nsScannerIterator& aSearchEnd,
552                      const nsStringComparator aComparator) {
553   bool found_it = false;
554 
555   nsScannerIterator savedSearchEnd(aSearchEnd);
556   nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
557 
558   while (searchStart != searchEnd) {
559     if (FindInReadable(aPattern, searchStart, searchEnd, aComparator)) {
560       found_it = true;
561 
562       // this is the best match so far, so remember it
563       aSearchStart = searchStart;
564       aSearchEnd = searchEnd;
565 
566       // ...and get ready to search some more
567       //  (it's tempting to set |searchStart=searchEnd| ... but that misses
568       //  overlapping patterns)
569       ++searchStart;
570       searchEnd = savedSearchEnd;
571     }
572   }
573 
574   // if we never found it, return an empty range
575   if (!found_it) aSearchStart = aSearchEnd;
576 
577   return found_it;
578 }
579