1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include <stdlib.h>
8 #include "nsScannerString.h"
9 #include "mozilla/CheckedInt.h"
10
11 /**
12 * nsScannerBufferList
13 */
14
15 #define MAX_CAPACITY \
16 ((UINT32_MAX / sizeof(char16_t)) - (sizeof(Buffer) + sizeof(char16_t)))
17
AllocBufferFromString(const nsAString & aString)18 nsScannerBufferList::Buffer* nsScannerBufferList::AllocBufferFromString(
19 const nsAString& aString) {
20 uint32_t len = aString.Length();
21 Buffer* buf = AllocBuffer(len);
22
23 if (buf) {
24 nsAString::const_iterator source;
25 aString.BeginReading(source);
26 nsCharTraits<char16_t>::copy(buf->DataStart(), source.get(), len);
27 }
28 return buf;
29 }
30
AllocBuffer(uint32_t capacity)31 nsScannerBufferList::Buffer* nsScannerBufferList::AllocBuffer(
32 uint32_t capacity) {
33 if (capacity > MAX_CAPACITY) return nullptr;
34
35 void* ptr = malloc(sizeof(Buffer) + (capacity + 1) * sizeof(char16_t));
36 if (!ptr) return nullptr;
37
38 Buffer* buf = new (ptr) Buffer();
39
40 buf->mUsageCount = 0;
41 buf->mDataEnd = buf->DataStart() + capacity;
42
43 // XXX null terminate. this shouldn't be required, but we do it because
44 // nsScanner erroneously thinks it can dereference DataEnd :-(
45 *buf->mDataEnd = char16_t(0);
46 return buf;
47 }
48
ReleaseAll()49 void nsScannerBufferList::ReleaseAll() {
50 while (!mBuffers.isEmpty()) {
51 Buffer* node = mBuffers.popFirst();
52 // printf(">>> freeing buffer @%p\n", node);
53 free(node);
54 }
55 }
56
SplitBuffer(const Position & pos)57 void nsScannerBufferList::SplitBuffer(const Position& pos) {
58 // splitting to the right keeps the work string and any extant token
59 // pointing to and holding a reference count on the same buffer.
60
61 Buffer* bufferToSplit = pos.mBuffer;
62 NS_ASSERTION(bufferToSplit, "null pointer");
63
64 uint32_t splitOffset = pos.mPosition - bufferToSplit->DataStart();
65 NS_ASSERTION(pos.mPosition >= bufferToSplit->DataStart() &&
66 splitOffset <= bufferToSplit->DataLength(),
67 "split offset is outside buffer");
68
69 uint32_t len = bufferToSplit->DataLength() - splitOffset;
70 Buffer* new_buffer = AllocBuffer(len);
71 if (new_buffer) {
72 nsCharTraits<char16_t>::copy(new_buffer->DataStart(),
73 bufferToSplit->DataStart() + splitOffset, len);
74 InsertAfter(new_buffer, bufferToSplit);
75 bufferToSplit->SetDataLength(splitOffset);
76 }
77 }
78
DiscardUnreferencedPrefix(Buffer * aBuf)79 void nsScannerBufferList::DiscardUnreferencedPrefix(Buffer* aBuf) {
80 if (aBuf == Head()) {
81 while (!mBuffers.isEmpty() && !Head()->IsInUse()) {
82 Buffer* buffer = Head();
83 buffer->remove();
84 free(buffer);
85 }
86 }
87 }
88
Distance(const Position & aStart,const Position & aEnd)89 size_t nsScannerBufferList::Position::Distance(const Position& aStart,
90 const Position& aEnd) {
91 size_t result = 0;
92 if (aStart.mBuffer == aEnd.mBuffer) {
93 result = aEnd.mPosition - aStart.mPosition;
94 } else {
95 result = aStart.mBuffer->DataEnd() - aStart.mPosition;
96 for (Buffer* b = aStart.mBuffer->Next(); b != aEnd.mBuffer; b = b->Next())
97 result += b->DataLength();
98 result += aEnd.mPosition - aEnd.mBuffer->DataStart();
99 }
100 return result;
101 }
102
103 /**
104 * nsScannerSubstring
105 */
106
nsScannerSubstring()107 nsScannerSubstring::nsScannerSubstring()
108 : mStart(nullptr, nullptr),
109 mEnd(nullptr, nullptr),
110 mBufferList(nullptr),
111 mLength(0),
112 mIsDirty(true) {}
113
nsScannerSubstring(const nsAString & s)114 nsScannerSubstring::nsScannerSubstring(const nsAString& s)
115 : mBufferList(nullptr), mIsDirty(true) {
116 Rebind(s);
117 }
118
~nsScannerSubstring()119 nsScannerSubstring::~nsScannerSubstring() {
120 release_ownership_of_buffer_list();
121 }
122
CountChar(char16_t c) const123 int32_t nsScannerSubstring::CountChar(char16_t c) const {
124 /*
125 re-write this to use a counting sink
126 */
127
128 size_type result = 0;
129 size_type lengthToExamine = Length();
130
131 nsScannerIterator iter;
132 for (BeginReading(iter);;) {
133 int32_t lengthToExamineInThisFragment = iter.size_forward();
134 const char16_t* fromBegin = iter.get();
135 result += size_type(
136 NS_COUNT(fromBegin, fromBegin + lengthToExamineInThisFragment, c));
137 if (!(lengthToExamine -= lengthToExamineInThisFragment)) return result;
138 iter.advance(lengthToExamineInThisFragment);
139 }
140 // never reached; quiets warnings
141 return 0;
142 }
143
Rebind(const nsScannerSubstring & aString,const nsScannerIterator & aStart,const nsScannerIterator & aEnd)144 void nsScannerSubstring::Rebind(const nsScannerSubstring& aString,
145 const nsScannerIterator& aStart,
146 const nsScannerIterator& aEnd) {
147 // allow for the case where &aString == this
148
149 aString.acquire_ownership_of_buffer_list();
150 release_ownership_of_buffer_list();
151
152 mStart = aStart;
153 mEnd = aEnd;
154 mBufferList = aString.mBufferList;
155 mLength = Distance(aStart, aEnd);
156 mIsDirty = true;
157 }
158
Rebind(const nsAString & aString)159 void nsScannerSubstring::Rebind(const nsAString& aString) {
160 release_ownership_of_buffer_list();
161
162 mBufferList = new nsScannerBufferList(AllocBufferFromString(aString));
163 mIsDirty = true;
164
165 init_range_from_buffer_list();
166 acquire_ownership_of_buffer_list();
167 }
168
AsString() const169 const nsAString& nsScannerSubstring::AsString() const {
170 if (mIsDirty) {
171 nsScannerSubstring* mutable_this = const_cast<nsScannerSubstring*>(this);
172
173 if (mStart.mBuffer == mEnd.mBuffer) {
174 // We only have a single fragment to deal with, so just return it
175 // as a substring.
176 mutable_this->mFlattenedRep.Rebind(mStart.mPosition, mEnd.mPosition);
177 } else {
178 // Otherwise, we need to copy the data into a flattened buffer.
179 nsScannerIterator start, end;
180 CopyUnicodeTo(BeginReading(start), EndReading(end),
181 mutable_this->mFlattenedRep);
182 }
183
184 mutable_this->mIsDirty = false;
185 }
186
187 return mFlattenedRep;
188 }
189
BeginReading(nsScannerIterator & iter) const190 nsScannerIterator& nsScannerSubstring::BeginReading(
191 nsScannerIterator& iter) const {
192 iter.mOwner = this;
193
194 iter.mFragment.mBuffer = mStart.mBuffer;
195 iter.mFragment.mFragmentStart = mStart.mPosition;
196 if (mStart.mBuffer == mEnd.mBuffer)
197 iter.mFragment.mFragmentEnd = mEnd.mPosition;
198 else
199 iter.mFragment.mFragmentEnd = mStart.mBuffer->DataEnd();
200
201 iter.mPosition = mStart.mPosition;
202 iter.normalize_forward();
203 return iter;
204 }
205
EndReading(nsScannerIterator & iter) const206 nsScannerIterator& nsScannerSubstring::EndReading(
207 nsScannerIterator& iter) const {
208 iter.mOwner = this;
209
210 iter.mFragment.mBuffer = mEnd.mBuffer;
211 iter.mFragment.mFragmentEnd = mEnd.mPosition;
212 if (mStart.mBuffer == mEnd.mBuffer)
213 iter.mFragment.mFragmentStart = mStart.mPosition;
214 else
215 iter.mFragment.mFragmentStart = mEnd.mBuffer->DataStart();
216
217 iter.mPosition = mEnd.mPosition;
218 // must not |normalize_backward| as that would likely invalidate tests like
219 // |while ( first != last )|
220 return iter;
221 }
222
GetNextFragment(nsScannerFragment & frag) const223 bool nsScannerSubstring::GetNextFragment(nsScannerFragment& frag) const {
224 // check to see if we are at the end of the buffer list
225 if (frag.mBuffer == mEnd.mBuffer) return false;
226
227 frag.mBuffer = frag.mBuffer->getNext();
228
229 if (frag.mBuffer == mStart.mBuffer)
230 frag.mFragmentStart = mStart.mPosition;
231 else
232 frag.mFragmentStart = frag.mBuffer->DataStart();
233
234 if (frag.mBuffer == mEnd.mBuffer)
235 frag.mFragmentEnd = mEnd.mPosition;
236 else
237 frag.mFragmentEnd = frag.mBuffer->DataEnd();
238
239 return true;
240 }
241
GetPrevFragment(nsScannerFragment & frag) const242 bool nsScannerSubstring::GetPrevFragment(nsScannerFragment& frag) const {
243 // check to see if we are at the beginning of the buffer list
244 if (frag.mBuffer == mStart.mBuffer) return false;
245
246 frag.mBuffer = frag.mBuffer->getPrevious();
247
248 if (frag.mBuffer == mStart.mBuffer)
249 frag.mFragmentStart = mStart.mPosition;
250 else
251 frag.mFragmentStart = frag.mBuffer->DataStart();
252
253 if (frag.mBuffer == mEnd.mBuffer)
254 frag.mFragmentEnd = mEnd.mPosition;
255 else
256 frag.mFragmentEnd = frag.mBuffer->DataEnd();
257
258 return true;
259 }
260
261 /**
262 * nsScannerString
263 */
264
nsScannerString(Buffer * aBuf)265 nsScannerString::nsScannerString(Buffer* aBuf) {
266 mBufferList = new nsScannerBufferList(aBuf);
267
268 init_range_from_buffer_list();
269 acquire_ownership_of_buffer_list();
270 }
271
AppendBuffer(Buffer * aBuf)272 void nsScannerString::AppendBuffer(Buffer* aBuf) {
273 mBufferList->Append(aBuf);
274 mLength += aBuf->DataLength();
275
276 mEnd.mBuffer = aBuf;
277 mEnd.mPosition = aBuf->DataEnd();
278
279 mIsDirty = true;
280 }
281
DiscardPrefix(const nsScannerIterator & aIter)282 void nsScannerString::DiscardPrefix(const nsScannerIterator& aIter) {
283 Position old_start(mStart);
284 mStart = aIter;
285 mLength -= Position::Distance(old_start, mStart);
286
287 mStart.mBuffer->IncrementUsageCount();
288 old_start.mBuffer->DecrementUsageCount();
289
290 mBufferList->DiscardUnreferencedPrefix(old_start.mBuffer);
291
292 mIsDirty = true;
293 }
294
UngetReadable(const nsAString & aReadable,const nsScannerIterator & aInsertPoint)295 void nsScannerString::UngetReadable(const nsAString& aReadable,
296 const nsScannerIterator& aInsertPoint)
297 /*
298 * Warning: this routine manipulates the shared buffer list in an
299 * unexpected way. The original design did not really allow for
300 * insertions, but this call promises that if called for a point after the
301 * end of all extant token strings, that no token string or the work string
302 * will be invalidated.
303 *
304 * This routine is protected because it is the responsibility of the
305 * derived class to keep those promises.
306 */
307 {
308 Position insertPos(aInsertPoint);
309
310 mBufferList->SplitBuffer(insertPos);
311 // splitting to the right keeps the work string and any extant token
312 // pointing to and holding a reference count on the same buffer
313
314 Buffer* new_buffer = AllocBufferFromString(aReadable);
315 // make a new buffer with all the data to insert...
316 // ALERT: we may have empty space to re-use in the split buffer,
317 // measure the cost of this and decide if we should do the work to fill
318 // it
319
320 Buffer* buffer_to_split = insertPos.mBuffer;
321 mBufferList->InsertAfter(new_buffer, buffer_to_split);
322 mLength += aReadable.Length();
323
324 mEnd.mBuffer = mBufferList->Tail();
325 mEnd.mPosition = mEnd.mBuffer->DataEnd();
326
327 mIsDirty = true;
328 }
329
330 /**
331 * nsScannerSharedSubstring
332 */
333
Rebind(const nsScannerIterator & aStart,const nsScannerIterator & aEnd)334 void nsScannerSharedSubstring::Rebind(const nsScannerIterator& aStart,
335 const nsScannerIterator& aEnd) {
336 // If the start and end positions are inside the same buffer, we must
337 // acquire ownership of the buffer. If not, we can optimize by not holding
338 // onto it.
339
340 Buffer* buffer = const_cast<Buffer*>(aStart.buffer());
341 bool sameBuffer = buffer == aEnd.buffer();
342
343 nsScannerBufferList* bufferList;
344
345 if (sameBuffer) {
346 bufferList = aStart.mOwner->mBufferList;
347 bufferList->AddRef();
348 buffer->IncrementUsageCount();
349 }
350
351 if (mBufferList) ReleaseBuffer();
352
353 if (sameBuffer) {
354 mBuffer = buffer;
355 mBufferList = bufferList;
356 mString.Rebind(aStart.mPosition, aEnd.mPosition);
357 } else {
358 mBuffer = nullptr;
359 mBufferList = nullptr;
360 CopyUnicodeTo(aStart, aEnd, mString);
361 }
362 }
363
ReleaseBuffer()364 void nsScannerSharedSubstring::ReleaseBuffer() {
365 NS_ASSERTION(mBufferList, "Should only be called with non-null mBufferList");
366 mBuffer->DecrementUsageCount();
367 mBufferList->DiscardUnreferencedPrefix(mBuffer);
368 mBufferList->Release();
369 }
370
MakeMutable()371 void nsScannerSharedSubstring::MakeMutable() {
372 nsString temp(mString); // this will force a copy of the data
373 mString.Assign(temp); // mString will now share the just-allocated buffer
374
375 ReleaseBuffer();
376
377 mBuffer = nullptr;
378 mBufferList = nullptr;
379 }
380
381 /**
382 * utils -- based on code from nsReadableUtils.cpp
383 */
384
385 // private helper function
copy_multifragment_string(nsScannerIterator & first,const nsScannerIterator & last,nsAString::iterator & result)386 static inline nsAString::iterator& copy_multifragment_string(
387 nsScannerIterator& first, const nsScannerIterator& last,
388 nsAString::iterator& result) {
389 typedef nsCharSourceTraits<nsScannerIterator> source_traits;
390 typedef nsCharSinkTraits<nsAString::iterator> sink_traits;
391
392 while (first != last) {
393 uint32_t distance = source_traits::readable_distance(first, last);
394 sink_traits::write(result, source_traits::read(first), distance);
395 NS_ASSERTION(distance > 0,
396 "|copy_multifragment_string| will never terminate");
397 source_traits::advance(first, distance);
398 }
399
400 return result;
401 }
402
CopyUnicodeTo(const nsScannerIterator & aSrcStart,const nsScannerIterator & aSrcEnd,nsAString & aDest)403 bool CopyUnicodeTo(const nsScannerIterator& aSrcStart,
404 const nsScannerIterator& aSrcEnd, nsAString& aDest) {
405 mozilla::CheckedInt<nsAString::size_type> distance(
406 Distance(aSrcStart, aSrcEnd));
407 if (!distance.isValid()) {
408 return false; // overflow detected
409 }
410
411 if (!aDest.SetLength(distance.value(), mozilla::fallible)) {
412 aDest.Truncate();
413 return false; // out of memory
414 }
415 auto writer = aDest.BeginWriting();
416 nsScannerIterator fromBegin(aSrcStart);
417
418 copy_multifragment_string(fromBegin, aSrcEnd, writer);
419 return true;
420 }
421
AppendUnicodeTo(const nsScannerIterator & aSrcStart,const nsScannerIterator & aSrcEnd,nsScannerSharedSubstring & aDest)422 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
423 const nsScannerIterator& aSrcEnd,
424 nsScannerSharedSubstring& aDest) {
425 // Check whether we can just create a dependent string.
426 if (aDest.str().IsEmpty()) {
427 // We can just make |aDest| point to the buffer.
428 // This will take care of copying if the buffer spans fragments.
429 aDest.Rebind(aSrcStart, aSrcEnd);
430 return true;
431 }
432 // The dest string is not empty, so it can't be a dependent substring.
433 return AppendUnicodeTo(aSrcStart, aSrcEnd, aDest.writable());
434 }
435
AppendUnicodeTo(const nsScannerIterator & aSrcStart,const nsScannerIterator & aSrcEnd,nsAString & aDest)436 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
437 const nsScannerIterator& aSrcEnd, nsAString& aDest) {
438 const nsAString::size_type oldLength = aDest.Length();
439 mozilla::CheckedInt<nsAString::size_type> newLen(
440 Distance(aSrcStart, aSrcEnd));
441 newLen += oldLength;
442 if (!newLen.isValid()) {
443 return false; // overflow detected
444 }
445
446 if (!aDest.SetLength(newLen.value(), mozilla::fallible))
447 return false; // out of memory
448 auto writer = aDest.BeginWriting();
449 std::advance(writer, oldLength);
450 nsScannerIterator fromBegin(aSrcStart);
451
452 copy_multifragment_string(fromBegin, aSrcEnd, writer);
453 return true;
454 }
455
FindCharInReadable(char16_t aChar,nsScannerIterator & aSearchStart,const nsScannerIterator & aSearchEnd)456 bool FindCharInReadable(char16_t aChar, nsScannerIterator& aSearchStart,
457 const nsScannerIterator& aSearchEnd) {
458 while (aSearchStart != aSearchEnd) {
459 int32_t fragmentLength;
460 if (SameFragment(aSearchStart, aSearchEnd))
461 fragmentLength = aSearchEnd.get() - aSearchStart.get();
462 else
463 fragmentLength = aSearchStart.size_forward();
464
465 const char16_t* charFoundAt =
466 nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
467 if (charFoundAt) {
468 aSearchStart.advance(charFoundAt - aSearchStart.get());
469 return true;
470 }
471
472 aSearchStart.advance(fragmentLength);
473 }
474
475 return false;
476 }
477
FindInReadable(const nsAString & aPattern,nsScannerIterator & aSearchStart,nsScannerIterator & aSearchEnd,const nsStringComparator compare)478 bool FindInReadable(const nsAString& aPattern, nsScannerIterator& aSearchStart,
479 nsScannerIterator& aSearchEnd,
480 const nsStringComparator compare) {
481 bool found_it = false;
482
483 // only bother searching at all if we're given a non-empty range to search
484 if (aSearchStart != aSearchEnd) {
485 nsAString::const_iterator aPatternStart, aPatternEnd;
486 aPattern.BeginReading(aPatternStart);
487 aPattern.EndReading(aPatternEnd);
488
489 // outer loop keeps searching till we find it or run out of string to search
490 while (!found_it) {
491 // fast inner loop (that's what it's called, not what it is) looks for a
492 // potential match
493 while (aSearchStart != aSearchEnd &&
494 compare(aPatternStart.get(), aSearchStart.get(), 1, 1))
495 ++aSearchStart;
496
497 // if we broke out of the `fast' loop because we're out of string ...
498 // we're done: no match
499 if (aSearchStart == aSearchEnd) break;
500
501 // otherwise, we're at a potential match, let's see if we really hit one
502 nsAString::const_iterator testPattern(aPatternStart);
503 nsScannerIterator testSearch(aSearchStart);
504
505 // slow inner loop verifies the potential match (found by the `fast' loop)
506 // at the current position
507 for (;;) {
508 // we already compared the first character in the outer loop,
509 // so we'll advance before the next comparison
510 ++testPattern;
511 ++testSearch;
512
513 // if we verified all the way to the end of the pattern, then we found
514 // it!
515 if (testPattern == aPatternEnd) {
516 found_it = true;
517 aSearchEnd = testSearch; // return the exact found range through the
518 // parameters
519 break;
520 }
521
522 // if we got to end of the string we're searching before we hit the end
523 // of the
524 // pattern, we'll never find what we're looking for
525 if (testSearch == aSearchEnd) {
526 aSearchStart = aSearchEnd;
527 break;
528 }
529
530 // else if we mismatched ... it's time to advance to the next search
531 // position
532 // and get back into the `fast' loop
533 if (compare(testPattern.get(), testSearch.get(), 1, 1)) {
534 ++aSearchStart;
535 break;
536 }
537 }
538 }
539 }
540
541 return found_it;
542 }
543
544 /**
545 * This implementation is simple, but does too much work.
546 * It searches the entire string from left to right, and returns the last match
547 * found, if any. This implementation will be replaced when I get
548 * |reverse_iterator|s working.
549 */
RFindInReadable(const nsAString & aPattern,nsScannerIterator & aSearchStart,nsScannerIterator & aSearchEnd,const nsStringComparator aComparator)550 bool RFindInReadable(const nsAString& aPattern, nsScannerIterator& aSearchStart,
551 nsScannerIterator& aSearchEnd,
552 const nsStringComparator aComparator) {
553 bool found_it = false;
554
555 nsScannerIterator savedSearchEnd(aSearchEnd);
556 nsScannerIterator searchStart(aSearchStart), searchEnd(aSearchEnd);
557
558 while (searchStart != searchEnd) {
559 if (FindInReadable(aPattern, searchStart, searchEnd, aComparator)) {
560 found_it = true;
561
562 // this is the best match so far, so remember it
563 aSearchStart = searchStart;
564 aSearchEnd = searchEnd;
565
566 // ...and get ready to search some more
567 // (it's tempting to set |searchStart=searchEnd| ... but that misses
568 // overlapping patterns)
569 ++searchStart;
570 searchEnd = savedSearchEnd;
571 }
572 }
573
574 // if we never found it, return an empty range
575 if (!found_it) aSearchStart = aSearchEnd;
576
577 return found_it;
578 }
579