1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 /**
8  * StreamingLexer is a lexing framework designed to make it simple to write
9  * image decoders without worrying about the details of how the data is arriving
10  * from the network.
11  */
12 
13 #ifndef mozilla_image_StreamingLexer_h
14 #define mozilla_image_StreamingLexer_h
15 
16 #include <algorithm>
17 #include <cstdint>
18 #include <utility>
19 
20 #include "SourceBuffer.h"
21 #include "mozilla/Assertions.h"
22 #include "mozilla/Attributes.h"
23 #include "mozilla/Maybe.h"
24 #include "mozilla/Variant.h"
25 #include "mozilla/Vector.h"
26 
27 namespace mozilla {
28 namespace image {
29 
30 /// Buffering behaviors for StreamingLexer transitions.
31 enum class BufferingStrategy {
32   BUFFERED,   // Data will be buffered and processed in one chunk.
33   UNBUFFERED  // Data will be processed as it arrives, in multiple chunks.
34 };
35 
36 /// Control flow behaviors for StreamingLexer transitions.
37 enum class ControlFlowStrategy {
38   CONTINUE,  // If there's enough data, proceed to the next state immediately.
39   YIELD      // Yield to the caller before proceeding to the next state.
40 };
41 
42 /// Possible terminal states for the lexer.
43 enum class TerminalState { SUCCESS, FAILURE };
44 
45 /// Possible yield reasons for the lexer.
46 enum class Yield {
47   NEED_MORE_DATA,   // The lexer cannot continue without more data.
48   OUTPUT_AVAILABLE  // There is output available for the caller to consume.
49 };
50 
51 /// The result of a call to StreamingLexer::Lex().
52 typedef Variant<TerminalState, Yield> LexerResult;
53 
54 /**
55  * LexerTransition is a type used to give commands to the lexing framework.
56  * Code that uses StreamingLexer can create LexerTransition values using the
57  * static methods on Transition, and then return them to the lexing framework
58  * for execution.
59  */
60 template <typename State>
61 class LexerTransition {
62  public:
63   // This is implicit so that Terminate{Success,Failure}() can return a
64   // TerminalState and have it implicitly converted to a
65   // LexerTransition<State>, which avoids the need for a "<State>"
66   // qualification to the Terminate{Success,Failure}() callsite.
LexerTransition(TerminalState aFinalState)67   MOZ_IMPLICIT LexerTransition(TerminalState aFinalState)
68       : mNextState(aFinalState) {}
69 
NextStateIsTerminal()70   bool NextStateIsTerminal() const {
71     return mNextState.template is<TerminalState>();
72   }
73 
NextStateAsTerminal()74   TerminalState NextStateAsTerminal() const {
75     return mNextState.template as<TerminalState>();
76   }
77 
NextState()78   State NextState() const {
79     return mNextState.template as<NonTerminalState>().mState;
80   }
81 
UnbufferedState()82   State UnbufferedState() const {
83     return *mNextState.template as<NonTerminalState>().mUnbufferedState;
84   }
85 
Size()86   size_t Size() const {
87     return mNextState.template as<NonTerminalState>().mSize;
88   }
89 
Buffering()90   BufferingStrategy Buffering() const {
91     return mNextState.template as<NonTerminalState>().mBufferingStrategy;
92   }
93 
ControlFlow()94   ControlFlowStrategy ControlFlow() const {
95     return mNextState.template as<NonTerminalState>().mControlFlowStrategy;
96   }
97 
98  private:
99   friend struct Transition;
100 
LexerTransition(State aNextState,const Maybe<State> & aUnbufferedState,size_t aSize,BufferingStrategy aBufferingStrategy,ControlFlowStrategy aControlFlowStrategy)101   LexerTransition(State aNextState, const Maybe<State>& aUnbufferedState,
102                   size_t aSize, BufferingStrategy aBufferingStrategy,
103                   ControlFlowStrategy aControlFlowStrategy)
104       : mNextState(NonTerminalState(aNextState, aUnbufferedState, aSize,
105                                     aBufferingStrategy, aControlFlowStrategy)) {
106   }
107 
108   struct NonTerminalState {
109     State mState;
110     Maybe<State> mUnbufferedState;
111     size_t mSize;
112     BufferingStrategy mBufferingStrategy;
113     ControlFlowStrategy mControlFlowStrategy;
114 
NonTerminalStateNonTerminalState115     NonTerminalState(State aState, const Maybe<State>& aUnbufferedState,
116                      size_t aSize, BufferingStrategy aBufferingStrategy,
117                      ControlFlowStrategy aControlFlowStrategy)
118         : mState(aState),
119           mUnbufferedState(aUnbufferedState),
120           mSize(aSize),
121           mBufferingStrategy(aBufferingStrategy),
122           mControlFlowStrategy(aControlFlowStrategy) {
123       MOZ_ASSERT_IF(mBufferingStrategy == BufferingStrategy::UNBUFFERED,
124                     mUnbufferedState);
125       MOZ_ASSERT_IF(mUnbufferedState,
126                     mBufferingStrategy == BufferingStrategy::UNBUFFERED);
127     }
128   };
129 
130   Variant<NonTerminalState, TerminalState> mNextState;
131 };
132 
133 struct Transition {
134   /// Transition to @aNextState, buffering @aSize bytes of data.
135   template <typename State>
ToTransition136   static LexerTransition<State> To(const State& aNextState, size_t aSize) {
137     return LexerTransition<State>(aNextState, Nothing(), aSize,
138                                   BufferingStrategy::BUFFERED,
139                                   ControlFlowStrategy::CONTINUE);
140   }
141 
142   /// Yield to the caller, transitioning to @aNextState when Lex() is next
143   /// invoked. The same data that was delivered for the current state will be
144   /// delivered again.
145   template <typename State>
ToAfterYieldTransition146   static LexerTransition<State> ToAfterYield(const State& aNextState) {
147     return LexerTransition<State>(aNextState, Nothing(), 0,
148                                   BufferingStrategy::BUFFERED,
149                                   ControlFlowStrategy::YIELD);
150   }
151 
152   /**
153    * Transition to @aNextState via @aUnbufferedState, reading @aSize bytes of
154    * data unbuffered.
155    *
156    * The unbuffered data will be delivered in state @aUnbufferedState, which may
157    * be invoked repeatedly until all @aSize bytes have been delivered. Then,
158    * @aNextState will be invoked with no data. No state transitions are allowed
159    * from @aUnbufferedState except for transitions to a terminal state, so
160    * @aNextState will always be reached unless lexing terminates early.
161    */
162   template <typename State>
ToUnbufferedTransition163   static LexerTransition<State> ToUnbuffered(const State& aNextState,
164                                              const State& aUnbufferedState,
165                                              size_t aSize) {
166     return LexerTransition<State>(aNextState, Some(aUnbufferedState), aSize,
167                                   BufferingStrategy::UNBUFFERED,
168                                   ControlFlowStrategy::CONTINUE);
169   }
170 
171   /**
172    * Continue receiving unbuffered data. @aUnbufferedState should be the same
173    * state as the @aUnbufferedState specified in the preceding call to
174    * ToUnbuffered().
175    *
176    * This should be used during an unbuffered read initiated by ToUnbuffered().
177    */
178   template <typename State>
ContinueUnbufferedTransition179   static LexerTransition<State> ContinueUnbuffered(
180       const State& aUnbufferedState) {
181     return LexerTransition<State>(aUnbufferedState, Nothing(), 0,
182                                   BufferingStrategy::BUFFERED,
183                                   ControlFlowStrategy::CONTINUE);
184   }
185 
186   /**
187    * Continue receiving unbuffered data. @aUnbufferedState should be the same
188    * state as the @aUnbufferedState specified in the preceding call to
189    * ToUnbuffered(). @aSize indicates the amount of data that has already been
190    * consumed; the next state will receive the same data that was delivered to
191    * the current state, without the first @aSize bytes.
192    *
193    * This should be used during an unbuffered read initiated by ToUnbuffered().
194    */
195   template <typename State>
ContinueUnbufferedAfterYieldTransition196   static LexerTransition<State> ContinueUnbufferedAfterYield(
197       const State& aUnbufferedState, size_t aSize) {
198     return LexerTransition<State>(aUnbufferedState, Nothing(), aSize,
199                                   BufferingStrategy::BUFFERED,
200                                   ControlFlowStrategy::YIELD);
201   }
202 
203   /**
204    * Terminate lexing, ending up in terminal state SUCCESS. (The implicit
205    * LexerTransition constructor will convert the result to a LexerTransition
206    * as needed.)
207    *
208    * No more data will be delivered after this function is used.
209    */
TerminateSuccessTransition210   static TerminalState TerminateSuccess() { return TerminalState::SUCCESS; }
211 
212   /**
213    * Terminate lexing, ending up in terminal state FAILURE. (The implicit
214    * LexerTransition constructor will convert the result to a LexerTransition
215    * as needed.)
216    *
217    * No more data will be delivered after this function is used.
218    */
TerminateFailureTransition219   static TerminalState TerminateFailure() { return TerminalState::FAILURE; }
220 
221  private:
222   Transition();
223 };
224 
225 /**
226  * StreamingLexer is a lexing framework designed to make it simple to write
227  * image decoders without worrying about the details of how the data is arriving
228  * from the network.
229  *
230  * To use StreamingLexer:
231  *
232  *  - Create a State type. This should be an |enum class| listing all of the
233  *    states that you can be in while lexing the image format you're trying to
234  *    read.
235  *
236  *  - Add an instance of StreamingLexer<State> to your decoder class. Initialize
237  *    it with a Transition::To() the state that you want to start lexing in, and
238  *    a Transition::To() the state you'd like to use to handle truncated data.
239  *
240  *  - In your decoder's DoDecode() method, call Lex(), passing in the input
241  *    data and length that are passed to DoDecode(). You also need to pass
242  *    a lambda which dispatches to lexing code for each state based on the State
243  *    value that's passed in. The lambda generally should just continue a
244  *    |switch| statement that calls different methods for each State value. Each
245  *    method should return a LexerTransition<State>, which the lambda should
246  *    return in turn.
247  *
248  *  - Write the methods that actually implement lexing for your image format.
249  *    These methods should return either Transition::To(), to move on to another
250  *    state, or Transition::Terminate{Success,Failure}(), if lexing has
251  *    terminated in either success or failure. (There are also additional
252  *    transitions for unbuffered reads; see below.)
253  *
254  * That's the basics. The StreamingLexer will track your position in the input
255  * and buffer enough data so that your lexing methods can process everything in
256  * one pass. Lex() returns Yield::NEED_MORE_DATA if more data is needed, in
257  * which case you should just return from DoDecode(). If lexing reaches a
258  * terminal state, Lex() returns TerminalState::SUCCESS or
259  * TerminalState::FAILURE, and you can check which one to determine if lexing
260  * succeeded or failed and do any necessary cleanup.
261  *
262  * Sometimes, the input data is truncated. StreamingLexer will notify you when
263  * this happens by invoking the truncated data state you passed to the
264  * constructor. At this point you can attempt to recover and return
265  * TerminalState::SUCCESS or TerminalState::FAILURE, depending on whether you
266  * were successful. Note that you can't return anything other than a terminal
267  * state in this situation, since there's no more data to read. For the same
268  * reason, your truncated data state shouldn't require any data. (That is, the
269  * @aSize argument you pass to Transition::To() must be zero.) Violating these
270  * requirements will trigger assertions and an immediate transition to
271  * TerminalState::FAILURE.
272  *
273  * Some lexers may want to *avoid* buffering in some cases, and just process the
274  * data as it comes in. This is useful if, for example, you just want to skip
275  * over a large section of data; there's no point in buffering data you're just
276  * going to ignore.
277  *
278  * You can begin an unbuffered read with Transition::ToUnbuffered(). This works
279  * a little differently than Transition::To() in that you specify *two* states.
280  * The @aUnbufferedState argument specifies a state that will be called
281  * repeatedly with unbuffered data, as soon as it arrives. The implementation
282  * for that state should return either a transition to a terminal state, or a
283  * Transition::ContinueUnbuffered() to the same @aUnbufferedState. (From a
284  * technical perspective, it's not necessary to specify the state again, but
285  * it's helpful to human readers.) Once the amount of data requested in the
286  * original call to Transition::ToUnbuffered() has been delivered, Lex() will
287  * transition to the @aNextState state specified via Transition::ToUnbuffered().
288  * That state will be invoked with *no* data; it's just called to signal that
289  * the unbuffered read is over.
290  *
291  * It's sometimes useful for a lexer to provide incremental results, rather
292  * than simply running to completion and presenting all its output at once. For
293  * example, when decoding animated images, it may be useful to produce each
294  * frame incrementally. StreamingLexer supports this by allowing a lexer to
295  * yield.
296  *
297  * To yield back to the caller, a state implementation can simply return
298  * Transition::ToAfterYield(). ToAfterYield()'s @aNextState argument specifies
299  * the next state that the lexer should transition to, just like when using
300  * Transition::To(), but there are two differences. One is that Lex() will
301  * return to the caller before processing any more data when it encounters a
302  * yield transition. This provides an opportunity for the caller to interact
303  * with the lexer's intermediate results. The second difference is that
304  * @aNextState will be called with *the same data as the state that you returned
305  * Transition::ToAfterYield() from*. This allows a lexer to partially consume
306  * the data, return intermediate results, and then finish consuming the data
307  * when @aNextState is called.
308  *
309  * It's also possible to yield during an unbuffered read. Just return a
310  * Transition::ContinueUnbufferedAfterYield(). Just like with
311  * Transition::ContinueUnbuffered(), the @aUnbufferedState must be the same as
312  * the one originally passed to Transition::ToUnbuffered(). The second argument,
313  * @aSize, specifies the amount of data that the lexer has already consumed.
314  * When @aUnbufferedState is next invoked, it will get the same data that it
315  * received previously, except that the first @aSize bytes will be excluded.
316  * This makes it easy to consume unbuffered data incrementally.
317  *
318  * XXX(seth): We should be able to get of the |State| stuff totally once bug
319  * 1198451 lands, since we can then just return a function representing the next
320  * state directly.
321  */
322 template <typename State, size_t InlineBufferSize = 16>
323 class StreamingLexer {
324  public:
StreamingLexer(const LexerTransition<State> & aStartState,const LexerTransition<State> & aTruncatedState)325   StreamingLexer(const LexerTransition<State>& aStartState,
326                  const LexerTransition<State>& aTruncatedState)
327       : mTransition(TerminalState::FAILURE),
328         mTruncatedTransition(aTruncatedState) {
329     if (!aStartState.NextStateIsTerminal() &&
330         aStartState.ControlFlow() == ControlFlowStrategy::YIELD) {
331       // Allowing a StreamingLexer to start in a yield state doesn't make sense
332       // semantically (since yield states are supposed to deliver the same data
333       // as previous states, and there's no previous state here), but more
334       // importantly, it's necessary to advance a SourceBufferIterator at least
335       // once before you can read from it, and adding the necessary checks to
336       // Lex() to avoid that issue has the potential to mask real bugs. So
337       // instead, it's better to forbid starting in a yield state.
338       MOZ_ASSERT_UNREACHABLE("Starting in a yield state");
339       return;
340     }
341 
342     if (!aTruncatedState.NextStateIsTerminal() &&
343         (aTruncatedState.ControlFlow() == ControlFlowStrategy::YIELD ||
344          aTruncatedState.Buffering() == BufferingStrategy::UNBUFFERED ||
345          aTruncatedState.Size() != 0)) {
346       // The truncated state can't receive any data because, by definition,
347       // there is no more data to receive. That means that yielding or an
348       // unbuffered read would not make sense, and that the state must require
349       // zero bytes.
350       MOZ_ASSERT_UNREACHABLE("Truncated state makes no sense");
351       return;
352     }
353 
354     SetTransition(aStartState);
355   }
356 
357   /**
358    * From the given SourceBufferIterator, aIterator, create a new iterator at
359    * the same position, with the given read limit, aReadLimit. The read limit
360    * applies after adjusting for the position. If the given iterator has been
361    * advanced, but required buffering inside StreamingLexer, the position
362    * of the cloned iterator will be at the beginning of buffered data; this
363    * should match the perspective of the caller.
364    */
Clone(SourceBufferIterator & aIterator,size_t aReadLimit)365   Maybe<SourceBufferIterator> Clone(SourceBufferIterator& aIterator,
366                                     size_t aReadLimit) const {
367     // In order to advance to the current position of the iterator from the
368     // perspective of the caller, we need to take into account if we are
369     // buffering data.
370     size_t pos = aIterator.Position();
371     if (!mBuffer.empty()) {
372       pos += aIterator.Length();
373       MOZ_ASSERT(pos > mBuffer.length());
374       pos -= mBuffer.length();
375     }
376 
377     size_t readLimit = aReadLimit;
378     if (aReadLimit != SIZE_MAX) {
379       readLimit += pos;
380     }
381 
382     SourceBufferIterator other = aIterator.Owner()->Iterator(readLimit);
383 
384     // Since the current iterator has already advanced to this point, we
385     // know that the state can only be READY or COMPLETE. That does not mean
386     // everything is stored in a single chunk, and may require multiple Advance
387     // calls to get where we want to be.
388     SourceBufferIterator::State state;
389     do {
390       state = other.Advance(pos);
391       if (state != SourceBufferIterator::READY) {
392         // The only way we should fail to advance over data we already seen is
393         // if we hit an error while inserting data into the buffer. This will
394         // cause an early exit.
395         MOZ_ASSERT(NS_FAILED(other.CompletionStatus()));
396         return Nothing();
397       }
398       MOZ_ASSERT(pos >= other.Length());
399       pos -= other.Length();
400     } while (pos > 0);
401 
402     // Force the data pointer to be where we expect it to be.
403     state = other.Advance(0);
404     if (state != SourceBufferIterator::READY) {
405       // The current position could be the end of the buffer, in which case
406       // there is no point cloning with no more data to read.
407       MOZ_ASSERT(state == SourceBufferIterator::COMPLETE);
408       return Nothing();
409     }
410     return Some(std::move(other));
411   }
412 
413   template <typename Func>
Lex(SourceBufferIterator & aIterator,IResumable * aOnResume,Func aFunc)414   LexerResult Lex(SourceBufferIterator& aIterator, IResumable* aOnResume,
415                   Func aFunc) {
416     if (mTransition.NextStateIsTerminal()) {
417       // We've already reached a terminal state. We never deliver any more data
418       // in this case; just return the terminal state again immediately.
419       return LexerResult(mTransition.NextStateAsTerminal());
420     }
421 
422     Maybe<LexerResult> result;
423 
424     // If the lexer requested a yield last time, we deliver the same data again
425     // before we read anything else from |aIterator|. Note that although to the
426     // callers of Lex(), Yield::NEED_MORE_DATA is just another type of yield,
427     // internally they're different in that we don't redeliver the same data in
428     // the Yield::NEED_MORE_DATA case, and |mYieldingToState| is not set. This
429     // means that for Yield::NEED_MORE_DATA, we go directly to the loop below.
430     if (mYieldingToState) {
431       result = mTransition.Buffering() == BufferingStrategy::UNBUFFERED
432                    ? UnbufferedReadAfterYield(aIterator, aFunc)
433                    : BufferedReadAfterYield(aIterator, aFunc);
434     }
435 
436     while (!result) {
437       MOZ_ASSERT_IF(mTransition.Buffering() == BufferingStrategy::UNBUFFERED,
438                     mUnbufferedState);
439 
440       // Figure out how much we need to read.
441       const size_t toRead =
442           mTransition.Buffering() == BufferingStrategy::UNBUFFERED
443               ? mUnbufferedState->mBytesRemaining
444               : mTransition.Size() - mBuffer.length();
445 
446       // Attempt to advance the iterator by |toRead| bytes.
447       switch (aIterator.AdvanceOrScheduleResume(toRead, aOnResume)) {
448         case SourceBufferIterator::WAITING:
449           // We can't continue because the rest of the data hasn't arrived from
450           // the network yet. We don't have to do anything special; the
451           // SourceBufferIterator will ensure that |aOnResume| gets called when
452           // more data is available.
453           result = Some(LexerResult(Yield::NEED_MORE_DATA));
454           break;
455 
456         case SourceBufferIterator::COMPLETE:
457           // The data is truncated; if not, the lexer would've reached a
458           // terminal state by now. We only get to
459           // SourceBufferIterator::COMPLETE after every byte of data has been
460           // delivered to the lexer.
461           result = Truncated(aIterator, aFunc);
462           break;
463 
464         case SourceBufferIterator::READY:
465           // Process the new data that became available.
466           MOZ_ASSERT(aIterator.Data());
467 
468           result = mTransition.Buffering() == BufferingStrategy::UNBUFFERED
469                        ? UnbufferedRead(aIterator, aFunc)
470                        : BufferedRead(aIterator, aFunc);
471           break;
472 
473         default:
474           MOZ_ASSERT_UNREACHABLE("Unknown SourceBufferIterator state");
475           result = SetTransition(Transition::TerminateFailure());
476       }
477     };
478 
479     return *result;
480   }
481 
482  private:
483   template <typename Func>
UnbufferedRead(SourceBufferIterator & aIterator,Func aFunc)484   Maybe<LexerResult> UnbufferedRead(SourceBufferIterator& aIterator,
485                                     Func aFunc) {
486     MOZ_ASSERT(mTransition.Buffering() == BufferingStrategy::UNBUFFERED);
487     MOZ_ASSERT(mUnbufferedState);
488     MOZ_ASSERT(!mYieldingToState);
489     MOZ_ASSERT(mBuffer.empty(),
490                "Buffered read at the same time as unbuffered read?");
491     MOZ_ASSERT(aIterator.Length() <= mUnbufferedState->mBytesRemaining,
492                "Read too much data during unbuffered read?");
493     MOZ_ASSERT(mUnbufferedState->mBytesConsumedInCurrentChunk == 0,
494                "Already consumed data in the current chunk, but not yielding?");
495 
496     if (mUnbufferedState->mBytesRemaining == 0) {
497       // We're done with the unbuffered read, so transition to the next state.
498       return SetTransition(aFunc(mTransition.NextState(), nullptr, 0));
499     }
500 
501     return ContinueUnbufferedRead(aIterator.Data(), aIterator.Length(),
502                                   aIterator.Length(), aFunc);
503   }
504 
505   template <typename Func>
UnbufferedReadAfterYield(SourceBufferIterator & aIterator,Func aFunc)506   Maybe<LexerResult> UnbufferedReadAfterYield(SourceBufferIterator& aIterator,
507                                               Func aFunc) {
508     MOZ_ASSERT(mTransition.Buffering() == BufferingStrategy::UNBUFFERED);
509     MOZ_ASSERT(mUnbufferedState);
510     MOZ_ASSERT(mYieldingToState);
511     MOZ_ASSERT(mBuffer.empty(),
512                "Buffered read at the same time as unbuffered read?");
513     MOZ_ASSERT(aIterator.Length() <= mUnbufferedState->mBytesRemaining,
514                "Read too much data during unbuffered read?");
515     MOZ_ASSERT(
516         mUnbufferedState->mBytesConsumedInCurrentChunk <= aIterator.Length(),
517         "Consumed more data than the current chunk holds?");
518     MOZ_ASSERT(mTransition.UnbufferedState() == *mYieldingToState);
519 
520     mYieldingToState = Nothing();
521 
522     if (mUnbufferedState->mBytesRemaining == 0) {
523       // We're done with the unbuffered read, so transition to the next state.
524       return SetTransition(aFunc(mTransition.NextState(), nullptr, 0));
525     }
526 
527     // Since we've yielded, we may have already consumed some data in this
528     // chunk. Make the necessary adjustments. (Note that the std::min call is
529     // just belt-and-suspenders to keep this code memory safe even if there's
530     // a bug somewhere.)
531     const size_t toSkip = std::min(
532         mUnbufferedState->mBytesConsumedInCurrentChunk, aIterator.Length());
533     const char* data = aIterator.Data() + toSkip;
534     const size_t length = aIterator.Length() - toSkip;
535 
536     // If |length| is zero, we've hit the end of the current chunk. This only
537     // happens if we yield right at the end of a chunk. Rather than call |aFunc|
538     // with a |length| of zero bytes (which seems potentially surprising to
539     // decoder authors), we go ahead and read more data.
540     if (length == 0) {
541       return FinishCurrentChunkOfUnbufferedRead(aIterator.Length());
542     }
543 
544     return ContinueUnbufferedRead(data, length, aIterator.Length(), aFunc);
545   }
546 
547   template <typename Func>
ContinueUnbufferedRead(const char * aData,size_t aLength,size_t aChunkLength,Func aFunc)548   Maybe<LexerResult> ContinueUnbufferedRead(const char* aData, size_t aLength,
549                                             size_t aChunkLength, Func aFunc) {
550     // Call aFunc with the unbuffered state to indicate that we're in the
551     // middle of an unbuffered read. We enforce that any state transition
552     // passed back to us is either a terminal state or takes us back to the
553     // unbuffered state.
554     LexerTransition<State> unbufferedTransition =
555         aFunc(mTransition.UnbufferedState(), aData, aLength);
556 
557     // If we reached a terminal state, we're done.
558     if (unbufferedTransition.NextStateIsTerminal()) {
559       return SetTransition(unbufferedTransition);
560     }
561 
562     MOZ_ASSERT(mTransition.UnbufferedState() ==
563                unbufferedTransition.NextState());
564 
565     // Perform bookkeeping.
566     if (unbufferedTransition.ControlFlow() == ControlFlowStrategy::YIELD) {
567       mUnbufferedState->mBytesConsumedInCurrentChunk +=
568           unbufferedTransition.Size();
569       return SetTransition(unbufferedTransition);
570     }
571 
572     MOZ_ASSERT(unbufferedTransition.Size() == 0);
573     return FinishCurrentChunkOfUnbufferedRead(aChunkLength);
574   }
575 
FinishCurrentChunkOfUnbufferedRead(size_t aChunkLength)576   Maybe<LexerResult> FinishCurrentChunkOfUnbufferedRead(size_t aChunkLength) {
577     // We've finished an unbuffered read of a chunk of length |aChunkLength|, so
578     // update |myBytesRemaining| to reflect that we're |aChunkLength| closer to
579     // the end of the unbuffered read. (The std::min call is just
580     // belt-and-suspenders to keep this code memory safe even if there's a bug
581     // somewhere.)
582     mUnbufferedState->mBytesRemaining -=
583         std::min(mUnbufferedState->mBytesRemaining, aChunkLength);
584 
585     // Since we're moving on to a new chunk, we can forget about the count of
586     // bytes consumed by yielding in the current chunk.
587     mUnbufferedState->mBytesConsumedInCurrentChunk = 0;
588 
589     return Nothing();  // Keep processing.
590   }
591 
592   template <typename Func>
BufferedRead(SourceBufferIterator & aIterator,Func aFunc)593   Maybe<LexerResult> BufferedRead(SourceBufferIterator& aIterator, Func aFunc) {
594     MOZ_ASSERT(mTransition.Buffering() == BufferingStrategy::BUFFERED);
595     MOZ_ASSERT(!mYieldingToState);
596     MOZ_ASSERT(!mUnbufferedState,
597                "Buffered read at the same time as unbuffered read?");
598     MOZ_ASSERT(mBuffer.length() < mTransition.Size() ||
599                    (mBuffer.length() == 0 && mTransition.Size() == 0),
600                "Buffered more than we needed?");
601 
602     // If we have all the data, we don't actually need to buffer anything.
603     if (mBuffer.empty() && aIterator.Length() == mTransition.Size()) {
604       return SetTransition(
605           aFunc(mTransition.NextState(), aIterator.Data(), aIterator.Length()));
606     }
607 
608     // We do need to buffer, so make sure the buffer has enough capacity. We
609     // deliberately wait until we know for sure we need to buffer to call
610     // reserve() since it could require memory allocation.
611     if (!mBuffer.reserve(mTransition.Size())) {
612       return SetTransition(Transition::TerminateFailure());
613     }
614 
615     // Append the new data we just got to the buffer.
616     if (!mBuffer.append(aIterator.Data(), aIterator.Length())) {
617       return SetTransition(Transition::TerminateFailure());
618     }
619 
620     if (mBuffer.length() != mTransition.Size()) {
621       return Nothing();  // Keep processing.
622     }
623 
624     // We've buffered everything, so transition to the next state.
625     return SetTransition(
626         aFunc(mTransition.NextState(), mBuffer.begin(), mBuffer.length()));
627   }
628 
629   template <typename Func>
BufferedReadAfterYield(SourceBufferIterator & aIterator,Func aFunc)630   Maybe<LexerResult> BufferedReadAfterYield(SourceBufferIterator& aIterator,
631                                             Func aFunc) {
632     MOZ_ASSERT(mTransition.Buffering() == BufferingStrategy::BUFFERED);
633     MOZ_ASSERT(mYieldingToState);
634     MOZ_ASSERT(!mUnbufferedState,
635                "Buffered read at the same time as unbuffered read?");
636     MOZ_ASSERT(mBuffer.length() <= mTransition.Size(),
637                "Buffered more than we needed?");
638 
639     State nextState = std::move(*mYieldingToState);
640 
641     // After a yield, we need to take the same data that we delivered to the
642     // last state, and deliver it again to the new state. We know that this is
643     // happening right at a state transition, and that the last state was a
644     // buffered read, so there are two cases:
645 
646     // 1. We got the data from the SourceBufferIterator directly.
647     if (mBuffer.empty() && aIterator.Length() == mTransition.Size()) {
648       return SetTransition(
649           aFunc(nextState, aIterator.Data(), aIterator.Length()));
650     }
651 
652     // 2. We got the data from the buffer.
653     if (mBuffer.length() == mTransition.Size()) {
654       return SetTransition(aFunc(nextState, mBuffer.begin(), mBuffer.length()));
655     }
656 
657     // Anything else indicates a bug.
658     MOZ_ASSERT_UNREACHABLE("Unexpected state encountered during yield");
659     return SetTransition(Transition::TerminateFailure());
660   }
661 
662   template <typename Func>
Truncated(SourceBufferIterator & aIterator,Func aFunc)663   Maybe<LexerResult> Truncated(SourceBufferIterator& aIterator, Func aFunc) {
664     // The data is truncated. Let the lexer clean up and decide which terminal
665     // state we should end up in.
666     LexerTransition<State> transition =
667         mTruncatedTransition.NextStateIsTerminal()
668             ? mTruncatedTransition
669             : aFunc(mTruncatedTransition.NextState(), nullptr, 0);
670 
671     if (!transition.NextStateIsTerminal()) {
672       MOZ_ASSERT_UNREACHABLE("Truncated state didn't lead to terminal state?");
673       return SetTransition(Transition::TerminateFailure());
674     }
675 
676     // If the SourceBuffer was completed with a failing state, we end in
677     // TerminalState::FAILURE no matter what. This only happens in exceptional
678     // situations like SourceBuffer itself encountering a failure due to OOM.
679     if (NS_FAILED(aIterator.CompletionStatus())) {
680       return SetTransition(Transition::TerminateFailure());
681     }
682 
683     return SetTransition(transition);
684   }
685 
SetTransition(const LexerTransition<State> & aTransition)686   Maybe<LexerResult> SetTransition(const LexerTransition<State>& aTransition) {
687     // There should be no transitions while we're buffering for a buffered read
688     // unless they're to terminal states. (The terminal state transitions would
689     // generally be triggered by error handling code.)
690     MOZ_ASSERT_IF(!mBuffer.empty(), aTransition.NextStateIsTerminal() ||
691                                         mBuffer.length() == mTransition.Size());
692 
693     // Similarly, the only transitions allowed in the middle of an unbuffered
694     // read are to a terminal state, or a yield to the same state. Otherwise, we
695     // should remain in the same state until the unbuffered read completes.
696     MOZ_ASSERT_IF(
697         mUnbufferedState,
698         aTransition.NextStateIsTerminal() ||
699             (aTransition.ControlFlow() == ControlFlowStrategy::YIELD &&
700              aTransition.NextState() == mTransition.UnbufferedState()) ||
701             mUnbufferedState->mBytesRemaining == 0);
702 
703     // If this transition is a yield, save the next state and return. We'll
704     // handle the rest when Lex() gets called again.
705     if (!aTransition.NextStateIsTerminal() &&
706         aTransition.ControlFlow() == ControlFlowStrategy::YIELD) {
707       mYieldingToState = Some(aTransition.NextState());
708       return Some(LexerResult(Yield::OUTPUT_AVAILABLE));
709     }
710 
711     // Update our transition.
712     mTransition = aTransition;
713 
714     // Get rid of anything left over from the previous state.
715     mBuffer.clear();
716     mYieldingToState = Nothing();
717     mUnbufferedState = Nothing();
718 
719     // If we reached a terminal state, let the caller know.
720     if (mTransition.NextStateIsTerminal()) {
721       return Some(LexerResult(mTransition.NextStateAsTerminal()));
722     }
723 
724     // If we're entering an unbuffered state, record how long we'll stay in it.
725     if (mTransition.Buffering() == BufferingStrategy::UNBUFFERED) {
726       mUnbufferedState.emplace(mTransition.Size());
727     }
728 
729     return Nothing();  // Keep processing.
730   }
731 
732   // State that tracks our position within an unbuffered read.
733   struct UnbufferedState {
UnbufferedStateUnbufferedState734     explicit UnbufferedState(size_t aBytesRemaining)
735         : mBytesRemaining(aBytesRemaining), mBytesConsumedInCurrentChunk(0) {}
736 
737     size_t mBytesRemaining;
738     size_t mBytesConsumedInCurrentChunk;
739   };
740 
741   Vector<char, InlineBufferSize> mBuffer;
742   LexerTransition<State> mTransition;
743   const LexerTransition<State> mTruncatedTransition;
744   Maybe<State> mYieldingToState;
745   Maybe<UnbufferedState> mUnbufferedState;
746 };
747 
748 }  // namespace image
749 }  // namespace mozilla
750 
751 #endif  // mozilla_image_StreamingLexer_h
752