1 /**********
2 This library is free software; you can redistribute it and/or modify it under
3 the terms of the GNU Lesser General Public License as published by the
4 Free Software Foundation; either version 3 of the License, or (at your
5 option) any later version. (See <http://www.gnu.org/copyleft/lesser.html>.)
6 
7 This library is distributed in the hope that it will be useful, but WITHOUT
8 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
10 more details.
11 
12 You should have received a copy of the GNU Lesser General Public License
13 along with this library; if not, write to the Free Software Foundation, Inc.,
14 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
15 **********/
16 // "liveMedia"
17 // Copyright (c) 1996-2020 Live Networks, Inc.  All rights reserved.
18 // A parser for a Matroska file.
19 // C++ header
20 
21 #ifndef _MATROSKA_FILE_PARSER_HH
22 
23 #ifndef _STREAM_PARSER_HH
24 #include "StreamParser.hh"
25 #endif
26 #ifndef _MATROSKA_FILE_HH
27 #include "MatroskaFile.hh"
28 #endif
29 #ifndef _EBML_NUMBER_HH
30 #include "EBMLNumber.hh"
31 #endif
32 
33 // An enum representing the current state of the parser:
34 enum MatroskaParseState {
35   PARSING_START_OF_FILE,
36   LOOKING_FOR_TRACKS,
37   PARSING_TRACK,
38   PARSING_CUES,
39   LOOKING_FOR_CLUSTER,
40   LOOKING_FOR_BLOCK,
41   PARSING_BLOCK,
42   DELIVERING_FRAME_WITHIN_BLOCK,
43   DELIVERING_FRAME_BYTES
44 };
45 
46 class MatroskaFileParser: public StreamParser {
47 public:
48   MatroskaFileParser(MatroskaFile& ourFile, FramedSource* inputSource,
49 		     FramedSource::onCloseFunc* onEndFunc, void* onEndClientData,
50 		     MatroskaDemux* ourDemux = NULL);
51   virtual ~MatroskaFileParser();
52 
53   void seekToTime(double& seekNPT);
54   void pause();
55 
56   // StreamParser 'client continue' function:
57   static void continueParsing(void* clientData, unsigned char* ptr, unsigned size, struct timeval presentationTime);
58   void continueParsing();
59 
60 private:
61   // Parsing functions:
62   Boolean parse();
63     // returns True iff we have finished parsing to the end of all 'Track' headers (on initialization)
64 
65   Boolean parseStartOfFile();
66   void lookForNextTrack();
67   Boolean parseTrack();
68   Boolean parseCues();
69 
70   void lookForNextBlock();
71   void parseBlock();
72   Boolean deliverFrameWithinBlock();
73   void deliverFrameBytes();
74 
75   void getCommonFrameBytes(MatroskaTrack* track, u_int8_t* to, unsigned numBytesToGet, unsigned numBytesToSkip);
76 
77   Boolean parseEBMLNumber(EBMLNumber& num);
78   Boolean parseEBMLIdAndSize(EBMLId& id, EBMLDataSize& size);
79   Boolean parseEBMLVal_unsigned64(EBMLDataSize& size, u_int64_t& result);
80   Boolean parseEBMLVal_unsigned(EBMLDataSize& size, unsigned& result);
81   Boolean parseEBMLVal_float(EBMLDataSize& size, float& result);
82   Boolean parseEBMLVal_string(EBMLDataSize& size, char*& result);
83     // Note: "result" is dynamically allocated; the caller must delete[] it later
84   Boolean parseEBMLVal_binary(EBMLDataSize& size, u_int8_t*& result);
85     // Note: "result" is dynamically allocated; the caller must delete[] it later
86   void skipHeader(EBMLDataSize const& size);
87   void skipRemainingHeaderBytes(Boolean isContinuation);
88 
89   void setParseState();
90 
91   void seekToFilePosition(u_int64_t offsetInFile);
92   void seekToEndOfFile();
93   void resetStateAfterSeeking(); // common code, called by both of the above
94 
95   void resetPresentationTimes();
96       // called after a seek or pause to ensure that presentation times continue to be
97       // aligned with 'wall clock' time
98 
99 private: // redefined virtual functions
100   virtual void restoreSavedParserState();
101 
102 private:
103   // General state for parsing:
104   MatroskaFile& fOurFile;
105   FramedSource* fInputSource;
106   FramedSource::onCloseFunc* fOnEndFunc;
107   void* fOnEndClientData;
108   MatroskaDemux* fOurDemux;
109   MatroskaParseState fCurrentParseState;
110   u_int64_t fCurOffsetInFile, fSavedCurOffsetInFile, fLimitOffsetInFile;
111 
112   // For skipping over (possibly large) headers:
113   u_int64_t fNumHeaderBytesToSkip;
114 
115   // For parsing 'Seek ID's:
116   EBMLId fLastSeekId;
117 
118   // Parameters of the most recently-parsed 'Cluster':
119   unsigned fClusterTimecode;
120 
121   // Parameters of the most recently-parsed 'Block':
122   unsigned fBlockSize;
123   unsigned fBlockTrackNumber;
124   short fBlockTimecode;
125   unsigned fNumFramesInBlock;
126   unsigned* fFrameSizesWithinBlock;
127 
128   // Parameters of the most recently-parsed frame within a 'Block':
129   double fPresentationTimeOffset;
130   unsigned fNextFrameNumberToDeliver;
131   unsigned fCurOffsetWithinFrame, fSavedCurOffsetWithinFrame; // used if track->haveSubframes()
132 
133   // Parameters of the (sub)frame that's currently being delivered:
134   u_int8_t* fCurFrameTo;
135   unsigned fCurFrameNumBytesToGet;
136   unsigned fCurFrameNumBytesToSkip;
137 };
138 
139 #endif
140