1 /**********
2 This library is free software; you can redistribute it and/or modify it under
3 the terms of the GNU Lesser General Public License as published by the
4 Free Software Foundation; either version 3 of the License, or (at your
5 option) any later version. (See <http://www.gnu.org/copyleft/lesser.html>.)
6 
7 This library is distributed in the hope that it will be useful, but WITHOUT
8 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9 FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
10 more details.
11 
12 You should have received a copy of the GNU Lesser General Public License
13 along with this library; if not, write to the Free Software Foundation, Inc.,
14 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
15 **********/
16 // "liveMedia"
17 // Copyright (c) 1996-2020 Live Networks, Inc.  All rights reserved.
18 // A filter that breaks up a H.264 or H.265 Video Elementary Stream into NAL units.
19 // Implementation
20 
21 #include "H264or5VideoStreamFramer.hh"
22 #include "MPEGVideoStreamParser.hh"
23 #include "BitVector.hh"
24 #include <GroupsockHelper.hh> // for "gettimeofday()"
25 
26 ////////// H264or5VideoStreamParser definition //////////
27 
28 class H264or5VideoStreamParser: public MPEGVideoStreamParser {
29 public:
30   H264or5VideoStreamParser(int hNumber, H264or5VideoStreamFramer* usingSource,
31 			   FramedSource* inputSource, Boolean includeStartCodeInOutput);
32   virtual ~H264or5VideoStreamParser();
33 
34 private: // redefined virtual functions:
35   virtual void flushInput();
36   virtual unsigned parse();
37 
38 private:
usingSource()39   H264or5VideoStreamFramer* usingSource() {
40     return (H264or5VideoStreamFramer*)fUsingSource;
41   }
42 
isVPS(u_int8_t nal_unit_type)43   Boolean isVPS(u_int8_t nal_unit_type) { return usingSource()->isVPS(nal_unit_type); }
isSPS(u_int8_t nal_unit_type)44   Boolean isSPS(u_int8_t nal_unit_type) { return usingSource()->isSPS(nal_unit_type); }
isPPS(u_int8_t nal_unit_type)45   Boolean isPPS(u_int8_t nal_unit_type) { return usingSource()->isPPS(nal_unit_type); }
isVCL(u_int8_t nal_unit_type)46   Boolean isVCL(u_int8_t nal_unit_type) { return usingSource()->isVCL(nal_unit_type); }
47   Boolean isSEI(u_int8_t nal_unit_type);
48   Boolean isEOF(u_int8_t nal_unit_type);
49   Boolean usuallyBeginsAccessUnit(u_int8_t nal_unit_type);
50 
51   void removeEmulationBytes(u_int8_t* nalUnitCopy, unsigned maxSize, unsigned& nalUnitCopySize);
52 
53   void analyze_video_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale);
54   void analyze_seq_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale);
55   void profile_tier_level(BitVector& bv, unsigned max_sub_layers_minus1);
56   void analyze_vui_parameters(BitVector& bv, unsigned& num_units_in_tick, unsigned& time_scale);
57   void analyze_hrd_parameters(BitVector& bv);
58   void analyze_sei_data(u_int8_t nal_unit_type);
59   void analyze_sei_payload(unsigned payloadType, unsigned payloadSize, u_int8_t* payload);
60 
61 private:
62   int fHNumber; // 264 or 265
63   unsigned fOutputStartCodeSize;
64   Boolean fHaveSeenFirstStartCode, fHaveSeenFirstByteOfNALUnit;
65   u_int8_t fFirstByteOfNALUnit;
66   double fParsedFrameRate;
67   // variables set & used in the specification:
68   unsigned cpb_removal_delay_length_minus1, dpb_output_delay_length_minus1;
69   Boolean CpbDpbDelaysPresentFlag, pic_struct_present_flag;
70   double DeltaTfiDivisor;
71 };
72 
73 
74 ////////// H264or5VideoStreamFramer implementation //////////
75 
76 H264or5VideoStreamFramer
H264or5VideoStreamFramer(int hNumber,UsageEnvironment & env,FramedSource * inputSource,Boolean createParser,Boolean includeStartCodeInOutput,Boolean insertAccessUnitDelimiters)77 ::H264or5VideoStreamFramer(int hNumber, UsageEnvironment& env, FramedSource* inputSource,
78 			   Boolean createParser,
79 			   Boolean includeStartCodeInOutput, Boolean insertAccessUnitDelimiters)
80   : MPEGVideoStreamFramer(env, inputSource),
81     fHNumber(hNumber), fIncludeStartCodeInOutput(includeStartCodeInOutput),
82     fInsertAccessUnitDelimiters(insertAccessUnitDelimiters),
83     fLastSeenVPS(NULL), fLastSeenVPSSize(0),
84     fLastSeenSPS(NULL), fLastSeenSPSSize(0),
85     fLastSeenPPS(NULL), fLastSeenPPSSize(0) {
86   fParser = createParser
87     ? new H264or5VideoStreamParser(hNumber, this, inputSource, includeStartCodeInOutput)
88     : NULL;
89   fFrameRate = 25.0; // We assume a frame rate of 25 fps, unless we learn otherwise (from parsing a VPS or SPS NAL unit)
90 }
91 
~H264or5VideoStreamFramer()92 H264or5VideoStreamFramer::~H264or5VideoStreamFramer() {
93   delete[] fLastSeenPPS;
94   delete[] fLastSeenSPS;
95   delete[] fLastSeenVPS;
96 }
97 
98 #define VPS_MAX_SIZE 1000 // larger than the largest possible VPS (Video Parameter Set) NAL unit
99 
saveCopyOfVPS(u_int8_t * from,unsigned size)100 void H264or5VideoStreamFramer::saveCopyOfVPS(u_int8_t* from, unsigned size) {
101   if (from == NULL) return;
102   delete[] fLastSeenVPS;
103   fLastSeenVPS = new u_int8_t[size];
104   memmove(fLastSeenVPS, from, size);
105 
106   fLastSeenVPSSize = size;
107 }
108 
109 #define SPS_MAX_SIZE 1000 // larger than the largest possible SPS (Sequence Parameter Set) NAL unit
110 
saveCopyOfSPS(u_int8_t * from,unsigned size)111 void H264or5VideoStreamFramer::saveCopyOfSPS(u_int8_t* from, unsigned size) {
112   if (from == NULL) return;
113   delete[] fLastSeenSPS;
114   fLastSeenSPS = new u_int8_t[size];
115   memmove(fLastSeenSPS, from, size);
116 
117   fLastSeenSPSSize = size;
118 }
119 
saveCopyOfPPS(u_int8_t * from,unsigned size)120 void H264or5VideoStreamFramer::saveCopyOfPPS(u_int8_t* from, unsigned size) {
121   if (from == NULL) return;
122   delete[] fLastSeenPPS;
123   fLastSeenPPS = new u_int8_t[size];
124   memmove(fLastSeenPPS, from, size);
125 
126   fLastSeenPPSSize = size;
127 }
128 
setPresentationTime()129 void H264or5VideoStreamFramer::setPresentationTime() {
130   if (fPresentationTimeBase.tv_sec == 0 && fPresentationTimeBase.tv_usec == 0) {
131     // Set to the current time:
132     gettimeofday(&fPresentationTimeBase, NULL);
133     fNextPresentationTime = fPresentationTimeBase;
134   }
135   fPresentationTime = fNextPresentationTime;
136 }
137 
isVPS(u_int8_t nal_unit_type)138 Boolean H264or5VideoStreamFramer::isVPS(u_int8_t nal_unit_type) {
139   // VPS NAL units occur in H.265 only:
140   return fHNumber == 265 && nal_unit_type == 32;
141 }
142 
isSPS(u_int8_t nal_unit_type)143 Boolean H264or5VideoStreamFramer::isSPS(u_int8_t nal_unit_type) {
144   return fHNumber == 264 ? nal_unit_type == 7 : nal_unit_type == 33;
145 }
146 
isPPS(u_int8_t nal_unit_type)147 Boolean H264or5VideoStreamFramer::isPPS(u_int8_t nal_unit_type) {
148   return fHNumber == 264 ? nal_unit_type == 8 : nal_unit_type == 34;
149 }
150 
isVCL(u_int8_t nal_unit_type)151 Boolean H264or5VideoStreamFramer::isVCL(u_int8_t nal_unit_type) {
152   return fHNumber == 264
153     ? (nal_unit_type <= 5 && nal_unit_type > 0)
154     : (nal_unit_type <= 31);
155 }
156 
doGetNextFrame()157 void H264or5VideoStreamFramer::doGetNextFrame() {
158   if (fInsertAccessUnitDelimiters && pictureEndMarker()) {
159     // Deliver an "access_unit_delimiter" NAL unit instead:
160     unsigned const startCodeSize = fIncludeStartCodeInOutput ? 4: 0;
161     unsigned const audNALSize = fHNumber == 264 ? 2 : 3;
162 
163     fFrameSize = startCodeSize + audNALSize;
164     if (fFrameSize > fMaxSize) { // there's no space
165       fNumTruncatedBytes = fFrameSize - fMaxSize;
166       fFrameSize = fMaxSize;
167       handleClosure();
168       return;
169     }
170 
171     if (fIncludeStartCodeInOutput) {
172       *fTo++ = 0x00; *fTo++ = 0x00; *fTo++ = 0x00; *fTo++ = 0x01;
173     }
174     if (fHNumber == 264) {
175       *fTo++ = 9; // "Access unit delimiter" nal_unit_type
176       *fTo++ = 0xF0; // "primary_pic_type" (7); "rbsp_trailing_bits()"
177     } else { // H.265
178       *fTo++ = 35<<1; // "Access unit delimiter" nal_unit_type
179       *fTo++ = 0; // "nuh_layer_id" (0); "nuh_temporal_id_plus1" (0) (Is this correct??)
180       *fTo++ = 0x50; // "pic_type" (2); "rbsp_trailing_bits()" (Is this correct??)
181     }
182 
183     pictureEndMarker() = False; // for next time
184     afterGetting(this);
185   } else {
186     // Do the normal delivery of a NAL unit from the parser:
187     MPEGVideoStreamFramer::doGetNextFrame();
188   }
189 }
190 
191 
192 ////////// H264or5VideoStreamParser implementation //////////
193 
194 H264or5VideoStreamParser
H264or5VideoStreamParser(int hNumber,H264or5VideoStreamFramer * usingSource,FramedSource * inputSource,Boolean includeStartCodeInOutput)195 ::H264or5VideoStreamParser(int hNumber, H264or5VideoStreamFramer* usingSource,
196 			   FramedSource* inputSource, Boolean includeStartCodeInOutput)
197   : MPEGVideoStreamParser(usingSource, inputSource),
198     fHNumber(hNumber), fOutputStartCodeSize(includeStartCodeInOutput ? 4 : 0), fHaveSeenFirstStartCode(False), fHaveSeenFirstByteOfNALUnit(False), fParsedFrameRate(0.0),
199     cpb_removal_delay_length_minus1(23), dpb_output_delay_length_minus1(23),
200     CpbDpbDelaysPresentFlag(0), pic_struct_present_flag(0),
201     DeltaTfiDivisor(2.0) {
202 }
203 
~H264or5VideoStreamParser()204 H264or5VideoStreamParser::~H264or5VideoStreamParser() {
205 }
206 
207 #define PREFIX_SEI_NUT 39 // for H.265
208 #define SUFFIX_SEI_NUT 40 // for H.265
isSEI(u_int8_t nal_unit_type)209 Boolean H264or5VideoStreamParser::isSEI(u_int8_t nal_unit_type) {
210   return fHNumber == 264
211     ? nal_unit_type == 6
212     : (nal_unit_type == PREFIX_SEI_NUT || nal_unit_type == SUFFIX_SEI_NUT);
213 }
214 
isEOF(u_int8_t nal_unit_type)215 Boolean H264or5VideoStreamParser::isEOF(u_int8_t nal_unit_type) {
216   // "end of sequence" or "end of (bit)stream"
217   return fHNumber == 264
218     ? (nal_unit_type == 10 || nal_unit_type == 11)
219     : (nal_unit_type == 36 || nal_unit_type == 37);
220 }
221 
usuallyBeginsAccessUnit(u_int8_t nal_unit_type)222 Boolean H264or5VideoStreamParser::usuallyBeginsAccessUnit(u_int8_t nal_unit_type) {
223   return fHNumber == 264
224     ? (nal_unit_type >= 6 && nal_unit_type <= 9) || (nal_unit_type >= 14 && nal_unit_type <= 18)
225     : (nal_unit_type >= 32 && nal_unit_type <= 35) || (nal_unit_type == 39)
226     || (nal_unit_type >= 41 && nal_unit_type <= 44)
227     || (nal_unit_type >= 48 && nal_unit_type <= 55);
228 }
229 
230 void H264or5VideoStreamParser
removeEmulationBytes(u_int8_t * nalUnitCopy,unsigned maxSize,unsigned & nalUnitCopySize)231 ::removeEmulationBytes(u_int8_t* nalUnitCopy, unsigned maxSize, unsigned& nalUnitCopySize) {
232   u_int8_t const* nalUnitOrig = fStartOfFrame + fOutputStartCodeSize;
233   unsigned const numBytesInNALunit = fTo - nalUnitOrig;
234   nalUnitCopySize
235     = removeH264or5EmulationBytes(nalUnitCopy, maxSize, nalUnitOrig, numBytesInNALunit);
236 }
237 
238 #ifdef DEBUG
239 char const* nal_unit_type_description_h264[32] = {
240   "Unspecified", //0
241   "Coded slice of a non-IDR picture", //1
242   "Coded slice data partition A", //2
243   "Coded slice data partition B", //3
244   "Coded slice data partition C", //4
245   "Coded slice of an IDR picture", //5
246   "Supplemental enhancement information (SEI)", //6
247   "Sequence parameter set", //7
248   "Picture parameter set", //8
249   "Access unit delimiter", //9
250   "End of sequence", //10
251   "End of stream", //11
252   "Filler data", //12
253   "Sequence parameter set extension", //13
254   "Prefix NAL unit", //14
255   "Subset sequence parameter set", //15
256   "Reserved", //16
257   "Reserved", //17
258   "Reserved", //18
259   "Coded slice of an auxiliary coded picture without partitioning", //19
260   "Coded slice extension", //20
261   "Reserved", //21
262   "Reserved", //22
263   "Reserved", //23
264   "Unspecified", //24
265   "Unspecified", //25
266   "Unspecified", //26
267   "Unspecified", //27
268   "Unspecified", //28
269   "Unspecified", //29
270   "Unspecified", //30
271   "Unspecified" //31
272 };
273 char const* nal_unit_type_description_h265[64] = {
274   "Coded slice segment of a non-TSA, non-STSA trailing picture", //0
275   "Coded slice segment of a non-TSA, non-STSA trailing picture", //1
276   "Coded slice segment of a TSA picture", //2
277   "Coded slice segment of a TSA picture", //3
278   "Coded slice segment of a STSA picture", //4
279   "Coded slice segment of a STSA picture", //5
280   "Coded slice segment of a RADL picture", //6
281   "Coded slice segment of a RADL picture", //7
282   "Coded slice segment of a RASL picture", //8
283   "Coded slice segment of a RASL picture", //9
284   "Reserved", //10
285   "Reserved", //11
286   "Reserved", //12
287   "Reserved", //13
288   "Reserved", //14
289   "Reserved", //15
290   "Coded slice segment of a BLA picture", //16
291   "Coded slice segment of a BLA picture", //17
292   "Coded slice segment of a BLA picture", //18
293   "Coded slice segment of an IDR picture", //19
294   "Coded slice segment of an IDR picture", //20
295   "Coded slice segment of a CRA picture", //21
296   "Reserved", //22
297   "Reserved", //23
298   "Reserved", //24
299   "Reserved", //25
300   "Reserved", //26
301   "Reserved", //27
302   "Reserved", //28
303   "Reserved", //29
304   "Reserved", //30
305   "Reserved", //31
306   "Video parameter set", //32
307   "Sequence parameter set", //33
308   "Picture parameter set", //34
309   "Access unit delimiter", //35
310   "End of sequence", //36
311   "End of bitstream", //37
312   "Filler data", //38
313   "Supplemental enhancement information (SEI)", //39
314   "Supplemental enhancement information (SEI)", //40
315   "Reserved", //41
316   "Reserved", //42
317   "Reserved", //43
318   "Reserved", //44
319   "Reserved", //45
320   "Reserved", //46
321   "Reserved", //47
322   "Unspecified", //48
323   "Unspecified", //49
324   "Unspecified", //50
325   "Unspecified", //51
326   "Unspecified", //52
327   "Unspecified", //53
328   "Unspecified", //54
329   "Unspecified", //55
330   "Unspecified", //56
331   "Unspecified", //57
332   "Unspecified", //58
333   "Unspecified", //59
334   "Unspecified", //60
335   "Unspecified", //61
336   "Unspecified", //62
337   "Unspecified", //63
338 };
339 #endif
340 
341 #ifdef DEBUG
342 static unsigned numDebugTabs = 1;
343 #define DEBUG_PRINT_TABS for (unsigned _i = 0; _i < numDebugTabs; ++_i) fprintf(stderr, "\t")
344 #define DEBUG_PRINT(x) do { DEBUG_PRINT_TABS; fprintf(stderr, "%s: %d\n", #x, x); } while (0)
345 #define DEBUG_STR(x) do { DEBUG_PRINT_TABS; fprintf(stderr, "%s\n", x); } while (0)
346 class DebugTab {
347 public:
DebugTab()348   DebugTab() {++numDebugTabs;}
~DebugTab()349   ~DebugTab() {--numDebugTabs;}
350 };
351 #define DEBUG_TAB DebugTab dummy
352 #else
353 #define DEBUG_PRINT(x) do {x = x;} while (0)
354     // Note: the "x=x;" statement is intended to eliminate "unused variable" compiler warning messages
355 #define DEBUG_STR(x) do {} while (0)
356 #define DEBUG_TAB do {} while (0)
357 #endif
358 
profile_tier_level(BitVector & bv,unsigned max_sub_layers_minus1)359 void H264or5VideoStreamParser::profile_tier_level(BitVector& bv, unsigned max_sub_layers_minus1) {
360   bv.skipBits(96);
361 
362   unsigned i;
363   Boolean sub_layer_profile_present_flag[7], sub_layer_level_present_flag[7];
364   for (i = 0; i < max_sub_layers_minus1; ++i) {
365     sub_layer_profile_present_flag[i] = bv.get1BitBoolean();
366     sub_layer_level_present_flag[i] = bv.get1BitBoolean();
367   }
368   if (max_sub_layers_minus1 > 0) {
369     bv.skipBits(2*(8-max_sub_layers_minus1)); // reserved_zero_2bits
370   }
371   for (i = 0; i < max_sub_layers_minus1; ++i) {
372     if (sub_layer_profile_present_flag[i]) {
373       bv.skipBits(88);
374     }
375     if (sub_layer_level_present_flag[i]) {
376       bv.skipBits(8); // sub_layer_level_idc[i]
377     }
378   }
379 }
380 
381 void H264or5VideoStreamParser
analyze_vui_parameters(BitVector & bv,unsigned & num_units_in_tick,unsigned & time_scale)382 ::analyze_vui_parameters(BitVector& bv,
383 			 unsigned& num_units_in_tick, unsigned& time_scale) {
384   Boolean aspect_ratio_info_present_flag = bv.get1BitBoolean();
385   DEBUG_PRINT(aspect_ratio_info_present_flag);
386   if (aspect_ratio_info_present_flag) {
387     DEBUG_TAB;
388     unsigned aspect_ratio_idc = bv.getBits(8);
389     DEBUG_PRINT(aspect_ratio_idc);
390     if (aspect_ratio_idc == 255/*Extended_SAR*/) {
391       bv.skipBits(32); // sar_width; sar_height
392     }
393   }
394   Boolean overscan_info_present_flag = bv.get1BitBoolean();
395   DEBUG_PRINT(overscan_info_present_flag);
396   if (overscan_info_present_flag) {
397     bv.skipBits(1); // overscan_appropriate_flag
398   }
399   Boolean video_signal_type_present_flag = bv.get1BitBoolean();
400   DEBUG_PRINT(video_signal_type_present_flag);
401   if (video_signal_type_present_flag) {
402     DEBUG_TAB;
403     bv.skipBits(4); // video_format; video_full_range_flag
404     Boolean colour_description_present_flag = bv.get1BitBoolean();
405     DEBUG_PRINT(colour_description_present_flag);
406     if (colour_description_present_flag) {
407       bv.skipBits(24); // colour_primaries; transfer_characteristics; matrix_coefficients
408     }
409   }
410   Boolean chroma_loc_info_present_flag = bv.get1BitBoolean();
411   DEBUG_PRINT(chroma_loc_info_present_flag);
412   if (chroma_loc_info_present_flag) {
413     (void)bv.get_expGolomb(); // chroma_sample_loc_type_top_field
414     (void)bv.get_expGolomb(); // chroma_sample_loc_type_bottom_field
415   }
416   if (fHNumber == 265) {
417     bv.skipBits(2); // neutral_chroma_indication_flag, field_seq_flag
418     Boolean frame_field_info_present_flag = bv.get1BitBoolean();
419     DEBUG_PRINT(frame_field_info_present_flag);
420     pic_struct_present_flag = frame_field_info_present_flag; // hack to make H.265 like H.264
421     Boolean default_display_window_flag = bv.get1BitBoolean();
422     DEBUG_PRINT(default_display_window_flag);
423     if (default_display_window_flag) {
424       (void)bv.get_expGolomb(); // def_disp_win_left_offset
425       (void)bv.get_expGolomb(); // def_disp_win_right_offset
426       (void)bv.get_expGolomb(); // def_disp_win_top_offset
427       (void)bv.get_expGolomb(); // def_disp_win_bottom_offset
428     }
429   }
430   Boolean timing_info_present_flag = bv.get1BitBoolean();
431   DEBUG_PRINT(timing_info_present_flag);
432   if (timing_info_present_flag) {
433     DEBUG_TAB;
434     num_units_in_tick = bv.getBits(32);
435     DEBUG_PRINT(num_units_in_tick);
436     time_scale = bv.getBits(32);
437     DEBUG_PRINT(time_scale);
438     if (fHNumber == 264) {
439       Boolean fixed_frame_rate_flag = bv.get1BitBoolean();
440       DEBUG_PRINT(fixed_frame_rate_flag);
441     } else { // 265
442       Boolean vui_poc_proportional_to_timing_flag = bv.get1BitBoolean();
443       DEBUG_PRINT(vui_poc_proportional_to_timing_flag);
444       if (vui_poc_proportional_to_timing_flag) {
445 	unsigned vui_num_ticks_poc_diff_one_minus1 = bv.get_expGolomb();
446 	DEBUG_PRINT(vui_num_ticks_poc_diff_one_minus1);
447       }
448       return; // For H.265, don't bother parsing any more of this #####
449     }
450   }
451   // The following is H.264 only: #####
452   Boolean nal_hrd_parameters_present_flag = bv.get1BitBoolean();
453   DEBUG_PRINT(nal_hrd_parameters_present_flag);
454   if (nal_hrd_parameters_present_flag) analyze_hrd_parameters(bv);
455   Boolean vcl_hrd_parameters_present_flag = bv.get1BitBoolean();
456   DEBUG_PRINT(vcl_hrd_parameters_present_flag);
457   if (vcl_hrd_parameters_present_flag) analyze_hrd_parameters(bv);
458   CpbDpbDelaysPresentFlag = nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag;
459   if (CpbDpbDelaysPresentFlag) {
460     bv.skipBits(1); // low_delay_hrd_flag
461   }
462   pic_struct_present_flag = bv.get1BitBoolean();
463   DEBUG_PRINT(pic_struct_present_flag);
464 }
465 
analyze_hrd_parameters(BitVector & bv)466 void H264or5VideoStreamParser::analyze_hrd_parameters(BitVector& bv) {
467   DEBUG_TAB;
468   unsigned cpb_cnt_minus1 = bv.get_expGolomb();
469   DEBUG_PRINT(cpb_cnt_minus1);
470   unsigned bit_rate_scale = bv.getBits(4);
471   DEBUG_PRINT(bit_rate_scale);
472   unsigned cpb_size_scale = bv.getBits(4);
473   DEBUG_PRINT(cpb_size_scale);
474   for (unsigned SchedSelIdx = 0; SchedSelIdx <= cpb_cnt_minus1; ++SchedSelIdx) {
475     DEBUG_TAB;
476     DEBUG_PRINT(SchedSelIdx);
477     unsigned bit_rate_value_minus1 = bv.get_expGolomb();
478     DEBUG_PRINT(bit_rate_value_minus1);
479     unsigned cpb_size_value_minus1 = bv.get_expGolomb();
480     DEBUG_PRINT(cpb_size_value_minus1);
481     Boolean cbr_flag = bv.get1BitBoolean();
482     DEBUG_PRINT(cbr_flag);
483   }
484   unsigned initial_cpb_removal_delay_length_minus1 = bv.getBits(5);
485   DEBUG_PRINT(initial_cpb_removal_delay_length_minus1);
486   cpb_removal_delay_length_minus1 = bv.getBits(5);
487   DEBUG_PRINT(cpb_removal_delay_length_minus1);
488   dpb_output_delay_length_minus1 = bv.getBits(5);
489   DEBUG_PRINT(dpb_output_delay_length_minus1);
490   unsigned time_offset_length = bv.getBits(5);
491   DEBUG_PRINT(time_offset_length);
492 }
493 
494 void H264or5VideoStreamParser
analyze_video_parameter_set_data(unsigned & num_units_in_tick,unsigned & time_scale)495 ::analyze_video_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale) {
496   num_units_in_tick = time_scale = 0; // default values
497 
498   // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes:
499   u_int8_t vps[VPS_MAX_SIZE];
500   unsigned vpsSize;
501   removeEmulationBytes(vps, sizeof vps, vpsSize);
502 
503   BitVector bv(vps, 0, 8*vpsSize);
504 
505   // Assert: fHNumber == 265 (because this function is called only when parsing H.265)
506   unsigned i;
507 
508   bv.skipBits(28); // nal_unit_header, vps_video_parameter_set_id, vps_reserved_three_2bits, vps_max_layers_minus1
509   unsigned vps_max_sub_layers_minus1 = bv.getBits(3);
510   DEBUG_PRINT(vps_max_sub_layers_minus1);
511   bv.skipBits(17); // vps_temporal_id_nesting_flag, vps_reserved_0xffff_16bits
512   profile_tier_level(bv, vps_max_sub_layers_minus1);
513   Boolean vps_sub_layer_ordering_info_present_flag = bv.get1BitBoolean();
514   DEBUG_PRINT(vps_sub_layer_ordering_info_present_flag);
515   for (i = vps_sub_layer_ordering_info_present_flag ? 0 : vps_max_sub_layers_minus1;
516        i <= vps_max_sub_layers_minus1; ++i) {
517     (void)bv.get_expGolomb(); // vps_max_dec_pic_buffering_minus1[i]
518     (void)bv.get_expGolomb(); // vps_max_num_reorder_pics[i]
519     (void)bv.get_expGolomb(); // vps_max_latency_increase_plus1[i]
520   }
521   unsigned vps_max_layer_id = bv.getBits(6);
522   DEBUG_PRINT(vps_max_layer_id);
523   unsigned vps_num_layer_sets_minus1 = bv.get_expGolomb();
524   DEBUG_PRINT(vps_num_layer_sets_minus1);
525   for (i = 1; i <= vps_num_layer_sets_minus1; ++i) {
526     bv.skipBits(vps_max_layer_id+1); // layer_id_included_flag[i][0..vps_max_layer_id]
527   }
528   Boolean vps_timing_info_present_flag = bv.get1BitBoolean();
529   DEBUG_PRINT(vps_timing_info_present_flag);
530   if (vps_timing_info_present_flag) {
531     DEBUG_TAB;
532     num_units_in_tick = bv.getBits(32);
533     DEBUG_PRINT(num_units_in_tick);
534     time_scale = bv.getBits(32);
535     DEBUG_PRINT(time_scale);
536     Boolean vps_poc_proportional_to_timing_flag = bv.get1BitBoolean();
537     DEBUG_PRINT(vps_poc_proportional_to_timing_flag);
538     if (vps_poc_proportional_to_timing_flag) {
539       unsigned vps_num_ticks_poc_diff_one_minus1 = bv.get_expGolomb();
540       DEBUG_PRINT(vps_num_ticks_poc_diff_one_minus1);
541     }
542   }
543   Boolean vps_extension_flag = bv.get1BitBoolean();
544   DEBUG_PRINT(vps_extension_flag);
545 }
546 
547 void H264or5VideoStreamParser
analyze_seq_parameter_set_data(unsigned & num_units_in_tick,unsigned & time_scale)548 ::analyze_seq_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale) {
549   num_units_in_tick = time_scale = 0; // default values
550 
551   // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes:
552   u_int8_t sps[SPS_MAX_SIZE];
553   unsigned spsSize;
554   removeEmulationBytes(sps, sizeof sps, spsSize);
555 
556   BitVector bv(sps, 0, 8*spsSize);
557 
558   if (fHNumber == 264) {
559     bv.skipBits(8); // forbidden_zero_bit; nal_ref_idc; nal_unit_type
560     unsigned profile_idc = bv.getBits(8);
561     DEBUG_PRINT(profile_idc);
562     unsigned constraint_setN_flag = bv.getBits(8); // also "reserved_zero_2bits" at end
563     DEBUG_PRINT(constraint_setN_flag);
564     unsigned level_idc = bv.getBits(8);
565     DEBUG_PRINT(level_idc);
566     unsigned seq_parameter_set_id = bv.get_expGolomb();
567     DEBUG_PRINT(seq_parameter_set_id);
568     if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ) {
569       DEBUG_TAB;
570       unsigned chroma_format_idc = bv.get_expGolomb();
571       DEBUG_PRINT(chroma_format_idc);
572       if (chroma_format_idc == 3) {
573 	DEBUG_TAB;
574 	Boolean separate_colour_plane_flag = bv.get1BitBoolean();
575 	DEBUG_PRINT(separate_colour_plane_flag);
576       }
577       (void)bv.get_expGolomb(); // bit_depth_luma_minus8
578       (void)bv.get_expGolomb(); // bit_depth_chroma_minus8
579       bv.skipBits(1); // qpprime_y_zero_transform_bypass_flag
580       Boolean seq_scaling_matrix_present_flag = bv.get1BitBoolean();
581       DEBUG_PRINT(seq_scaling_matrix_present_flag);
582       if (seq_scaling_matrix_present_flag) {
583 	for (int i = 0; i < ((chroma_format_idc != 3) ? 8 : 12); ++i) {
584 	  DEBUG_TAB;
585 	  DEBUG_PRINT(i);
586 	  Boolean seq_scaling_list_present_flag = bv.get1BitBoolean();
587 	  DEBUG_PRINT(seq_scaling_list_present_flag);
588 	  if (seq_scaling_list_present_flag) {
589 	    DEBUG_TAB;
590 	    unsigned sizeOfScalingList = i < 6 ? 16 : 64;
591 	    unsigned lastScale = 8;
592 	    unsigned nextScale = 8;
593 	    for (unsigned j = 0; j < sizeOfScalingList; ++j) {
594 	      DEBUG_TAB;
595 	      DEBUG_PRINT(j);
596 	      DEBUG_PRINT(nextScale);
597 	      if (nextScale != 0) {
598 		DEBUG_TAB;
599 		int delta_scale = bv.get_expGolombSigned();
600 		DEBUG_PRINT(delta_scale);
601 		nextScale = (lastScale + delta_scale + 256) % 256;
602 	      }
603 	      lastScale = (nextScale == 0) ? lastScale : nextScale;
604 	      DEBUG_PRINT(lastScale);
605 	    }
606 	  }
607 	}
608       }
609     }
610     unsigned log2_max_frame_num_minus4 = bv.get_expGolomb();
611     DEBUG_PRINT(log2_max_frame_num_minus4);
612     unsigned pic_order_cnt_type = bv.get_expGolomb();
613     DEBUG_PRINT(pic_order_cnt_type);
614     if (pic_order_cnt_type == 0) {
615       DEBUG_TAB;
616       unsigned log2_max_pic_order_cnt_lsb_minus4 = bv.get_expGolomb();
617       DEBUG_PRINT(log2_max_pic_order_cnt_lsb_minus4);
618     } else if (pic_order_cnt_type == 1) {
619       DEBUG_TAB;
620       bv.skipBits(1); // delta_pic_order_always_zero_flag
621       (void)bv.get_expGolombSigned(); // offset_for_non_ref_pic
622       (void)bv.get_expGolombSigned(); // offset_for_top_to_bottom_field
623       unsigned num_ref_frames_in_pic_order_cnt_cycle = bv.get_expGolomb();
624       DEBUG_PRINT(num_ref_frames_in_pic_order_cnt_cycle);
625       for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
626 	(void)bv.get_expGolombSigned(); // offset_for_ref_frame[i]
627       }
628     }
629     unsigned max_num_ref_frames = bv.get_expGolomb();
630     DEBUG_PRINT(max_num_ref_frames);
631     Boolean gaps_in_frame_num_value_allowed_flag = bv.get1BitBoolean();
632     DEBUG_PRINT(gaps_in_frame_num_value_allowed_flag);
633     unsigned pic_width_in_mbs_minus1 = bv.get_expGolomb();
634     DEBUG_PRINT(pic_width_in_mbs_minus1);
635     unsigned pic_height_in_map_units_minus1 = bv.get_expGolomb();
636     DEBUG_PRINT(pic_height_in_map_units_minus1);
637     Boolean frame_mbs_only_flag = bv.get1BitBoolean();
638     DEBUG_PRINT(frame_mbs_only_flag);
639     if (!frame_mbs_only_flag) {
640       bv.skipBits(1); // mb_adaptive_frame_field_flag
641     }
642     bv.skipBits(1); // direct_8x8_inference_flag
643     Boolean frame_cropping_flag = bv.get1BitBoolean();
644     DEBUG_PRINT(frame_cropping_flag);
645     if (frame_cropping_flag) {
646       (void)bv.get_expGolomb(); // frame_crop_left_offset
647       (void)bv.get_expGolomb(); // frame_crop_right_offset
648       (void)bv.get_expGolomb(); // frame_crop_top_offset
649       (void)bv.get_expGolomb(); // frame_crop_bottom_offset
650     }
651     Boolean vui_parameters_present_flag = bv.get1BitBoolean();
652     DEBUG_PRINT(vui_parameters_present_flag);
653     if (vui_parameters_present_flag) {
654       DEBUG_TAB;
655       analyze_vui_parameters(bv, num_units_in_tick, time_scale);
656     }
657   } else { // 265
658     unsigned i;
659 
660     bv.skipBits(16); // nal_unit_header
661     bv.skipBits(4); // sps_video_parameter_set_id
662     unsigned sps_max_sub_layers_minus1 = bv.getBits(3);
663     DEBUG_PRINT(sps_max_sub_layers_minus1);
664     bv.skipBits(1); // sps_temporal_id_nesting_flag
665     profile_tier_level(bv, sps_max_sub_layers_minus1);
666     (void)bv.get_expGolomb(); // sps_seq_parameter_set_id
667     unsigned chroma_format_idc = bv.get_expGolomb();
668     DEBUG_PRINT(chroma_format_idc);
669     if (chroma_format_idc == 3) bv.skipBits(1); // separate_colour_plane_flag
670     unsigned pic_width_in_luma_samples = bv.get_expGolomb();
671     DEBUG_PRINT(pic_width_in_luma_samples);
672     unsigned pic_height_in_luma_samples = bv.get_expGolomb();
673     DEBUG_PRINT(pic_height_in_luma_samples);
674     Boolean conformance_window_flag = bv.get1BitBoolean();
675     DEBUG_PRINT(conformance_window_flag);
676     if (conformance_window_flag) {
677       DEBUG_TAB;
678       unsigned conf_win_left_offset = bv.get_expGolomb();
679       DEBUG_PRINT(conf_win_left_offset);
680       unsigned conf_win_right_offset = bv.get_expGolomb();
681       DEBUG_PRINT(conf_win_right_offset);
682       unsigned conf_win_top_offset = bv.get_expGolomb();
683       DEBUG_PRINT(conf_win_top_offset);
684       unsigned conf_win_bottom_offset = bv.get_expGolomb();
685       DEBUG_PRINT(conf_win_bottom_offset);
686     }
687     (void)bv.get_expGolomb(); // bit_depth_luma_minus8
688     (void)bv.get_expGolomb(); // bit_depth_chroma_minus8
689     unsigned log2_max_pic_order_cnt_lsb_minus4 = bv.get_expGolomb();
690     Boolean sps_sub_layer_ordering_info_present_flag = bv.get1BitBoolean();
691     DEBUG_PRINT(sps_sub_layer_ordering_info_present_flag);
692     for (i = (sps_sub_layer_ordering_info_present_flag ? 0 : sps_max_sub_layers_minus1);
693 	 i <= sps_max_sub_layers_minus1; ++i) {
694       (void)bv.get_expGolomb(); // sps_max_dec_pic_buffering_minus1[i]
695       (void)bv.get_expGolomb(); // sps_max_num_reorder_pics[i]
696       (void)bv.get_expGolomb(); // sps_max_latency_increase[i]
697     }
698     (void)bv.get_expGolomb(); // log2_min_luma_coding_block_size_minus3
699     (void)bv.get_expGolomb(); // log2_diff_max_min_luma_coding_block_size
700     (void)bv.get_expGolomb(); // log2_min_transform_block_size_minus2
701     (void)bv.get_expGolomb(); // log2_diff_max_min_transform_block_size
702     (void)bv.get_expGolomb(); // max_transform_hierarchy_depth_inter
703     (void)bv.get_expGolomb(); // max_transform_hierarchy_depth_intra
704     Boolean scaling_list_enabled_flag = bv.get1BitBoolean();
705     DEBUG_PRINT(scaling_list_enabled_flag);
706     if (scaling_list_enabled_flag) {
707       DEBUG_TAB;
708       Boolean sps_scaling_list_data_present_flag = bv.get1BitBoolean();
709       DEBUG_PRINT(sps_scaling_list_data_present_flag);
710       if (sps_scaling_list_data_present_flag) {
711 	// scaling_list_data()
712 	DEBUG_TAB;
713 	for (unsigned sizeId = 0; sizeId < 4; ++sizeId) {
714 	  DEBUG_PRINT(sizeId);
715 	  for (unsigned matrixId = 0; matrixId < (sizeId == 3 ? 2 : 6); ++matrixId) {
716 	    DEBUG_TAB;
717 	    DEBUG_PRINT(matrixId);
718 	    Boolean scaling_list_pred_mode_flag = bv.get1BitBoolean();
719 	    DEBUG_PRINT(scaling_list_pred_mode_flag);
720 	    if (!scaling_list_pred_mode_flag) {
721 	      (void)bv.get_expGolomb(); // scaling_list_pred_matrix_id_delta[sizeId][matrixId]
722 	    } else {
723 	      unsigned const c = 1 << (4+(sizeId<<1));
724 	      unsigned coefNum = c < 64 ? c : 64;
725 	      if (sizeId > 1) {
726 		(void)bv.get_expGolomb(); // scaling_list_dc_coef_minus8[sizeId][matrixId]
727 	      }
728 	      for (i = 0; i < coefNum; ++i) {
729 		(void)bv.get_expGolomb(); // scaling_list_delta_coef
730 	      }
731 	    }
732 	  }
733 	}
734       }
735     }
736     bv.skipBits(2); // amp_enabled_flag, sample_adaptive_offset_enabled_flag
737     Boolean pcm_enabled_flag = bv.get1BitBoolean();
738     DEBUG_PRINT(pcm_enabled_flag);
739     if (pcm_enabled_flag) {
740       bv.skipBits(8); // pcm_sample_bit_depth_luma_minus1, pcm_sample_bit_depth_chroma_minus1
741       (void)bv.get_expGolomb(); // log2_min_pcm_luma_coding_block_size_minus3
742       (void)bv.get_expGolomb(); // log2_diff_max_min_pcm_luma_coding_block_size
743       bv.skipBits(1); // pcm_loop_filter_disabled_flag
744     }
745     unsigned num_short_term_ref_pic_sets = bv.get_expGolomb();
746     DEBUG_PRINT(num_short_term_ref_pic_sets);
747     unsigned num_negative_pics = 0, prev_num_negative_pics = 0;
748     unsigned num_positive_pics = 0, prev_num_positive_pics = 0;
749     for (i = 0; i < num_short_term_ref_pic_sets; ++i) {
750       // short_term_ref_pic_set(i):
751       DEBUG_TAB;
752       DEBUG_PRINT(i);
753       Boolean inter_ref_pic_set_prediction_flag = False;
754       if (i != 0) {
755 	inter_ref_pic_set_prediction_flag = bv.get1BitBoolean();
756       }
757       DEBUG_PRINT(inter_ref_pic_set_prediction_flag);
758       if (inter_ref_pic_set_prediction_flag) {
759 	DEBUG_TAB;
760 	if (i == num_short_term_ref_pic_sets) {
761 	  // This can't happen here, but it's in the spec, so we include it for completeness
762 	  (void)bv.get_expGolomb(); // delta_idx_minus1
763 	}
764 	bv.skipBits(1); // delta_rps_sign
765 	(void)bv.get_expGolomb(); // abs_delta_rps_minus1
766 	unsigned NumDeltaPocs = prev_num_negative_pics + prev_num_positive_pics; // correct???
767 	for (unsigned j = 0; j < NumDeltaPocs; ++j) {
768 	  DEBUG_PRINT(j);
769 	  Boolean used_by_curr_pic_flag = bv.get1BitBoolean();
770 	  DEBUG_PRINT(used_by_curr_pic_flag);
771 	  if (!used_by_curr_pic_flag) bv.skipBits(1); // use_delta_flag[j]
772 	}
773       } else {
774 	prev_num_negative_pics = num_negative_pics;
775 	num_negative_pics = bv.get_expGolomb();
776 	DEBUG_PRINT(num_negative_pics);
777 	prev_num_positive_pics = num_positive_pics;
778 	num_positive_pics = bv.get_expGolomb();
779 	DEBUG_PRINT(num_positive_pics);
780 	unsigned k;
781 	for (k = 0; k < num_negative_pics; ++k) {
782 	  (void)bv.get_expGolomb(); // delta_poc_s0_minus1[k]
783 	  bv.skipBits(1); // used_by_curr_pic_s0_flag[k]
784 	}
785 	for (k = 0; k < num_positive_pics; ++k) {
786 	  (void)bv.get_expGolomb(); // delta_poc_s1_minus1[k]
787 	  bv.skipBits(1); // used_by_curr_pic_s1_flag[k]
788 	}
789       }
790     }
791     Boolean long_term_ref_pics_present_flag = bv.get1BitBoolean();
792     DEBUG_PRINT(long_term_ref_pics_present_flag);
793     if (long_term_ref_pics_present_flag) {
794       DEBUG_TAB;
795       unsigned num_long_term_ref_pics_sps = bv.get_expGolomb();
796       DEBUG_PRINT(num_long_term_ref_pics_sps);
797       for (i = 0; i < num_long_term_ref_pics_sps; ++i) {
798 	bv.skipBits(log2_max_pic_order_cnt_lsb_minus4); // lt_ref_pic_poc_lsb_sps[i]
799 	bv.skipBits(1); // used_by_curr_pic_lt_sps_flag[1]
800       }
801     }
802     bv.skipBits(2); // sps_temporal_mvp_enabled_flag, strong_intra_smoothing_enabled_flag
803     Boolean vui_parameters_present_flag = bv.get1BitBoolean();
804     DEBUG_PRINT(vui_parameters_present_flag);
805     if (vui_parameters_present_flag) {
806       DEBUG_TAB;
807       analyze_vui_parameters(bv, num_units_in_tick, time_scale);
808     }
809     Boolean sps_extension_flag = bv.get1BitBoolean();
810     DEBUG_PRINT(sps_extension_flag);
811   }
812 }
813 
814 #define SEI_MAX_SIZE 5000 // larger than the largest possible SEI NAL unit
815 
816 #ifdef DEBUG
817 #define MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264 46
818 char const* sei_payloadType_description_h264[MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264+1] = {
819   "buffering_period", //0
820   "pic_timing", //1
821   "pan_scan_rect", //2
822   "filler_payload", //3
823   "user_data_registered_itu_t_t35", //4
824   "user_data_unregistered", //5
825   "recovery_point", //6
826   "dec_ref_pic_marking_repetition", //7
827   "spare_pic", //8
828   "scene_info", //9
829   "sub_seq_info", //10
830   "sub_seq_layer_characteristics", //11
831   "sub_seq_characteristics", //12
832   "full_frame_freeze", //13
833   "full_frame_freeze_release", //14
834   "full_frame_snapshot", //15
835   "progressive_refinement_segment_start", //16
836   "progressive_refinement_segment_end", //17
837   "motion_constrained_slice_group_set", //18
838   "film_grain_characteristics", //19
839   "deblocking_filter_display_preference", //20
840   "stereo_video_info", //21
841   "post_filter_hint", //22
842   "tone_mapping_info", //23
843   "scalability_info", //24
844   "sub_pic_scalable_layer", //25
845   "non_required_layer_rep", //26
846   "priority_layer_info", //27
847   "layers_not_present", //28
848   "layer_dependency_change", //29
849   "scalable_nesting", //30
850   "base_layer_temporal_hrd", //31
851   "quality_layer_integrity_check", //32
852   "redundant_pic_property", //33
853   "tl0_dep_rep_index", //34
854   "tl_switching_point", //35
855   "parallel_decoding_info", //36
856   "mvc_scalable_nesting", //37
857   "view_scalability_info", //38
858   "multiview_scene_info", //39
859   "multiview_acquisition_info", //40
860   "non_required_view_component", //41
861   "view_dependency_change", //42
862   "operation_points_not_present", //43
863   "base_view_temporal_hrd", //44
864   "frame_packing_arrangement", //45
865   "reserved_sei_message" // 46 or higher
866 };
867 #endif
868 
analyze_sei_data(u_int8_t nal_unit_type)869 void H264or5VideoStreamParser::analyze_sei_data(u_int8_t nal_unit_type) {
870   // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes:
871   u_int8_t sei[SEI_MAX_SIZE];
872   unsigned seiSize;
873   removeEmulationBytes(sei, sizeof sei, seiSize);
874 
875   unsigned j = 1; // skip the initial byte (forbidden_zero_bit; nal_ref_idc; nal_unit_type); we've already seen it
876   while (j < seiSize) {
877     unsigned payloadType = 0;
878     do {
879       payloadType += sei[j];
880     } while (sei[j++] == 255 && j < seiSize);
881     if (j >= seiSize) break;
882 
883     unsigned payloadSize = 0;
884     do {
885       payloadSize += sei[j];
886     } while (sei[j++] == 255 && j < seiSize);
887     if (j >= seiSize) break;
888 
889 #ifdef DEBUG
890     char const* description;
891     if (fHNumber == 264) {
892       unsigned descriptionNum = payloadType <= MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264
893 	? payloadType : MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264;
894       description = sei_payloadType_description_h264[descriptionNum];
895     } else { // 265
896       description =
897 	payloadType == 3 ? "filler_payload" :
898 	payloadType == 4 ? "user_data_registered_itu_t_t35" :
899 	payloadType == 5 ? "user_data_unregistered" :
900 	payloadType == 17 ? "progressive_refinement_segment_end" :
901 	payloadType == 22 ? "post_filter_hint" :
902 	(payloadType == 132 && nal_unit_type == SUFFIX_SEI_NUT) ? "decoded_picture_hash" :
903 	nal_unit_type == SUFFIX_SEI_NUT ? "reserved_sei_message" :
904 	payloadType == 0 ? "buffering_period" :
905 	payloadType == 1 ? "pic_timing" :
906 	payloadType == 2 ? "pan_scan_rect" :
907 	payloadType == 6 ? "recovery_point" :
908 	payloadType == 9 ? "scene_info" :
909 	payloadType == 15 ? "picture_snapshot" :
910 	payloadType == 16 ? "progressive_refinement_segment_start" :
911 	payloadType == 19 ? "film_grain_characteristics" :
912 	payloadType == 23 ? "tone_mapping_info" :
913 	payloadType == 45 ? "frame_packing_arrangement" :
914 	payloadType == 47 ? "display_orientation" :
915 	payloadType == 128 ? "structure_of_pictures_info" :
916 	payloadType == 129 ? "active_parameter_sets" :
917 	payloadType == 130 ? "decoding_unit_info" :
918 	payloadType == 131 ? "temporal_sub_layer_zero_index" :
919 	payloadType == 133 ? "scalable_nesting" :
920 	payloadType == 134 ? "region_refresh_info" : "reserved_sei_message";
921     }
922     fprintf(stderr, "\tpayloadType %d (\"%s\"); payloadSize %d\n", payloadType, description, payloadSize);
923 #endif
924 
925     analyze_sei_payload(payloadType, payloadSize, &sei[j]);
926     j += payloadSize;
927   }
928 }
929 
930 void H264or5VideoStreamParser
analyze_sei_payload(unsigned payloadType,unsigned payloadSize,u_int8_t * payload)931 ::analyze_sei_payload(unsigned payloadType, unsigned payloadSize, u_int8_t* payload) {
932   if (payloadType == 1/* pic_timing, for both H.264 and H.265 */) {
933     BitVector bv(payload, 0, 8*payloadSize);
934 
935     DEBUG_TAB;
936     if (CpbDpbDelaysPresentFlag) {
937       unsigned cpb_removal_delay = bv.getBits(cpb_removal_delay_length_minus1 + 1);
938       DEBUG_PRINT(cpb_removal_delay);
939       unsigned dpb_output_delay = bv.getBits(dpb_output_delay_length_minus1 + 1);
940       DEBUG_PRINT(dpb_output_delay);
941     }
942     double prevDeltaTfiDivisor = DeltaTfiDivisor;
943     if (pic_struct_present_flag) {
944       unsigned pic_struct = bv.getBits(4);
945       DEBUG_PRINT(pic_struct);
946       // Use this to set "DeltaTfiDivisor" (which is used to compute the frame rate):
947       if (fHNumber == 264) {
948 	DeltaTfiDivisor =
949 	  pic_struct == 0 ? 2.0 :
950 	  pic_struct <= 2 ? 1.0 :
951 	  pic_struct <= 4 ? 2.0 :
952 	  pic_struct <= 6 ? 3.0 :
953 	  pic_struct == 7 ? 4.0 :
954 	  pic_struct == 8 ? 6.0 :
955 	  2.0;
956       } else { // H.265
957 	DeltaTfiDivisor =
958 	  pic_struct == 0 ? 2.0 :
959 	  pic_struct <= 2 ? 1.0 :
960 	  pic_struct <= 4 ? 2.0 :
961 	  pic_struct <= 6 ? 3.0 :
962 	  pic_struct == 7 ? 2.0 :
963 	  pic_struct == 8 ? 3.0 :
964 	  pic_struct <= 12 ? 1.0 :
965 	  2.0;
966       }
967     } else {
968       if (fHNumber == 264) {
969 	// Need to get field_pic_flag from slice_header to set this properly! #####
970       } else { // H.265
971 	DeltaTfiDivisor = 1.0;
972       }
973     }
974     // If "DeltaTfiDivisor" has changed, and we've already computed the frame rate, then
975     // adjust it, based on the new value of "DeltaTfiDivisor":
976     if (DeltaTfiDivisor != prevDeltaTfiDivisor && fParsedFrameRate != 0.0) {
977       usingSource()->fFrameRate = fParsedFrameRate
978 	= fParsedFrameRate*(prevDeltaTfiDivisor/DeltaTfiDivisor);
979 #ifdef DEBUG
980       fprintf(stderr, "Changed frame rate to %f fps\n", usingSource()->fFrameRate);
981 #endif
982     }
983     // Ignore the rest of the payload (timestamps) for now... #####
984   }
985 }
986 
flushInput()987 void H264or5VideoStreamParser::flushInput() {
988   fHaveSeenFirstStartCode = False;
989   fHaveSeenFirstByteOfNALUnit = False;
990 
991   StreamParser::flushInput();
992 }
993 
parse()994 unsigned H264or5VideoStreamParser::parse() {
995   try {
996     // The stream must start with a 0x00000001:
997     if (!fHaveSeenFirstStartCode) {
998       // Skip over any input bytes that precede the first 0x00000001:
999       u_int32_t first4Bytes;
1000       while ((first4Bytes = test4Bytes()) != 0x00000001) {
1001 	get1Byte(); setParseState(); // ensures that we progress over bad data
1002       }
1003       skipBytes(4); // skip this initial code
1004 
1005       setParseState();
1006       fHaveSeenFirstStartCode = True; // from now on
1007     }
1008 
1009     if (fOutputStartCodeSize > 0 && curFrameSize() == 0 && !haveSeenEOF()) {
1010       // Include a start code in the output:
1011       save4Bytes(0x00000001);
1012     }
1013 
1014     // Then save everything up until the next 0x00000001 (4 bytes) or 0x000001 (3 bytes), or we hit EOF.
1015     // Also make note of the first byte, because it contains the "nal_unit_type":
1016     if (haveSeenEOF()) {
1017       // We hit EOF the last time that we tried to parse this data, so we know that any remaining unparsed data
1018       // forms a complete NAL unit, and that there's no 'start code' at the end:
1019       unsigned remainingDataSize = totNumValidBytes() - curOffset();
1020 #ifdef DEBUG
1021       unsigned const trailingNALUnitSize = remainingDataSize;
1022 #endif
1023       while (remainingDataSize > 0) {
1024 	u_int8_t nextByte = get1Byte();
1025 	if (!fHaveSeenFirstByteOfNALUnit) {
1026 	  fFirstByteOfNALUnit = nextByte;
1027 	  fHaveSeenFirstByteOfNALUnit = True;
1028 	}
1029 	saveByte(nextByte);
1030 	--remainingDataSize;
1031       }
1032 
1033 #ifdef DEBUG
1034       if (fHNumber == 264) {
1035 	u_int8_t nal_ref_idc = (fFirstByteOfNALUnit&0x60)>>5;
1036 	u_int8_t nal_unit_type = fFirstByteOfNALUnit&0x1F;
1037 	fprintf(stderr, "Parsed trailing %d-byte NAL-unit (nal_ref_idc: %d, nal_unit_type: %d (\"%s\"))\n",
1038 		trailingNALUnitSize, nal_ref_idc, nal_unit_type, nal_unit_type_description_h264[nal_unit_type]);
1039       } else { // 265
1040 	u_int8_t nal_unit_type = (fFirstByteOfNALUnit&0x7E)>>1;
1041 	fprintf(stderr, "Parsed trailing %d-byte NAL-unit (nal_unit_type: %d (\"%s\"))\n",
1042 		trailingNALUnitSize, nal_unit_type, nal_unit_type_description_h265[nal_unit_type]);
1043       }
1044 #endif
1045 
1046       (void)get1Byte(); // forces another read, which will cause EOF to get handled for real this time
1047       return 0;
1048     } else {
1049       u_int32_t next4Bytes = test4Bytes();
1050       if (!fHaveSeenFirstByteOfNALUnit) {
1051 	fFirstByteOfNALUnit = next4Bytes>>24;
1052 	fHaveSeenFirstByteOfNALUnit = True;
1053       }
1054       while (next4Bytes != 0x00000001 && (next4Bytes&0xFFFFFF00) != 0x00000100) {
1055 	// We save at least some of "next4Bytes".
1056 	if ((unsigned)(next4Bytes&0xFF) > 1) {
1057 	  // Common case: 0x00000001 or 0x000001 definitely doesn't begin anywhere in "next4Bytes", so we save all of it:
1058 	  save4Bytes(next4Bytes);
1059 	  skipBytes(4);
1060 	} else {
1061 	  // Save the first byte, and continue testing the rest:
1062 	  saveByte(next4Bytes>>24);
1063 	  skipBytes(1);
1064 	}
1065 	setParseState(); // ensures forward progress
1066 	next4Bytes = test4Bytes();
1067       }
1068       // Assert: next4Bytes starts with 0x00000001 or 0x000001, and we've saved all previous bytes (forming a complete NAL unit).
1069       // Skip over these remaining bytes, up until the start of the next NAL unit:
1070       if (next4Bytes == 0x00000001) {
1071 	skipBytes(4);
1072       } else {
1073 	skipBytes(3);
1074       }
1075     }
1076 
1077     fHaveSeenFirstByteOfNALUnit = False; // for the next NAL unit that we'll parse
1078     u_int8_t nal_unit_type;
1079     if (fHNumber == 264) {
1080       nal_unit_type = fFirstByteOfNALUnit&0x1F;
1081 #ifdef DEBUG
1082       u_int8_t nal_ref_idc = (fFirstByteOfNALUnit&0x60)>>5;
1083       fprintf(stderr, "Parsed %d-byte NAL-unit (nal_ref_idc: %d, nal_unit_type: %d (\"%s\"))\n",
1084 	      curFrameSize()-fOutputStartCodeSize, nal_ref_idc, nal_unit_type, nal_unit_type_description_h264[nal_unit_type]);
1085 #endif
1086     } else { // 265
1087       nal_unit_type = (fFirstByteOfNALUnit&0x7E)>>1;
1088 #ifdef DEBUG
1089       fprintf(stderr, "Parsed %d-byte NAL-unit (nal_unit_type: %d (\"%s\"))\n",
1090 	      curFrameSize()-fOutputStartCodeSize, nal_unit_type, nal_unit_type_description_h265[nal_unit_type]);
1091 #endif
1092     }
1093 
1094     // Now that we have found (& copied) a NAL unit, process it if it's of special interest to us:
1095     if (isVPS(nal_unit_type)) { // Video parameter set
1096       // First, save a copy of this NAL unit, in case the downstream object wants to see it:
1097       usingSource()->saveCopyOfVPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize);
1098 
1099       if (fParsedFrameRate == 0.0) {
1100 	// We haven't yet parsed a frame rate from the stream.
1101 	// So parse this NAL unit to check whether frame rate information is present:
1102 	unsigned num_units_in_tick, time_scale;
1103 	analyze_video_parameter_set_data(num_units_in_tick, time_scale);
1104 	if (time_scale > 0 && num_units_in_tick > 0) {
1105 	  usingSource()->fFrameRate = fParsedFrameRate
1106 	    = time_scale/(DeltaTfiDivisor*num_units_in_tick);
1107 #ifdef DEBUG
1108 	  fprintf(stderr, "Set frame rate to %f fps\n", usingSource()->fFrameRate);
1109 #endif
1110 	} else {
1111 #ifdef DEBUG
1112 	  fprintf(stderr, "\tThis \"Video Parameter Set\" NAL unit contained no frame rate information, so we use a default frame rate of %f fps\n", usingSource()->fFrameRate);
1113 #endif
1114 	}
1115       }
1116     } else if (isSPS(nal_unit_type)) { // Sequence parameter set
1117       // First, save a copy of this NAL unit, in case the downstream object wants to see it:
1118       usingSource()->saveCopyOfSPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize);
1119 
1120       if (fParsedFrameRate == 0.0) {
1121 	// We haven't yet parsed a frame rate from the stream.
1122 	// So parse this NAL unit to check whether frame rate information is present:
1123 	unsigned num_units_in_tick, time_scale;
1124 	analyze_seq_parameter_set_data(num_units_in_tick, time_scale);
1125 	if (time_scale > 0 && num_units_in_tick > 0) {
1126 	  usingSource()->fFrameRate = fParsedFrameRate
1127 	    = time_scale/(DeltaTfiDivisor*num_units_in_tick);
1128 #ifdef DEBUG
1129 	  fprintf(stderr, "Set frame rate to %f fps\n", usingSource()->fFrameRate);
1130 #endif
1131 	} else {
1132 #ifdef DEBUG
1133 	  fprintf(stderr, "\tThis \"Sequence Parameter Set\" NAL unit contained no frame rate information, so we use a default frame rate of %f fps\n", usingSource()->fFrameRate);
1134 #endif
1135 	}
1136       }
1137     } else if (isPPS(nal_unit_type)) { // Picture parameter set
1138       // Save a copy of this NAL unit, in case the downstream object wants to see it:
1139       usingSource()->saveCopyOfPPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize);
1140     } else if (isSEI(nal_unit_type)) { // Supplemental enhancement information (SEI)
1141       analyze_sei_data(nal_unit_type);
1142       // Later, perhaps adjust "fPresentationTime" if we saw a "pic_timing" SEI payload??? #####
1143     }
1144 
1145     usingSource()->setPresentationTime();
1146 #ifdef DEBUG
1147     unsigned long secs = (unsigned long)usingSource()->fPresentationTime.tv_sec;
1148     unsigned uSecs = (unsigned)usingSource()->fPresentationTime.tv_usec;
1149     fprintf(stderr, "\tPresentation time: %lu.%06u\n", secs, uSecs);
1150 #endif
1151 
1152     // Now, check whether this NAL unit ends an 'access unit'.
1153     // (RTP streamers need to know this in order to figure out whether or not to set the "M" bit.)
1154     Boolean thisNALUnitEndsAccessUnit;
1155     if (haveSeenEOF() || isEOF(nal_unit_type)) {
1156       // There is no next NAL unit, so we assume that this one ends the current 'access unit':
1157       thisNALUnitEndsAccessUnit = True;
1158     } else if (usuallyBeginsAccessUnit(nal_unit_type)) {
1159       // These NAL units usually *begin* an access unit, so assume that they don't end one here:
1160       thisNALUnitEndsAccessUnit = False;
1161     } else {
1162       // We need to check the *next* NAL unit to figure out whether
1163       // the current NAL unit ends an 'access unit':
1164       u_int8_t firstBytesOfNextNALUnit[3];
1165       testBytes(firstBytesOfNextNALUnit, 3);
1166 
1167       u_int8_t const& next_nal_unit_type = fHNumber == 264
1168 	? (firstBytesOfNextNALUnit[0]&0x1F) : ((firstBytesOfNextNALUnit[0]&0x7E)>>1);
1169       if (isVCL(next_nal_unit_type)) {
1170 	// The high-order bit of the byte after the "nal_unit_header" tells us whether it's
1171 	// the start of a new 'access unit' (and thus the current NAL unit ends an 'access unit'):
1172 	u_int8_t const byteAfter_nal_unit_header
1173 	  = fHNumber == 264 ? firstBytesOfNextNALUnit[1] : firstBytesOfNextNALUnit[2];
1174 	thisNALUnitEndsAccessUnit = (byteAfter_nal_unit_header&0x80) != 0;
1175       } else if (usuallyBeginsAccessUnit(next_nal_unit_type)) {
1176 	// The next NAL unit's type is one that usually appears at the start of an 'access unit',
1177 	// so we assume that the current NAL unit ends an 'access unit':
1178 	thisNALUnitEndsAccessUnit = True;
1179       } else {
1180 	// The next NAL unit definitely doesn't start a new 'access unit',
1181 	// which means that the current NAL unit doesn't end one:
1182 	thisNALUnitEndsAccessUnit = False;
1183       }
1184     }
1185 
1186     if (thisNALUnitEndsAccessUnit) {
1187 #ifdef DEBUG
1188       fprintf(stderr, "*****This NAL unit ends the current access unit*****\n");
1189 #endif
1190       usingSource()->fPictureEndMarker = True;
1191       ++usingSource()->fPictureCount;
1192 
1193       // Note that the presentation time for the next NAL unit will be different:
1194       struct timeval& nextPT = usingSource()->fNextPresentationTime; // alias
1195       nextPT = usingSource()->fPresentationTime;
1196       double nextFraction = nextPT.tv_usec/1000000.0 + 1/usingSource()->fFrameRate;
1197       unsigned nextSecsIncrement = (long)nextFraction;
1198       nextPT.tv_sec += (long)nextSecsIncrement;
1199       nextPT.tv_usec = (long)((nextFraction - nextSecsIncrement)*1000000);
1200     }
1201     setParseState();
1202 
1203     return curFrameSize();
1204   } catch (int /*e*/) {
1205 #ifdef DEBUG
1206     fprintf(stderr, "H264or5VideoStreamParser::parse() EXCEPTION (This is normal behavior - *not* an error)\n");
1207 #endif
1208     return 0;  // the parsing got interrupted
1209   }
1210 }
1211 
removeH264or5EmulationBytes(u_int8_t * to,unsigned toMaxSize,u_int8_t const * from,unsigned fromSize)1212 unsigned removeH264or5EmulationBytes(u_int8_t* to, unsigned toMaxSize,
1213                                      u_int8_t const* from, unsigned fromSize) {
1214   unsigned toSize = 0;
1215   unsigned i = 0;
1216   while (i < fromSize && toSize+1 < toMaxSize) {
1217     if (i+2 < fromSize && from[i] == 0 && from[i+1] == 0 && from[i+2] == 3) {
1218       to[toSize] = to[toSize+1] = 0;
1219       toSize += 2;
1220       i += 3;
1221     } else {
1222       to[toSize] = from[i];
1223       toSize += 1;
1224       i += 1;
1225     }
1226   }
1227 
1228   return toSize;
1229 }
1230