1 /**********
2 This library is free software; you can redistribute it and/or modify it under
3 the terms of the GNU Lesser General Public License as published by the
4 Free Software Foundation; either version 3 of the License, or (at your
5 option) any later version. (See <http://www.gnu.org/copyleft/lesser.html>.)
6
7 This library is distributed in the hope that it will be useful, but WITHOUT
8 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
10 more details.
11
12 You should have received a copy of the GNU Lesser General Public License
13 along with this library; if not, write to the Free Software Foundation, Inc.,
14 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
15 **********/
16 // "liveMedia"
17 // Copyright (c) 1996-2020 Live Networks, Inc. All rights reserved.
18 // A filter that breaks up a H.264 or H.265 Video Elementary Stream into NAL units.
19 // Implementation
20
21 #include "H264or5VideoStreamFramer.hh"
22 #include "MPEGVideoStreamParser.hh"
23 #include "BitVector.hh"
24 #include <GroupsockHelper.hh> // for "gettimeofday()"
25
26 ////////// H264or5VideoStreamParser definition //////////
27
28 class H264or5VideoStreamParser: public MPEGVideoStreamParser {
29 public:
30 H264or5VideoStreamParser(int hNumber, H264or5VideoStreamFramer* usingSource,
31 FramedSource* inputSource, Boolean includeStartCodeInOutput);
32 virtual ~H264or5VideoStreamParser();
33
34 private: // redefined virtual functions:
35 virtual void flushInput();
36 virtual unsigned parse();
37
38 private:
usingSource()39 H264or5VideoStreamFramer* usingSource() {
40 return (H264or5VideoStreamFramer*)fUsingSource;
41 }
42
isVPS(u_int8_t nal_unit_type)43 Boolean isVPS(u_int8_t nal_unit_type) { return usingSource()->isVPS(nal_unit_type); }
isSPS(u_int8_t nal_unit_type)44 Boolean isSPS(u_int8_t nal_unit_type) { return usingSource()->isSPS(nal_unit_type); }
isPPS(u_int8_t nal_unit_type)45 Boolean isPPS(u_int8_t nal_unit_type) { return usingSource()->isPPS(nal_unit_type); }
isVCL(u_int8_t nal_unit_type)46 Boolean isVCL(u_int8_t nal_unit_type) { return usingSource()->isVCL(nal_unit_type); }
47 Boolean isSEI(u_int8_t nal_unit_type);
48 Boolean isEOF(u_int8_t nal_unit_type);
49 Boolean usuallyBeginsAccessUnit(u_int8_t nal_unit_type);
50
51 void removeEmulationBytes(u_int8_t* nalUnitCopy, unsigned maxSize, unsigned& nalUnitCopySize);
52
53 void analyze_video_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale);
54 void analyze_seq_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale);
55 void profile_tier_level(BitVector& bv, unsigned max_sub_layers_minus1);
56 void analyze_vui_parameters(BitVector& bv, unsigned& num_units_in_tick, unsigned& time_scale);
57 void analyze_hrd_parameters(BitVector& bv);
58 void analyze_sei_data(u_int8_t nal_unit_type);
59 void analyze_sei_payload(unsigned payloadType, unsigned payloadSize, u_int8_t* payload);
60
61 private:
62 int fHNumber; // 264 or 265
63 unsigned fOutputStartCodeSize;
64 Boolean fHaveSeenFirstStartCode, fHaveSeenFirstByteOfNALUnit;
65 u_int8_t fFirstByteOfNALUnit;
66 double fParsedFrameRate;
67 // variables set & used in the specification:
68 unsigned cpb_removal_delay_length_minus1, dpb_output_delay_length_minus1;
69 Boolean CpbDpbDelaysPresentFlag, pic_struct_present_flag;
70 double DeltaTfiDivisor;
71 };
72
73
74 ////////// H264or5VideoStreamFramer implementation //////////
75
76 H264or5VideoStreamFramer
H264or5VideoStreamFramer(int hNumber,UsageEnvironment & env,FramedSource * inputSource,Boolean createParser,Boolean includeStartCodeInOutput,Boolean insertAccessUnitDelimiters)77 ::H264or5VideoStreamFramer(int hNumber, UsageEnvironment& env, FramedSource* inputSource,
78 Boolean createParser,
79 Boolean includeStartCodeInOutput, Boolean insertAccessUnitDelimiters)
80 : MPEGVideoStreamFramer(env, inputSource),
81 fHNumber(hNumber), fIncludeStartCodeInOutput(includeStartCodeInOutput),
82 fInsertAccessUnitDelimiters(insertAccessUnitDelimiters),
83 fLastSeenVPS(NULL), fLastSeenVPSSize(0),
84 fLastSeenSPS(NULL), fLastSeenSPSSize(0),
85 fLastSeenPPS(NULL), fLastSeenPPSSize(0) {
86 fParser = createParser
87 ? new H264or5VideoStreamParser(hNumber, this, inputSource, includeStartCodeInOutput)
88 : NULL;
89 fFrameRate = 25.0; // We assume a frame rate of 25 fps, unless we learn otherwise (from parsing a VPS or SPS NAL unit)
90 }
91
~H264or5VideoStreamFramer()92 H264or5VideoStreamFramer::~H264or5VideoStreamFramer() {
93 delete[] fLastSeenPPS;
94 delete[] fLastSeenSPS;
95 delete[] fLastSeenVPS;
96 }
97
98 #define VPS_MAX_SIZE 1000 // larger than the largest possible VPS (Video Parameter Set) NAL unit
99
saveCopyOfVPS(u_int8_t * from,unsigned size)100 void H264or5VideoStreamFramer::saveCopyOfVPS(u_int8_t* from, unsigned size) {
101 if (from == NULL) return;
102 delete[] fLastSeenVPS;
103 fLastSeenVPS = new u_int8_t[size];
104 memmove(fLastSeenVPS, from, size);
105
106 fLastSeenVPSSize = size;
107 }
108
109 #define SPS_MAX_SIZE 1000 // larger than the largest possible SPS (Sequence Parameter Set) NAL unit
110
saveCopyOfSPS(u_int8_t * from,unsigned size)111 void H264or5VideoStreamFramer::saveCopyOfSPS(u_int8_t* from, unsigned size) {
112 if (from == NULL) return;
113 delete[] fLastSeenSPS;
114 fLastSeenSPS = new u_int8_t[size];
115 memmove(fLastSeenSPS, from, size);
116
117 fLastSeenSPSSize = size;
118 }
119
saveCopyOfPPS(u_int8_t * from,unsigned size)120 void H264or5VideoStreamFramer::saveCopyOfPPS(u_int8_t* from, unsigned size) {
121 if (from == NULL) return;
122 delete[] fLastSeenPPS;
123 fLastSeenPPS = new u_int8_t[size];
124 memmove(fLastSeenPPS, from, size);
125
126 fLastSeenPPSSize = size;
127 }
128
setPresentationTime()129 void H264or5VideoStreamFramer::setPresentationTime() {
130 if (fPresentationTimeBase.tv_sec == 0 && fPresentationTimeBase.tv_usec == 0) {
131 // Set to the current time:
132 gettimeofday(&fPresentationTimeBase, NULL);
133 fNextPresentationTime = fPresentationTimeBase;
134 }
135 fPresentationTime = fNextPresentationTime;
136 }
137
isVPS(u_int8_t nal_unit_type)138 Boolean H264or5VideoStreamFramer::isVPS(u_int8_t nal_unit_type) {
139 // VPS NAL units occur in H.265 only:
140 return fHNumber == 265 && nal_unit_type == 32;
141 }
142
isSPS(u_int8_t nal_unit_type)143 Boolean H264or5VideoStreamFramer::isSPS(u_int8_t nal_unit_type) {
144 return fHNumber == 264 ? nal_unit_type == 7 : nal_unit_type == 33;
145 }
146
isPPS(u_int8_t nal_unit_type)147 Boolean H264or5VideoStreamFramer::isPPS(u_int8_t nal_unit_type) {
148 return fHNumber == 264 ? nal_unit_type == 8 : nal_unit_type == 34;
149 }
150
isVCL(u_int8_t nal_unit_type)151 Boolean H264or5VideoStreamFramer::isVCL(u_int8_t nal_unit_type) {
152 return fHNumber == 264
153 ? (nal_unit_type <= 5 && nal_unit_type > 0)
154 : (nal_unit_type <= 31);
155 }
156
doGetNextFrame()157 void H264or5VideoStreamFramer::doGetNextFrame() {
158 if (fInsertAccessUnitDelimiters && pictureEndMarker()) {
159 // Deliver an "access_unit_delimiter" NAL unit instead:
160 unsigned const startCodeSize = fIncludeStartCodeInOutput ? 4: 0;
161 unsigned const audNALSize = fHNumber == 264 ? 2 : 3;
162
163 fFrameSize = startCodeSize + audNALSize;
164 if (fFrameSize > fMaxSize) { // there's no space
165 fNumTruncatedBytes = fFrameSize - fMaxSize;
166 fFrameSize = fMaxSize;
167 handleClosure();
168 return;
169 }
170
171 if (fIncludeStartCodeInOutput) {
172 *fTo++ = 0x00; *fTo++ = 0x00; *fTo++ = 0x00; *fTo++ = 0x01;
173 }
174 if (fHNumber == 264) {
175 *fTo++ = 9; // "Access unit delimiter" nal_unit_type
176 *fTo++ = 0xF0; // "primary_pic_type" (7); "rbsp_trailing_bits()"
177 } else { // H.265
178 *fTo++ = 35<<1; // "Access unit delimiter" nal_unit_type
179 *fTo++ = 0; // "nuh_layer_id" (0); "nuh_temporal_id_plus1" (0) (Is this correct??)
180 *fTo++ = 0x50; // "pic_type" (2); "rbsp_trailing_bits()" (Is this correct??)
181 }
182
183 pictureEndMarker() = False; // for next time
184 afterGetting(this);
185 } else {
186 // Do the normal delivery of a NAL unit from the parser:
187 MPEGVideoStreamFramer::doGetNextFrame();
188 }
189 }
190
191
192 ////////// H264or5VideoStreamParser implementation //////////
193
194 H264or5VideoStreamParser
H264or5VideoStreamParser(int hNumber,H264or5VideoStreamFramer * usingSource,FramedSource * inputSource,Boolean includeStartCodeInOutput)195 ::H264or5VideoStreamParser(int hNumber, H264or5VideoStreamFramer* usingSource,
196 FramedSource* inputSource, Boolean includeStartCodeInOutput)
197 : MPEGVideoStreamParser(usingSource, inputSource),
198 fHNumber(hNumber), fOutputStartCodeSize(includeStartCodeInOutput ? 4 : 0), fHaveSeenFirstStartCode(False), fHaveSeenFirstByteOfNALUnit(False), fParsedFrameRate(0.0),
199 cpb_removal_delay_length_minus1(23), dpb_output_delay_length_minus1(23),
200 CpbDpbDelaysPresentFlag(0), pic_struct_present_flag(0),
201 DeltaTfiDivisor(2.0) {
202 }
203
~H264or5VideoStreamParser()204 H264or5VideoStreamParser::~H264or5VideoStreamParser() {
205 }
206
207 #define PREFIX_SEI_NUT 39 // for H.265
208 #define SUFFIX_SEI_NUT 40 // for H.265
isSEI(u_int8_t nal_unit_type)209 Boolean H264or5VideoStreamParser::isSEI(u_int8_t nal_unit_type) {
210 return fHNumber == 264
211 ? nal_unit_type == 6
212 : (nal_unit_type == PREFIX_SEI_NUT || nal_unit_type == SUFFIX_SEI_NUT);
213 }
214
isEOF(u_int8_t nal_unit_type)215 Boolean H264or5VideoStreamParser::isEOF(u_int8_t nal_unit_type) {
216 // "end of sequence" or "end of (bit)stream"
217 return fHNumber == 264
218 ? (nal_unit_type == 10 || nal_unit_type == 11)
219 : (nal_unit_type == 36 || nal_unit_type == 37);
220 }
221
usuallyBeginsAccessUnit(u_int8_t nal_unit_type)222 Boolean H264or5VideoStreamParser::usuallyBeginsAccessUnit(u_int8_t nal_unit_type) {
223 return fHNumber == 264
224 ? (nal_unit_type >= 6 && nal_unit_type <= 9) || (nal_unit_type >= 14 && nal_unit_type <= 18)
225 : (nal_unit_type >= 32 && nal_unit_type <= 35) || (nal_unit_type == 39)
226 || (nal_unit_type >= 41 && nal_unit_type <= 44)
227 || (nal_unit_type >= 48 && nal_unit_type <= 55);
228 }
229
230 void H264or5VideoStreamParser
removeEmulationBytes(u_int8_t * nalUnitCopy,unsigned maxSize,unsigned & nalUnitCopySize)231 ::removeEmulationBytes(u_int8_t* nalUnitCopy, unsigned maxSize, unsigned& nalUnitCopySize) {
232 u_int8_t const* nalUnitOrig = fStartOfFrame + fOutputStartCodeSize;
233 unsigned const numBytesInNALunit = fTo - nalUnitOrig;
234 nalUnitCopySize
235 = removeH264or5EmulationBytes(nalUnitCopy, maxSize, nalUnitOrig, numBytesInNALunit);
236 }
237
238 #ifdef DEBUG
239 char const* nal_unit_type_description_h264[32] = {
240 "Unspecified", //0
241 "Coded slice of a non-IDR picture", //1
242 "Coded slice data partition A", //2
243 "Coded slice data partition B", //3
244 "Coded slice data partition C", //4
245 "Coded slice of an IDR picture", //5
246 "Supplemental enhancement information (SEI)", //6
247 "Sequence parameter set", //7
248 "Picture parameter set", //8
249 "Access unit delimiter", //9
250 "End of sequence", //10
251 "End of stream", //11
252 "Filler data", //12
253 "Sequence parameter set extension", //13
254 "Prefix NAL unit", //14
255 "Subset sequence parameter set", //15
256 "Reserved", //16
257 "Reserved", //17
258 "Reserved", //18
259 "Coded slice of an auxiliary coded picture without partitioning", //19
260 "Coded slice extension", //20
261 "Reserved", //21
262 "Reserved", //22
263 "Reserved", //23
264 "Unspecified", //24
265 "Unspecified", //25
266 "Unspecified", //26
267 "Unspecified", //27
268 "Unspecified", //28
269 "Unspecified", //29
270 "Unspecified", //30
271 "Unspecified" //31
272 };
273 char const* nal_unit_type_description_h265[64] = {
274 "Coded slice segment of a non-TSA, non-STSA trailing picture", //0
275 "Coded slice segment of a non-TSA, non-STSA trailing picture", //1
276 "Coded slice segment of a TSA picture", //2
277 "Coded slice segment of a TSA picture", //3
278 "Coded slice segment of a STSA picture", //4
279 "Coded slice segment of a STSA picture", //5
280 "Coded slice segment of a RADL picture", //6
281 "Coded slice segment of a RADL picture", //7
282 "Coded slice segment of a RASL picture", //8
283 "Coded slice segment of a RASL picture", //9
284 "Reserved", //10
285 "Reserved", //11
286 "Reserved", //12
287 "Reserved", //13
288 "Reserved", //14
289 "Reserved", //15
290 "Coded slice segment of a BLA picture", //16
291 "Coded slice segment of a BLA picture", //17
292 "Coded slice segment of a BLA picture", //18
293 "Coded slice segment of an IDR picture", //19
294 "Coded slice segment of an IDR picture", //20
295 "Coded slice segment of a CRA picture", //21
296 "Reserved", //22
297 "Reserved", //23
298 "Reserved", //24
299 "Reserved", //25
300 "Reserved", //26
301 "Reserved", //27
302 "Reserved", //28
303 "Reserved", //29
304 "Reserved", //30
305 "Reserved", //31
306 "Video parameter set", //32
307 "Sequence parameter set", //33
308 "Picture parameter set", //34
309 "Access unit delimiter", //35
310 "End of sequence", //36
311 "End of bitstream", //37
312 "Filler data", //38
313 "Supplemental enhancement information (SEI)", //39
314 "Supplemental enhancement information (SEI)", //40
315 "Reserved", //41
316 "Reserved", //42
317 "Reserved", //43
318 "Reserved", //44
319 "Reserved", //45
320 "Reserved", //46
321 "Reserved", //47
322 "Unspecified", //48
323 "Unspecified", //49
324 "Unspecified", //50
325 "Unspecified", //51
326 "Unspecified", //52
327 "Unspecified", //53
328 "Unspecified", //54
329 "Unspecified", //55
330 "Unspecified", //56
331 "Unspecified", //57
332 "Unspecified", //58
333 "Unspecified", //59
334 "Unspecified", //60
335 "Unspecified", //61
336 "Unspecified", //62
337 "Unspecified", //63
338 };
339 #endif
340
341 #ifdef DEBUG
342 static unsigned numDebugTabs = 1;
343 #define DEBUG_PRINT_TABS for (unsigned _i = 0; _i < numDebugTabs; ++_i) fprintf(stderr, "\t")
344 #define DEBUG_PRINT(x) do { DEBUG_PRINT_TABS; fprintf(stderr, "%s: %d\n", #x, x); } while (0)
345 #define DEBUG_STR(x) do { DEBUG_PRINT_TABS; fprintf(stderr, "%s\n", x); } while (0)
346 class DebugTab {
347 public:
DebugTab()348 DebugTab() {++numDebugTabs;}
~DebugTab()349 ~DebugTab() {--numDebugTabs;}
350 };
351 #define DEBUG_TAB DebugTab dummy
352 #else
353 #define DEBUG_PRINT(x) do {x = x;} while (0)
354 // Note: the "x=x;" statement is intended to eliminate "unused variable" compiler warning messages
355 #define DEBUG_STR(x) do {} while (0)
356 #define DEBUG_TAB do {} while (0)
357 #endif
358
profile_tier_level(BitVector & bv,unsigned max_sub_layers_minus1)359 void H264or5VideoStreamParser::profile_tier_level(BitVector& bv, unsigned max_sub_layers_minus1) {
360 bv.skipBits(96);
361
362 unsigned i;
363 Boolean sub_layer_profile_present_flag[7], sub_layer_level_present_flag[7];
364 for (i = 0; i < max_sub_layers_minus1; ++i) {
365 sub_layer_profile_present_flag[i] = bv.get1BitBoolean();
366 sub_layer_level_present_flag[i] = bv.get1BitBoolean();
367 }
368 if (max_sub_layers_minus1 > 0) {
369 bv.skipBits(2*(8-max_sub_layers_minus1)); // reserved_zero_2bits
370 }
371 for (i = 0; i < max_sub_layers_minus1; ++i) {
372 if (sub_layer_profile_present_flag[i]) {
373 bv.skipBits(88);
374 }
375 if (sub_layer_level_present_flag[i]) {
376 bv.skipBits(8); // sub_layer_level_idc[i]
377 }
378 }
379 }
380
381 void H264or5VideoStreamParser
analyze_vui_parameters(BitVector & bv,unsigned & num_units_in_tick,unsigned & time_scale)382 ::analyze_vui_parameters(BitVector& bv,
383 unsigned& num_units_in_tick, unsigned& time_scale) {
384 Boolean aspect_ratio_info_present_flag = bv.get1BitBoolean();
385 DEBUG_PRINT(aspect_ratio_info_present_flag);
386 if (aspect_ratio_info_present_flag) {
387 DEBUG_TAB;
388 unsigned aspect_ratio_idc = bv.getBits(8);
389 DEBUG_PRINT(aspect_ratio_idc);
390 if (aspect_ratio_idc == 255/*Extended_SAR*/) {
391 bv.skipBits(32); // sar_width; sar_height
392 }
393 }
394 Boolean overscan_info_present_flag = bv.get1BitBoolean();
395 DEBUG_PRINT(overscan_info_present_flag);
396 if (overscan_info_present_flag) {
397 bv.skipBits(1); // overscan_appropriate_flag
398 }
399 Boolean video_signal_type_present_flag = bv.get1BitBoolean();
400 DEBUG_PRINT(video_signal_type_present_flag);
401 if (video_signal_type_present_flag) {
402 DEBUG_TAB;
403 bv.skipBits(4); // video_format; video_full_range_flag
404 Boolean colour_description_present_flag = bv.get1BitBoolean();
405 DEBUG_PRINT(colour_description_present_flag);
406 if (colour_description_present_flag) {
407 bv.skipBits(24); // colour_primaries; transfer_characteristics; matrix_coefficients
408 }
409 }
410 Boolean chroma_loc_info_present_flag = bv.get1BitBoolean();
411 DEBUG_PRINT(chroma_loc_info_present_flag);
412 if (chroma_loc_info_present_flag) {
413 (void)bv.get_expGolomb(); // chroma_sample_loc_type_top_field
414 (void)bv.get_expGolomb(); // chroma_sample_loc_type_bottom_field
415 }
416 if (fHNumber == 265) {
417 bv.skipBits(2); // neutral_chroma_indication_flag, field_seq_flag
418 Boolean frame_field_info_present_flag = bv.get1BitBoolean();
419 DEBUG_PRINT(frame_field_info_present_flag);
420 pic_struct_present_flag = frame_field_info_present_flag; // hack to make H.265 like H.264
421 Boolean default_display_window_flag = bv.get1BitBoolean();
422 DEBUG_PRINT(default_display_window_flag);
423 if (default_display_window_flag) {
424 (void)bv.get_expGolomb(); // def_disp_win_left_offset
425 (void)bv.get_expGolomb(); // def_disp_win_right_offset
426 (void)bv.get_expGolomb(); // def_disp_win_top_offset
427 (void)bv.get_expGolomb(); // def_disp_win_bottom_offset
428 }
429 }
430 Boolean timing_info_present_flag = bv.get1BitBoolean();
431 DEBUG_PRINT(timing_info_present_flag);
432 if (timing_info_present_flag) {
433 DEBUG_TAB;
434 num_units_in_tick = bv.getBits(32);
435 DEBUG_PRINT(num_units_in_tick);
436 time_scale = bv.getBits(32);
437 DEBUG_PRINT(time_scale);
438 if (fHNumber == 264) {
439 Boolean fixed_frame_rate_flag = bv.get1BitBoolean();
440 DEBUG_PRINT(fixed_frame_rate_flag);
441 } else { // 265
442 Boolean vui_poc_proportional_to_timing_flag = bv.get1BitBoolean();
443 DEBUG_PRINT(vui_poc_proportional_to_timing_flag);
444 if (vui_poc_proportional_to_timing_flag) {
445 unsigned vui_num_ticks_poc_diff_one_minus1 = bv.get_expGolomb();
446 DEBUG_PRINT(vui_num_ticks_poc_diff_one_minus1);
447 }
448 return; // For H.265, don't bother parsing any more of this #####
449 }
450 }
451 // The following is H.264 only: #####
452 Boolean nal_hrd_parameters_present_flag = bv.get1BitBoolean();
453 DEBUG_PRINT(nal_hrd_parameters_present_flag);
454 if (nal_hrd_parameters_present_flag) analyze_hrd_parameters(bv);
455 Boolean vcl_hrd_parameters_present_flag = bv.get1BitBoolean();
456 DEBUG_PRINT(vcl_hrd_parameters_present_flag);
457 if (vcl_hrd_parameters_present_flag) analyze_hrd_parameters(bv);
458 CpbDpbDelaysPresentFlag = nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag;
459 if (CpbDpbDelaysPresentFlag) {
460 bv.skipBits(1); // low_delay_hrd_flag
461 }
462 pic_struct_present_flag = bv.get1BitBoolean();
463 DEBUG_PRINT(pic_struct_present_flag);
464 }
465
analyze_hrd_parameters(BitVector & bv)466 void H264or5VideoStreamParser::analyze_hrd_parameters(BitVector& bv) {
467 DEBUG_TAB;
468 unsigned cpb_cnt_minus1 = bv.get_expGolomb();
469 DEBUG_PRINT(cpb_cnt_minus1);
470 unsigned bit_rate_scale = bv.getBits(4);
471 DEBUG_PRINT(bit_rate_scale);
472 unsigned cpb_size_scale = bv.getBits(4);
473 DEBUG_PRINT(cpb_size_scale);
474 for (unsigned SchedSelIdx = 0; SchedSelIdx <= cpb_cnt_minus1; ++SchedSelIdx) {
475 DEBUG_TAB;
476 DEBUG_PRINT(SchedSelIdx);
477 unsigned bit_rate_value_minus1 = bv.get_expGolomb();
478 DEBUG_PRINT(bit_rate_value_minus1);
479 unsigned cpb_size_value_minus1 = bv.get_expGolomb();
480 DEBUG_PRINT(cpb_size_value_minus1);
481 Boolean cbr_flag = bv.get1BitBoolean();
482 DEBUG_PRINT(cbr_flag);
483 }
484 unsigned initial_cpb_removal_delay_length_minus1 = bv.getBits(5);
485 DEBUG_PRINT(initial_cpb_removal_delay_length_minus1);
486 cpb_removal_delay_length_minus1 = bv.getBits(5);
487 DEBUG_PRINT(cpb_removal_delay_length_minus1);
488 dpb_output_delay_length_minus1 = bv.getBits(5);
489 DEBUG_PRINT(dpb_output_delay_length_minus1);
490 unsigned time_offset_length = bv.getBits(5);
491 DEBUG_PRINT(time_offset_length);
492 }
493
494 void H264or5VideoStreamParser
analyze_video_parameter_set_data(unsigned & num_units_in_tick,unsigned & time_scale)495 ::analyze_video_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale) {
496 num_units_in_tick = time_scale = 0; // default values
497
498 // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes:
499 u_int8_t vps[VPS_MAX_SIZE];
500 unsigned vpsSize;
501 removeEmulationBytes(vps, sizeof vps, vpsSize);
502
503 BitVector bv(vps, 0, 8*vpsSize);
504
505 // Assert: fHNumber == 265 (because this function is called only when parsing H.265)
506 unsigned i;
507
508 bv.skipBits(28); // nal_unit_header, vps_video_parameter_set_id, vps_reserved_three_2bits, vps_max_layers_minus1
509 unsigned vps_max_sub_layers_minus1 = bv.getBits(3);
510 DEBUG_PRINT(vps_max_sub_layers_minus1);
511 bv.skipBits(17); // vps_temporal_id_nesting_flag, vps_reserved_0xffff_16bits
512 profile_tier_level(bv, vps_max_sub_layers_minus1);
513 Boolean vps_sub_layer_ordering_info_present_flag = bv.get1BitBoolean();
514 DEBUG_PRINT(vps_sub_layer_ordering_info_present_flag);
515 for (i = vps_sub_layer_ordering_info_present_flag ? 0 : vps_max_sub_layers_minus1;
516 i <= vps_max_sub_layers_minus1; ++i) {
517 (void)bv.get_expGolomb(); // vps_max_dec_pic_buffering_minus1[i]
518 (void)bv.get_expGolomb(); // vps_max_num_reorder_pics[i]
519 (void)bv.get_expGolomb(); // vps_max_latency_increase_plus1[i]
520 }
521 unsigned vps_max_layer_id = bv.getBits(6);
522 DEBUG_PRINT(vps_max_layer_id);
523 unsigned vps_num_layer_sets_minus1 = bv.get_expGolomb();
524 DEBUG_PRINT(vps_num_layer_sets_minus1);
525 for (i = 1; i <= vps_num_layer_sets_minus1; ++i) {
526 bv.skipBits(vps_max_layer_id+1); // layer_id_included_flag[i][0..vps_max_layer_id]
527 }
528 Boolean vps_timing_info_present_flag = bv.get1BitBoolean();
529 DEBUG_PRINT(vps_timing_info_present_flag);
530 if (vps_timing_info_present_flag) {
531 DEBUG_TAB;
532 num_units_in_tick = bv.getBits(32);
533 DEBUG_PRINT(num_units_in_tick);
534 time_scale = bv.getBits(32);
535 DEBUG_PRINT(time_scale);
536 Boolean vps_poc_proportional_to_timing_flag = bv.get1BitBoolean();
537 DEBUG_PRINT(vps_poc_proportional_to_timing_flag);
538 if (vps_poc_proportional_to_timing_flag) {
539 unsigned vps_num_ticks_poc_diff_one_minus1 = bv.get_expGolomb();
540 DEBUG_PRINT(vps_num_ticks_poc_diff_one_minus1);
541 }
542 }
543 Boolean vps_extension_flag = bv.get1BitBoolean();
544 DEBUG_PRINT(vps_extension_flag);
545 }
546
547 void H264or5VideoStreamParser
analyze_seq_parameter_set_data(unsigned & num_units_in_tick,unsigned & time_scale)548 ::analyze_seq_parameter_set_data(unsigned& num_units_in_tick, unsigned& time_scale) {
549 num_units_in_tick = time_scale = 0; // default values
550
551 // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes:
552 u_int8_t sps[SPS_MAX_SIZE];
553 unsigned spsSize;
554 removeEmulationBytes(sps, sizeof sps, spsSize);
555
556 BitVector bv(sps, 0, 8*spsSize);
557
558 if (fHNumber == 264) {
559 bv.skipBits(8); // forbidden_zero_bit; nal_ref_idc; nal_unit_type
560 unsigned profile_idc = bv.getBits(8);
561 DEBUG_PRINT(profile_idc);
562 unsigned constraint_setN_flag = bv.getBits(8); // also "reserved_zero_2bits" at end
563 DEBUG_PRINT(constraint_setN_flag);
564 unsigned level_idc = bv.getBits(8);
565 DEBUG_PRINT(level_idc);
566 unsigned seq_parameter_set_id = bv.get_expGolomb();
567 DEBUG_PRINT(seq_parameter_set_id);
568 if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ) {
569 DEBUG_TAB;
570 unsigned chroma_format_idc = bv.get_expGolomb();
571 DEBUG_PRINT(chroma_format_idc);
572 if (chroma_format_idc == 3) {
573 DEBUG_TAB;
574 Boolean separate_colour_plane_flag = bv.get1BitBoolean();
575 DEBUG_PRINT(separate_colour_plane_flag);
576 }
577 (void)bv.get_expGolomb(); // bit_depth_luma_minus8
578 (void)bv.get_expGolomb(); // bit_depth_chroma_minus8
579 bv.skipBits(1); // qpprime_y_zero_transform_bypass_flag
580 Boolean seq_scaling_matrix_present_flag = bv.get1BitBoolean();
581 DEBUG_PRINT(seq_scaling_matrix_present_flag);
582 if (seq_scaling_matrix_present_flag) {
583 for (int i = 0; i < ((chroma_format_idc != 3) ? 8 : 12); ++i) {
584 DEBUG_TAB;
585 DEBUG_PRINT(i);
586 Boolean seq_scaling_list_present_flag = bv.get1BitBoolean();
587 DEBUG_PRINT(seq_scaling_list_present_flag);
588 if (seq_scaling_list_present_flag) {
589 DEBUG_TAB;
590 unsigned sizeOfScalingList = i < 6 ? 16 : 64;
591 unsigned lastScale = 8;
592 unsigned nextScale = 8;
593 for (unsigned j = 0; j < sizeOfScalingList; ++j) {
594 DEBUG_TAB;
595 DEBUG_PRINT(j);
596 DEBUG_PRINT(nextScale);
597 if (nextScale != 0) {
598 DEBUG_TAB;
599 int delta_scale = bv.get_expGolombSigned();
600 DEBUG_PRINT(delta_scale);
601 nextScale = (lastScale + delta_scale + 256) % 256;
602 }
603 lastScale = (nextScale == 0) ? lastScale : nextScale;
604 DEBUG_PRINT(lastScale);
605 }
606 }
607 }
608 }
609 }
610 unsigned log2_max_frame_num_minus4 = bv.get_expGolomb();
611 DEBUG_PRINT(log2_max_frame_num_minus4);
612 unsigned pic_order_cnt_type = bv.get_expGolomb();
613 DEBUG_PRINT(pic_order_cnt_type);
614 if (pic_order_cnt_type == 0) {
615 DEBUG_TAB;
616 unsigned log2_max_pic_order_cnt_lsb_minus4 = bv.get_expGolomb();
617 DEBUG_PRINT(log2_max_pic_order_cnt_lsb_minus4);
618 } else if (pic_order_cnt_type == 1) {
619 DEBUG_TAB;
620 bv.skipBits(1); // delta_pic_order_always_zero_flag
621 (void)bv.get_expGolombSigned(); // offset_for_non_ref_pic
622 (void)bv.get_expGolombSigned(); // offset_for_top_to_bottom_field
623 unsigned num_ref_frames_in_pic_order_cnt_cycle = bv.get_expGolomb();
624 DEBUG_PRINT(num_ref_frames_in_pic_order_cnt_cycle);
625 for (unsigned i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
626 (void)bv.get_expGolombSigned(); // offset_for_ref_frame[i]
627 }
628 }
629 unsigned max_num_ref_frames = bv.get_expGolomb();
630 DEBUG_PRINT(max_num_ref_frames);
631 Boolean gaps_in_frame_num_value_allowed_flag = bv.get1BitBoolean();
632 DEBUG_PRINT(gaps_in_frame_num_value_allowed_flag);
633 unsigned pic_width_in_mbs_minus1 = bv.get_expGolomb();
634 DEBUG_PRINT(pic_width_in_mbs_minus1);
635 unsigned pic_height_in_map_units_minus1 = bv.get_expGolomb();
636 DEBUG_PRINT(pic_height_in_map_units_minus1);
637 Boolean frame_mbs_only_flag = bv.get1BitBoolean();
638 DEBUG_PRINT(frame_mbs_only_flag);
639 if (!frame_mbs_only_flag) {
640 bv.skipBits(1); // mb_adaptive_frame_field_flag
641 }
642 bv.skipBits(1); // direct_8x8_inference_flag
643 Boolean frame_cropping_flag = bv.get1BitBoolean();
644 DEBUG_PRINT(frame_cropping_flag);
645 if (frame_cropping_flag) {
646 (void)bv.get_expGolomb(); // frame_crop_left_offset
647 (void)bv.get_expGolomb(); // frame_crop_right_offset
648 (void)bv.get_expGolomb(); // frame_crop_top_offset
649 (void)bv.get_expGolomb(); // frame_crop_bottom_offset
650 }
651 Boolean vui_parameters_present_flag = bv.get1BitBoolean();
652 DEBUG_PRINT(vui_parameters_present_flag);
653 if (vui_parameters_present_flag) {
654 DEBUG_TAB;
655 analyze_vui_parameters(bv, num_units_in_tick, time_scale);
656 }
657 } else { // 265
658 unsigned i;
659
660 bv.skipBits(16); // nal_unit_header
661 bv.skipBits(4); // sps_video_parameter_set_id
662 unsigned sps_max_sub_layers_minus1 = bv.getBits(3);
663 DEBUG_PRINT(sps_max_sub_layers_minus1);
664 bv.skipBits(1); // sps_temporal_id_nesting_flag
665 profile_tier_level(bv, sps_max_sub_layers_minus1);
666 (void)bv.get_expGolomb(); // sps_seq_parameter_set_id
667 unsigned chroma_format_idc = bv.get_expGolomb();
668 DEBUG_PRINT(chroma_format_idc);
669 if (chroma_format_idc == 3) bv.skipBits(1); // separate_colour_plane_flag
670 unsigned pic_width_in_luma_samples = bv.get_expGolomb();
671 DEBUG_PRINT(pic_width_in_luma_samples);
672 unsigned pic_height_in_luma_samples = bv.get_expGolomb();
673 DEBUG_PRINT(pic_height_in_luma_samples);
674 Boolean conformance_window_flag = bv.get1BitBoolean();
675 DEBUG_PRINT(conformance_window_flag);
676 if (conformance_window_flag) {
677 DEBUG_TAB;
678 unsigned conf_win_left_offset = bv.get_expGolomb();
679 DEBUG_PRINT(conf_win_left_offset);
680 unsigned conf_win_right_offset = bv.get_expGolomb();
681 DEBUG_PRINT(conf_win_right_offset);
682 unsigned conf_win_top_offset = bv.get_expGolomb();
683 DEBUG_PRINT(conf_win_top_offset);
684 unsigned conf_win_bottom_offset = bv.get_expGolomb();
685 DEBUG_PRINT(conf_win_bottom_offset);
686 }
687 (void)bv.get_expGolomb(); // bit_depth_luma_minus8
688 (void)bv.get_expGolomb(); // bit_depth_chroma_minus8
689 unsigned log2_max_pic_order_cnt_lsb_minus4 = bv.get_expGolomb();
690 Boolean sps_sub_layer_ordering_info_present_flag = bv.get1BitBoolean();
691 DEBUG_PRINT(sps_sub_layer_ordering_info_present_flag);
692 for (i = (sps_sub_layer_ordering_info_present_flag ? 0 : sps_max_sub_layers_minus1);
693 i <= sps_max_sub_layers_minus1; ++i) {
694 (void)bv.get_expGolomb(); // sps_max_dec_pic_buffering_minus1[i]
695 (void)bv.get_expGolomb(); // sps_max_num_reorder_pics[i]
696 (void)bv.get_expGolomb(); // sps_max_latency_increase[i]
697 }
698 (void)bv.get_expGolomb(); // log2_min_luma_coding_block_size_minus3
699 (void)bv.get_expGolomb(); // log2_diff_max_min_luma_coding_block_size
700 (void)bv.get_expGolomb(); // log2_min_transform_block_size_minus2
701 (void)bv.get_expGolomb(); // log2_diff_max_min_transform_block_size
702 (void)bv.get_expGolomb(); // max_transform_hierarchy_depth_inter
703 (void)bv.get_expGolomb(); // max_transform_hierarchy_depth_intra
704 Boolean scaling_list_enabled_flag = bv.get1BitBoolean();
705 DEBUG_PRINT(scaling_list_enabled_flag);
706 if (scaling_list_enabled_flag) {
707 DEBUG_TAB;
708 Boolean sps_scaling_list_data_present_flag = bv.get1BitBoolean();
709 DEBUG_PRINT(sps_scaling_list_data_present_flag);
710 if (sps_scaling_list_data_present_flag) {
711 // scaling_list_data()
712 DEBUG_TAB;
713 for (unsigned sizeId = 0; sizeId < 4; ++sizeId) {
714 DEBUG_PRINT(sizeId);
715 for (unsigned matrixId = 0; matrixId < (sizeId == 3 ? 2 : 6); ++matrixId) {
716 DEBUG_TAB;
717 DEBUG_PRINT(matrixId);
718 Boolean scaling_list_pred_mode_flag = bv.get1BitBoolean();
719 DEBUG_PRINT(scaling_list_pred_mode_flag);
720 if (!scaling_list_pred_mode_flag) {
721 (void)bv.get_expGolomb(); // scaling_list_pred_matrix_id_delta[sizeId][matrixId]
722 } else {
723 unsigned const c = 1 << (4+(sizeId<<1));
724 unsigned coefNum = c < 64 ? c : 64;
725 if (sizeId > 1) {
726 (void)bv.get_expGolomb(); // scaling_list_dc_coef_minus8[sizeId][matrixId]
727 }
728 for (i = 0; i < coefNum; ++i) {
729 (void)bv.get_expGolomb(); // scaling_list_delta_coef
730 }
731 }
732 }
733 }
734 }
735 }
736 bv.skipBits(2); // amp_enabled_flag, sample_adaptive_offset_enabled_flag
737 Boolean pcm_enabled_flag = bv.get1BitBoolean();
738 DEBUG_PRINT(pcm_enabled_flag);
739 if (pcm_enabled_flag) {
740 bv.skipBits(8); // pcm_sample_bit_depth_luma_minus1, pcm_sample_bit_depth_chroma_minus1
741 (void)bv.get_expGolomb(); // log2_min_pcm_luma_coding_block_size_minus3
742 (void)bv.get_expGolomb(); // log2_diff_max_min_pcm_luma_coding_block_size
743 bv.skipBits(1); // pcm_loop_filter_disabled_flag
744 }
745 unsigned num_short_term_ref_pic_sets = bv.get_expGolomb();
746 DEBUG_PRINT(num_short_term_ref_pic_sets);
747 unsigned num_negative_pics = 0, prev_num_negative_pics = 0;
748 unsigned num_positive_pics = 0, prev_num_positive_pics = 0;
749 for (i = 0; i < num_short_term_ref_pic_sets; ++i) {
750 // short_term_ref_pic_set(i):
751 DEBUG_TAB;
752 DEBUG_PRINT(i);
753 Boolean inter_ref_pic_set_prediction_flag = False;
754 if (i != 0) {
755 inter_ref_pic_set_prediction_flag = bv.get1BitBoolean();
756 }
757 DEBUG_PRINT(inter_ref_pic_set_prediction_flag);
758 if (inter_ref_pic_set_prediction_flag) {
759 DEBUG_TAB;
760 if (i == num_short_term_ref_pic_sets) {
761 // This can't happen here, but it's in the spec, so we include it for completeness
762 (void)bv.get_expGolomb(); // delta_idx_minus1
763 }
764 bv.skipBits(1); // delta_rps_sign
765 (void)bv.get_expGolomb(); // abs_delta_rps_minus1
766 unsigned NumDeltaPocs = prev_num_negative_pics + prev_num_positive_pics; // correct???
767 for (unsigned j = 0; j < NumDeltaPocs; ++j) {
768 DEBUG_PRINT(j);
769 Boolean used_by_curr_pic_flag = bv.get1BitBoolean();
770 DEBUG_PRINT(used_by_curr_pic_flag);
771 if (!used_by_curr_pic_flag) bv.skipBits(1); // use_delta_flag[j]
772 }
773 } else {
774 prev_num_negative_pics = num_negative_pics;
775 num_negative_pics = bv.get_expGolomb();
776 DEBUG_PRINT(num_negative_pics);
777 prev_num_positive_pics = num_positive_pics;
778 num_positive_pics = bv.get_expGolomb();
779 DEBUG_PRINT(num_positive_pics);
780 unsigned k;
781 for (k = 0; k < num_negative_pics; ++k) {
782 (void)bv.get_expGolomb(); // delta_poc_s0_minus1[k]
783 bv.skipBits(1); // used_by_curr_pic_s0_flag[k]
784 }
785 for (k = 0; k < num_positive_pics; ++k) {
786 (void)bv.get_expGolomb(); // delta_poc_s1_minus1[k]
787 bv.skipBits(1); // used_by_curr_pic_s1_flag[k]
788 }
789 }
790 }
791 Boolean long_term_ref_pics_present_flag = bv.get1BitBoolean();
792 DEBUG_PRINT(long_term_ref_pics_present_flag);
793 if (long_term_ref_pics_present_flag) {
794 DEBUG_TAB;
795 unsigned num_long_term_ref_pics_sps = bv.get_expGolomb();
796 DEBUG_PRINT(num_long_term_ref_pics_sps);
797 for (i = 0; i < num_long_term_ref_pics_sps; ++i) {
798 bv.skipBits(log2_max_pic_order_cnt_lsb_minus4); // lt_ref_pic_poc_lsb_sps[i]
799 bv.skipBits(1); // used_by_curr_pic_lt_sps_flag[1]
800 }
801 }
802 bv.skipBits(2); // sps_temporal_mvp_enabled_flag, strong_intra_smoothing_enabled_flag
803 Boolean vui_parameters_present_flag = bv.get1BitBoolean();
804 DEBUG_PRINT(vui_parameters_present_flag);
805 if (vui_parameters_present_flag) {
806 DEBUG_TAB;
807 analyze_vui_parameters(bv, num_units_in_tick, time_scale);
808 }
809 Boolean sps_extension_flag = bv.get1BitBoolean();
810 DEBUG_PRINT(sps_extension_flag);
811 }
812 }
813
814 #define SEI_MAX_SIZE 5000 // larger than the largest possible SEI NAL unit
815
816 #ifdef DEBUG
817 #define MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264 46
818 char const* sei_payloadType_description_h264[MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264+1] = {
819 "buffering_period", //0
820 "pic_timing", //1
821 "pan_scan_rect", //2
822 "filler_payload", //3
823 "user_data_registered_itu_t_t35", //4
824 "user_data_unregistered", //5
825 "recovery_point", //6
826 "dec_ref_pic_marking_repetition", //7
827 "spare_pic", //8
828 "scene_info", //9
829 "sub_seq_info", //10
830 "sub_seq_layer_characteristics", //11
831 "sub_seq_characteristics", //12
832 "full_frame_freeze", //13
833 "full_frame_freeze_release", //14
834 "full_frame_snapshot", //15
835 "progressive_refinement_segment_start", //16
836 "progressive_refinement_segment_end", //17
837 "motion_constrained_slice_group_set", //18
838 "film_grain_characteristics", //19
839 "deblocking_filter_display_preference", //20
840 "stereo_video_info", //21
841 "post_filter_hint", //22
842 "tone_mapping_info", //23
843 "scalability_info", //24
844 "sub_pic_scalable_layer", //25
845 "non_required_layer_rep", //26
846 "priority_layer_info", //27
847 "layers_not_present", //28
848 "layer_dependency_change", //29
849 "scalable_nesting", //30
850 "base_layer_temporal_hrd", //31
851 "quality_layer_integrity_check", //32
852 "redundant_pic_property", //33
853 "tl0_dep_rep_index", //34
854 "tl_switching_point", //35
855 "parallel_decoding_info", //36
856 "mvc_scalable_nesting", //37
857 "view_scalability_info", //38
858 "multiview_scene_info", //39
859 "multiview_acquisition_info", //40
860 "non_required_view_component", //41
861 "view_dependency_change", //42
862 "operation_points_not_present", //43
863 "base_view_temporal_hrd", //44
864 "frame_packing_arrangement", //45
865 "reserved_sei_message" // 46 or higher
866 };
867 #endif
868
analyze_sei_data(u_int8_t nal_unit_type)869 void H264or5VideoStreamParser::analyze_sei_data(u_int8_t nal_unit_type) {
870 // Begin by making a copy of the NAL unit data, removing any 'emulation prevention' bytes:
871 u_int8_t sei[SEI_MAX_SIZE];
872 unsigned seiSize;
873 removeEmulationBytes(sei, sizeof sei, seiSize);
874
875 unsigned j = 1; // skip the initial byte (forbidden_zero_bit; nal_ref_idc; nal_unit_type); we've already seen it
876 while (j < seiSize) {
877 unsigned payloadType = 0;
878 do {
879 payloadType += sei[j];
880 } while (sei[j++] == 255 && j < seiSize);
881 if (j >= seiSize) break;
882
883 unsigned payloadSize = 0;
884 do {
885 payloadSize += sei[j];
886 } while (sei[j++] == 255 && j < seiSize);
887 if (j >= seiSize) break;
888
889 #ifdef DEBUG
890 char const* description;
891 if (fHNumber == 264) {
892 unsigned descriptionNum = payloadType <= MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264
893 ? payloadType : MAX_SEI_PAYLOAD_TYPE_DESCRIPTION_H264;
894 description = sei_payloadType_description_h264[descriptionNum];
895 } else { // 265
896 description =
897 payloadType == 3 ? "filler_payload" :
898 payloadType == 4 ? "user_data_registered_itu_t_t35" :
899 payloadType == 5 ? "user_data_unregistered" :
900 payloadType == 17 ? "progressive_refinement_segment_end" :
901 payloadType == 22 ? "post_filter_hint" :
902 (payloadType == 132 && nal_unit_type == SUFFIX_SEI_NUT) ? "decoded_picture_hash" :
903 nal_unit_type == SUFFIX_SEI_NUT ? "reserved_sei_message" :
904 payloadType == 0 ? "buffering_period" :
905 payloadType == 1 ? "pic_timing" :
906 payloadType == 2 ? "pan_scan_rect" :
907 payloadType == 6 ? "recovery_point" :
908 payloadType == 9 ? "scene_info" :
909 payloadType == 15 ? "picture_snapshot" :
910 payloadType == 16 ? "progressive_refinement_segment_start" :
911 payloadType == 19 ? "film_grain_characteristics" :
912 payloadType == 23 ? "tone_mapping_info" :
913 payloadType == 45 ? "frame_packing_arrangement" :
914 payloadType == 47 ? "display_orientation" :
915 payloadType == 128 ? "structure_of_pictures_info" :
916 payloadType == 129 ? "active_parameter_sets" :
917 payloadType == 130 ? "decoding_unit_info" :
918 payloadType == 131 ? "temporal_sub_layer_zero_index" :
919 payloadType == 133 ? "scalable_nesting" :
920 payloadType == 134 ? "region_refresh_info" : "reserved_sei_message";
921 }
922 fprintf(stderr, "\tpayloadType %d (\"%s\"); payloadSize %d\n", payloadType, description, payloadSize);
923 #endif
924
925 analyze_sei_payload(payloadType, payloadSize, &sei[j]);
926 j += payloadSize;
927 }
928 }
929
930 void H264or5VideoStreamParser
analyze_sei_payload(unsigned payloadType,unsigned payloadSize,u_int8_t * payload)931 ::analyze_sei_payload(unsigned payloadType, unsigned payloadSize, u_int8_t* payload) {
932 if (payloadType == 1/* pic_timing, for both H.264 and H.265 */) {
933 BitVector bv(payload, 0, 8*payloadSize);
934
935 DEBUG_TAB;
936 if (CpbDpbDelaysPresentFlag) {
937 unsigned cpb_removal_delay = bv.getBits(cpb_removal_delay_length_minus1 + 1);
938 DEBUG_PRINT(cpb_removal_delay);
939 unsigned dpb_output_delay = bv.getBits(dpb_output_delay_length_minus1 + 1);
940 DEBUG_PRINT(dpb_output_delay);
941 }
942 double prevDeltaTfiDivisor = DeltaTfiDivisor;
943 if (pic_struct_present_flag) {
944 unsigned pic_struct = bv.getBits(4);
945 DEBUG_PRINT(pic_struct);
946 // Use this to set "DeltaTfiDivisor" (which is used to compute the frame rate):
947 if (fHNumber == 264) {
948 DeltaTfiDivisor =
949 pic_struct == 0 ? 2.0 :
950 pic_struct <= 2 ? 1.0 :
951 pic_struct <= 4 ? 2.0 :
952 pic_struct <= 6 ? 3.0 :
953 pic_struct == 7 ? 4.0 :
954 pic_struct == 8 ? 6.0 :
955 2.0;
956 } else { // H.265
957 DeltaTfiDivisor =
958 pic_struct == 0 ? 2.0 :
959 pic_struct <= 2 ? 1.0 :
960 pic_struct <= 4 ? 2.0 :
961 pic_struct <= 6 ? 3.0 :
962 pic_struct == 7 ? 2.0 :
963 pic_struct == 8 ? 3.0 :
964 pic_struct <= 12 ? 1.0 :
965 2.0;
966 }
967 } else {
968 if (fHNumber == 264) {
969 // Need to get field_pic_flag from slice_header to set this properly! #####
970 } else { // H.265
971 DeltaTfiDivisor = 1.0;
972 }
973 }
974 // If "DeltaTfiDivisor" has changed, and we've already computed the frame rate, then
975 // adjust it, based on the new value of "DeltaTfiDivisor":
976 if (DeltaTfiDivisor != prevDeltaTfiDivisor && fParsedFrameRate != 0.0) {
977 usingSource()->fFrameRate = fParsedFrameRate
978 = fParsedFrameRate*(prevDeltaTfiDivisor/DeltaTfiDivisor);
979 #ifdef DEBUG
980 fprintf(stderr, "Changed frame rate to %f fps\n", usingSource()->fFrameRate);
981 #endif
982 }
983 // Ignore the rest of the payload (timestamps) for now... #####
984 }
985 }
986
flushInput()987 void H264or5VideoStreamParser::flushInput() {
988 fHaveSeenFirstStartCode = False;
989 fHaveSeenFirstByteOfNALUnit = False;
990
991 StreamParser::flushInput();
992 }
993
parse()994 unsigned H264or5VideoStreamParser::parse() {
995 try {
996 // The stream must start with a 0x00000001:
997 if (!fHaveSeenFirstStartCode) {
998 // Skip over any input bytes that precede the first 0x00000001:
999 u_int32_t first4Bytes;
1000 while ((first4Bytes = test4Bytes()) != 0x00000001) {
1001 get1Byte(); setParseState(); // ensures that we progress over bad data
1002 }
1003 skipBytes(4); // skip this initial code
1004
1005 setParseState();
1006 fHaveSeenFirstStartCode = True; // from now on
1007 }
1008
1009 if (fOutputStartCodeSize > 0 && curFrameSize() == 0 && !haveSeenEOF()) {
1010 // Include a start code in the output:
1011 save4Bytes(0x00000001);
1012 }
1013
1014 // Then save everything up until the next 0x00000001 (4 bytes) or 0x000001 (3 bytes), or we hit EOF.
1015 // Also make note of the first byte, because it contains the "nal_unit_type":
1016 if (haveSeenEOF()) {
1017 // We hit EOF the last time that we tried to parse this data, so we know that any remaining unparsed data
1018 // forms a complete NAL unit, and that there's no 'start code' at the end:
1019 unsigned remainingDataSize = totNumValidBytes() - curOffset();
1020 #ifdef DEBUG
1021 unsigned const trailingNALUnitSize = remainingDataSize;
1022 #endif
1023 while (remainingDataSize > 0) {
1024 u_int8_t nextByte = get1Byte();
1025 if (!fHaveSeenFirstByteOfNALUnit) {
1026 fFirstByteOfNALUnit = nextByte;
1027 fHaveSeenFirstByteOfNALUnit = True;
1028 }
1029 saveByte(nextByte);
1030 --remainingDataSize;
1031 }
1032
1033 #ifdef DEBUG
1034 if (fHNumber == 264) {
1035 u_int8_t nal_ref_idc = (fFirstByteOfNALUnit&0x60)>>5;
1036 u_int8_t nal_unit_type = fFirstByteOfNALUnit&0x1F;
1037 fprintf(stderr, "Parsed trailing %d-byte NAL-unit (nal_ref_idc: %d, nal_unit_type: %d (\"%s\"))\n",
1038 trailingNALUnitSize, nal_ref_idc, nal_unit_type, nal_unit_type_description_h264[nal_unit_type]);
1039 } else { // 265
1040 u_int8_t nal_unit_type = (fFirstByteOfNALUnit&0x7E)>>1;
1041 fprintf(stderr, "Parsed trailing %d-byte NAL-unit (nal_unit_type: %d (\"%s\"))\n",
1042 trailingNALUnitSize, nal_unit_type, nal_unit_type_description_h265[nal_unit_type]);
1043 }
1044 #endif
1045
1046 (void)get1Byte(); // forces another read, which will cause EOF to get handled for real this time
1047 return 0;
1048 } else {
1049 u_int32_t next4Bytes = test4Bytes();
1050 if (!fHaveSeenFirstByteOfNALUnit) {
1051 fFirstByteOfNALUnit = next4Bytes>>24;
1052 fHaveSeenFirstByteOfNALUnit = True;
1053 }
1054 while (next4Bytes != 0x00000001 && (next4Bytes&0xFFFFFF00) != 0x00000100) {
1055 // We save at least some of "next4Bytes".
1056 if ((unsigned)(next4Bytes&0xFF) > 1) {
1057 // Common case: 0x00000001 or 0x000001 definitely doesn't begin anywhere in "next4Bytes", so we save all of it:
1058 save4Bytes(next4Bytes);
1059 skipBytes(4);
1060 } else {
1061 // Save the first byte, and continue testing the rest:
1062 saveByte(next4Bytes>>24);
1063 skipBytes(1);
1064 }
1065 setParseState(); // ensures forward progress
1066 next4Bytes = test4Bytes();
1067 }
1068 // Assert: next4Bytes starts with 0x00000001 or 0x000001, and we've saved all previous bytes (forming a complete NAL unit).
1069 // Skip over these remaining bytes, up until the start of the next NAL unit:
1070 if (next4Bytes == 0x00000001) {
1071 skipBytes(4);
1072 } else {
1073 skipBytes(3);
1074 }
1075 }
1076
1077 fHaveSeenFirstByteOfNALUnit = False; // for the next NAL unit that we'll parse
1078 u_int8_t nal_unit_type;
1079 if (fHNumber == 264) {
1080 nal_unit_type = fFirstByteOfNALUnit&0x1F;
1081 #ifdef DEBUG
1082 u_int8_t nal_ref_idc = (fFirstByteOfNALUnit&0x60)>>5;
1083 fprintf(stderr, "Parsed %d-byte NAL-unit (nal_ref_idc: %d, nal_unit_type: %d (\"%s\"))\n",
1084 curFrameSize()-fOutputStartCodeSize, nal_ref_idc, nal_unit_type, nal_unit_type_description_h264[nal_unit_type]);
1085 #endif
1086 } else { // 265
1087 nal_unit_type = (fFirstByteOfNALUnit&0x7E)>>1;
1088 #ifdef DEBUG
1089 fprintf(stderr, "Parsed %d-byte NAL-unit (nal_unit_type: %d (\"%s\"))\n",
1090 curFrameSize()-fOutputStartCodeSize, nal_unit_type, nal_unit_type_description_h265[nal_unit_type]);
1091 #endif
1092 }
1093
1094 // Now that we have found (& copied) a NAL unit, process it if it's of special interest to us:
1095 if (isVPS(nal_unit_type)) { // Video parameter set
1096 // First, save a copy of this NAL unit, in case the downstream object wants to see it:
1097 usingSource()->saveCopyOfVPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize);
1098
1099 if (fParsedFrameRate == 0.0) {
1100 // We haven't yet parsed a frame rate from the stream.
1101 // So parse this NAL unit to check whether frame rate information is present:
1102 unsigned num_units_in_tick, time_scale;
1103 analyze_video_parameter_set_data(num_units_in_tick, time_scale);
1104 if (time_scale > 0 && num_units_in_tick > 0) {
1105 usingSource()->fFrameRate = fParsedFrameRate
1106 = time_scale/(DeltaTfiDivisor*num_units_in_tick);
1107 #ifdef DEBUG
1108 fprintf(stderr, "Set frame rate to %f fps\n", usingSource()->fFrameRate);
1109 #endif
1110 } else {
1111 #ifdef DEBUG
1112 fprintf(stderr, "\tThis \"Video Parameter Set\" NAL unit contained no frame rate information, so we use a default frame rate of %f fps\n", usingSource()->fFrameRate);
1113 #endif
1114 }
1115 }
1116 } else if (isSPS(nal_unit_type)) { // Sequence parameter set
1117 // First, save a copy of this NAL unit, in case the downstream object wants to see it:
1118 usingSource()->saveCopyOfSPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize);
1119
1120 if (fParsedFrameRate == 0.0) {
1121 // We haven't yet parsed a frame rate from the stream.
1122 // So parse this NAL unit to check whether frame rate information is present:
1123 unsigned num_units_in_tick, time_scale;
1124 analyze_seq_parameter_set_data(num_units_in_tick, time_scale);
1125 if (time_scale > 0 && num_units_in_tick > 0) {
1126 usingSource()->fFrameRate = fParsedFrameRate
1127 = time_scale/(DeltaTfiDivisor*num_units_in_tick);
1128 #ifdef DEBUG
1129 fprintf(stderr, "Set frame rate to %f fps\n", usingSource()->fFrameRate);
1130 #endif
1131 } else {
1132 #ifdef DEBUG
1133 fprintf(stderr, "\tThis \"Sequence Parameter Set\" NAL unit contained no frame rate information, so we use a default frame rate of %f fps\n", usingSource()->fFrameRate);
1134 #endif
1135 }
1136 }
1137 } else if (isPPS(nal_unit_type)) { // Picture parameter set
1138 // Save a copy of this NAL unit, in case the downstream object wants to see it:
1139 usingSource()->saveCopyOfPPS(fStartOfFrame + fOutputStartCodeSize, curFrameSize() - fOutputStartCodeSize);
1140 } else if (isSEI(nal_unit_type)) { // Supplemental enhancement information (SEI)
1141 analyze_sei_data(nal_unit_type);
1142 // Later, perhaps adjust "fPresentationTime" if we saw a "pic_timing" SEI payload??? #####
1143 }
1144
1145 usingSource()->setPresentationTime();
1146 #ifdef DEBUG
1147 unsigned long secs = (unsigned long)usingSource()->fPresentationTime.tv_sec;
1148 unsigned uSecs = (unsigned)usingSource()->fPresentationTime.tv_usec;
1149 fprintf(stderr, "\tPresentation time: %lu.%06u\n", secs, uSecs);
1150 #endif
1151
1152 // Now, check whether this NAL unit ends an 'access unit'.
1153 // (RTP streamers need to know this in order to figure out whether or not to set the "M" bit.)
1154 Boolean thisNALUnitEndsAccessUnit;
1155 if (haveSeenEOF() || isEOF(nal_unit_type)) {
1156 // There is no next NAL unit, so we assume that this one ends the current 'access unit':
1157 thisNALUnitEndsAccessUnit = True;
1158 } else if (usuallyBeginsAccessUnit(nal_unit_type)) {
1159 // These NAL units usually *begin* an access unit, so assume that they don't end one here:
1160 thisNALUnitEndsAccessUnit = False;
1161 } else {
1162 // We need to check the *next* NAL unit to figure out whether
1163 // the current NAL unit ends an 'access unit':
1164 u_int8_t firstBytesOfNextNALUnit[3];
1165 testBytes(firstBytesOfNextNALUnit, 3);
1166
1167 u_int8_t const& next_nal_unit_type = fHNumber == 264
1168 ? (firstBytesOfNextNALUnit[0]&0x1F) : ((firstBytesOfNextNALUnit[0]&0x7E)>>1);
1169 if (isVCL(next_nal_unit_type)) {
1170 // The high-order bit of the byte after the "nal_unit_header" tells us whether it's
1171 // the start of a new 'access unit' (and thus the current NAL unit ends an 'access unit'):
1172 u_int8_t const byteAfter_nal_unit_header
1173 = fHNumber == 264 ? firstBytesOfNextNALUnit[1] : firstBytesOfNextNALUnit[2];
1174 thisNALUnitEndsAccessUnit = (byteAfter_nal_unit_header&0x80) != 0;
1175 } else if (usuallyBeginsAccessUnit(next_nal_unit_type)) {
1176 // The next NAL unit's type is one that usually appears at the start of an 'access unit',
1177 // so we assume that the current NAL unit ends an 'access unit':
1178 thisNALUnitEndsAccessUnit = True;
1179 } else {
1180 // The next NAL unit definitely doesn't start a new 'access unit',
1181 // which means that the current NAL unit doesn't end one:
1182 thisNALUnitEndsAccessUnit = False;
1183 }
1184 }
1185
1186 if (thisNALUnitEndsAccessUnit) {
1187 #ifdef DEBUG
1188 fprintf(stderr, "*****This NAL unit ends the current access unit*****\n");
1189 #endif
1190 usingSource()->fPictureEndMarker = True;
1191 ++usingSource()->fPictureCount;
1192
1193 // Note that the presentation time for the next NAL unit will be different:
1194 struct timeval& nextPT = usingSource()->fNextPresentationTime; // alias
1195 nextPT = usingSource()->fPresentationTime;
1196 double nextFraction = nextPT.tv_usec/1000000.0 + 1/usingSource()->fFrameRate;
1197 unsigned nextSecsIncrement = (long)nextFraction;
1198 nextPT.tv_sec += (long)nextSecsIncrement;
1199 nextPT.tv_usec = (long)((nextFraction - nextSecsIncrement)*1000000);
1200 }
1201 setParseState();
1202
1203 return curFrameSize();
1204 } catch (int /*e*/) {
1205 #ifdef DEBUG
1206 fprintf(stderr, "H264or5VideoStreamParser::parse() EXCEPTION (This is normal behavior - *not* an error)\n");
1207 #endif
1208 return 0; // the parsing got interrupted
1209 }
1210 }
1211
removeH264or5EmulationBytes(u_int8_t * to,unsigned toMaxSize,u_int8_t const * from,unsigned fromSize)1212 unsigned removeH264or5EmulationBytes(u_int8_t* to, unsigned toMaxSize,
1213 u_int8_t const* from, unsigned fromSize) {
1214 unsigned toSize = 0;
1215 unsigned i = 0;
1216 while (i < fromSize && toSize+1 < toMaxSize) {
1217 if (i+2 < fromSize && from[i] == 0 && from[i+1] == 0 && from[i+2] == 3) {
1218 to[toSize] = to[toSize+1] = 0;
1219 toSize += 2;
1220 i += 3;
1221 } else {
1222 to[toSize] = from[i];
1223 toSize += 1;
1224 i += 1;
1225 }
1226 }
1227
1228 return toSize;
1229 }
1230