1 /*
2  * Utilities for reading H.264 elementary streams.
3  *
4  * ***** BEGIN LICENSE BLOCK *****
5  * Version: MPL 1.1
6  *
7  * The contents of this file are subject to the Mozilla Public License Version
8  * 1.1 (the "License"); you may not use this file except in compliance with
9  * the License. You may obtain a copy of the License at
10  * http://www.mozilla.org/MPL/
11  *
12  * Software distributed under the License is distributed on an "AS IS" basis,
13  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14  * for the specific language governing rights and limitations under the
15  * License.
16  *
17  * The Original Code is the MPEG TS, PS and ES tools.
18  *
19  * The Initial Developer of the Original Code is Amino Communications Ltd.
20  * Portions created by the Initial Developer are Copyright (C) 2008
21  * the Initial Developer. All Rights Reserved.
22  *
23  * Contributor(s):
24  *   Amino Communications Ltd, Swavesey, Cambridge UK
25  *
26  * ***** END LICENSE BLOCK *****
27  */
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <errno.h>
32 #include <string.h>
33 #ifdef _WIN32
34 #include <io.h>
35 #else // _WIN32
36 #include <unistd.h>
37 #endif // _WIN32
38 
39 #include "compat.h"
40 #include "misc_fns.h"
41 #include "pes_fns.h"
42 #include "tswrite_fns.h"
43 #include "es_fns.h"
44 
45 #define DEBUG 0
46 
47 // A lone forwards reference
48 static inline int get_more_data(ES_p  es);
49 
50 // ------------------------------------------------------------
51 // Basic functions
52 // ------------------------------------------------------------
53 /*
54  * Open an ES file and build an elementary stream datastructure to read
55  * it with.
56  *
57  * - `filename` is the ES files name. As a special case, if this is NULL
58  *   then standard input (STDIN_FILENO) will be read from.
59  *
60  * Opens the file for read, builds the datastructure, and reads the first 3
61  * bytes of the input file (this is done to prime the triple-byte search
62  * mechanism).
63  *
64  * Returns 0 if all goes well, 1 otherwise.
65  */
open_elementary_stream(char * filename,ES_p * es)66 extern int open_elementary_stream(char  *filename,
67                                   ES_p  *es)
68 {
69   int err;
70   int input;
71 
72   if (filename == NULL)
73     input = STDIN_FILENO;
74   else
75   {
76     input = open_binary_file(filename,FALSE);
77     if (input == -1) return 1;
78   }
79 
80   err = build_elementary_stream_file(input,es);
81   if (err)
82   {
83     fprintf(stderr,"### Error building elementary stream for file %s\n",
84             filename);
85     return 1;
86   }
87   return 0;
88 }
89 
setup_readahead(ES_p es)90 static int setup_readahead(ES_p  es)
91 {
92   int err;
93 
94   es->read_ahead_len = 0;
95   es->read_ahead_posn = 0;
96 
97   es->data = NULL;
98   es->data_end = NULL;
99   es->data_ptr = NULL;
100 
101   es->last_packet_posn = 0;
102   es->last_packet_es_data_len = 0;
103 
104   // Try to get the first chunk of data from the file
105   err = get_more_data(es);
106   if (err) return err;
107 
108   if (es->reading_ES)
109   {
110     if (es->read_ahead_len < 3)
111     {
112       fprintf(stderr,"### File only contains %d byte%s\n",
113               es->read_ahead_len,(es->read_ahead_len==1?"":"s"));
114       return 1;
115     }
116   }
117   else
118   {
119     if (es->reader->packet->es_data_len < 3)
120     {
121       fprintf(stderr,"### File PES packet only contains %d byte%s\n",
122               es->reader->packet->es_data_len,
123               (es->reader->packet->es_data_len==1?"":"s"));
124       return 1;
125     }
126   }
127 
128   if (DEBUG)
129     printf("File starts %02x %02x %02x\n",es->data[0],es->data[1],es->data[2]);
130 
131   // Despite (maybe) reporting the above, we haven't actually read anything
132   // yet
133   es->prev2_byte = es->prev1_byte = es->cur_byte = 0xFF;
134   es->posn_of_next_byte.infile = 0;
135   es->posn_of_next_byte.inpacket = 0;
136 
137   return 0;
138 }
139 
140 /*
141  * Build an elementary stream datastructure attached to an input file.
142  * This is intended for reading ES data files.
143  *
144  * - `input` is the file stream to read from.
145  *
146  * Builds the datastructure, and reads the first 3 bytes of the input
147  * file (this is done to prime the triple-byte search mechanism).
148  *
149  * Returns 0 if all goes well, 1 otherwise.
150  */
build_elementary_stream_file(int input,ES_p * es)151 extern int build_elementary_stream_file(int    input,
152                                         ES_p  *es)
153 {
154   ES_p new = malloc(SIZEOF_ES);
155   if (new == NULL)
156   {
157     fprintf(stderr,"### Unable to allocate elementary stream datastructure\n");
158     return 1;
159   }
160 
161   new->reading_ES = TRUE;
162   new->input = input;
163   new->reader = NULL;
164 
165   setup_readahead(new);
166 
167   *es = new;
168   return 0;
169 }
170 
171 /*
172  * Build an elementary stream datastructure for use with a PES reader.
173  * Reads the first (or next) three bytes of the ES.
174  *
175  * This reads data from the PES video data, ignoring any audio data.
176  *
177  * - `reader` is the PES reader we want to use to read our TS or PS data.
178  *
179  * The caller must explicitly close the PES reader as well as closing the
180  * elementary stream (closing the ES does not affect the PES reader).
181  *
182  * Returns 0 if all goes well, 1 otherwise.
183  */
build_elementary_stream_PES(PES_reader_p reader,ES_p * es)184 extern int build_elementary_stream_PES(PES_reader_p  reader,
185                                        ES_p         *es)
186 {
187   ES_p new = malloc(SIZEOF_ES);
188   if (new == NULL)
189   {
190     fprintf(stderr,"### Unable to allocate elementary stream datastructure\n");
191     return 1;
192   }
193 
194   new->reading_ES = FALSE;
195   new->input = -1;
196   new->reader = reader;
197 
198   setup_readahead(new);
199 
200   *es = new;
201   return 0;
202 }
203 
204 /*
205  * Tidy up the elementary stream datastructure after we've finished with it.
206  *
207  * Specifically:
208  *
209  * - free the datastructure
210  * - set `es` to NULL
211  *
212  * No return status is given, since there's not much one can do if anything
213  * *did* go wrong, and if something went wrong and the program is continuing,
214  * it's bound to show up pretty soon.
215  */
free_elementary_stream(ES_p * es)216 extern void free_elementary_stream(ES_p  *es)
217 {
218   (*es)->input = -1;  // "forget" our input
219   free(*es);
220   *es = NULL;
221 }
222 
223 /*
224  * Tidy up the elementary stream datastructure after we've finished with it.
225  *
226  * Specifically:
227  *
228  * - close the input file (if its stream is set, and if it's not STDIN)
229  * - call `free_elementary_stream()`
230  *
231  * No return status is given, since there's not much one can do if anything
232  * *did* go wrong, and if something went wrong and the program is continuing,
233  * it's bound to show up pretty soon.
234  */
close_elementary_stream(ES_p * es)235 extern void close_elementary_stream(ES_p  *es)
236 {
237   int input;
238   if (*es == NULL)
239     return;
240   input = (*es)->input;
241   if (input != -1 && input != STDIN_FILENO)
242     (void) close_file(input);
243   free_elementary_stream(es);
244 }
245 
246 /*
247  * Ask an ES context if changed input is available.
248  *
249  * This is a convenience wrapper to save querying the ES context to see
250  * if it is (a) reading from PES, (b) automatically writing the PES packets
251  * out via a TS writer, and (c) if said TS writer has a changed command.
252  *
253  * Calls `tswrite_command_changed()` on the TS writer associated with this ES.
254  *
255  * Returns TRUE if there is a changed command.
256  */
es_command_changed(ES_p es)257 extern int es_command_changed(ES_p  es)
258 {
259   if (es->reading_ES)
260     return FALSE;
261 
262   if (es->reader->tswriter == NULL)
263     return FALSE;
264 
265   return tswrite_command_changed(es->reader->tswriter);
266 }
267 
268 // ------------------------------------------------------------
269 // Handling elementary stream data units
270 // ------------------------------------------------------------
271 /*
272  * Prepare the contents of a (new) ES unit datastructure.
273  *
274  * Allocates a new data array, and unsets the counts.
275  *
276  * Returns 0 if it succeeds, 1 if some error occurs.
277  */
setup_ES_unit(ES_unit_p unit)278 extern int setup_ES_unit(ES_unit_p  unit)
279 {
280   unit->data = malloc(ES_UNIT_DATA_START_SIZE);
281   if (unit->data == NULL)
282   {
283     fprintf(stderr,"### Unable to allocate ES unit data buffer\n");
284     return 1;
285   }
286   unit->data_len = 0;
287   unit->data_size = ES_UNIT_DATA_START_SIZE;
288   unit->start_posn.infile = 0;
289   unit->start_posn.inpacket = 0;
290 
291   unit->PES_had_PTS = FALSE;    // See the header file
292   return 0;
293 }
294 
295 /*
296  * Tidy up an ES unit datastructure after we've finished with it.
297  *
298  * (Frees the internal data array, and unsets the counts)
299  */
clear_ES_unit(ES_unit_p unit)300 extern void clear_ES_unit(ES_unit_p  unit)
301 {
302   if (unit->data != NULL)
303   {
304     free(unit->data);
305     unit->data = NULL;
306     unit->data_size = 0;
307     unit->data_len = 0;
308   }
309 }
310 
311 /*
312  * Build a new ES unit datastructure.
313  *
314  * Returns 0 if it succeeds, 1 if some error occurs.
315  */
build_ES_unit(ES_unit_p * unit)316 extern int build_ES_unit(ES_unit_p  *unit)
317 {
318   int err;
319   ES_unit_p  new = malloc(SIZEOF_ES_UNIT);
320   if (new == NULL)
321   {
322     fprintf(stderr,"### Unable to allocate ES unit datastructure\n");
323     return 1;
324   }
325   err = setup_ES_unit(new);
326   if (err)
327   {
328     free(new);
329     return 1;
330   }
331   *unit = new;
332   return 0;
333 }
334 
335 /*
336  * Build a new ES unit datastructure, from a given data array.
337  *
338  * Takes a copy of 'data'. Sets 'start_code' appropriately,
339  * sets 'start_posn' to (0,0), and 'PES_had_PTS' to FALSE.
340  *
341  * Returns 0 if it succeeds, 1 if some error occurs.
342  */
build_ES_unit_from_data(ES_unit_p * unit,byte * data,uint32_t data_len)343 extern int build_ES_unit_from_data(ES_unit_p  *unit,
344                                    byte       *data,
345                                    uint32_t    data_len)
346 {
347   ES_unit_p  new = malloc(SIZEOF_ES_UNIT);
348   if (new == NULL)
349   {
350     fprintf(stderr,"### Unable to allocate ES unit datastructure\n");
351     return 1;
352   }
353   new->data = malloc(data_len);
354   if (new->data == NULL)
355   {
356     fprintf(stderr,"### Unable to allocate ES unit data buffer\n");
357     return 1;
358   }
359   (void) memcpy(new->data, data, data_len);
360   new->data_len  = data_len;
361   new->data_size = data_len;
362   new->start_code = data[3];
363   new->start_posn.infile = 0;
364   new->start_posn.inpacket = 0;
365   new->PES_had_PTS = FALSE;    // See the header file
366   *unit = new;
367   return 0;
368 }
369 
370 /*
371  * Tidy up and free an ES unit datastructure after we've finished with it.
372  *
373  * Empties the ES unit datastructure, frees it, and sets `unit` to NULL.
374  *
375  * If `unit` is already NULL, does nothing.
376  */
free_ES_unit(ES_unit_p * unit)377 extern void free_ES_unit(ES_unit_p  *unit)
378 {
379   if (*unit == NULL)
380     return;
381   clear_ES_unit(*unit);
382   free(*unit);
383   *unit = NULL;
384 }
385 
386 /*
387  * Print out some information this ES unit, on the given stream
388  */
report_ES_unit(FILE * stream,ES_unit_p unit)389 extern void report_ES_unit(FILE      *stream,
390                            ES_unit_p  unit)
391 {
392   byte s = unit->start_code;
393   fprintf(stream,OFFSET_T_FORMAT_08 "/%4d: ES unit (%02x '%d%d%d%d %d%d%d%d')",
394           unit->start_posn.infile,unit->start_posn.inpacket,s,
395           (s&0x80)>>7,(s&0x40)>>6,(s&0x20)>>5,(s&0x10)>>4,
396           (s&0x08)>>3,(s&0x04)>>2,(s&0x02)>>1,(s&0x01));
397 
398   // Show the data bytes - but we don't need to show the first 4,
399   // since we know they're 00 00 01 <start-code>
400   if (unit->data_len > 0)
401   {
402     int ii;
403     int data_len = unit->data_len - 4;
404     int show_len = (data_len>10?10:data_len);
405     fprintf(stream," %6d:",data_len);
406     for (ii = 0; ii < show_len; ii++)
407       fprintf(stream," %02x",unit->data[4+ii]);
408     if (show_len < data_len)
409       fprintf(stream,"...");
410   }
411   fprintf(stream,"\n");
412 }
413 
414 // ------------------------------------------------------------
415 // ES unit *data* stuff
416 // ------------------------------------------------------------
417 /*
418  * A wrapper for `read_next_PES_ES_packet()`, to save us forgetting things
419  * we need to do when we call it.
420  *
421  * Returns 0 if it succeeds, EOF if the end-of-file is read, otherwise
422  * 1 if some error occurs.
423  */
get_next_pes_packet(ES_p es)424 static inline int get_next_pes_packet(ES_p  es)
425 {
426   int  err;
427   PES_reader_p  reader = es->reader;
428 
429   // Before reading the *next* packet, remember where the last one was
430   if (reader->packet == NULL)
431   {
432     // What can we do if there was no last packet?
433     es->last_packet_posn = 0;
434     es->last_packet_es_data_len = 0;
435   }
436   else
437   {
438     es->last_packet_posn = reader->packet->posn;
439     es->last_packet_es_data_len = reader->packet->es_data_len;
440   }
441 
442   err = read_next_PES_ES_packet(es->reader);
443   if (err) return err;
444 
445   // Point to our (new) data buffer
446   es->data = reader->packet->es_data;
447   es->data_end = es->data + reader->packet->es_data_len;
448   es->data_ptr = es->data;
449 
450   es->posn_of_next_byte.infile = reader->packet->posn;
451   es->posn_of_next_byte.inpacket = 0;
452   return 0;
453 }
454 
455 /*
456  * Read some more data into our read-ahead buffer. For a "bare" file,
457  * reads the next buffer-full in, and for PES based data, reads the
458  * next PES packet that contains ES data.
459  *
460  * Returns 0 if it succeeds, EOF if the end-of-file is read, otherwise
461  * 1 if some error occurs.
462  */
get_more_data(ES_p es)463 static inline int get_more_data(ES_p  es)
464 {
465   if (es->reading_ES)
466   {
467     // Call `read` directly - we don't particularly mind if we get a "short"
468     // read, since we'll just catch up later on
469 #ifdef _WIN32
470     int len = _read(es->input,&es->read_ahead,ES_READ_AHEAD_SIZE);
471 #else
472     ssize_t  len = read(es->input,&es->read_ahead,ES_READ_AHEAD_SIZE);
473 #endif
474     if (len == 0)
475       return EOF;
476     else if (len == -1)
477     {
478       fprintf(stderr,"### Error reading next bytes: %s\n",strerror(errno));
479       return 1;
480     }
481     es->read_ahead_posn += es->read_ahead_len;  // length of the *last* buffer
482     es->read_ahead_len = len;
483     es->data = es->read_ahead;     // should be done in the setup function
484     es->data_end = es->data + len; // one beyond the last byte
485     es->data_ptr = es->data;
486     return 0;
487   }
488   else
489   {
490     return get_next_pes_packet(es);
491   }
492 }
493 
494 /*
495  * Find the start of the next ES unit - i.e., a 00 00 01 start code prefix.
496  *
497  * Doesn't move the read position if we're already *at* the start of
498  * an ES unit.
499  *
500  * ((new scheme: Leaves the data_ptr set to read the *next* byte, since
501  * we know that we've "used up" the 00 00 01 at the start of this unit.))
502  *
503  * Returns 0 if it succeeds, EOF if the end-of-file is read, otherwise
504  * 1 if some error occurs.
505  */
find_ES_unit_start(ES_p es,ES_unit_p unit)506 static int find_ES_unit_start(ES_p       es,
507                               ES_unit_p  unit)
508 {
509   int   err;
510   byte  prev1 = es->prev1_byte;
511   byte  prev2 = es->prev2_byte;
512 
513   // In almost all cases (hopefully, except for the very start of the file),
514   // a previous call to find_ES_unit_end will already have positioned us
515   // "over" the start of the next unit
516   for (;;)
517   {
518     byte  *ptr;
519     for (ptr = es->data_ptr; ptr < es->data_end; ptr++)
520     {
521       if (prev2 == 0x00 && prev1 == 0x00 && *ptr == 0x01)
522       {
523         es->prev1_byte = es->prev2_byte = 0x00;
524         es->cur_byte = 0x01;
525         if (es->reading_ES)
526         {
527           unit->start_posn.infile = es->read_ahead_posn + (ptr - es->data) - 2;
528         }
529         else
530         {
531           unit->start_posn.infile = es->reader->packet->posn;
532           unit->start_posn.inpacket = (ptr - es->data) - 2;
533           if (unit->start_posn.inpacket < 0)
534           {
535             unit->start_posn.infile = es->last_packet_posn;
536             unit->start_posn.inpacket += es->last_packet_es_data_len;
537           }
538           // Does the PES packet that we are starting in have a PTS?
539           unit->PES_had_PTS = es->reader->packet->has_PTS;
540         }
541         es->data_ptr = ptr + 1; // the *next* byte to read
542         unit->data[0] = 0x00;   // i.e., the values we just read
543         unit->data[1] = 0x00;
544         unit->data[2] = 0x01;
545         unit->data_len = 3;
546         return 0;
547       }
548       prev2 = prev1;
549       prev1 = *ptr;
550     }
551 
552     // We've run out of data - get some more
553     err = get_more_data(es);
554     if (err) return err;
555   }
556 }
557 
558 /*
559  * Find (read to) the end of the current ES unit.
560  *
561  * Reads to just before the next 00 00 01 start code prefix.
562  *
563  *    H.264 rules would also allow us to read to just before a 00 00 00
564  *    sequence, but we shall ignore this ability, because when reading
565  *    ES we want to ensure that there are no bytes "left out" of the ES
566  *    units, so that the sum of the lengths of all the ES units we read will
567  *    be the same as the length of data we have read. This is desirable
568  *    because it makes handling ES via PES much easier (we can, for instance,
569  *    position to the first ES unit of a picture, and then just read in N
570  *    bytes, where N is the sum of the lengths of the ES units making up the
571  *    picture, which is much more efficient than having to read in individual
572  *    ES units, and takes less room to remember than having to remember the
573  *    end position (offset of PES packet in file + offset of end of ES unit in
574  *    PES packet)).
575  *
576  * ((new scheme: Leaves the data_ptr set to read the current byte again,
577  * since we know that, in general, we want to detect it in find_ES_unit_start
578  * as the 01 following on from a 00 and a 00.))
579  *
580  * Returns 0 if it succeeds, otherwise 1 if some error occurs.
581  *
582  * Note that finding end-of-file is not counted as an error - it is
583  * assumed that it is just the natural end of the ES unit.
584  */
find_ES_unit_end(ES_p es,ES_unit_p unit)585 static int find_ES_unit_end(ES_p       es,
586                             ES_unit_p  unit)
587 {
588   int   err;
589   byte  prev1 = es->cur_byte;
590   byte  prev2 = es->prev1_byte;
591   for (;;)
592   {
593     byte  *ptr;
594     for (ptr = es->data_ptr; ptr < es->data_end; ptr++)
595     {
596       // Have we reached the end of our unit?
597       // We know we are if we've found the next 00 00 01 start code prefix.
598       // (as stated in the header comment above, we're ignoring the H.264
599       // ability to end if we've found a 00 00 00 sequence)
600       if (prev2 == 0x00 && prev1 == 0x00 && *ptr == 0x01)
601       {
602         es->data_ptr = ptr;     // remember where we've got to
603         es->prev2_byte = 0x00;  // we know prev1_byte is already 0
604         es->cur_byte = 0x01;
605         // We've read two 00 bytes we don't need into our data buffer...
606         unit->data_len -= 2;
607 
608         if (es->reading_ES)
609         {
610           es->posn_of_next_byte.infile = es->read_ahead_posn +
611             (ptr - es->data) - 2;
612         }
613         else
614         {
615           es->posn_of_next_byte.infile = es->reader->packet->posn;
616           es->posn_of_next_byte.inpacket = (ptr - es->data) - 2;
617         }
618         return 0;
619       }
620 
621       // Otherwise, it's a data byte
622       if (unit->data_len == unit->data_size)
623       {
624         int newsize = unit->data_size + ES_UNIT_DATA_INCREMENT;
625         unit->data = realloc(unit->data,newsize);
626         if (unit->data == NULL)
627         {
628           fprintf(stderr,"### Unable to extend ES unit data array\n");
629           return 1;
630         }
631         unit->data_size = newsize;
632       }
633       unit->data[unit->data_len++] = *ptr;
634 
635       prev2 = prev1;
636       prev1 = *ptr;
637     }
638 
639     // We've run out of data (ptr == es->data_end) - get some more
640     err = get_more_data(es);
641     if (err == EOF)
642     {
643       // Reaching the end of file is a legitimate way of stopping!
644       es->data_ptr = ptr;     // remember where we've got to
645       es->prev2_byte = prev2;
646       es->prev1_byte = prev1;
647       es->cur_byte = 0xFF;    // the notional byte off the end of the file
648       //es->cur_byte   = *ptr;
649 
650       // Pretend there's a "next byte"
651       if (es->reading_ES)
652       {
653         es->posn_of_next_byte.infile = es->read_ahead_posn + (ptr - es->data);
654       }
655       else
656       {
657         es->posn_of_next_byte.inpacket = (ptr - es->data);
658       }
659       return 0;
660     }
661     else if (err)
662       return err;
663 
664     if (!es->reading_ES)
665     {
666       // If we update this now, it will be correct when we return,
667       // even if we return because of a later EOF
668       es->posn_of_next_byte.infile = es->reader->packet->posn;
669 
670       // Does the PES packet that we have just read in have a PTS?
671       // If it does, then there's a very good chance (subject to a 00 00 01
672       // being split between PES packets) that our ES unit has a PTS "around"
673       // it
674       if (es->reader->packet->has_PTS)
675         unit->PES_had_PTS = TRUE;
676     }
677   }
678 }
679 
680 /*
681  * Find and read in the next ES unit.
682  *
683  * In general, unless there are compelling reasons, use
684  * `find_and_build_next_ES_unit()` instead.
685  *
686  * - `es` is the elementary stream we're reading from.
687  * - `unit` is the datastructure into which to read the ES unit
688  *   - any previous content will be lost.
689  *
690  * Returns 0 if it succeeds, EOF if the end-of-file is read (i.e., there
691  * is no next ES unit), otherwise 1 if some error occurs.
692  */
find_next_ES_unit(ES_p es,ES_unit_p unit)693 extern int find_next_ES_unit(ES_p       es,
694                              ES_unit_p  unit)
695 {
696   int err;
697 
698   err = find_ES_unit_start(es,unit);
699   if (err) return err;  // 1 or EOF
700 
701   err = find_ES_unit_end(es,unit);
702   if (err) return err;
703 
704   // The first byte after the 00 00 01 prefix tells us what sort of thing
705   // we've found - we'll be friendly and extract it for the user
706   unit->start_code = unit->data[3];
707 
708   return 0;
709 }
710 
711 /*
712  * Find and read the next ES unit into a new datastructure.
713  *
714  * - `es` is the elementary stream we're reading from.
715  * - `unit` is the datastructure containing the ES unit found, or NULL
716  *   if there was none.
717  *
718  * Returns 0 if it succeeds, EOF if the end-of-file is read (i.e., there
719  * is no next ES unit), otherwise 1 if some error occurs.
720  */
find_and_build_next_ES_unit(ES_p es,ES_unit_p * unit)721 extern int find_and_build_next_ES_unit(ES_p        es,
722                                        ES_unit_p  *unit)
723 {
724   int err;
725 
726   err = build_ES_unit(unit);
727   if (err) return 1;
728 
729   err = find_next_ES_unit(es,*unit);
730   if (err)
731   {
732     free_ES_unit(unit);
733     return err;
734   }
735   return 0;
736 }
737 
738 /*
739  * Write (copy) the current ES unit to the output stream.
740  *
741  * Note that it writes out all of the data for this ES unit,
742  * including its 00 00 01 start code prefix.
743  *
744  * - `output` is the output stream (file descriptor) to write to
745  * - `unit` is the ES unit to write
746  *
747  * Returns 0 if all went well, 1 if something went wrong.
748  */
write_ES_unit(FILE * output,ES_unit_p unit)749 extern int write_ES_unit(FILE      *output,
750                          ES_unit_p  unit)
751 {
752   size_t written = fwrite(unit->data,1,unit->data_len,output);
753   if (written != unit->data_len)
754   {
755     fprintf(stderr,"### Error writing out ES unit data: %s\n"
756             "    Wrote %ld bytes instead of %d\n",
757             strerror(errno),(long int)written,unit->data_len);
758     return 1;
759   }
760   else
761     return 0;
762 }
763 
764 // ------------------------------------------------------------
765 // Arbitrary reading from ES data
766 // ------------------------------------------------------------
767 /*
768  * Seek within PES underlying ES.
769  *
770  * This should only be used to seek to data that starts with 00 00 01.
771  *
772  * "Unsets" the triple byte context.
773  *
774  * Returns 0 if all goes well, 1 if something goes wrong.
775  */
seek_in_PES(ES_p es,ES_offset where)776 static int seek_in_PES(ES_p       es,
777                        ES_offset  where)
778 {
779   int  err;
780 
781   if (es->reader == NULL)
782   {
783     fprintf(stderr,"### Attempt to seek in PES for an ES reader that"
784             " is not attached to a PES reader\n");
785     return 1;
786   }
787 
788   // Force the reader to forget its current packet
789   if (es->reader->packet != NULL)
790     free_PES_packet_data(&es->reader->packet);
791 
792   // Seek to the right packet in the PES data
793   err = set_PES_reader_position(es->reader,where.infile);
794   if (err)
795   {
796     fprintf(stderr,"### Error seeking for PES packet at " OFFSET_T_FORMAT
797             "\n",where.infile);
798     return 1;
799   }
800   // Read the PES packet containing ES (ignoring packets we don't care about)
801   err = get_next_pes_packet(es);
802   if (err)
803   {
804     fprintf(stderr,"### Error reading PES packet at " OFFSET_T_FORMAT "/%d\n",
805             where.infile,where.inpacket);
806     return 1;
807   }
808 
809   // Now sort out the byte offset
810   if (where.inpacket > es->reader->packet->es_data_len)
811   {
812     fprintf(stderr,"### Error seeking PES packet at " OFFSET_T_FORMAT "/%d: "
813             " packet ES data is only %d bytes long\n",where.infile,
814             where.inpacket,es->reader->packet->es_data_len);
815     return 1;
816   }
817   es->posn_of_next_byte = where;
818   return 0;
819 }
820 
821 /*
822  * Update our current position information after a seek or direct read.
823  */
deduce_correct_position(ES_p es)824 static inline void deduce_correct_position(ES_p   es)
825 {
826   // We don't know what the previous three bytes were, but we (strongly)
827   // assume that they were not 00 00 01
828   es->cur_byte = 0xff;
829   es->prev1_byte = 0xff;
830   es->prev2_byte = 0xff;
831 
832   if (es->reading_ES)
833   {
834     // For ES data, we want to force new data to be read in from the file
835     es->data_ptr = es->data_end = NULL;
836     es->read_ahead_len = 0;  // to stop the read ahead posn being incremented
837     es->read_ahead_posn = es->posn_of_next_byte.infile;
838   }
839   else
840   {
841     // For PES data, we have whatever is left in the current packet
842     PES_packet_data_p packet = es->reader->packet;
843     es->data     = packet->es_data;
844     es->data_ptr = packet->es_data + es->posn_of_next_byte.inpacket;
845     es->data_end = packet->es_data + packet->es_data_len;
846     // And, of course, we have no idea about the *previous* packet in the file
847     es->last_packet_posn = es->last_packet_es_data_len = 0;
848   }
849 }
850 
851 /*
852  * "Seek" to the given position in the ES data, which is assumed to
853  * be an offset ready to read a 00 00 01 sequence.
854  *
855  * If the ES reader is using PES to read its data, then both fields
856  * of `where` are significant, but if the underlying file *is* just a file,
857  * only `where.infile` is used.
858  *
859  * Returns 0 if all went well, 1 is something went wrong
860  */
seek_ES(ES_p es,ES_offset where)861 extern int seek_ES(ES_p       es,
862                    ES_offset  where)
863 {
864   int err;
865   if (es->reading_ES)
866   {
867     err = seek_file(es->input,where.infile);
868     if (err)
869     {
870       fprintf(stderr,"### Error seeking within ES file\n");
871       return 1;
872     }
873   }
874   else
875   {
876     err = seek_in_PES(es,where);
877     if (err)
878     {
879       fprintf(stderr,
880               "### Error seeking within ES over PES (offset " OFFSET_T_FORMAT
881               "/%d)\n",where.infile,where.inpacket);
882       return 1;
883     }
884   }
885 
886   // And make it look as if we reached this position sensibly
887   es->posn_of_next_byte = where;
888   deduce_correct_position(es);
889   return 0;
890 }
891 
892 /*
893  * Retrieve ES bytes from PES as requested
894  *
895  * Leaves the PES reader set to read on after this data.
896  *
897  * Returns 0 if all goes well, 1 if something goes wrong.
898  */
read_bytes_from_PES(ES_p es,byte * data,uint32_t num_bytes)899 static int read_bytes_from_PES(ES_p     es,
900                                byte    *data,
901                                uint32_t num_bytes)
902 {
903   int     err;
904   int     offset = 0;
905   int     num_bytes_wanted = num_bytes;
906   int32_t from = es->posn_of_next_byte.inpacket;
907   int32_t num_bytes_left = es->reader->packet->es_data_len - from;
908 
909   for (;;)
910   {
911     if (num_bytes_left < num_bytes_wanted)
912     {
913       memcpy(&(data[offset]),&(es->reader->packet->es_data[from]),
914              num_bytes_left);
915       offset += num_bytes_left;
916       num_bytes_wanted -= num_bytes_left;
917       err = get_next_pes_packet(es);
918       if (err) return err;
919       from = 0;
920       num_bytes_left = es->reader->packet->es_data_len;
921     }
922     else
923     {
924       memcpy(&(data[offset]),&(es->reader->packet->es_data[from]),
925              num_bytes_wanted);
926       from += num_bytes_wanted;
927       break;
928     }
929   }
930   es->posn_of_next_byte.inpacket = from;
931   //es->posn_of_next_byte.infile   = es->reader->packet->posn;
932   return 0;
933 }
934 
935 /*
936  * Read in some ES data from disk.
937  *
938  * Suitable for use when reading in a set of ES units whose bounds
939  * (start offset and total number of bytes) have been remembered.
940  *
941  * "Seeks" to the given position in the ES data, which is assumed to
942  * be an offset ready to read a 00 00 01 sequence, and reads data thereafter.
943  *
944  * After this function, the triple byte context is set to FF FF FF, and the
945  * position of said bytes are undefined, but the next position to read a byte
946  * from *is* defined.
947  *
948  * The intent is to allow the caller to have a data array (`data`) that
949  * always contains the last data read, and is of the required size, and
950  * need only be freed when no more data is needed.
951  *
952  * - `es` is where to read our data from
953  * - `start_posn` is the file offset to start reading at
954  * - `num_bytes` is how many bytes we want to read
955  * - `data_len` may be NULL or a pointer to a value.
956  *   If it is NULL, then the data array will be reallocated to size
957  *   `num_bytes` regardless. If it is non-NULL, it should be passed *in*
958  *    as the size that `data` *was*, and will be returned as the size
959  *    that `data` is when the function returns.
960  * - `data` is the data array to read into. If this is NULL, or if `num_bytes`
961  *   is NULL, or if `num_bytes` is greater than `data_len`, then it will be
962  *   reallocated to size `num_bytes`.
963  *
964  * Returns 0 if all went well, 1 if something went wrong.
965  */
read_ES_data(ES_p es,ES_offset start_posn,uint32_t num_bytes,uint32_t * data_len,byte ** data)966 extern int read_ES_data(ES_p       es,
967                         ES_offset  start_posn,
968                         uint32_t   num_bytes,
969                         uint32_t  *data_len,
970                         byte     **data)
971 {
972   int  err;
973   if (*data == NULL || data_len == NULL || num_bytes > *data_len)
974   {
975     *data = realloc(*data,num_bytes);
976     if (*data == NULL)
977     {
978       fprintf(stderr,"### Unable to reallocate data space\n");
979       return 1;
980     }
981     if (data_len != NULL)
982       *data_len = num_bytes;
983   }
984   err = seek_ES(es,start_posn);
985   if (err) return err;
986   if (es->reading_ES)
987   {
988     err = read_bytes(es->input,num_bytes,*data);
989     if (err)
990     {
991       if (err == EOF)
992       {
993         fprintf(stderr,"### Error (EOF) reading %d bytes\n",num_bytes);
994         return 1;
995       }
996       else
997         return err;
998     }
999     es->posn_of_next_byte.infile = start_posn.infile + num_bytes;
1000   }
1001   else
1002   {
1003     err = read_bytes_from_PES(es,*data,num_bytes);
1004     if (err)
1005     {
1006       fprintf(stderr,"### Error reading %d bytes from PES\n",num_bytes);
1007       return 1;
1008     }
1009   }
1010   // Make it look as if we "read" to this position by the normal means,
1011   // but ensure that we have no data left "in hand"
1012   //
1013   // We could leave it up to our caller to do this, on the assumption that
1014   // they're likely to call us several times when, for example, reversing,
1015   // without wanting to read onwards on all but the last of those occasions.
1016   // That would, indeed, save some time each time we are called, but it would
1017   // also allow our caller to forget to do this, with rather bad results.
1018   //
1019   // So, since it shouldn't really take very long...
1020   deduce_correct_position(es);
1021   return 0;
1022 }
1023 
1024 /*
1025  * Retrieve ES data from the end of a PES packet. It is assumed (i.e, things
1026  * will go wrong if it is not true) that at least one ES unit has been read
1027  * from the PES data stream via the ES reader.
1028  *
1029  * - `es` is our ES reader. It must be reading ES from PES packets.
1030  * - `data` is the ES data remaining (to be read) in the current PES packet.
1031  *   It is up to the caller to free this data.
1032  * - `data_len` is the length of said data. If this is 0, then `data`
1033  *   will be NULL.
1034  *
1035  * Returns 0 if all goes well, 1 if an error occurs.
1036  */
get_end_of_underlying_PES_packet(ES_p es,byte ** data,int * data_len)1037 extern int get_end_of_underlying_PES_packet(ES_p        es,
1038                                             byte      **data,
1039                                             int        *data_len)
1040 {
1041   int32_t offset;
1042 
1043   if (es->reading_ES)
1044   {
1045     fprintf(stderr,"### Cannot retrieve end of PES packet - the ES data"
1046             " is direct ES, not ES read from PES\n");
1047     return 1;
1048   }
1049   if (es->reader->packet == NULL)
1050   {
1051     // This is naughty, but we'll pretend to cope
1052     *data = NULL;
1053     *data_len = 0;
1054     return 0;
1055   }
1056 
1057   // The offset (in this packet) of the next ES byte to read.
1058   // We assume that this must also be the offset of the first byte
1059   // of the next ES unit (or, at least, of one of the 00 bytes that
1060   // come before it).
1061   offset = es->posn_of_next_byte.inpacket;
1062 
1063   // The way that we read (using our "triple byte" mechanism) means that
1064   // we will generally already have read the start of the next ES unit.
1065   // Life gets interesting if the 00 00 01 triplet (or, possibly, 00 00 00
1066   // triplet - but we're not supporting that option for the moment - see
1067   // find_ES_unit_end for details) is split over a PES packet boundary.
1068 
1069   // So we know we 00 00 01 to "start" a new ES unit and end the previous
1070   // one. (In fact, even if it was 00 00 00, the relevant values are held in
1071   // our triple byte memory, so we don't particularly care which it is.)
1072   //
1073   // If offset is 0, then then next byte to read is the first byte of
1074   // this packet's ES data, so we need to "pretend" to have all three
1075   // of the triple bytes "in front of" the actual ES data for this PES
1076   // packet.
1077   //
1078   // If offset is 1, then presumably the cur_byte was at offset 0, and
1079   // we have two "dangling" bytes in the previous packet.
1080   //
1081   // If offset is 2, then there would only be one "dangling" byte.
1082   //
1083   // Finally, if offset is 3 or more, we know there was room for the
1084   // 00 00 01 or 00 00 00 before the next byte we'll read, so we don't
1085   // need to bluff at all.
1086 
1087   // So, to calculation - we must remember to leave room for those
1088   // three bytes at the start of the data we return
1089   *data_len = es->reader->packet->es_data_len - offset + 3;
1090   *data = malloc(*data_len);
1091   if (*data == NULL)
1092   {
1093     fprintf(stderr,"### Cannot allocate space for rest of PES packet\n");
1094     return 1;
1095   }
1096   (*data)[0] = es->prev2_byte; // Hmm - should be 0x00
1097   (*data)[1] = es->prev1_byte; // Hmm - should be 0x00
1098   (*data)[2] = es->cur_byte;   // Hmm - should be 0x01 (see above)
1099   memcpy( &((*data)[3]),
1100           &(es->reader->packet->es_data[offset]),
1101           (*data_len) - 3);
1102 
1103   return 0;
1104 }
1105 
1106 // ------------------------------------------------------------
1107 // Lists of ES units
1108 // ------------------------------------------------------------
1109 /*
1110  * Build a new list-of-ES-units datastructure.
1111  *
1112  * Returns 0 if it succeeds, 1 if some error occurs.
1113  */
build_ES_unit_list(ES_unit_list_p * list)1114 extern int build_ES_unit_list(ES_unit_list_p  *list)
1115 {
1116   ES_unit_list_p  new = malloc(SIZEOF_ES_UNIT_LIST);
1117   if (new == NULL)
1118   {
1119     fprintf(stderr,"### Unable to allocate ES unit list datastructure\n");
1120     return 1;
1121   }
1122 
1123   new->length = 0;
1124   new->size = ES_UNIT_LIST_START_SIZE;
1125   new->array = malloc(SIZEOF_ES_UNIT*ES_UNIT_LIST_START_SIZE);
1126   if (new->array == NULL)
1127   {
1128     free(new);
1129     fprintf(stderr,
1130             "### Unable to allocate array in ES unit list datastructure\n");
1131     return 1;
1132   }
1133   *list = new;
1134   return 0;
1135 }
1136 
1137 /*
1138  * Add a copy of an ES unit to the end of the ES unit list
1139  *
1140  * Note that since this takes a copy of the ES unit's data, it is safe
1141  * to free the original ES unit.
1142  *
1143  * Returns 0 if it succeeds, 1 if some error occurs.
1144  */
append_to_ES_unit_list(ES_unit_list_p list,ES_unit_p unit)1145 extern int append_to_ES_unit_list(ES_unit_list_p  list,
1146                                   ES_unit_p       unit)
1147 {
1148   ES_unit_p  ptr;
1149   if (list->length == list->size)
1150   {
1151     int newsize = list->size + ES_UNIT_LIST_INCREMENT;
1152     list->array = realloc(list->array,newsize*SIZEOF_ES_UNIT);
1153     if (list->array == NULL)
1154     {
1155       fprintf(stderr,"### Unable to extend ES unit list array\n");
1156       return 1;
1157     }
1158     list->size = newsize;
1159   }
1160   ptr = &list->array[list->length++];
1161   // Some things can be copied directly
1162   *ptr = *unit;
1163   // But some need adjusting
1164   ptr->data = malloc(unit->data_len);
1165   if (ptr->data == NULL)
1166   {
1167     fprintf(stderr,"### Unable to copy ES unit data array\n");
1168     return 1;
1169   }
1170   memcpy(ptr->data,unit->data,unit->data_len);
1171   ptr->data_size = unit->data_len;
1172   return 0;
1173 }
1174 
1175 /*
1176  * Tidy up an ES unit list datastructure after we've finished with it.
1177  */
clear_ES_unit_list(ES_unit_list_p list)1178 static inline void clear_ES_unit_list(ES_unit_list_p  list)
1179 {
1180   if (list->array != NULL)
1181   {
1182     int ii;
1183     for (ii=0; ii<list->length; ii++)
1184     {
1185       clear_ES_unit(&list->array[ii]);
1186     }
1187     free(list->array);
1188     list->array = NULL;
1189   }
1190   list->length = 0;
1191   list->size = 0;
1192 }
1193 
1194 /*
1195  * Reset (empty) an ES unit list.
1196  */
reset_ES_unit_list(ES_unit_list_p list)1197 extern void reset_ES_unit_list(ES_unit_list_p  list)
1198 {
1199   if (list->array != NULL)
1200   {
1201     int ii;
1202     for (ii=0; ii<list->length; ii++)
1203     {
1204       clear_ES_unit(&list->array[ii]);
1205     }
1206     // We *could* also shrink it - as it is, it will never get smaller
1207     // than its maximum size. Is that likely to be a problem?
1208   }
1209   list->length = 0;
1210 }
1211 
1212 /*
1213  * Tidy up and free an ES unit list datastructure after we've finished with it.
1214  *
1215  * Clears the datastructure, frees it and returns `list` as NULL.
1216  *
1217  * Does nothing if `list` is already NULL.
1218  */
free_ES_unit_list(ES_unit_list_p * list)1219 extern void free_ES_unit_list(ES_unit_list_p  *list)
1220 {
1221   if (*list == NULL)
1222     return;
1223   clear_ES_unit_list(*list);
1224   free(*list);
1225   *list = NULL;
1226 }
1227 
1228 /*
1229  * Report on an ES unit list's contents.
1230  *
1231  * - `stream` is where to write the information
1232  * - `name` is the name of the list (used in the header)
1233  * - `list` is the list to report on
1234  */
report_ES_unit_list(FILE * stream,char * name,ES_unit_list_p list)1235 extern void report_ES_unit_list(FILE              *stream,
1236                                 char              *name,
1237                                 ES_unit_list_p     list)
1238 {
1239   fprintf(stream,"ES unit list '%s': ",name);
1240   if (list->array == NULL)
1241     fprintf(stream,"<empty>\n");
1242   else
1243   {
1244     int ii;
1245     fprintf(stream,"%d item%s (size %d)\n",list->length,
1246             (list->length==1?"":"s"),list->size);
1247     for (ii=0; ii<list->length; ii++)
1248     {
1249       fprintf(stream,"    ");
1250       report_ES_unit(stream,&(list->array[ii]));
1251     }
1252   }
1253 }
1254 
1255 /*
1256  * Retrieve the bounds of this ES unit list in the file it was read from.
1257  *
1258  * - `list` is the ES unit list we're interested in
1259  * - `start` is its start position (i.e., the location at which to start
1260  *   reading to retrieve all of the data for the list)
1261  * - `length` is the total length of the ES units within this list
1262  *
1263  * Returns 0 if all goes well, 1 if the ES unit list has no content.
1264  */
get_ES_unit_list_bounds(ES_unit_list_p list,ES_offset * start,uint32_t * length)1265 extern int get_ES_unit_list_bounds(ES_unit_list_p   list,
1266                                    ES_offset       *start,
1267                                    uint32_t        *length)
1268 {
1269   int ii;
1270   if (list->array == NULL || list->length == 0)
1271   {
1272     fprintf(stderr,
1273             "### Cannot determine bounds of an ES unit list with no content\n");
1274     return 1;
1275   }
1276 
1277   *start = list->array[0].start_posn;
1278   *length = 0;
1279 
1280   // Maybe we should precalculate, or even cache, the total length...
1281   for (ii=0; ii<list->length; ii++)
1282     (*length) += list->array[ii].data_len;
1283 
1284   return 0;
1285 }
1286 
1287 /*
1288  * Compare two ES unit lists. The comparison does not include the start
1289  * position of the unit data, but just the actual data - i.e., two unit lists
1290  * read from different locations in the input stream may be considered the
1291  * same if their data content is identical.
1292  *
1293  * - `list1` and `list2` are the two ES unit lists to compare.
1294  *
1295  * Returns TRUE if the lists contain identical content, FALSE otherwise.
1296  */
same_ES_unit_list(ES_unit_list_p list1,ES_unit_list_p list2)1297 extern int same_ES_unit_list(ES_unit_list_p  list1,
1298                              ES_unit_list_p  list2)
1299 {
1300   int ii;
1301   if (list1 == list2)
1302     return TRUE;
1303 
1304   if (list1->array == NULL)
1305     return (list2->array == NULL);
1306 
1307   if (list1->length != list2->length)
1308     return FALSE;
1309 
1310   for (ii = 0; ii < list1->length; ii++)
1311   {
1312     ES_unit_p  unit1 = &list1->array[ii];
1313     ES_unit_p  unit2 = &list2->array[ii];
1314 
1315     if (unit1->data_len != unit2->data_len)
1316       return FALSE;
1317 
1318     if (memcmp(unit1->data,unit2->data,unit1->data_len))
1319       return FALSE;
1320   }
1321 
1322   return TRUE;
1323 }
1324 
1325 /*
1326  * Compare two ES offsets
1327  *
1328  * Returns -1 if offset1 < offset2, 0 if they are the same, and 1 if
1329  * offset1 > offset2.
1330  */
compare_ES_offsets(ES_offset offset1,ES_offset offset2)1331 extern int compare_ES_offsets(ES_offset  offset1,
1332                               ES_offset  offset2)
1333 {
1334   if (offset1.infile < offset2.infile)
1335     return -1;
1336   else if (offset1.infile > offset2.infile)
1337     return 1;
1338   else if (offset1.inpacket < offset2.inpacket)
1339     return -1;
1340   else if (offset1.inpacket > offset2.inpacket)
1341     return 1;
1342   else
1343     return 0;
1344 }
1345 
1346 // ============================================================
1347 // Simple file type guessing
1348 // ============================================================
1349 /*
1350  * Is an ES unit H.262 or H.264 (or not sure?)
1351  *
1352  * Return 0 if all goes well, 1 if things go wrong.
1353  */
try_to_guess_video_type(ES_unit_p unit,int show_reasoning,int * maybe_h264,int * maybe_h262,int * maybe_avs)1354 static int try_to_guess_video_type(ES_unit_p   unit,
1355                                    int         show_reasoning,
1356                                    int        *maybe_h264,
1357                                    int        *maybe_h262,
1358                                    int        *maybe_avs)
1359 {
1360 
1361   byte nal_ref_idc = 0;
1362   byte nal_unit_type = 0;
1363 
1364   if (show_reasoning)
1365     printf("Looking at ES unit with start code %02X\n",unit->start_code);
1366 
1367   // The following are *not allowed*
1368   //
1369   // - In AVS:   B4, B8     and B9..FF are system start codes
1370   // - In H.262: B0, B1, B6 and B9..FF are system start codes
1371   // - In H.264: Anything with top bit set
1372 
1373   if (unit->start_code == 0xBA)   // PS pack header
1374   {
1375     fprintf(stderr,
1376             "### ES unit start code is 0xBA, which looks like a PS pack"
1377             " header\n    i.e., data may be PS\n");
1378     return 1;
1379   }
1380 
1381   if (unit->start_code >= 0xB9)   // system start code - probably PES
1382   {
1383     fprintf(stderr,
1384             "### ES unit start code %02X is more than 0xB9, which is probably"
1385             " a PES system start code\n    i.e., data may be PES, "
1386             "and is thus probably PS or TS\n",
1387       unit->start_code);
1388     return 1;
1389   }
1390 
1391   if (unit->start_code & 0x80)    // top bit set means not H.264
1392   {
1393     if (*maybe_h264)
1394     {
1395       if (show_reasoning) printf("  %02X has top bit set, so not H.264,\n",unit->start_code);
1396       *maybe_h264 = FALSE;
1397     }
1398 
1399     if (unit->start_code == 0xB0 ||
1400         unit->start_code == 0xB1 ||
1401         unit->start_code == 0xB6)
1402     {
1403       *maybe_h262 = FALSE;
1404       if (show_reasoning)
1405         printf("  Start code %02X is reserved in H.262, so not H.262\n",
1406                unit->start_code);
1407     }
1408     else if (unit->start_code == 0xB4 ||
1409              unit->start_code == 0xB8)
1410     {
1411       *maybe_avs = FALSE;
1412       if (show_reasoning)
1413         printf("  Start code %02X is reserved in AVS, so not AVS\n",
1414                unit->start_code);
1415     }
1416   }
1417   else if (*maybe_h264)
1418   {
1419     if (show_reasoning)
1420       printf("  Top bit not set, so might be H.264\n");
1421 
1422     // If we don't have that top bit set, then we need to work a bit harder
1423     nal_ref_idc = (unit->start_code & 0x60) >> 5;
1424     nal_unit_type = (unit->start_code & 0x1F);
1425 
1426     if (show_reasoning)
1427       printf("  Interpreting it as nal_ref_idc %d, nal_unit_type %d\n",
1428              nal_ref_idc,nal_unit_type);
1429 
1430     if (nal_unit_type > 12 && nal_unit_type < 24)
1431     {
1432       if (show_reasoning)
1433         printf("  H.264 reserves nal_unit_type %02X,"
1434                " so not H.264\n",nal_unit_type);
1435       *maybe_h264 = FALSE;
1436     }
1437     else if (nal_unit_type > 23)
1438     {
1439       if (show_reasoning)
1440         printf("  H.264 does not specify nal_unit_type %02X,"
1441                " so not H.264\n",nal_unit_type);
1442       *maybe_h264 = FALSE;
1443     }
1444     else if (nal_ref_idc == 0)
1445     {
1446       if (nal_unit_type == 5 || // IDR picture
1447           nal_unit_type == 7 || // sequence parameter set
1448           nal_unit_type == 8)   // picture parameter set
1449       {
1450         if (show_reasoning)
1451           printf("  H.264 does not allow nal_ref_idc 0 and nal_unit_type %d,"
1452                  " so not H.264\n",nal_unit_type);
1453         *maybe_h264 = FALSE;
1454       }
1455     }
1456     else // nal_ref_idc is NOT 0
1457     {
1458       // Which means it should *not* be:
1459       if (nal_unit_type ==  6 || // SEI
1460           nal_unit_type ==  9 || // access unit delimiter
1461           nal_unit_type == 10 || // end of sequence
1462           nal_unit_type == 11 || // end of stream
1463           nal_unit_type == 12)   // fille
1464       {
1465         if (show_reasoning)
1466           printf("  H.264 insists nal_ref_idc shall be 0 for nal_unit_type %d,"
1467                  " so not H.264\n",nal_unit_type);
1468         *maybe_h264 = FALSE;
1469       }
1470     }
1471   }
1472   return 0;
1473 }
1474 
1475 /*
1476  * Look at the start of an elementary stream to try to determine its
1477  * video type.
1478  *
1479  * "Eats" the ES units that it looks at, and doesn't rewind the stream
1480  * afterwards.
1481  *
1482  * - `es` is the ES file
1483  * - if `print_dots` is true, print a dot for each ES unit that is inspected
1484  * - if `show_reasoning` is true, then output messages explaining how the
1485  *   decision is being made
1486  * - `video_type` is the final decision -- one of VIDEO_H264, VIDEO_H262,
1487  *   VIDEO_AVS, or VIDEO_UNKNOWN.
1488  *
1489  * Returns 0 if all goes well, 1 if something goes wrong
1490  */
decide_ES_video_type(ES_p es,int print_dots,int show_reasoning,int * video_type)1491 extern int decide_ES_video_type(ES_p  es,
1492                                 int   print_dots,
1493                                 int   show_reasoning,
1494                                 int  *video_type)
1495 {
1496   int  err;
1497   int  ii;
1498   int  maybe_h262 = TRUE;
1499   int  maybe_h264 = TRUE;
1500   int  maybe_avs  = TRUE;
1501   int  decided = FALSE;
1502 
1503   struct ES_unit  unit;
1504 
1505   *video_type = VIDEO_UNKNOWN;
1506 
1507   err = setup_ES_unit(&unit);
1508   if (err)
1509   {
1510     fprintf(stderr,"### Error trying to setup ES unit before"
1511             " working out video type\n");
1512     return 1;
1513   }
1514 
1515   // Otherwise, look at the first 500 packets to see if we can tell
1516   //
1517   // Basically, if we find anything with the top byte as B, then it is
1518   // not H.264. Since H.262 allows up to AF (175) slices (which start with
1519   // 01..AF), and AVS the same (or perhaps one more) and each "surrounds" those
1520   // with entities with top byte B, it's rather hard to see how we could go
1521   // very far without finding something with the top bit of the high byte set
1522   // (certainly not as far as 500 units). So if we *do* go that far, we can be
1523   // *very* sure it is not H.262 or AVS. And if the only other choice is H.264,
1524   // then...
1525   if (show_reasoning)
1526     printf("Looking through first 500 ES units to try to decide video type\n");
1527   for (ii=0; ii<500; ii++)
1528   {
1529     if (print_dots)
1530     {
1531       printf(".");
1532       fflush(stdout);
1533     }
1534     else if (show_reasoning)
1535       printf("%d: ",ii+1);
1536 
1537     err = find_next_ES_unit(es,&unit);
1538     if (err == EOF)
1539     {
1540       if (print_dots) printf("\n");
1541       if (show_reasoning) printf("End of file, trying to read ES unit %d\n",ii+2);
1542       break;
1543     }
1544     else if (err)
1545     {
1546       if (print_dots) printf("\n");
1547       fprintf(stderr,
1548               "### Error trying to find 'unit' %d in ES whilst"
1549               " working out video type\n",ii+2);
1550       clear_ES_unit(&unit);
1551       return 1;
1552     }
1553     err = try_to_guess_video_type(&unit,show_reasoning,
1554                                   &maybe_h264,&maybe_h262,&maybe_avs);
1555     if (err)
1556     {
1557       if (print_dots) printf("\n");
1558       fprintf(stderr,
1559               "### Whilst trying to work out video_type\n");
1560       clear_ES_unit(&unit);
1561       return 1;
1562     }
1563 
1564     if (maybe_h264 && !maybe_h262 && !maybe_avs)
1565     {
1566       if (show_reasoning) printf("  Which leaves only H.264\n");
1567       *video_type = VIDEO_H264;
1568       decided = TRUE;
1569     }
1570     else if (!maybe_h264 && maybe_h262 && !maybe_avs)
1571     {
1572       if (show_reasoning) printf("  Which leaves only H.262\n");
1573       *video_type = VIDEO_H262;
1574       decided = TRUE;
1575     }
1576     else if (!maybe_h264 && !maybe_h262 && maybe_avs)
1577     {
1578       if (show_reasoning) printf("  Which leaves only AVS\n");
1579       *video_type = VIDEO_AVS;
1580       decided = TRUE;
1581     }
1582     else
1583     {
1584       if (show_reasoning)
1585         printf("  It is not possible to decide from that start code\n");
1586     }
1587     if (decided)
1588       break;
1589   }
1590   if (print_dots) printf("\n");
1591   clear_ES_unit(&unit);
1592   return 0;
1593 }
1594 
1595 /*
1596  * Look at the start of an elementary stream to try to determine it's
1597  * video type.
1598  *
1599  * Note that it is easier to prove something is H.262 (or AVS) than to prove
1600  * that it is H.264, and that the result of this routine is a best-guess, not a
1601  * guarantee.
1602  *
1603  * Rewinds back to the original position in the file after it has finished.
1604  *
1605  * - `input` is the file to look at
1606  * - if `print_dots` is true, print a dot for each ES unit that is inspected
1607  * - if `show_reasoning` is true, then output messages explaining how the
1608  *   decision is being made
1609  * - `video_type` is the final decision -- one of VIDEO_H264, VIDEO_H262,
1610  *   VIDEO_AVS, or VIDEO_UNKNOWN.
1611  *
1612  * Returns 0 if all goes well, 1 if something goes wrong
1613  */
decide_ES_file_video_type(int input,int print_dots,int show_reasoning,int * video_type)1614 extern int decide_ES_file_video_type(int   input,
1615                                      int   print_dots,
1616                                      int   show_reasoning,
1617                                      int  *video_type)
1618 {
1619   offset_t  start_posn;
1620   int       err;
1621   ES_p      es = NULL;
1622 
1623   start_posn = tell_file(input);
1624   if (start_posn == -1)
1625   {
1626     fprintf(stderr,"### Error remembering start position in file before"
1627             " working out video type\n");
1628     return 1;
1629   }
1630 
1631   err = seek_file(input,0);
1632   if (err)
1633   {
1634     fprintf(stderr,"### Error rewinding file before"
1635             " working out video type\n");
1636     return 1;
1637   }
1638 
1639   err = build_elementary_stream_file(input,&es);
1640   if (err)
1641   {
1642     fprintf(stderr,"### Error starting elementary stream before"
1643             " working out video type\n");
1644     return 1;
1645   }
1646 
1647   err = decide_ES_video_type(es,print_dots,show_reasoning,video_type);
1648   if (err)
1649   {
1650     fprintf(stderr,"### Error deciding video type of file\n");
1651     free_elementary_stream(&es);
1652     return 1;
1653   }
1654 
1655   free_elementary_stream(&es);
1656 
1657   err = seek_file(input,start_posn);
1658   if (err)
1659   {
1660     fprintf(stderr,"### Error returning to start position in file after"
1661             " working out video type\n");
1662     return 1;
1663   }
1664   return 0;
1665 }
1666 
1667 // Local Variables:
1668 // tab-width: 8
1669 // indent-tabs-mode: nil
1670 // c-basic-offset: 2
1671 // End:
1672 // vim: set tabstop=8 shiftwidth=2 expandtab:
1673