1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 2 of the License, or
5  * (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15  */
16 
17 #include "opus.h"
18 
19 int
get_opus_metadata(PerlIO * infile,char * file,HV * info,HV * tags)20 get_opus_metadata(PerlIO *infile, char *file, HV *info, HV *tags)
21 {
22   return _opus_parse(infile, file, info, tags, 0);
23 }
24 
25 #define OGG_HEADER_SIZE 28
26 int
_opus_parse(PerlIO * infile,char * file,HV * info,HV * tags,uint8_t seeking)27 _opus_parse(PerlIO *infile, char *file, HV *info, HV *tags, uint8_t seeking)
28 {
29   Buffer ogg_buf, vorbis_buf;
30   unsigned char *bptr;
31   unsigned char *last_bptr;
32   unsigned int buf_size;
33 
34   unsigned int id3_size = 0; // size of leading ID3 data
35 
36   off_t file_size;           // total file size
37   off_t audio_size;          // total size of audio without tags
38   off_t audio_offset = 0;    // offset to audio
39   off_t seek_position;
40 
41   unsigned char ogghdr[OGG_HEADER_SIZE];
42   char header_type;
43   int serialno;
44   int final_serialno;
45   int pagenum;
46   uint8_t num_segments;
47   int pagelen;
48   int page = 0;
49   int packets = 0;
50   int streams = 0;
51 
52   unsigned char opushdr[11];
53   unsigned char channels;
54   unsigned int samplerate = 0;
55   unsigned int preskip = 0;
56   unsigned int input_samplerate = 0;
57   uint64_t granule_pos = 0;
58 
59   unsigned char TOC_byte = 0;
60 
61   int i;
62   int err = 0;
63 
64   buffer_init(&ogg_buf, OGG_BLOCK_SIZE);
65   buffer_init(&vorbis_buf, 0);
66 
67   file_size = _file_size(infile);
68   my_hv_store( info, "file_size", newSVuv(file_size) );
69 
70   if ( !_check_buf(infile, &ogg_buf, 10, OGG_BLOCK_SIZE) ) {
71     err = -1;
72     goto out;
73   }
74 
75   // Skip ID3 tags if any
76   bptr = (unsigned char *)buffer_ptr(&ogg_buf);
77   if (
78     (bptr[0] == 'I' && bptr[1] == 'D' && bptr[2] == '3') &&
79     bptr[3] < 0xff && bptr[4] < 0xff &&
80     bptr[6] < 0x80 && bptr[7] < 0x80 && bptr[8] < 0x80 && bptr[9] < 0x80
81   ) {
82     /* found an ID3 header... */
83     id3_size = 10 + (bptr[6]<<21) + (bptr[7]<<14) + (bptr[8]<<7) + bptr[9];
84 
85     if (bptr[5] & 0x10) {
86       // footer present
87       id3_size += 10;
88     }
89 
90     buffer_clear(&ogg_buf);
91 
92     audio_offset += id3_size;
93 
94     DEBUG_TRACE("Skipping ID3v2 tag of size %d\n", id3_size);
95 
96     PerlIO_seek(infile, id3_size, SEEK_SET);
97   }
98 
99   while (1) {
100     // Grab 28-byte Ogg header
101     if ( !_check_buf(infile, &ogg_buf, OGG_HEADER_SIZE, OGG_BLOCK_SIZE) ) {
102       err = -1;
103       goto out;
104     }
105 
106     buffer_get(&ogg_buf, ogghdr, OGG_HEADER_SIZE);
107 
108     audio_offset += OGG_HEADER_SIZE;
109 
110     // check that the first four bytes are 'OggS'
111     if ( ogghdr[0] != 'O' || ogghdr[1] != 'g' || ogghdr[2] != 'g' || ogghdr[3] != 'S' ) {
112       PerlIO_printf(PerlIO_stderr(), "Not an Ogg file (bad OggS header): %s\n", file);
113       goto out;
114     }
115 
116     // Header type flag
117     header_type = ogghdr[5];
118 
119     // Absolute granule position, used to find the first audio page
120     bptr = ogghdr + 6;
121     granule_pos = (uint64_t)CONVERT_INT32LE(bptr);
122     bptr += 4;
123     granule_pos |= (uint64_t)CONVERT_INT32LE(bptr) << 32;
124 
125     // Stream serial number
126     serialno = CONVERT_INT32LE((ogghdr+14));
127 
128     // Count start-of-stream pages
129     if ( header_type & 0x02 ) {
130       streams++;
131     }
132 
133     // Keep track of packet count
134     if ( !(header_type & 0x01) ) {
135       packets++;
136     }
137 
138     // stop processing if we reach the 3rd packet and have no data
139     if (packets > 2 * streams && !buffer_len(&vorbis_buf) ) {
140       break;
141     }
142 
143     // Page seq number
144     pagenum = CONVERT_INT32LE((ogghdr+18));
145 
146     if (page >= 0 && page == pagenum) {
147       page++;
148     }
149     else {
150       page = -1;
151       DEBUG_TRACE("Missing page(s) in Ogg file: %s\n", file);
152     }
153 
154     DEBUG_TRACE("OggS page %d / packet %d at %d\n", pagenum, packets, (int)(audio_offset - 28));
155     DEBUG_TRACE("  granule_pos: %llu\n", granule_pos);
156 
157     // Number of page segments
158     num_segments = ogghdr[26];
159 
160     // Calculate total page size
161     pagelen = ogghdr[27];
162     if (num_segments > 1) {
163       int i;
164 
165       if ( !_check_buf(infile, &ogg_buf, num_segments, OGG_BLOCK_SIZE) ) {
166         err = -1;
167         goto out;
168       }
169 
170       for( i = 0; i < num_segments - 1; i++ ) {
171         u_char x;
172         x = buffer_get_char(&ogg_buf);
173         pagelen += x;
174       }
175 
176       audio_offset += num_segments - 1;
177     }
178 
179     if ( !_check_buf(infile, &ogg_buf, pagelen, OGG_BLOCK_SIZE) ) {
180       err = -1;
181       goto out;
182     }
183 
184     // Still don't have enough data, must have reached the end of the file
185     if ( buffer_len(&ogg_buf) < pagelen ) {
186       PerlIO_printf(PerlIO_stderr(), "Premature end of file: %s\n", file);
187 
188       err = -1;
189       goto out;
190     }
191 
192     audio_offset += pagelen;
193 
194     // Copy page into vorbis buffer
195     buffer_append( &vorbis_buf, buffer_ptr(&ogg_buf), pagelen );
196     DEBUG_TRACE("  Read %d into vorbis buffer\n", pagelen);
197 
198     // Process vorbis packet
199     TOC_byte = buffer_get_char(&vorbis_buf);
200     if ( TOC_byte == 'O' ) {
201       if ( strncmp( buffer_ptr(&vorbis_buf), "pusTags", 7 ) == 0) {
202         buffer_consume(&vorbis_buf, 7);
203         DEBUG_TRACE("  Found Opus tags TOC packet type\n");
204       	if ( !seeking ) {
205                 _parse_vorbis_comments(infile, &vorbis_buf, tags, 0);
206       	}
207         DEBUG_TRACE("  parsed vorbis comments\n");
208 
209         buffer_clear(&vorbis_buf);
210       }
211       else {
212       	// Verify 'OpusHead' string
213       	if ( strncmp( buffer_ptr(&vorbis_buf), "pusHead", 7 ) ) {
214       	  PerlIO_printf(PerlIO_stderr(), "Not an Opus file (bad opus header): %s\n", file);
215       	  goto out;
216       	}
217       	buffer_consume( &vorbis_buf, 7 );
218 
219       	DEBUG_TRACE("  Found Opus header TOC packet type\n");
220       	// Parse info
221       	// Grab 23-byte Vorbis header
222       	if ( buffer_len(&vorbis_buf) < 11 ) {
223       	  PerlIO_printf(PerlIO_stderr(), "Not an Opus file (opus header too short): %s\n", file);
224       	  goto out;
225       	}
226 
227       	buffer_get(&vorbis_buf, opushdr, 11);
228 
229       	my_hv_store( info, "version", newSViv( opushdr[0] ) );
230 
231       	channels = opushdr[1];
232       	my_hv_store( info, "channels", newSViv(channels) );
233       	my_hv_store( info, "stereo", newSViv( channels == 2 ? 1 : 0 ) );
234 
235       	preskip = CONVERT_INT16LE((opushdr+2));
236       	my_hv_store( info, "preskip", newSViv(preskip) );
237 
238       	my_hv_store( info, "samplerate", newSViv(48000) );
239       	samplerate = 48000; // Opus only supports 48k
240 
241       	input_samplerate = CONVERT_INT32LE((opushdr+4));
242       	my_hv_store( info, "input_samplerate", newSViv(input_samplerate) );
243 
244       	DEBUG_TRACE("  parsed opus info header\n");
245       }
246       buffer_clear(&vorbis_buf);
247     }
248 
249     // Skip rest of this page
250     buffer_consume( &ogg_buf, pagelen );
251   }
252 
253   buffer_clear(&ogg_buf);
254   DEBUG_TRACE("Buffer clear");
255 
256   // audio_offset is 28 less because we read the Ogg header
257   audio_offset -= 28;
258 
259   // from the first packet past the comments
260   my_hv_store( info, "audio_offset", newSViv(audio_offset) );
261 
262   audio_size = file_size - audio_offset;
263   my_hv_store( info, "audio_size", newSVuv(audio_size) );
264 
265   my_hv_store( info, "serial_number", newSVuv(serialno) );
266   DEBUG_TRACE("serial number\n");
267 #define BUF_SIZE 8500 // from vlc
268   seek_position = file_size - BUF_SIZE;
269   while (1) {
270     if ( seek_position < audio_offset ) {
271       seek_position = audio_offset;
272     }
273 
274     // calculate average bitrate and duration
275     DEBUG_TRACE("Seeking to %d to calculate bitrate/duration\n", (int)seek_position);
276     PerlIO_seek(infile, seek_position, SEEK_SET);
277 
278     buf_size = PerlIO_read(infile, buffer_append_space(&ogg_buf, BUF_SIZE), BUF_SIZE);
279     if ( buf_size == 0 ) {
280       if ( PerlIO_error(infile) ) {
281         PerlIO_printf(PerlIO_stderr(), "Error reading: %s\n", strerror(errno));
282       }
283       else {
284         PerlIO_printf(PerlIO_stderr(), "File too small. Probably corrupted.\n");
285       }
286 
287       err = -1;
288       goto out;
289     }
290 
291     // Find sync
292     bptr = (unsigned char *)buffer_ptr(&ogg_buf);
293     last_bptr = bptr;
294     // make sure we have room for at least the one ogg page header
295     while (buf_size >= OGG_HEADER_SIZE) {
296       if (bptr[0] == 'O' && bptr[1] == 'g' && bptr[2] == 'g' && bptr[3] == 'S') {
297         bptr += 6;
298 
299         // Get absolute granule value
300         granule_pos = (uint64_t)CONVERT_INT32LE(bptr);
301         bptr += 4;
302         granule_pos |= (uint64_t)CONVERT_INT32LE(bptr) << 32;
303         bptr += 4;
304         DEBUG_TRACE("found granule_pos %llu / samplerate %d to calculate bitrate/duration\n", granule_pos, samplerate);
305         //XXX: jump the header size
306         last_bptr = bptr;
307       }
308       else {
309         bptr++;
310         buf_size--;
311       }
312     }
313     bptr = last_bptr;
314 
315     // Get serial number of this page, if the serial doesn't match the beginning of the file
316     // we have changed logical bitstreams and can't use the granule_pos for bitrate
317     final_serialno = CONVERT_INT32LE((bptr));
318 
319     if ( granule_pos && samplerate && serialno == final_serialno ) {
320       // XXX: needs to adjust for initial granule value if file does not start at 0 samples
321       int length = (int)(((granule_pos-preskip) * 1.0 / samplerate) * 1000);
322       my_hv_store( info, "song_length_ms", newSVuv(length) );
323       my_hv_store( info, "bitrate_average", newSVuv( _bitrate(audio_size, length) ) );
324 
325       DEBUG_TRACE("Using granule_pos %llu / samplerate %d to calculate bitrate/duration\n", granule_pos, samplerate);
326       break;
327     }
328     if ( seek_position == audio_offset ) {
329       DEBUG_TRACE("Packet not found we won't be able to determine the length\n");
330       break;
331     }
332     // seek backwards by BUF_SIZE - OGG_HEADER_SIZE so that if our previous sync happened to include the end
333     // of page header we will include it in the next read
334     seek_position -= (BUF_SIZE - OGG_HEADER_SIZE);
335   }
336 out:
337   buffer_free(&ogg_buf);
338   buffer_free(&vorbis_buf);
339 
340   DEBUG_TRACE("Err %d\n", err);
341   if (err) return err;
342 
343   return 0;
344 }
345 
346 static int
opus_find_frame(PerlIO * infile,char * file,int offset)347 opus_find_frame(PerlIO *infile, char *file, int offset)
348 {
349   int frame_offset = -1;
350   uint32_t samplerate;
351   uint32_t song_length_ms;
352   uint64_t target_sample;
353 
354   // We need to read all metadata first to get some data we need to calculate
355   HV *info = newHV();
356   HV *tags = newHV();
357   if ( _opus_parse(infile, file, info, tags, 1) != 0 ) {
358     goto out;
359   }
360 
361   song_length_ms = SvIV( *(my_hv_fetch( info, "song_length_ms" )) );
362   if (offset >= song_length_ms) {
363     goto out;
364   }
365 
366   samplerate = SvIV( *(my_hv_fetch( info, "samplerate" )) );
367 
368   // Determine target sample we're looking for
369   target_sample = ((offset - 1) / 10) * (samplerate / 100);
370   DEBUG_TRACE("Looking for target sample %llu\n", target_sample);
371 
372   frame_offset = _ogg_binary_search_sample(infile, file, info, target_sample);
373 
374 out:
375   // Don't leak
376   SvREFCNT_dec(info);
377   SvREFCNT_dec(tags);
378 
379   return frame_offset;
380 }
381