1# -*- coding: utf-8 -*-
2# Copyright (C) 2006  Joe Wreschnig
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8
9"""Read and write Ogg bitstreams and pages.
10
11This module reads and writes a subset of the Ogg bitstream format
12version 0. It does *not* read or write Ogg Vorbis files! For that,
13you should use mutagen.oggvorbis.
14
15This implementation is based on the RFC 3533 standard found at
16http://www.xiph.org/ogg/doc/rfc3533.txt.
17"""
18
19import struct
20import sys
21import zlib
22
23from mutagen import FileType
24from mutagen._util import cdata, resize_bytes, MutagenError, loadfile, seek_end
25from ._compat import cBytesIO, reraise, chr_, izip, xrange
26
27
28class error(MutagenError):
29    """Ogg stream parsing errors."""
30
31    pass
32
33
34class OggPage(object):
35    """A single Ogg page (not necessarily a single encoded packet).
36
37    A page is a header of 26 bytes, followed by the length of the
38    data, followed by the data.
39
40    The constructor is givin a file-like object pointing to the start
41    of an Ogg page. After the constructor is finished it is pointing
42    to the start of the next page.
43
44    Attributes:
45        version (`int`): stream structure version (currently always 0)
46        position (`int`): absolute stream position (default -1)
47        serial (`int`): logical stream serial number (default 0)
48        sequence (`int`): page sequence number within logical stream
49            (default 0)
50        offset (`int` or `None`): offset this page was read from (default None)
51        complete (`bool`): if the last packet on this page is complete
52            (default True)
53        packets (list[bytes]): list of raw packet data (default [])
54
55    Note that if 'complete' is false, the next page's 'continued'
56    property must be true (so set both when constructing pages).
57
58    If a file-like object is supplied to the constructor, the above
59    attributes will be filled in based on it.
60    """
61
62    version = 0
63    __type_flags = 0
64    position = 0
65    serial = 0
66    sequence = 0
67    offset = None
68    complete = True
69
70    def __init__(self, fileobj=None):
71        """Raises error, IOError, EOFError"""
72
73        self.packets = []
74
75        if fileobj is None:
76            return
77
78        self.offset = fileobj.tell()
79
80        header = fileobj.read(27)
81        if len(header) == 0:
82            raise EOFError
83
84        try:
85            (oggs, self.version, self.__type_flags,
86             self.position, self.serial, self.sequence,
87             crc, segments) = struct.unpack("<4sBBqIIiB", header)
88        except struct.error:
89            raise error("unable to read full header; got %r" % header)
90
91        if oggs != b"OggS":
92            raise error("read %r, expected %r, at 0x%x" % (
93                oggs, b"OggS", fileobj.tell() - 27))
94
95        if self.version != 0:
96            raise error("version %r unsupported" % self.version)
97
98        total = 0
99        lacings = []
100        lacing_bytes = fileobj.read(segments)
101        if len(lacing_bytes) != segments:
102            raise error("unable to read %r lacing bytes" % segments)
103        for c in bytearray(lacing_bytes):
104            total += c
105            if c < 255:
106                lacings.append(total)
107                total = 0
108        if total:
109            lacings.append(total)
110            self.complete = False
111
112        self.packets = [fileobj.read(l) for l in lacings]
113        if [len(p) for p in self.packets] != lacings:
114            raise error("unable to read full data")
115
116    def __eq__(self, other):
117        """Two Ogg pages are the same if they write the same data."""
118        try:
119            return (self.write() == other.write())
120        except AttributeError:
121            return False
122
123    __hash__ = object.__hash__
124
125    def __repr__(self):
126        attrs = ['version', 'position', 'serial', 'sequence', 'offset',
127                 'complete', 'continued', 'first', 'last']
128        values = ["%s=%r" % (attr, getattr(self, attr)) for attr in attrs]
129        return "<%s %s, %d bytes in %d packets>" % (
130            type(self).__name__, " ".join(values), sum(map(len, self.packets)),
131            len(self.packets))
132
133    def write(self):
134        """Return a string encoding of the page header and data.
135
136        A ValueError is raised if the data is too big to fit in a
137        single page.
138        """
139
140        data = [
141            struct.pack("<4sBBqIIi", b"OggS", self.version, self.__type_flags,
142                        self.position, self.serial, self.sequence, 0)
143        ]
144
145        lacing_data = []
146        for datum in self.packets:
147            quot, rem = divmod(len(datum), 255)
148            lacing_data.append(b"\xff" * quot + chr_(rem))
149        lacing_data = b"".join(lacing_data)
150        if not self.complete and lacing_data.endswith(b"\x00"):
151            lacing_data = lacing_data[:-1]
152        data.append(chr_(len(lacing_data)))
153        data.append(lacing_data)
154        data.extend(self.packets)
155        data = b"".join(data)
156
157        # Python's CRC is swapped relative to Ogg's needs.
158        # crc32 returns uint prior to py2.6 on some platforms, so force uint
159        crc = (~zlib.crc32(data.translate(cdata.bitswap), -1)) & 0xffffffff
160        # Although we're using to_uint_be, this actually makes the CRC
161        # a proper le integer, since Python's CRC is byteswapped.
162        crc = cdata.to_uint_be(crc).translate(cdata.bitswap)
163        data = data[:22] + crc + data[26:]
164        return data
165
166    @property
167    def size(self):
168        """Total frame size."""
169
170        size = 27  # Initial header size
171        for datum in self.packets:
172            quot, rem = divmod(len(datum), 255)
173            size += quot + 1
174        if not self.complete and rem == 0:
175            # Packet contains a multiple of 255 bytes and is not
176            # terminated, so we don't have a \x00 at the end.
177            size -= 1
178        size += sum(map(len, self.packets))
179        return size
180
181    def __set_flag(self, bit, val):
182        mask = 1 << bit
183        if val:
184            self.__type_flags |= mask
185        else:
186            self.__type_flags &= ~mask
187
188    continued = property(
189        lambda self: cdata.test_bit(self.__type_flags, 0),
190        lambda self, v: self.__set_flag(0, v),
191        doc="The first packet is continued from the previous page.")
192
193    first = property(
194        lambda self: cdata.test_bit(self.__type_flags, 1),
195        lambda self, v: self.__set_flag(1, v),
196        doc="This is the first page of a logical bitstream.")
197
198    last = property(
199        lambda self: cdata.test_bit(self.__type_flags, 2),
200        lambda self, v: self.__set_flag(2, v),
201        doc="This is the last page of a logical bitstream.")
202
203    @staticmethod
204    def renumber(fileobj, serial, start):
205        """Renumber pages belonging to a specified logical stream.
206
207        fileobj must be opened with mode r+b or w+b.
208
209        Starting at page number 'start', renumber all pages belonging
210        to logical stream 'serial'. Other pages will be ignored.
211
212        fileobj must point to the start of a valid Ogg page; any
213        occuring after it and part of the specified logical stream
214        will be numbered. No adjustment will be made to the data in
215        the pages nor the granule position; only the page number, and
216        so also the CRC.
217
218        If an error occurs (e.g. non-Ogg data is found), fileobj will
219        be left pointing to the place in the stream the error occured,
220        but the invalid data will be left intact (since this function
221        does not change the total file size).
222        """
223
224        number = start
225        while True:
226            try:
227                page = OggPage(fileobj)
228            except EOFError:
229                break
230            else:
231                if page.serial != serial:
232                    # Wrong stream, skip this page.
233                    continue
234                # Changing the number can't change the page size,
235                # so seeking back based on the current size is safe.
236                fileobj.seek(-page.size, 1)
237            page.sequence = number
238            fileobj.write(page.write())
239            fileobj.seek(page.offset + page.size, 0)
240            number += 1
241
242    @staticmethod
243    def to_packets(pages, strict=False):
244        """Construct a list of packet data from a list of Ogg pages.
245
246        If strict is true, the first page must start a new packet,
247        and the last page must end the last packet.
248        """
249
250        serial = pages[0].serial
251        sequence = pages[0].sequence
252        packets = []
253
254        if strict:
255            if pages[0].continued:
256                raise ValueError("first packet is continued")
257            if not pages[-1].complete:
258                raise ValueError("last packet does not complete")
259        elif pages and pages[0].continued:
260            packets.append([b""])
261
262        for page in pages:
263            if serial != page.serial:
264                raise ValueError("invalid serial number in %r" % page)
265            elif sequence != page.sequence:
266                raise ValueError("bad sequence number in %r" % page)
267            else:
268                sequence += 1
269
270            if page.continued:
271                packets[-1].append(page.packets[0])
272            else:
273                packets.append([page.packets[0]])
274            packets.extend([p] for p in page.packets[1:])
275
276        return [b"".join(p) for p in packets]
277
278    @classmethod
279    def _from_packets_try_preserve(cls, packets, old_pages):
280        """Like from_packets but in case the size and number of the packets
281        is the same as in the given pages the layout of the pages will
282        be copied (the page size and number will match).
283
284        If the packets don't match this behaves like::
285
286            OggPage.from_packets(packets, sequence=old_pages[0].sequence)
287        """
288
289        old_packets = cls.to_packets(old_pages)
290
291        if [len(p) for p in packets] != [len(p) for p in old_packets]:
292            # doesn't match, fall back
293            return cls.from_packets(packets, old_pages[0].sequence)
294
295        new_data = b"".join(packets)
296        new_pages = []
297        for old in old_pages:
298            new = OggPage()
299            new.sequence = old.sequence
300            new.complete = old.complete
301            new.continued = old.continued
302            new.position = old.position
303            for p in old.packets:
304                data, new_data = new_data[:len(p)], new_data[len(p):]
305                new.packets.append(data)
306            new_pages.append(new)
307        assert not new_data
308
309        return new_pages
310
311    @staticmethod
312    def from_packets(packets, sequence=0, default_size=4096,
313                     wiggle_room=2048):
314        """Construct a list of Ogg pages from a list of packet data.
315
316        The algorithm will generate pages of approximately
317        default_size in size (rounded down to the nearest multiple of
318        255). However, it will also allow pages to increase to
319        approximately default_size + wiggle_room if allowing the
320        wiggle room would finish a packet (only one packet will be
321        finished in this way per page; if the next packet would fit
322        into the wiggle room, it still starts on a new page).
323
324        This method reduces packet fragmentation when packet sizes are
325        slightly larger than the default page size, while still
326        ensuring most pages are of the average size.
327
328        Pages are numbered started at 'sequence'; other information is
329        uninitialized.
330        """
331
332        chunk_size = (default_size // 255) * 255
333
334        pages = []
335
336        page = OggPage()
337        page.sequence = sequence
338
339        for packet in packets:
340            page.packets.append(b"")
341            while packet:
342                data, packet = packet[:chunk_size], packet[chunk_size:]
343                if page.size < default_size and len(page.packets) < 255:
344                    page.packets[-1] += data
345                else:
346                    # If we've put any packet data into this page yet,
347                    # we need to mark it incomplete. However, we can
348                    # also have just started this packet on an already
349                    # full page, in which case, just start the new
350                    # page with this packet.
351                    if page.packets[-1]:
352                        page.complete = False
353                        if len(page.packets) == 1:
354                            page.position = -1
355                    else:
356                        page.packets.pop(-1)
357                    pages.append(page)
358                    page = OggPage()
359                    page.continued = not pages[-1].complete
360                    page.sequence = pages[-1].sequence + 1
361                    page.packets.append(data)
362
363                if len(packet) < wiggle_room:
364                    page.packets[-1] += packet
365                    packet = b""
366
367        if page.packets:
368            pages.append(page)
369
370        return pages
371
372    @classmethod
373    def replace(cls, fileobj, old_pages, new_pages):
374        """Replace old_pages with new_pages within fileobj.
375
376        old_pages must have come from reading fileobj originally.
377        new_pages are assumed to have the 'same' data as old_pages,
378        and so the serial and sequence numbers will be copied, as will
379        the flags for the first and last pages.
380
381        fileobj will be resized and pages renumbered as necessary. As
382        such, it must be opened r+b or w+b.
383        """
384
385        if not len(old_pages) or not len(new_pages):
386            raise ValueError("empty pages list not allowed")
387
388        # Number the new pages starting from the first old page.
389        first = old_pages[0].sequence
390        for page, seq in izip(new_pages,
391                              xrange(first, first + len(new_pages))):
392            page.sequence = seq
393            page.serial = old_pages[0].serial
394
395        new_pages[0].first = old_pages[0].first
396        new_pages[0].last = old_pages[0].last
397        new_pages[0].continued = old_pages[0].continued
398
399        new_pages[-1].first = old_pages[-1].first
400        new_pages[-1].last = old_pages[-1].last
401        new_pages[-1].complete = old_pages[-1].complete
402        if not new_pages[-1].complete and len(new_pages[-1].packets) == 1:
403            new_pages[-1].position = -1
404
405        new_data = [cls.write(p) for p in new_pages]
406
407        # Add dummy data or merge the remaining data together so multiple
408        # new pages replace an old one
409        pages_diff = len(old_pages) - len(new_data)
410        if pages_diff > 0:
411            new_data.extend([b""] * pages_diff)
412        elif pages_diff < 0:
413            new_data[pages_diff - 1:] = [b"".join(new_data[pages_diff - 1:])]
414
415        # Replace pages one by one. If the sizes match no resize happens.
416        offset_adjust = 0
417        new_data_end = None
418        assert len(old_pages) == len(new_data)
419        for old_page, data in izip(old_pages, new_data):
420            offset = old_page.offset + offset_adjust
421            data_size = len(data)
422            resize_bytes(fileobj, old_page.size, data_size, offset)
423            fileobj.seek(offset, 0)
424            fileobj.write(data)
425            new_data_end = offset + data_size
426            offset_adjust += (data_size - old_page.size)
427
428        # Finally, if there's any discrepency in length, we need to
429        # renumber the pages for the logical stream.
430        if len(old_pages) != len(new_pages):
431            fileobj.seek(new_data_end, 0)
432            serial = new_pages[-1].serial
433            sequence = new_pages[-1].sequence + 1
434            cls.renumber(fileobj, serial, sequence)
435
436    @staticmethod
437    def find_last(fileobj, serial, finishing=False):
438        """Find the last page of the stream 'serial'.
439
440        If the file is not multiplexed this function is fast. If it is,
441        it must read the whole the stream.
442
443        This finds the last page in the actual file object, or the last
444        page in the stream (with eos set), whichever comes first.
445
446        If finishing is True it returns the last page which contains a packet
447        finishing on it. If there exist pages but none with finishing packets
448        returns None.
449
450        Returns None in case no page with the serial exists.
451        Raises error in case this isn't a valid ogg stream.
452        Raises IOError.
453        """
454
455        # For non-muxed streams, look at the last page.
456        seek_end(fileobj, 256 * 256)
457
458        data = fileobj.read()
459        try:
460            index = data.rindex(b"OggS")
461        except ValueError:
462            raise error("unable to find final Ogg header")
463        bytesobj = cBytesIO(data[index:])
464
465        def is_valid(page):
466            return not finishing or page.position != -1
467
468        best_page = None
469        try:
470            page = OggPage(bytesobj)
471        except error:
472            pass
473        else:
474            if page.serial == serial and is_valid(page):
475                if page.last:
476                    return page
477                else:
478                    best_page = page
479            else:
480                best_page = None
481
482        # The stream is muxed, so use the slow way.
483        fileobj.seek(0)
484        try:
485            page = OggPage(fileobj)
486            while True:
487                if page.serial == serial:
488                    if is_valid(page):
489                        best_page = page
490                    if page.last:
491                        break
492                page = OggPage(fileobj)
493            return best_page
494        except error:
495            return best_page
496        except EOFError:
497            return best_page
498
499
500class OggFileType(FileType):
501    """OggFileType(filething)
502
503    An generic Ogg file.
504
505    Arguments:
506        filething (filething)
507    """
508
509    _Info = None
510    _Tags = None
511    _Error = None
512    _mimes = ["application/ogg", "application/x-ogg"]
513
514    @loadfile()
515    def load(self, filething):
516        """load(filething)
517
518        Load file information from a filename.
519
520        Args:
521            filething (filething)
522        Raises:
523            mutagen.MutagenError
524        """
525
526        fileobj = filething.fileobj
527
528        try:
529            self.info = self._Info(fileobj)
530            self.tags = self._Tags(fileobj, self.info)
531            self.info._post_tags(fileobj)
532        except (error, IOError) as e:
533            reraise(self._Error, e, sys.exc_info()[2])
534        except EOFError:
535            raise self._Error("no appropriate stream found")
536
537    @loadfile(writable=True)
538    def delete(self, filething=None):
539        """delete(filething=None)
540
541        Remove tags from a file.
542
543        If no filename is given, the one most recently loaded is used.
544
545        Args:
546            filething (filething)
547        Raises:
548            mutagen.MutagenError
549        """
550
551        fileobj = filething.fileobj
552
553        self.tags.clear()
554        # TODO: we should delegate the deletion to the subclass and not through
555        # _inject.
556        try:
557            try:
558                self.tags._inject(fileobj, lambda x: 0)
559            except error as e:
560                reraise(self._Error, e, sys.exc_info()[2])
561            except EOFError:
562                raise self._Error("no appropriate stream found")
563        except IOError as e:
564            reraise(self._Error, e, sys.exc_info()[2])
565
566    def add_tags(self):
567        raise self._Error
568
569    @loadfile(writable=True)
570    def save(self, filething=None, padding=None):
571        """save(filething=None, padding=None)
572
573        Save a tag to a file.
574
575        If no filename is given, the one most recently loaded is used.
576
577        Args:
578            filething (filething)
579            padding (:obj:`mutagen.PaddingFunction`)
580        Raises:
581            mutagen.MutagenError
582        """
583
584        try:
585            self.tags._inject(filething.fileobj, padding)
586        except (IOError, error) as e:
587            reraise(self._Error, e, sys.exc_info()[2])
588        except EOFError:
589            raise self._Error("no appropriate stream found")
590