1# -*- coding: utf-8 -*- 2# Copyright (C) 2006 Joe Wreschnig 3# 4# This program is free software; you can redistribute it and/or modify 5# it under the terms of the GNU General Public License as published by 6# the Free Software Foundation; either version 2 of the License, or 7# (at your option) any later version. 8 9"""Read and write Ogg bitstreams and pages. 10 11This module reads and writes a subset of the Ogg bitstream format 12version 0. It does *not* read or write Ogg Vorbis files! For that, 13you should use mutagen.oggvorbis. 14 15This implementation is based on the RFC 3533 standard found at 16http://www.xiph.org/ogg/doc/rfc3533.txt. 17""" 18 19import struct 20import sys 21import zlib 22 23from mutagen import FileType 24from mutagen._util import cdata, resize_bytes, MutagenError, loadfile, seek_end 25from ._compat import cBytesIO, reraise, chr_, izip, xrange 26 27 28class error(MutagenError): 29 """Ogg stream parsing errors.""" 30 31 pass 32 33 34class OggPage(object): 35 """A single Ogg page (not necessarily a single encoded packet). 36 37 A page is a header of 26 bytes, followed by the length of the 38 data, followed by the data. 39 40 The constructor is givin a file-like object pointing to the start 41 of an Ogg page. After the constructor is finished it is pointing 42 to the start of the next page. 43 44 Attributes: 45 version (`int`): stream structure version (currently always 0) 46 position (`int`): absolute stream position (default -1) 47 serial (`int`): logical stream serial number (default 0) 48 sequence (`int`): page sequence number within logical stream 49 (default 0) 50 offset (`int` or `None`): offset this page was read from (default None) 51 complete (`bool`): if the last packet on this page is complete 52 (default True) 53 packets (list[bytes]): list of raw packet data (default []) 54 55 Note that if 'complete' is false, the next page's 'continued' 56 property must be true (so set both when constructing pages). 57 58 If a file-like object is supplied to the constructor, the above 59 attributes will be filled in based on it. 60 """ 61 62 version = 0 63 __type_flags = 0 64 position = 0 65 serial = 0 66 sequence = 0 67 offset = None 68 complete = True 69 70 def __init__(self, fileobj=None): 71 """Raises error, IOError, EOFError""" 72 73 self.packets = [] 74 75 if fileobj is None: 76 return 77 78 self.offset = fileobj.tell() 79 80 header = fileobj.read(27) 81 if len(header) == 0: 82 raise EOFError 83 84 try: 85 (oggs, self.version, self.__type_flags, 86 self.position, self.serial, self.sequence, 87 crc, segments) = struct.unpack("<4sBBqIIiB", header) 88 except struct.error: 89 raise error("unable to read full header; got %r" % header) 90 91 if oggs != b"OggS": 92 raise error("read %r, expected %r, at 0x%x" % ( 93 oggs, b"OggS", fileobj.tell() - 27)) 94 95 if self.version != 0: 96 raise error("version %r unsupported" % self.version) 97 98 total = 0 99 lacings = [] 100 lacing_bytes = fileobj.read(segments) 101 if len(lacing_bytes) != segments: 102 raise error("unable to read %r lacing bytes" % segments) 103 for c in bytearray(lacing_bytes): 104 total += c 105 if c < 255: 106 lacings.append(total) 107 total = 0 108 if total: 109 lacings.append(total) 110 self.complete = False 111 112 self.packets = [fileobj.read(l) for l in lacings] 113 if [len(p) for p in self.packets] != lacings: 114 raise error("unable to read full data") 115 116 def __eq__(self, other): 117 """Two Ogg pages are the same if they write the same data.""" 118 try: 119 return (self.write() == other.write()) 120 except AttributeError: 121 return False 122 123 __hash__ = object.__hash__ 124 125 def __repr__(self): 126 attrs = ['version', 'position', 'serial', 'sequence', 'offset', 127 'complete', 'continued', 'first', 'last'] 128 values = ["%s=%r" % (attr, getattr(self, attr)) for attr in attrs] 129 return "<%s %s, %d bytes in %d packets>" % ( 130 type(self).__name__, " ".join(values), sum(map(len, self.packets)), 131 len(self.packets)) 132 133 def write(self): 134 """Return a string encoding of the page header and data. 135 136 A ValueError is raised if the data is too big to fit in a 137 single page. 138 """ 139 140 data = [ 141 struct.pack("<4sBBqIIi", b"OggS", self.version, self.__type_flags, 142 self.position, self.serial, self.sequence, 0) 143 ] 144 145 lacing_data = [] 146 for datum in self.packets: 147 quot, rem = divmod(len(datum), 255) 148 lacing_data.append(b"\xff" * quot + chr_(rem)) 149 lacing_data = b"".join(lacing_data) 150 if not self.complete and lacing_data.endswith(b"\x00"): 151 lacing_data = lacing_data[:-1] 152 data.append(chr_(len(lacing_data))) 153 data.append(lacing_data) 154 data.extend(self.packets) 155 data = b"".join(data) 156 157 # Python's CRC is swapped relative to Ogg's needs. 158 # crc32 returns uint prior to py2.6 on some platforms, so force uint 159 crc = (~zlib.crc32(data.translate(cdata.bitswap), -1)) & 0xffffffff 160 # Although we're using to_uint_be, this actually makes the CRC 161 # a proper le integer, since Python's CRC is byteswapped. 162 crc = cdata.to_uint_be(crc).translate(cdata.bitswap) 163 data = data[:22] + crc + data[26:] 164 return data 165 166 @property 167 def size(self): 168 """Total frame size.""" 169 170 size = 27 # Initial header size 171 for datum in self.packets: 172 quot, rem = divmod(len(datum), 255) 173 size += quot + 1 174 if not self.complete and rem == 0: 175 # Packet contains a multiple of 255 bytes and is not 176 # terminated, so we don't have a \x00 at the end. 177 size -= 1 178 size += sum(map(len, self.packets)) 179 return size 180 181 def __set_flag(self, bit, val): 182 mask = 1 << bit 183 if val: 184 self.__type_flags |= mask 185 else: 186 self.__type_flags &= ~mask 187 188 continued = property( 189 lambda self: cdata.test_bit(self.__type_flags, 0), 190 lambda self, v: self.__set_flag(0, v), 191 doc="The first packet is continued from the previous page.") 192 193 first = property( 194 lambda self: cdata.test_bit(self.__type_flags, 1), 195 lambda self, v: self.__set_flag(1, v), 196 doc="This is the first page of a logical bitstream.") 197 198 last = property( 199 lambda self: cdata.test_bit(self.__type_flags, 2), 200 lambda self, v: self.__set_flag(2, v), 201 doc="This is the last page of a logical bitstream.") 202 203 @staticmethod 204 def renumber(fileobj, serial, start): 205 """Renumber pages belonging to a specified logical stream. 206 207 fileobj must be opened with mode r+b or w+b. 208 209 Starting at page number 'start', renumber all pages belonging 210 to logical stream 'serial'. Other pages will be ignored. 211 212 fileobj must point to the start of a valid Ogg page; any 213 occuring after it and part of the specified logical stream 214 will be numbered. No adjustment will be made to the data in 215 the pages nor the granule position; only the page number, and 216 so also the CRC. 217 218 If an error occurs (e.g. non-Ogg data is found), fileobj will 219 be left pointing to the place in the stream the error occured, 220 but the invalid data will be left intact (since this function 221 does not change the total file size). 222 """ 223 224 number = start 225 while True: 226 try: 227 page = OggPage(fileobj) 228 except EOFError: 229 break 230 else: 231 if page.serial != serial: 232 # Wrong stream, skip this page. 233 continue 234 # Changing the number can't change the page size, 235 # so seeking back based on the current size is safe. 236 fileobj.seek(-page.size, 1) 237 page.sequence = number 238 fileobj.write(page.write()) 239 fileobj.seek(page.offset + page.size, 0) 240 number += 1 241 242 @staticmethod 243 def to_packets(pages, strict=False): 244 """Construct a list of packet data from a list of Ogg pages. 245 246 If strict is true, the first page must start a new packet, 247 and the last page must end the last packet. 248 """ 249 250 serial = pages[0].serial 251 sequence = pages[0].sequence 252 packets = [] 253 254 if strict: 255 if pages[0].continued: 256 raise ValueError("first packet is continued") 257 if not pages[-1].complete: 258 raise ValueError("last packet does not complete") 259 elif pages and pages[0].continued: 260 packets.append([b""]) 261 262 for page in pages: 263 if serial != page.serial: 264 raise ValueError("invalid serial number in %r" % page) 265 elif sequence != page.sequence: 266 raise ValueError("bad sequence number in %r" % page) 267 else: 268 sequence += 1 269 270 if page.continued: 271 packets[-1].append(page.packets[0]) 272 else: 273 packets.append([page.packets[0]]) 274 packets.extend([p] for p in page.packets[1:]) 275 276 return [b"".join(p) for p in packets] 277 278 @classmethod 279 def _from_packets_try_preserve(cls, packets, old_pages): 280 """Like from_packets but in case the size and number of the packets 281 is the same as in the given pages the layout of the pages will 282 be copied (the page size and number will match). 283 284 If the packets don't match this behaves like:: 285 286 OggPage.from_packets(packets, sequence=old_pages[0].sequence) 287 """ 288 289 old_packets = cls.to_packets(old_pages) 290 291 if [len(p) for p in packets] != [len(p) for p in old_packets]: 292 # doesn't match, fall back 293 return cls.from_packets(packets, old_pages[0].sequence) 294 295 new_data = b"".join(packets) 296 new_pages = [] 297 for old in old_pages: 298 new = OggPage() 299 new.sequence = old.sequence 300 new.complete = old.complete 301 new.continued = old.continued 302 new.position = old.position 303 for p in old.packets: 304 data, new_data = new_data[:len(p)], new_data[len(p):] 305 new.packets.append(data) 306 new_pages.append(new) 307 assert not new_data 308 309 return new_pages 310 311 @staticmethod 312 def from_packets(packets, sequence=0, default_size=4096, 313 wiggle_room=2048): 314 """Construct a list of Ogg pages from a list of packet data. 315 316 The algorithm will generate pages of approximately 317 default_size in size (rounded down to the nearest multiple of 318 255). However, it will also allow pages to increase to 319 approximately default_size + wiggle_room if allowing the 320 wiggle room would finish a packet (only one packet will be 321 finished in this way per page; if the next packet would fit 322 into the wiggle room, it still starts on a new page). 323 324 This method reduces packet fragmentation when packet sizes are 325 slightly larger than the default page size, while still 326 ensuring most pages are of the average size. 327 328 Pages are numbered started at 'sequence'; other information is 329 uninitialized. 330 """ 331 332 chunk_size = (default_size // 255) * 255 333 334 pages = [] 335 336 page = OggPage() 337 page.sequence = sequence 338 339 for packet in packets: 340 page.packets.append(b"") 341 while packet: 342 data, packet = packet[:chunk_size], packet[chunk_size:] 343 if page.size < default_size and len(page.packets) < 255: 344 page.packets[-1] += data 345 else: 346 # If we've put any packet data into this page yet, 347 # we need to mark it incomplete. However, we can 348 # also have just started this packet on an already 349 # full page, in which case, just start the new 350 # page with this packet. 351 if page.packets[-1]: 352 page.complete = False 353 if len(page.packets) == 1: 354 page.position = -1 355 else: 356 page.packets.pop(-1) 357 pages.append(page) 358 page = OggPage() 359 page.continued = not pages[-1].complete 360 page.sequence = pages[-1].sequence + 1 361 page.packets.append(data) 362 363 if len(packet) < wiggle_room: 364 page.packets[-1] += packet 365 packet = b"" 366 367 if page.packets: 368 pages.append(page) 369 370 return pages 371 372 @classmethod 373 def replace(cls, fileobj, old_pages, new_pages): 374 """Replace old_pages with new_pages within fileobj. 375 376 old_pages must have come from reading fileobj originally. 377 new_pages are assumed to have the 'same' data as old_pages, 378 and so the serial and sequence numbers will be copied, as will 379 the flags for the first and last pages. 380 381 fileobj will be resized and pages renumbered as necessary. As 382 such, it must be opened r+b or w+b. 383 """ 384 385 if not len(old_pages) or not len(new_pages): 386 raise ValueError("empty pages list not allowed") 387 388 # Number the new pages starting from the first old page. 389 first = old_pages[0].sequence 390 for page, seq in izip(new_pages, 391 xrange(first, first + len(new_pages))): 392 page.sequence = seq 393 page.serial = old_pages[0].serial 394 395 new_pages[0].first = old_pages[0].first 396 new_pages[0].last = old_pages[0].last 397 new_pages[0].continued = old_pages[0].continued 398 399 new_pages[-1].first = old_pages[-1].first 400 new_pages[-1].last = old_pages[-1].last 401 new_pages[-1].complete = old_pages[-1].complete 402 if not new_pages[-1].complete and len(new_pages[-1].packets) == 1: 403 new_pages[-1].position = -1 404 405 new_data = [cls.write(p) for p in new_pages] 406 407 # Add dummy data or merge the remaining data together so multiple 408 # new pages replace an old one 409 pages_diff = len(old_pages) - len(new_data) 410 if pages_diff > 0: 411 new_data.extend([b""] * pages_diff) 412 elif pages_diff < 0: 413 new_data[pages_diff - 1:] = [b"".join(new_data[pages_diff - 1:])] 414 415 # Replace pages one by one. If the sizes match no resize happens. 416 offset_adjust = 0 417 new_data_end = None 418 assert len(old_pages) == len(new_data) 419 for old_page, data in izip(old_pages, new_data): 420 offset = old_page.offset + offset_adjust 421 data_size = len(data) 422 resize_bytes(fileobj, old_page.size, data_size, offset) 423 fileobj.seek(offset, 0) 424 fileobj.write(data) 425 new_data_end = offset + data_size 426 offset_adjust += (data_size - old_page.size) 427 428 # Finally, if there's any discrepency in length, we need to 429 # renumber the pages for the logical stream. 430 if len(old_pages) != len(new_pages): 431 fileobj.seek(new_data_end, 0) 432 serial = new_pages[-1].serial 433 sequence = new_pages[-1].sequence + 1 434 cls.renumber(fileobj, serial, sequence) 435 436 @staticmethod 437 def find_last(fileobj, serial, finishing=False): 438 """Find the last page of the stream 'serial'. 439 440 If the file is not multiplexed this function is fast. If it is, 441 it must read the whole the stream. 442 443 This finds the last page in the actual file object, or the last 444 page in the stream (with eos set), whichever comes first. 445 446 If finishing is True it returns the last page which contains a packet 447 finishing on it. If there exist pages but none with finishing packets 448 returns None. 449 450 Returns None in case no page with the serial exists. 451 Raises error in case this isn't a valid ogg stream. 452 Raises IOError. 453 """ 454 455 # For non-muxed streams, look at the last page. 456 seek_end(fileobj, 256 * 256) 457 458 data = fileobj.read() 459 try: 460 index = data.rindex(b"OggS") 461 except ValueError: 462 raise error("unable to find final Ogg header") 463 bytesobj = cBytesIO(data[index:]) 464 465 def is_valid(page): 466 return not finishing or page.position != -1 467 468 best_page = None 469 try: 470 page = OggPage(bytesobj) 471 except error: 472 pass 473 else: 474 if page.serial == serial and is_valid(page): 475 if page.last: 476 return page 477 else: 478 best_page = page 479 else: 480 best_page = None 481 482 # The stream is muxed, so use the slow way. 483 fileobj.seek(0) 484 try: 485 page = OggPage(fileobj) 486 while True: 487 if page.serial == serial: 488 if is_valid(page): 489 best_page = page 490 if page.last: 491 break 492 page = OggPage(fileobj) 493 return best_page 494 except error: 495 return best_page 496 except EOFError: 497 return best_page 498 499 500class OggFileType(FileType): 501 """OggFileType(filething) 502 503 An generic Ogg file. 504 505 Arguments: 506 filething (filething) 507 """ 508 509 _Info = None 510 _Tags = None 511 _Error = None 512 _mimes = ["application/ogg", "application/x-ogg"] 513 514 @loadfile() 515 def load(self, filething): 516 """load(filething) 517 518 Load file information from a filename. 519 520 Args: 521 filething (filething) 522 Raises: 523 mutagen.MutagenError 524 """ 525 526 fileobj = filething.fileobj 527 528 try: 529 self.info = self._Info(fileobj) 530 self.tags = self._Tags(fileobj, self.info) 531 self.info._post_tags(fileobj) 532 except (error, IOError) as e: 533 reraise(self._Error, e, sys.exc_info()[2]) 534 except EOFError: 535 raise self._Error("no appropriate stream found") 536 537 @loadfile(writable=True) 538 def delete(self, filething=None): 539 """delete(filething=None) 540 541 Remove tags from a file. 542 543 If no filename is given, the one most recently loaded is used. 544 545 Args: 546 filething (filething) 547 Raises: 548 mutagen.MutagenError 549 """ 550 551 fileobj = filething.fileobj 552 553 self.tags.clear() 554 # TODO: we should delegate the deletion to the subclass and not through 555 # _inject. 556 try: 557 try: 558 self.tags._inject(fileobj, lambda x: 0) 559 except error as e: 560 reraise(self._Error, e, sys.exc_info()[2]) 561 except EOFError: 562 raise self._Error("no appropriate stream found") 563 except IOError as e: 564 reraise(self._Error, e, sys.exc_info()[2]) 565 566 def add_tags(self): 567 raise self._Error 568 569 @loadfile(writable=True) 570 def save(self, filething=None, padding=None): 571 """save(filething=None, padding=None) 572 573 Save a tag to a file. 574 575 If no filename is given, the one most recently loaded is used. 576 577 Args: 578 filething (filething) 579 padding (:obj:`mutagen.PaddingFunction`) 580 Raises: 581 mutagen.MutagenError 582 """ 583 584 try: 585 self.tags._inject(filething.fileobj, padding) 586 except (IOError, error) as e: 587 reraise(self._Error, e, sys.exc_info()[2]) 588 except EOFError: 589 raise self._Error("no appropriate stream found") 590