1# -*- coding: utf-8 -*- 2# Part of Odoo. See LICENSE file for full copyright and licensing details. 3from PyPDF2 import PdfFileWriter, PdfFileReader 4from PyPDF2.generic import DictionaryObject, DecodedStreamObject, NameObject, createStringObject, ArrayObject 5from PyPDF2.utils import b_ 6from datetime import datetime 7 8import io 9import hashlib 10 11 12DEFAULT_PDF_DATETIME_FORMAT = "D:%Y%m%d%H%M%S+00'00'" 13 14 15# make sure values are unwrapped by calling the specialized __getitem__ 16def _unwrapping_get(self, key, default=None): 17 try: 18 return self[key] 19 except KeyError: 20 return default 21 22 23DictionaryObject.get = _unwrapping_get 24 25 26class BrandedFileWriter(PdfFileWriter): 27 def __init__(self): 28 super().__init__() 29 self.addMetadata({ 30 '/Creator': "Odoo", 31 '/Producer': "Odoo", 32 }) 33 34 35PdfFileWriter = BrandedFileWriter 36 37 38def merge_pdf(pdf_data): 39 ''' Merge a collection of PDF documents in one. 40 Note that the attachments are not merged. 41 :param list pdf_data: a list of PDF datastrings 42 :return: a unique merged PDF datastring 43 ''' 44 writer = PdfFileWriter() 45 for document in pdf_data: 46 reader = PdfFileReader(io.BytesIO(document), strict=False) 47 for page in range(0, reader.getNumPages()): 48 writer.addPage(reader.getPage(page)) 49 with io.BytesIO() as _buffer: 50 writer.write(_buffer) 51 return _buffer.getvalue() 52 53 54def rotate_pdf(pdf): 55 ''' Rotate clockwise PDF (90°) into a new PDF. 56 Note that the attachments are not copied. 57 :param pdf: a PDF to rotate 58 :return: a PDF rotated 59 ''' 60 writer = PdfFileWriter() 61 reader = PdfFileReader(io.BytesIO(pdf), strict=False) 62 for page in range(0, reader.getNumPages()): 63 page = reader.getPage(page) 64 page.rotateClockwise(90) 65 writer.addPage(page) 66 with io.BytesIO() as _buffer: 67 writer.write(_buffer) 68 return _buffer.getvalue() 69 70# by default PdfFileReader will overwrite warnings.showwarning which is what 71# logging.captureWarnings does, meaning it essentially reverts captureWarnings 72# every time it's called which is undesirable 73old_init = PdfFileReader.__init__ 74PdfFileReader.__init__ = lambda self, stream, strict=True, warndest=None, overwriteWarnings=True: \ 75 old_init(self, stream=stream, strict=strict, warndest=None, overwriteWarnings=False) 76 77class OdooPdfFileReader(PdfFileReader): 78 # OVERRIDE of PdfFileReader to add the management of multiple embedded files. 79 80 ''' Returns the files inside the PDF. 81 :raises NotImplementedError: if document is encrypted and uses an unsupported encryption method. 82 ''' 83 def getAttachments(self): 84 if self.isEncrypted: 85 # If the PDF is owner-encrypted, try to unwrap it by giving it an empty user password. 86 self.decrypt('') 87 88 try: 89 file_path = self.trailer["/Root"].get("/Names", {}).get("/EmbeddedFiles", {}).get("/Names") 90 except Exception: 91 # malformed pdf (i.e. invalid xref page) 92 return [] 93 94 if not file_path: 95 return [] 96 for i in range(0, len(file_path), 2): 97 attachment = file_path[i+1].getObject() 98 yield (attachment["/F"], attachment["/EF"]["/F"].getObject().getData()) 99 100 101class OdooPdfFileWriter(PdfFileWriter): 102 # OVERRIDE of PdfFileWriter to add the management of multiple embedded files. 103 104 def _create_attachment_object(self, attachment): 105 ''' Create a PyPdf2.generic object representing an embedded file. 106 107 :param attachment: A dictionary containing: 108 * filename: The name of the file to embed (require). 109 * content: The content of the file encoded in base64 (require). 110 :return: 111 ''' 112 file_entry = DecodedStreamObject() 113 file_entry.setData(attachment['content']) 114 file_entry.update({ 115 NameObject("/Type"): NameObject("/EmbeddedFile"), 116 NameObject("/Params"): 117 DictionaryObject({ 118 NameObject('/CheckSum'): createStringObject(hashlib.md5(attachment['content']).hexdigest()), 119 NameObject('/ModDate'): createStringObject(datetime.now().strftime(DEFAULT_PDF_DATETIME_FORMAT)), 120 NameObject('/Size'): NameObject(str(len(attachment['content']))), 121 }), 122 }) 123 if attachment.get('subtype'): 124 file_entry.update({ 125 NameObject("/Subtype"): NameObject(attachment['subtype']), 126 }) 127 file_entry_object = self._addObject(file_entry) 128 filename_object = createStringObject(attachment['filename']) 129 filespec_object = DictionaryObject({ 130 NameObject("/AFRelationship"): NameObject("/Data"), 131 NameObject("/Type"): NameObject("/Filespec"), 132 NameObject("/F"): filename_object, 133 NameObject("/EF"): 134 DictionaryObject({ 135 NameObject("/F"): file_entry_object, 136 NameObject('/UF'): file_entry_object, 137 }), 138 NameObject("/UF"): filename_object, 139 }) 140 if attachment.get('description'): 141 filespec_object.update({NameObject("/Desc"): createStringObject(attachment['description'])}) 142 return self._addObject(filespec_object) 143 144 def addAttachment(self, fname, fdata): 145 # OVERRIDE of the AddAttachment method to allow appending attachemnts when some already exist 146 if self._root_object.get('/Names') and self._root_object['/Names'].get('/EmbeddedFiles'): 147 attachments = self._root_object["/Names"]["/EmbeddedFiles"]["/Names"] 148 new_attachment = self._create_attachment_object({'filename': fname, 'content': fdata}) 149 attachments.extend([new_attachment.getObject()['/F'], new_attachment]) 150 else: 151 super().addAttachment(fname, fdata) 152