1 /* 2 * $Id$ 3 * 4 * Copyright 2007 Michael Neuweiler and Bruno Lowagie 5 * 6 * The contents of this file are subject to the Mozilla Public License Version 1.1 7 * (the "License"); you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at http://www.mozilla.org/MPL/ 9 * 10 * Software distributed under the License is distributed on an "AS IS" basis, 11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 * for the specific language governing rights and limitations under the License. 13 * 14 * The Original Code is 'iText, a free JAVA-PDF library'. 15 * 16 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by 17 * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie. 18 * All Rights Reserved. 19 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer 20 * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved. 21 * 22 * This class was written by Michael Neuweiler based on hints given by Bruno Lowagie 23 * 24 * Contributor(s): all the names of the contributors are added in the source code 25 * where applicable. 26 * 27 * Alternatively, the contents of this file may be used under the terms of the 28 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the 29 * provisions of LGPL are applicable instead of those above. If you wish to 30 * allow use of your version of this file only under the terms of the LGPL 31 * License and not to allow others to use your version of this file under 32 * the MPL, indicate your decision by deleting the provisions above and 33 * replace them with the notice and other provisions required by the LGPL. 34 * If you do not delete the provisions above, a recipient may use your version 35 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE. 36 * 37 * This library is free software; you can redistribute it and/or modify it 38 * under the terms of the MPL as stated above or under the terms of the GNU 39 * Library General Public License as published by the Free Software Foundation; 40 * either version 2 of the License, or any later version. 41 * 42 * This library is distributed in the hope that it will be useful, but WITHOUT 43 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 44 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more 45 * details. 46 * 47 * If you didn't download this code from the following link, you should check if 48 * you aren't using an obsolete version: 49 * http://www.lowagie.com/iText/ 50 */ 51 package com.lowagie.text.pdf; 52 53 import java.io.IOException; 54 import java.io.OutputStream; 55 import java.security.MessageDigest; 56 import java.util.Arrays; 57 import java.util.HashMap; 58 59 import com.lowagie.text.Document; 60 import com.lowagie.text.DocumentException; 61 import com.lowagie.text.ExceptionConverter; 62 63 /** 64 * PdfSmartCopy has the same functionality as PdfCopy, 65 * but when resources (such as fonts, images,...) are 66 * encountered, a reference to these resources is saved 67 * in a cache, so that they can be reused. 68 * This requires more memory, but reduces the file size 69 * of the resulting PDF document. 70 */ 71 72 public class PdfSmartCopy extends PdfCopy { 73 74 /** the cache with the streams and references. */ 75 private HashMap streamMap = null; 76 77 /** Creates a PdfSmartCopy instance. */ PdfSmartCopy(Document document, OutputStream os)78 public PdfSmartCopy(Document document, OutputStream os) throws DocumentException { 79 super(document, os); 80 this.streamMap = new HashMap(); 81 } 82 /** 83 * Translate a PRIndirectReference to a PdfIndirectReference 84 * In addition, translates the object numbers, and copies the 85 * referenced object to the output file if it wasn't available 86 * in the cache yet. If it's in the cache, the reference to 87 * the already used stream is returned. 88 * 89 * NB: PRIndirectReferences (and PRIndirectObjects) really need to know what 90 * file they came from, because each file has its own namespace. The translation 91 * we do from their namespace to ours is *at best* heuristic, and guaranteed to 92 * fail under some circumstances. 93 */ copyIndirect(PRIndirectReference in)94 protected PdfIndirectReference copyIndirect(PRIndirectReference in) throws IOException, BadPdfFormatException { 95 PdfObject srcObj = PdfReader.getPdfObjectRelease(in); 96 ByteStore streamKey = null; 97 boolean validStream = false; 98 if (srcObj.isStream()) { 99 streamKey = new ByteStore((PRStream)srcObj); 100 validStream = true; 101 PdfIndirectReference streamRef = (PdfIndirectReference) streamMap.get(streamKey); 102 if (streamRef != null) { 103 return streamRef; 104 } 105 } 106 107 PdfIndirectReference theRef; 108 RefKey key = new RefKey(in); 109 IndirectReferences iRef = (IndirectReferences) indirects.get(key); 110 if (iRef != null) { 111 theRef = iRef.getRef(); 112 if (iRef.getCopied()) { 113 return theRef; 114 } 115 } else { 116 theRef = body.getPdfIndirectReference(); 117 iRef = new IndirectReferences(theRef); 118 indirects.put(key, iRef); 119 } 120 if (srcObj.isDictionary()) { 121 PdfObject type = PdfReader.getPdfObjectRelease(((PdfDictionary)srcObj).get(PdfName.TYPE)); 122 if (type != null && PdfName.PAGE.equals(type)) { 123 return theRef; 124 } 125 } 126 iRef.setCopied(); 127 128 if (validStream) { 129 streamMap.put(streamKey, theRef); 130 } 131 132 PdfObject obj = copyObject(srcObj); 133 addToBody(obj, theRef); 134 return theRef; 135 } 136 137 static class ByteStore { 138 private byte[] b; 139 private int hash; 140 private MessageDigest md5; 141 serObject(PdfObject obj, int level, ByteBuffer bb)142 private void serObject(PdfObject obj, int level, ByteBuffer bb) throws IOException { 143 if (level <= 0) 144 return; 145 if (obj == null) { 146 bb.append("$Lnull"); 147 return; 148 } 149 obj = PdfReader.getPdfObject(obj); 150 if (obj.isStream()) { 151 bb.append("$B"); 152 serDic((PdfDictionary)obj, level - 1, bb); 153 if (level > 0) { 154 md5.reset(); 155 bb.append(md5.digest(PdfReader.getStreamBytesRaw((PRStream)obj))); 156 } 157 } 158 else if (obj.isDictionary()) { 159 serDic((PdfDictionary)obj, level - 1, bb); 160 } 161 else if (obj.isArray()) { 162 serArray((PdfArray)obj, level - 1, bb); 163 } 164 else if (obj.isString()) { 165 bb.append("$S").append(obj.toString()); 166 } 167 else if (obj.isName()) { 168 bb.append("$N").append(obj.toString()); 169 } 170 else 171 bb.append("$L").append(obj.toString()); 172 } 173 serDic(PdfDictionary dic, int level, ByteBuffer bb)174 private void serDic(PdfDictionary dic, int level, ByteBuffer bb) throws IOException { 175 bb.append("$D"); 176 if (level <= 0) 177 return; 178 Object[] keys = dic.getKeys().toArray(); 179 Arrays.sort(keys); 180 for (int k = 0; k < keys.length; ++k) { 181 serObject((PdfObject)keys[k], level, bb); 182 serObject(dic.get((PdfName)keys[k]), level, bb); 183 } 184 } 185 serArray(PdfArray array, int level, ByteBuffer bb)186 private void serArray(PdfArray array, int level, ByteBuffer bb) throws IOException { 187 bb.append("$A"); 188 if (level <= 0) 189 return; 190 for (int k = 0; k < array.size(); ++k) { 191 serObject(array.getPdfObject(k), level, bb); 192 } 193 } 194 ByteStore(PRStream str)195 ByteStore(PRStream str) throws IOException { 196 try { 197 md5 = MessageDigest.getInstance("MD5"); 198 } 199 catch (Exception e) { 200 throw new ExceptionConverter(e); 201 } 202 ByteBuffer bb = new ByteBuffer(); 203 int level = 100; 204 serObject(str, level, bb); 205 this.b = bb.toByteArray(); 206 md5 = null; 207 } 208 equals(Object obj)209 public boolean equals(Object obj) { 210 if (!(obj instanceof ByteStore)) 211 return false; 212 if (hashCode() != obj.hashCode()) 213 return false; 214 return Arrays.equals(b, ((ByteStore)obj).b); 215 } 216 hashCode()217 public int hashCode() { 218 if (hash == 0) { 219 int len = b.length; 220 for (int k = 0; k < len; ++k) { 221 hash = hash * 31 + (b[k] & 0xff); 222 } 223 } 224 return hash; 225 } 226 } 227 }