1 /*
2  * $Id$
3  *
4  * Copyright 2007 Michael Neuweiler and Bruno Lowagie
5  *
6  * The contents of this file are subject to the Mozilla Public License Version 1.1
7  * (the "License"); you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the License.
13  *
14  * The Original Code is 'iText, a free JAVA-PDF library'.
15  *
16  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
17  * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
18  * All Rights Reserved.
19  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
20  * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
21  *
22  * This class was written by Michael Neuweiler based on hints given by Bruno Lowagie
23  *
24  * Contributor(s): all the names of the contributors are added in the source code
25  * where applicable.
26  *
27  * Alternatively, the contents of this file may be used under the terms of the
28  * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
29  * provisions of LGPL are applicable instead of those above.  If you wish to
30  * allow use of your version of this file only under the terms of the LGPL
31  * License and not to allow others to use your version of this file under
32  * the MPL, indicate your decision by deleting the provisions above and
33  * replace them with the notice and other provisions required by the LGPL.
34  * If you do not delete the provisions above, a recipient may use your version
35  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
36  *
37  * This library is free software; you can redistribute it and/or modify it
38  * under the terms of the MPL as stated above or under the terms of the GNU
39  * Library General Public License as published by the Free Software Foundation;
40  * either version 2 of the License, or any later version.
41  *
42  * This library is distributed in the hope that it will be useful, but WITHOUT
43  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
44  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
45  * details.
46  *
47  * If you didn't download this code from the following link, you should check if
48  * you aren't using an obsolete version:
49  * http://www.lowagie.com/iText/
50  */
51 package com.lowagie.text.pdf;
52 
53 import java.io.IOException;
54 import java.io.OutputStream;
55 import java.security.MessageDigest;
56 import java.util.Arrays;
57 import java.util.HashMap;
58 
59 import com.lowagie.text.Document;
60 import com.lowagie.text.DocumentException;
61 import com.lowagie.text.ExceptionConverter;
62 
63 /**
64  * PdfSmartCopy has the same functionality as PdfCopy,
65  * but when resources (such as fonts, images,...) are
66  * encountered, a reference to these resources is saved
67  * in a cache, so that they can be reused.
68  * This requires more memory, but reduces the file size
69  * of the resulting PDF document.
70  */
71 
72 public class PdfSmartCopy extends PdfCopy {
73 
74 	/** the cache with the streams and references. */
75     private HashMap streamMap = null;
76 
77     /** Creates a PdfSmartCopy instance. */
PdfSmartCopy(Document document, OutputStream os)78     public PdfSmartCopy(Document document, OutputStream os) throws DocumentException {
79         super(document, os);
80         this.streamMap = new HashMap();
81     }
82     /**
83      * Translate a PRIndirectReference to a PdfIndirectReference
84      * In addition, translates the object numbers, and copies the
85      * referenced object to the output file if it wasn't available
86      * in the cache yet. If it's in the cache, the reference to
87      * the already used stream is returned.
88      *
89      * NB: PRIndirectReferences (and PRIndirectObjects) really need to know what
90      * file they came from, because each file has its own namespace. The translation
91      * we do from their namespace to ours is *at best* heuristic, and guaranteed to
92      * fail under some circumstances.
93      */
copyIndirect(PRIndirectReference in)94     protected PdfIndirectReference copyIndirect(PRIndirectReference in) throws IOException, BadPdfFormatException {
95         PdfObject srcObj = PdfReader.getPdfObjectRelease(in);
96         ByteStore streamKey = null;
97         boolean validStream = false;
98         if (srcObj.isStream()) {
99             streamKey = new ByteStore((PRStream)srcObj);
100             validStream = true;
101             PdfIndirectReference streamRef = (PdfIndirectReference) streamMap.get(streamKey);
102             if (streamRef != null) {
103                 return streamRef;
104             }
105         }
106 
107         PdfIndirectReference theRef;
108         RefKey key = new RefKey(in);
109         IndirectReferences iRef = (IndirectReferences) indirects.get(key);
110         if (iRef != null) {
111             theRef = iRef.getRef();
112             if (iRef.getCopied()) {
113                 return theRef;
114             }
115         } else {
116             theRef = body.getPdfIndirectReference();
117             iRef = new IndirectReferences(theRef);
118             indirects.put(key, iRef);
119         }
120         if (srcObj.isDictionary()) {
121             PdfObject type = PdfReader.getPdfObjectRelease(((PdfDictionary)srcObj).get(PdfName.TYPE));
122             if (type != null && PdfName.PAGE.equals(type)) {
123                 return theRef;
124             }
125         }
126         iRef.setCopied();
127 
128         if (validStream) {
129             streamMap.put(streamKey, theRef);
130         }
131 
132         PdfObject obj = copyObject(srcObj);
133         addToBody(obj, theRef);
134         return theRef;
135     }
136 
137     static class ByteStore {
138         private byte[] b;
139         private int hash;
140         private MessageDigest md5;
141 
serObject(PdfObject obj, int level, ByteBuffer bb)142         private void serObject(PdfObject obj, int level, ByteBuffer bb) throws IOException {
143             if (level <= 0)
144                 return;
145             if (obj == null) {
146                 bb.append("$Lnull");
147                 return;
148             }
149             obj = PdfReader.getPdfObject(obj);
150             if (obj.isStream()) {
151                 bb.append("$B");
152                 serDic((PdfDictionary)obj, level - 1, bb);
153                 if (level > 0) {
154                     md5.reset();
155                     bb.append(md5.digest(PdfReader.getStreamBytesRaw((PRStream)obj)));
156                 }
157             }
158             else if (obj.isDictionary()) {
159                 serDic((PdfDictionary)obj, level - 1, bb);
160             }
161             else if (obj.isArray()) {
162                 serArray((PdfArray)obj, level - 1, bb);
163             }
164             else if (obj.isString()) {
165                 bb.append("$S").append(obj.toString());
166             }
167             else if (obj.isName()) {
168                 bb.append("$N").append(obj.toString());
169             }
170             else
171                 bb.append("$L").append(obj.toString());
172         }
173 
serDic(PdfDictionary dic, int level, ByteBuffer bb)174         private void serDic(PdfDictionary dic, int level, ByteBuffer bb) throws IOException {
175             bb.append("$D");
176             if (level <= 0)
177                 return;
178             Object[] keys = dic.getKeys().toArray();
179             Arrays.sort(keys);
180             for (int k = 0; k < keys.length; ++k) {
181                 serObject((PdfObject)keys[k], level, bb);
182                 serObject(dic.get((PdfName)keys[k]), level, bb);
183             }
184         }
185 
serArray(PdfArray array, int level, ByteBuffer bb)186         private void serArray(PdfArray array, int level, ByteBuffer bb) throws IOException {
187             bb.append("$A");
188             if (level <= 0)
189                 return;
190             for (int k = 0; k < array.size(); ++k) {
191                 serObject(array.getPdfObject(k), level, bb);
192             }
193         }
194 
ByteStore(PRStream str)195         ByteStore(PRStream str) throws IOException {
196             try {
197                 md5 = MessageDigest.getInstance("MD5");
198             }
199             catch (Exception e) {
200                 throw new ExceptionConverter(e);
201             }
202             ByteBuffer bb = new ByteBuffer();
203             int level = 100;
204             serObject(str, level, bb);
205             this.b = bb.toByteArray();
206             md5 = null;
207         }
208 
equals(Object obj)209         public boolean equals(Object obj) {
210             if (!(obj instanceof ByteStore))
211                 return false;
212             if (hashCode() != obj.hashCode())
213                 return false;
214             return Arrays.equals(b, ((ByteStore)obj).b);
215         }
216 
hashCode()217         public int hashCode() {
218             if (hash == 0) {
219                 int len = b.length;
220                 for (int k = 0; k < len; ++k) {
221                     hash = hash * 31 + (b[k] & 0xff);
222                 }
223             }
224             return hash;
225         }
226     }
227 }