1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 /* $Id$ */
19 package org.apache.fop.pdf;
20 
21 import java.io.ByteArrayOutputStream;
22 import java.io.IOException;
23 import java.io.OutputStream;
24 import java.util.ArrayList;
25 import java.util.Collections;
26 import java.util.Comparator;
27 import java.util.HashMap;
28 import java.util.LinkedHashSet;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Set;
32 
33 import org.apache.commons.io.output.CountingOutputStream;
34 
35 public class PDFLinearization {
36     private PDFDocument doc;
37     private Map<PDFPage, Set<PDFObject>> pageObjsMap = new HashMap<PDFPage, Set<PDFObject>>();
38     private PDFDictionary linearDict;
39     private HintTable hintTable;
40 
PDFLinearization(PDFDocument doc)41     public PDFLinearization(PDFDocument doc) {
42         this.doc = doc;
43     }
44 
45     static class HintTable extends PDFStream {
46         private List<PDFPage> pages;
47         int pageStartPos;
48         List<Integer> sharedLengths = new ArrayList<Integer>();
49         List<Integer> pageLengths = new ArrayList<Integer>();
50         List<Integer> contentStreamLengths = new ArrayList<Integer>();
51         List<Integer> objCount = new ArrayList<Integer>();
52         Map<String, int[]> hintGroups = new HashMap<String, int[]>();
53 
HintTable(PDFDocument doc)54         public HintTable(PDFDocument doc) {
55             super(false);
56             doc.assignObjectNumber(this);
57             doc.addObject(this);
58             pages = doc.pageObjs;
59             for (int i = 0; i < pages.size(); i++) {
60                 pageLengths.add(0);
61                 contentStreamLengths.add(0);
62                 objCount.add(0);
63             }
64             hintGroups.put("/C", new int[4]);
65             hintGroups.put("/L", new int[4]);
66             hintGroups.put("/I", new int[4]);
67             hintGroups.put("/E", new int[4]);
68             hintGroups.put("/O", new int[4]);
69             hintGroups.put("/V", new int[4]);
70         }
71 
72         @Override
getFilterList()73         public PDFFilterList getFilterList() {
74             return new PDFFilterList(getDocument().isEncryptionActive());
75         }
76 
77         @Override
outputRawStreamData(OutputStream os)78         protected void outputRawStreamData(OutputStream os) throws IOException {
79             CountingOutputStream bos = new CountingOutputStream(os);
80 
81             //start header
82             writeULong(1, bos); //1
83             writeULong(pageStartPos, bos); //2
84             writeCard16(32, bos); //3
85             writeULong(0, bos); //4
86             writeCard16(32, bos); //5
87             writeULong(0, bos); //6
88             writeCard16(0, bos); //7
89             writeULong(0, bos); //8
90             writeCard16(32, bos); //9
91             writeCard16(0, bos); //10
92             writeCard16(0, bos); //11
93             writeCard16(0, bos); //12
94             writeCard16(4, bos); //13
95             //end header
96 
97             for (PDFPage page : pages) {
98                 writeULong(objCount.get(page.pageIndex) - 1, bos);
99             }
100             for (PDFPage page : pages) {
101                 writeULong(pageLengths.get(page.pageIndex), bos);
102             }
103             for (PDFPage page : pages) {
104                 writeULong(contentStreamLengths.get(page.pageIndex), bos);
105             }
106 
107             writeSharedTable(bos);
108 
109             for (Map.Entry<String, int[]> group : hintGroups.entrySet()) {
110                 put(group.getKey(), bos.getCount());
111                 for (int i : group.getValue()) {
112                     writeULong(i, bos);
113                 }
114                 if (group.getKey().equals("/C")) {
115                     writeULong(0, bos);
116                     writeCard16(0, bos);
117                 }
118             }
119         }
120 
writeSharedTable(CountingOutputStream bos)121         private void writeSharedTable(CountingOutputStream bos) throws IOException {
122             put("/S", bos.getCount());
123 
124             //Shared object hint table, header section
125             writeULong(0, bos); //1
126             writeULong(0, bos); //2
127             writeULong(sharedLengths.size(), bos); //3
128             writeULong(sharedLengths.size(), bos); //4
129             writeCard16(0, bos); //5
130             writeULong(0, bos); //6
131             writeCard16(32, bos); //7
132 
133             for (int i : sharedLengths) {
134                 writeULong(i, bos);
135             }
136             writeULong(0, bos);
137         }
138 
writeCard16(int s, OutputStream bos)139         private void writeCard16(int s, OutputStream bos) throws IOException {
140             byte b1 = (byte)((s >> 8) & 0xff);
141             byte b2 = (byte)(s & 0xff);
142             bos.write(b1);
143             bos.write(b2);
144         }
145 
writeULong(int s, OutputStream bos)146         private void writeULong(int s, OutputStream bos) throws IOException {
147             byte b1 = (byte)((s >> 24) & 0xff);
148             byte b2 = (byte)((s >> 16) & 0xff);
149             byte b3 = (byte)((s >> 8) & 0xff);
150             byte b4 = (byte)(s & 0xff);
151             bos.write(b1);
152             bos.write(b2);
153             bos.write(b3);
154             bos.write(b4);
155         }
156     }
157 
158     static class LinearPDFDictionary extends PDFDictionary {
159         private int lastsize = -1;
160 
LinearPDFDictionary(PDFDocument doc)161         public LinearPDFDictionary(PDFDocument doc) {
162             put("Linearized", 1);
163             put("/L", 0);
164             PDFArray larray = new PDFArray();
165             larray.add(0);
166             larray.add(0);
167             put("/H", larray);
168             doc.assignObjectNumber(this);
169             getObjectNumber().getNumber();
170             put("/O", getObjectNumber().getNumber() + 3);
171             put("/E", 0);
172             put("/N", doc.pageObjs.size());
173             put("/T", 0);
174         }
175 
output(OutputStream stream)176         public int output(OutputStream stream) throws IOException {
177             int size = super.output(stream);
178             int padding = lastsize - size + 32;
179             if (lastsize == -1) {
180                 padding = 32;
181                 lastsize = size;
182             }
183             writePadding(padding, stream);
184             return size + padding;
185         }
186     }
187 
188 
assignNumbers()189     private Set<PDFObject> assignNumbers() throws IOException {
190         Set<PDFObject> page1Children = getPage1Children();
191         if (!doc.pageObjs.isEmpty()) {
192             for (int i = 1; i < doc.pageObjs.size(); i++) {
193                 PDFPage page = doc.pageObjs.get(i);
194                 Set<PDFObject> children = pageObjsMap.get(page);
195                 for (PDFObject c : children) {
196                     if (!page1Children.contains(c) && c.hasObjectNumber()) {
197                         c.getObjectNumber().getNumber();
198                     }
199                 }
200             }
201             for (PDFObject o : doc.objects) {
202                 if (o instanceof PDFDests || o instanceof PDFOutline) {
203                     for (PDFObject c : getChildren(o)) {
204                         c.getObjectNumber().getNumber();
205                     }
206                 }
207                 if (o instanceof PDFInfo || o instanceof PDFPageLabels) {
208                     o.getObjectNumber().getNumber();
209                 }
210             }
211             for (PDFObject o : doc.objects) {
212                 if (!page1Children.contains(o)) {
213                     o.getObjectNumber().getNumber();
214                 }
215             }
216         }
217         linearDict = new LinearPDFDictionary(doc);
218         for (PDFObject o : page1Children) {
219             o.getObjectNumber().getNumber();
220         }
221         sort(doc.objects);
222         return page1Children;
223     }
224 
sort(List<PDFObject> objects)225     private void sort(List<PDFObject> objects) {
226         Collections.sort(objects, new Comparator<PDFObject>() {
227             public int compare(PDFObject o1, PDFObject o2) {
228                 return Integer.compare(o1.getObjectNumber().getNumber(), o2.getObjectNumber().getNumber());
229             }
230         });
231     }
232 
getChildren(PDFObject o)233     private Set<PDFObject> getChildren(PDFObject o) {
234         Set<PDFObject> children = new LinkedHashSet<PDFObject>();
235         children.add(o);
236         o.getChildren(children);
237         return children;
238     }
239 
outputPages(OutputStream stream)240     public void outputPages(OutputStream stream) throws IOException {
241         Collections.sort(doc.pageObjs, new Comparator<PDFPage>() {
242             public int compare(PDFPage o1, PDFPage o2) {
243                 return Integer.compare(o1.pageIndex, o2.pageIndex);
244             }
245         });
246         doc.objects.addAll(doc.trailerObjects);
247         doc.trailerObjects = null;
248         if (doc.getStructureTreeElements() != null) {
249             doc.objects.addAll(doc.getStructureTreeElements());
250             doc.structureTreeElements = null;
251         }
252         for (int i = 0; i < doc.objects.size() * 2; i++) {
253             doc.indirectObjectOffsets.add(0L);
254         }
255         Set<PDFObject> page1Children = assignNumbers();
256         doc.streamIndirectObject(linearDict, new ByteArrayOutputStream());
257         for (PDFObject o : page1Children) {
258             doc.objects.remove(o);
259         }
260         int sizeOfRest = doc.objects.size();
261 
262         ByteArrayOutputStream fakeHeaderTrailerStream = new ByteArrayOutputStream();
263         long topTrailer = doc.position;
264         doc.writeTrailer(fakeHeaderTrailerStream, sizeOfRest, page1Children.size() + 1,
265                 page1Children.size() + sizeOfRest + 1, Long.MAX_VALUE, 0);
266         doc.position += fakeHeaderTrailerStream.size();
267 
268         ByteArrayOutputStream pageStream = new ByteArrayOutputStream();
269         writeObjects(page1Children, pageStream, sizeOfRest + 1);
270         long trailerOffset = doc.position;
271         ByteArrayOutputStream footerTrailerStream = new ByteArrayOutputStream();
272         doc.writeTrailer(footerTrailerStream, 0, sizeOfRest, sizeOfRest, 0, topTrailer);
273         doc.position += footerTrailerStream.size();
274 
275         linearDict.put("/L", doc.position);
276 
277         PDFDocument.outputIndirectObject(linearDict, stream);
278         CountingOutputStream realTrailer = new CountingOutputStream(stream);
279         doc.writeTrailer(realTrailer, sizeOfRest, page1Children.size() + 1,
280                 page1Children.size() + sizeOfRest + 1, trailerOffset, 0);
281         writePadding(fakeHeaderTrailerStream.size() - realTrailer.getCount(), stream);
282         for (PDFObject o : page1Children) {
283             PDFDocument.outputIndirectObject(o, stream);
284             if (o instanceof HintTable) {
285                 break;
286             }
287         }
288         stream.write(pageStream.toByteArray());
289         stream.write(footerTrailerStream.toByteArray());
290     }
291 
getPage1Children()292     private Set<PDFObject> getPage1Children() throws IOException {
293         Set<PDFObject> page1Children = new LinkedHashSet<PDFObject>();
294         if (!doc.pageObjs.isEmpty()) {
295             PDFPage page1 = doc.pageObjs.get(0);
296             page1Children.add(doc.getRoot());
297             hintTable = new HintTable(doc);
298             page1Children.add(hintTable);
299             page1Children.add(page1);
300             page1.getChildren(page1Children);
301             doc.objects.remove(doc.getPages());
302             doc.objects.add(0, doc.getPages());
303             pageObjsMap.put(page1, page1Children);
304 
305             for (int i = 1; i < doc.pageObjs.size(); i++) {
306                 PDFPage page = doc.pageObjs.get(i);
307                 pageObjsMap.put(page, getChildren(page));
308             }
309         }
310         return page1Children;
311     }
312 
writePadding(int padding, OutputStream stream)313     private static void writePadding(int padding, OutputStream stream) throws IOException {
314         for (int i = 0; i < padding; i++) {
315             stream.write(" ".getBytes("UTF-8"));
316         }
317     }
318 
writeObjects(Set<PDFObject> children1, OutputStream pageStream, int sizeOfRest)319     private void writeObjects(Set<PDFObject> children1, OutputStream pageStream, int sizeOfRest) throws IOException {
320         writePage1(children1, pageStream);
321         linearDict.put("/E", doc.position);
322         for (PDFPage page : doc.pageObjs) {
323             if (page.pageIndex != 0) {
324                 writePage(page, pageStream);
325             }
326         }
327         while (!doc.objects.isEmpty()) {
328             PDFObject o = doc.objects.remove(0);
329             if (o instanceof PDFOutline) {
330                 writeObjectGroup("/O", getChildren(o), pageStream);
331             } else if (o instanceof PDFDests) {
332                 writeObjectGroup("/E", getChildren(o), pageStream);
333             } else if (o instanceof PDFInfo) {
334                 writeObjectGroup("/I", getChildren(o), pageStream);
335             } else if (o instanceof PDFPageLabels) {
336                 writeObjectGroup("/L", getChildren(o), pageStream);
337             } else if (o instanceof PDFStructTreeRoot) {
338                 writeObjectGroup("/C", getChildren(o), pageStream);
339             } else {
340                 doc.streamIndirectObject(o, pageStream);
341             }
342         }
343         linearDict.put("/T", doc.position + 8 + String.valueOf(sizeOfRest).length());
344     }
345 
writeObjectGroup(String name, Set<PDFObject> objects, OutputStream pageStream)346     private void writeObjectGroup(String name, Set<PDFObject> objects, OutputStream pageStream)
347             throws IOException {
348         List<PDFObject> children = new ArrayList<PDFObject>(objects);
349         sort(children);
350 
351         int[] values = hintTable.hintGroups.get(name);
352         values[0] = children.iterator().next().getObjectNumber().getNumber();
353         values[1] = (int) doc.position;
354         values[2] = children.size();
355         for (PDFObject o : children) {
356             values[3] += doc.streamIndirectObject(o, pageStream);
357             doc.objects.remove(o);
358         }
359     }
360 
writePage1(Set<PDFObject> children1, OutputStream pageStream)361     private void writePage1(Set<PDFObject> children1, OutputStream pageStream) throws IOException {
362         hintTable.pageStartPos = (int) doc.position;
363         OutputStream stream = new ByteArrayOutputStream();
364 
365         Set<PDFObject> sharedChildren = getSharedObjects();
366 
367         int page1Len = 0;
368         int objCount = 0;
369         int sharedCount = 0;
370         for (PDFObject o : children1) {
371             if (o instanceof HintTable) {
372                 PDFArray a = (PDFArray) linearDict.get("/H");
373                 a.set(0, doc.position);
374                 doc.streamIndirectObject(o, stream);
375                 a.set(1, doc.position - (Double)a.get(0));
376                 stream = pageStream;
377             } else {
378                 int len = doc.streamIndirectObject(o, stream);
379                 if (o instanceof PDFStream && hintTable.contentStreamLengths.get(0) == 0) {
380                     hintTable.contentStreamLengths.set(0, len);
381                 }
382                 if (!(o instanceof PDFRoot)) {
383                     page1Len += len;
384                     objCount++;
385                 }
386                 if (sharedChildren.contains(o)) {
387                     hintTable.sharedLengths.set(sharedCount, len);
388                     sharedCount++;
389                 }
390             }
391         }
392         hintTable.pageLengths.set(0, page1Len);
393         hintTable.objCount.set(0, objCount);
394     }
395 
getSharedObjects()396     private Set<PDFObject> getSharedObjects() {
397         Set<PDFObject> pageSharedChildren = getChildren(doc.pageObjs.get(0));
398         for (int i = 0; i < pageSharedChildren.size(); i++) {
399             hintTable.sharedLengths.add(0);
400         }
401         return pageSharedChildren;
402     }
403 
writePage(PDFPage page, OutputStream pageStream)404     private void writePage(PDFPage page, OutputStream pageStream) throws IOException {
405         Set<PDFObject> children = pageObjsMap.get(page);
406         int pageLen = 0;
407         int objCount = 0;
408         for (PDFObject c : children) {
409             if (doc.objects.contains(c)) {
410                 int len = doc.streamIndirectObject(c, pageStream);
411                 if (c instanceof PDFStream) {
412                     hintTable.contentStreamLengths.set(page.pageIndex, len);
413                 }
414                 pageLen += len;
415                 doc.objects.remove(c);
416                 objCount++;
417             }
418         }
419         hintTable.pageLengths.set(page.pageIndex, pageLen);
420         hintTable.objCount.set(page.pageIndex, objCount);
421     }
422 }
423