1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* $Id$ */ 19 package org.apache.fop.pdf; 20 21 import java.io.ByteArrayOutputStream; 22 import java.io.IOException; 23 import java.io.OutputStream; 24 import java.util.ArrayList; 25 import java.util.Collections; 26 import java.util.Comparator; 27 import java.util.HashMap; 28 import java.util.LinkedHashSet; 29 import java.util.List; 30 import java.util.Map; 31 import java.util.Set; 32 33 import org.apache.commons.io.output.CountingOutputStream; 34 35 public class PDFLinearization { 36 private PDFDocument doc; 37 private Map<PDFPage, Set<PDFObject>> pageObjsMap = new HashMap<PDFPage, Set<PDFObject>>(); 38 private PDFDictionary linearDict; 39 private HintTable hintTable; 40 PDFLinearization(PDFDocument doc)41 public PDFLinearization(PDFDocument doc) { 42 this.doc = doc; 43 } 44 45 static class HintTable extends PDFStream { 46 private List<PDFPage> pages; 47 int pageStartPos; 48 List<Integer> sharedLengths = new ArrayList<Integer>(); 49 List<Integer> pageLengths = new ArrayList<Integer>(); 50 List<Integer> contentStreamLengths = new ArrayList<Integer>(); 51 List<Integer> objCount = new ArrayList<Integer>(); 52 Map<String, int[]> hintGroups = new HashMap<String, int[]>(); 53 HintTable(PDFDocument doc)54 public HintTable(PDFDocument doc) { 55 super(false); 56 doc.assignObjectNumber(this); 57 doc.addObject(this); 58 pages = doc.pageObjs; 59 for (int i = 0; i < pages.size(); i++) { 60 pageLengths.add(0); 61 contentStreamLengths.add(0); 62 objCount.add(0); 63 } 64 hintGroups.put("/C", new int[4]); 65 hintGroups.put("/L", new int[4]); 66 hintGroups.put("/I", new int[4]); 67 hintGroups.put("/E", new int[4]); 68 hintGroups.put("/O", new int[4]); 69 hintGroups.put("/V", new int[4]); 70 } 71 72 @Override getFilterList()73 public PDFFilterList getFilterList() { 74 return new PDFFilterList(getDocument().isEncryptionActive()); 75 } 76 77 @Override outputRawStreamData(OutputStream os)78 protected void outputRawStreamData(OutputStream os) throws IOException { 79 CountingOutputStream bos = new CountingOutputStream(os); 80 81 //start header 82 writeULong(1, bos); //1 83 writeULong(pageStartPos, bos); //2 84 writeCard16(32, bos); //3 85 writeULong(0, bos); //4 86 writeCard16(32, bos); //5 87 writeULong(0, bos); //6 88 writeCard16(0, bos); //7 89 writeULong(0, bos); //8 90 writeCard16(32, bos); //9 91 writeCard16(0, bos); //10 92 writeCard16(0, bos); //11 93 writeCard16(0, bos); //12 94 writeCard16(4, bos); //13 95 //end header 96 97 for (PDFPage page : pages) { 98 writeULong(objCount.get(page.pageIndex) - 1, bos); 99 } 100 for (PDFPage page : pages) { 101 writeULong(pageLengths.get(page.pageIndex), bos); 102 } 103 for (PDFPage page : pages) { 104 writeULong(contentStreamLengths.get(page.pageIndex), bos); 105 } 106 107 writeSharedTable(bos); 108 109 for (Map.Entry<String, int[]> group : hintGroups.entrySet()) { 110 put(group.getKey(), bos.getCount()); 111 for (int i : group.getValue()) { 112 writeULong(i, bos); 113 } 114 if (group.getKey().equals("/C")) { 115 writeULong(0, bos); 116 writeCard16(0, bos); 117 } 118 } 119 } 120 writeSharedTable(CountingOutputStream bos)121 private void writeSharedTable(CountingOutputStream bos) throws IOException { 122 put("/S", bos.getCount()); 123 124 //Shared object hint table, header section 125 writeULong(0, bos); //1 126 writeULong(0, bos); //2 127 writeULong(sharedLengths.size(), bos); //3 128 writeULong(sharedLengths.size(), bos); //4 129 writeCard16(0, bos); //5 130 writeULong(0, bos); //6 131 writeCard16(32, bos); //7 132 133 for (int i : sharedLengths) { 134 writeULong(i, bos); 135 } 136 writeULong(0, bos); 137 } 138 writeCard16(int s, OutputStream bos)139 private void writeCard16(int s, OutputStream bos) throws IOException { 140 byte b1 = (byte)((s >> 8) & 0xff); 141 byte b2 = (byte)(s & 0xff); 142 bos.write(b1); 143 bos.write(b2); 144 } 145 writeULong(int s, OutputStream bos)146 private void writeULong(int s, OutputStream bos) throws IOException { 147 byte b1 = (byte)((s >> 24) & 0xff); 148 byte b2 = (byte)((s >> 16) & 0xff); 149 byte b3 = (byte)((s >> 8) & 0xff); 150 byte b4 = (byte)(s & 0xff); 151 bos.write(b1); 152 bos.write(b2); 153 bos.write(b3); 154 bos.write(b4); 155 } 156 } 157 158 static class LinearPDFDictionary extends PDFDictionary { 159 private int lastsize = -1; 160 LinearPDFDictionary(PDFDocument doc)161 public LinearPDFDictionary(PDFDocument doc) { 162 put("Linearized", 1); 163 put("/L", 0); 164 PDFArray larray = new PDFArray(); 165 larray.add(0); 166 larray.add(0); 167 put("/H", larray); 168 doc.assignObjectNumber(this); 169 getObjectNumber().getNumber(); 170 put("/O", getObjectNumber().getNumber() + 3); 171 put("/E", 0); 172 put("/N", doc.pageObjs.size()); 173 put("/T", 0); 174 } 175 output(OutputStream stream)176 public int output(OutputStream stream) throws IOException { 177 int size = super.output(stream); 178 int padding = lastsize - size + 32; 179 if (lastsize == -1) { 180 padding = 32; 181 lastsize = size; 182 } 183 writePadding(padding, stream); 184 return size + padding; 185 } 186 } 187 188 assignNumbers()189 private Set<PDFObject> assignNumbers() throws IOException { 190 Set<PDFObject> page1Children = getPage1Children(); 191 if (!doc.pageObjs.isEmpty()) { 192 for (int i = 1; i < doc.pageObjs.size(); i++) { 193 PDFPage page = doc.pageObjs.get(i); 194 Set<PDFObject> children = pageObjsMap.get(page); 195 for (PDFObject c : children) { 196 if (!page1Children.contains(c) && c.hasObjectNumber()) { 197 c.getObjectNumber().getNumber(); 198 } 199 } 200 } 201 for (PDFObject o : doc.objects) { 202 if (o instanceof PDFDests || o instanceof PDFOutline) { 203 for (PDFObject c : getChildren(o)) { 204 c.getObjectNumber().getNumber(); 205 } 206 } 207 if (o instanceof PDFInfo || o instanceof PDFPageLabels) { 208 o.getObjectNumber().getNumber(); 209 } 210 } 211 for (PDFObject o : doc.objects) { 212 if (!page1Children.contains(o)) { 213 o.getObjectNumber().getNumber(); 214 } 215 } 216 } 217 linearDict = new LinearPDFDictionary(doc); 218 for (PDFObject o : page1Children) { 219 o.getObjectNumber().getNumber(); 220 } 221 sort(doc.objects); 222 return page1Children; 223 } 224 sort(List<PDFObject> objects)225 private void sort(List<PDFObject> objects) { 226 Collections.sort(objects, new Comparator<PDFObject>() { 227 public int compare(PDFObject o1, PDFObject o2) { 228 return Integer.compare(o1.getObjectNumber().getNumber(), o2.getObjectNumber().getNumber()); 229 } 230 }); 231 } 232 getChildren(PDFObject o)233 private Set<PDFObject> getChildren(PDFObject o) { 234 Set<PDFObject> children = new LinkedHashSet<PDFObject>(); 235 children.add(o); 236 o.getChildren(children); 237 return children; 238 } 239 outputPages(OutputStream stream)240 public void outputPages(OutputStream stream) throws IOException { 241 Collections.sort(doc.pageObjs, new Comparator<PDFPage>() { 242 public int compare(PDFPage o1, PDFPage o2) { 243 return Integer.compare(o1.pageIndex, o2.pageIndex); 244 } 245 }); 246 doc.objects.addAll(doc.trailerObjects); 247 doc.trailerObjects = null; 248 if (doc.getStructureTreeElements() != null) { 249 doc.objects.addAll(doc.getStructureTreeElements()); 250 doc.structureTreeElements = null; 251 } 252 for (int i = 0; i < doc.objects.size() * 2; i++) { 253 doc.indirectObjectOffsets.add(0L); 254 } 255 Set<PDFObject> page1Children = assignNumbers(); 256 doc.streamIndirectObject(linearDict, new ByteArrayOutputStream()); 257 for (PDFObject o : page1Children) { 258 doc.objects.remove(o); 259 } 260 int sizeOfRest = doc.objects.size(); 261 262 ByteArrayOutputStream fakeHeaderTrailerStream = new ByteArrayOutputStream(); 263 long topTrailer = doc.position; 264 doc.writeTrailer(fakeHeaderTrailerStream, sizeOfRest, page1Children.size() + 1, 265 page1Children.size() + sizeOfRest + 1, Long.MAX_VALUE, 0); 266 doc.position += fakeHeaderTrailerStream.size(); 267 268 ByteArrayOutputStream pageStream = new ByteArrayOutputStream(); 269 writeObjects(page1Children, pageStream, sizeOfRest + 1); 270 long trailerOffset = doc.position; 271 ByteArrayOutputStream footerTrailerStream = new ByteArrayOutputStream(); 272 doc.writeTrailer(footerTrailerStream, 0, sizeOfRest, sizeOfRest, 0, topTrailer); 273 doc.position += footerTrailerStream.size(); 274 275 linearDict.put("/L", doc.position); 276 277 PDFDocument.outputIndirectObject(linearDict, stream); 278 CountingOutputStream realTrailer = new CountingOutputStream(stream); 279 doc.writeTrailer(realTrailer, sizeOfRest, page1Children.size() + 1, 280 page1Children.size() + sizeOfRest + 1, trailerOffset, 0); 281 writePadding(fakeHeaderTrailerStream.size() - realTrailer.getCount(), stream); 282 for (PDFObject o : page1Children) { 283 PDFDocument.outputIndirectObject(o, stream); 284 if (o instanceof HintTable) { 285 break; 286 } 287 } 288 stream.write(pageStream.toByteArray()); 289 stream.write(footerTrailerStream.toByteArray()); 290 } 291 getPage1Children()292 private Set<PDFObject> getPage1Children() throws IOException { 293 Set<PDFObject> page1Children = new LinkedHashSet<PDFObject>(); 294 if (!doc.pageObjs.isEmpty()) { 295 PDFPage page1 = doc.pageObjs.get(0); 296 page1Children.add(doc.getRoot()); 297 hintTable = new HintTable(doc); 298 page1Children.add(hintTable); 299 page1Children.add(page1); 300 page1.getChildren(page1Children); 301 doc.objects.remove(doc.getPages()); 302 doc.objects.add(0, doc.getPages()); 303 pageObjsMap.put(page1, page1Children); 304 305 for (int i = 1; i < doc.pageObjs.size(); i++) { 306 PDFPage page = doc.pageObjs.get(i); 307 pageObjsMap.put(page, getChildren(page)); 308 } 309 } 310 return page1Children; 311 } 312 writePadding(int padding, OutputStream stream)313 private static void writePadding(int padding, OutputStream stream) throws IOException { 314 for (int i = 0; i < padding; i++) { 315 stream.write(" ".getBytes("UTF-8")); 316 } 317 } 318 writeObjects(Set<PDFObject> children1, OutputStream pageStream, int sizeOfRest)319 private void writeObjects(Set<PDFObject> children1, OutputStream pageStream, int sizeOfRest) throws IOException { 320 writePage1(children1, pageStream); 321 linearDict.put("/E", doc.position); 322 for (PDFPage page : doc.pageObjs) { 323 if (page.pageIndex != 0) { 324 writePage(page, pageStream); 325 } 326 } 327 while (!doc.objects.isEmpty()) { 328 PDFObject o = doc.objects.remove(0); 329 if (o instanceof PDFOutline) { 330 writeObjectGroup("/O", getChildren(o), pageStream); 331 } else if (o instanceof PDFDests) { 332 writeObjectGroup("/E", getChildren(o), pageStream); 333 } else if (o instanceof PDFInfo) { 334 writeObjectGroup("/I", getChildren(o), pageStream); 335 } else if (o instanceof PDFPageLabels) { 336 writeObjectGroup("/L", getChildren(o), pageStream); 337 } else if (o instanceof PDFStructTreeRoot) { 338 writeObjectGroup("/C", getChildren(o), pageStream); 339 } else { 340 doc.streamIndirectObject(o, pageStream); 341 } 342 } 343 linearDict.put("/T", doc.position + 8 + String.valueOf(sizeOfRest).length()); 344 } 345 writeObjectGroup(String name, Set<PDFObject> objects, OutputStream pageStream)346 private void writeObjectGroup(String name, Set<PDFObject> objects, OutputStream pageStream) 347 throws IOException { 348 List<PDFObject> children = new ArrayList<PDFObject>(objects); 349 sort(children); 350 351 int[] values = hintTable.hintGroups.get(name); 352 values[0] = children.iterator().next().getObjectNumber().getNumber(); 353 values[1] = (int) doc.position; 354 values[2] = children.size(); 355 for (PDFObject o : children) { 356 values[3] += doc.streamIndirectObject(o, pageStream); 357 doc.objects.remove(o); 358 } 359 } 360 writePage1(Set<PDFObject> children1, OutputStream pageStream)361 private void writePage1(Set<PDFObject> children1, OutputStream pageStream) throws IOException { 362 hintTable.pageStartPos = (int) doc.position; 363 OutputStream stream = new ByteArrayOutputStream(); 364 365 Set<PDFObject> sharedChildren = getSharedObjects(); 366 367 int page1Len = 0; 368 int objCount = 0; 369 int sharedCount = 0; 370 for (PDFObject o : children1) { 371 if (o instanceof HintTable) { 372 PDFArray a = (PDFArray) linearDict.get("/H"); 373 a.set(0, doc.position); 374 doc.streamIndirectObject(o, stream); 375 a.set(1, doc.position - (Double)a.get(0)); 376 stream = pageStream; 377 } else { 378 int len = doc.streamIndirectObject(o, stream); 379 if (o instanceof PDFStream && hintTable.contentStreamLengths.get(0) == 0) { 380 hintTable.contentStreamLengths.set(0, len); 381 } 382 if (!(o instanceof PDFRoot)) { 383 page1Len += len; 384 objCount++; 385 } 386 if (sharedChildren.contains(o)) { 387 hintTable.sharedLengths.set(sharedCount, len); 388 sharedCount++; 389 } 390 } 391 } 392 hintTable.pageLengths.set(0, page1Len); 393 hintTable.objCount.set(0, objCount); 394 } 395 getSharedObjects()396 private Set<PDFObject> getSharedObjects() { 397 Set<PDFObject> pageSharedChildren = getChildren(doc.pageObjs.get(0)); 398 for (int i = 0; i < pageSharedChildren.size(); i++) { 399 hintTable.sharedLengths.add(0); 400 } 401 return pageSharedChildren; 402 } 403 writePage(PDFPage page, OutputStream pageStream)404 private void writePage(PDFPage page, OutputStream pageStream) throws IOException { 405 Set<PDFObject> children = pageObjsMap.get(page); 406 int pageLen = 0; 407 int objCount = 0; 408 for (PDFObject c : children) { 409 if (doc.objects.contains(c)) { 410 int len = doc.streamIndirectObject(c, pageStream); 411 if (c instanceof PDFStream) { 412 hintTable.contentStreamLengths.set(page.pageIndex, len); 413 } 414 pageLen += len; 415 doc.objects.remove(c); 416 objCount++; 417 } 418 } 419 hintTable.pageLengths.set(page.pageIndex, pageLen); 420 hintTable.objCount.set(page.pageIndex, objCount); 421 } 422 } 423