1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "public/fpdf_ppo.h"
8 
9 #include <map>
10 #include <memory>
11 #include <vector>
12 
13 #include "core/fpdfapi/fpdf_parser/include/cpdf_array.h"
14 #include "core/fpdfapi/fpdf_parser/include/cpdf_document.h"
15 #include "core/fpdfapi/fpdf_parser/include/cpdf_name.h"
16 #include "core/fpdfapi/fpdf_parser/include/cpdf_number.h"
17 #include "core/fpdfapi/fpdf_parser/include/cpdf_reference.h"
18 #include "core/fpdfapi/fpdf_parser/include/cpdf_stream.h"
19 #include "core/fpdfapi/fpdf_parser/include/cpdf_string.h"
20 #include "fpdfsdk/include/fsdk_define.h"
21 #include "third_party/base/stl_util.h"
22 
23 class CPDF_PageOrganizer {
24  public:
25   using ObjectNumberMap = std::map<uint32_t, uint32_t>;
26   CPDF_PageOrganizer();
27   ~CPDF_PageOrganizer();
28 
29   FX_BOOL PDFDocInit(CPDF_Document* pDestPDFDoc, CPDF_Document* pSrcPDFDoc);
30   FX_BOOL ExportPage(CPDF_Document* pSrcPDFDoc,
31                      std::vector<uint16_t>* pPageNums,
32                      CPDF_Document* pDestPDFDoc,
33                      int nIndex);
34   CPDF_Object* PageDictGetInheritableTag(CPDF_Dictionary* pDict,
35                                          const CFX_ByteString& bsSrctag);
36   FX_BOOL UpdateReference(CPDF_Object* pObj,
37                           CPDF_Document* pDoc,
38                           ObjectNumberMap* pObjNumberMap);
39   uint32_t GetNewObjId(CPDF_Document* pDoc,
40                        ObjectNumberMap* pObjNumberMap,
41                        CPDF_Reference* pRef);
42 };
43 
CPDF_PageOrganizer()44 CPDF_PageOrganizer::CPDF_PageOrganizer() {}
45 
~CPDF_PageOrganizer()46 CPDF_PageOrganizer::~CPDF_PageOrganizer() {}
47 
PDFDocInit(CPDF_Document * pDestPDFDoc,CPDF_Document * pSrcPDFDoc)48 FX_BOOL CPDF_PageOrganizer::PDFDocInit(CPDF_Document* pDestPDFDoc,
49                                        CPDF_Document* pSrcPDFDoc) {
50   if (!pDestPDFDoc || !pSrcPDFDoc)
51     return FALSE;
52 
53   CPDF_Dictionary* pNewRoot = pDestPDFDoc->GetRoot();
54   if (!pNewRoot)
55     return FALSE;
56 
57   CPDF_Dictionary* DInfoDict = pDestPDFDoc->GetInfo();
58   if (!DInfoDict)
59     return FALSE;
60 
61   CFX_ByteString producerstr;
62   producerstr.Format("PDFium");
63   DInfoDict->SetAt("Producer", new CPDF_String(producerstr, FALSE));
64 
65   CFX_ByteString cbRootType = pNewRoot->GetStringBy("Type", "");
66   if (cbRootType.IsEmpty())
67     pNewRoot->SetAt("Type", new CPDF_Name("Catalog"));
68 
69   CPDF_Object* pElement = pNewRoot->GetObjectBy("Pages");
70   CPDF_Dictionary* pNewPages =
71       pElement ? ToDictionary(pElement->GetDirect()) : nullptr;
72   if (!pNewPages) {
73     pNewPages = new CPDF_Dictionary;
74     uint32_t NewPagesON = pDestPDFDoc->AddIndirectObject(pNewPages);
75     pNewRoot->SetAt("Pages", new CPDF_Reference(pDestPDFDoc, NewPagesON));
76   }
77 
78   CFX_ByteString cbPageType = pNewPages->GetStringBy("Type", "");
79   if (cbPageType == "") {
80     pNewPages->SetAt("Type", new CPDF_Name("Pages"));
81   }
82 
83   CPDF_Array* pKeysArray = pNewPages->GetArrayBy("Kids");
84   if (!pKeysArray) {
85     CPDF_Array* pNewKids = new CPDF_Array;
86     uint32_t Kidsobjnum = pDestPDFDoc->AddIndirectObject(pNewKids);
87 
88     pNewPages->SetAt("Kids", new CPDF_Reference(pDestPDFDoc, Kidsobjnum));
89     pNewPages->SetAt("Count", new CPDF_Number(0));
90   }
91 
92   return TRUE;
93 }
94 
ExportPage(CPDF_Document * pSrcPDFDoc,std::vector<uint16_t> * pPageNums,CPDF_Document * pDestPDFDoc,int nIndex)95 FX_BOOL CPDF_PageOrganizer::ExportPage(CPDF_Document* pSrcPDFDoc,
96                                        std::vector<uint16_t>* pPageNums,
97                                        CPDF_Document* pDestPDFDoc,
98                                        int nIndex) {
99   int curpage = nIndex;
100   std::unique_ptr<ObjectNumberMap> pObjNumberMap(new ObjectNumberMap);
101   int nSize = pdfium::CollectionSize<int>(*pPageNums);
102   for (int i = 0; i < nSize; ++i) {
103     CPDF_Dictionary* pCurPageDict = pDestPDFDoc->CreateNewPage(curpage);
104     CPDF_Dictionary* pSrcPageDict = pSrcPDFDoc->GetPage(pPageNums->at(i) - 1);
105     if (!pSrcPageDict || !pCurPageDict)
106       return FALSE;
107 
108     // Clone the page dictionary
109     for (const auto& it : *pSrcPageDict) {
110       const CFX_ByteString& cbSrcKeyStr = it.first;
111       CPDF_Object* pObj = it.second;
112       if (cbSrcKeyStr.Compare(("Type")) && cbSrcKeyStr.Compare(("Parent"))) {
113         if (pCurPageDict->KeyExist(cbSrcKeyStr))
114           pCurPageDict->RemoveAt(cbSrcKeyStr);
115         pCurPageDict->SetAt(cbSrcKeyStr, pObj->Clone());
116       }
117     }
118 
119     // inheritable item
120     CPDF_Object* pInheritable = nullptr;
121     // 1 MediaBox  //required
122     if (!pCurPageDict->KeyExist("MediaBox")) {
123       pInheritable = PageDictGetInheritableTag(pSrcPageDict, "MediaBox");
124       if (!pInheritable) {
125         // Search the "CropBox" from source page dictionary,
126         // if not exists,we take the letter size.
127         pInheritable = PageDictGetInheritableTag(pSrcPageDict, "CropBox");
128         if (pInheritable) {
129           pCurPageDict->SetAt("MediaBox", pInheritable->Clone());
130         } else {
131           // Make the default size to be letter size (8.5'x11')
132           CPDF_Array* pArray = new CPDF_Array;
133           pArray->AddNumber(0);
134           pArray->AddNumber(0);
135           pArray->AddNumber(612);
136           pArray->AddNumber(792);
137           pCurPageDict->SetAt("MediaBox", pArray);
138         }
139       } else {
140         pCurPageDict->SetAt("MediaBox", pInheritable->Clone());
141       }
142     }
143     // 2 Resources //required
144     if (!pCurPageDict->KeyExist("Resources")) {
145       pInheritable = PageDictGetInheritableTag(pSrcPageDict, "Resources");
146       if (!pInheritable)
147         return FALSE;
148       pCurPageDict->SetAt("Resources", pInheritable->Clone());
149     }
150     // 3 CropBox  //Optional
151     if (!pCurPageDict->KeyExist("CropBox")) {
152       pInheritable = PageDictGetInheritableTag(pSrcPageDict, "CropBox");
153       if (pInheritable)
154         pCurPageDict->SetAt("CropBox", pInheritable->Clone());
155     }
156     // 4 Rotate  //Optional
157     if (!pCurPageDict->KeyExist("Rotate")) {
158       pInheritable = PageDictGetInheritableTag(pSrcPageDict, "Rotate");
159       if (pInheritable)
160         pCurPageDict->SetAt("Rotate", pInheritable->Clone());
161     }
162 
163     // Update the reference
164     uint32_t dwOldPageObj = pSrcPageDict->GetObjNum();
165     uint32_t dwNewPageObj = pCurPageDict->GetObjNum();
166 
167     (*pObjNumberMap)[dwOldPageObj] = dwNewPageObj;
168 
169     UpdateReference(pCurPageDict, pDestPDFDoc, pObjNumberMap.get());
170     ++curpage;
171   }
172 
173   return TRUE;
174 }
175 
PageDictGetInheritableTag(CPDF_Dictionary * pDict,const CFX_ByteString & bsSrcTag)176 CPDF_Object* CPDF_PageOrganizer::PageDictGetInheritableTag(
177     CPDF_Dictionary* pDict,
178     const CFX_ByteString& bsSrcTag) {
179   if (!pDict || bsSrcTag.IsEmpty())
180     return nullptr;
181   if (!pDict->KeyExist("Parent") || !pDict->KeyExist("Type"))
182     return nullptr;
183 
184   CPDF_Object* pType = pDict->GetObjectBy("Type")->GetDirect();
185   if (!ToName(pType))
186     return nullptr;
187   if (pType->GetString().Compare("Page"))
188     return nullptr;
189 
190   CPDF_Dictionary* pp = ToDictionary(pDict->GetObjectBy("Parent")->GetDirect());
191   if (!pp)
192     return nullptr;
193 
194   if (pDict->KeyExist(bsSrcTag))
195     return pDict->GetObjectBy(bsSrcTag);
196 
197   while (pp) {
198     if (pp->KeyExist(bsSrcTag))
199       return pp->GetObjectBy(bsSrcTag);
200     if (!pp->KeyExist("Parent"))
201       break;
202     pp = ToDictionary(pp->GetObjectBy("Parent")->GetDirect());
203   }
204   return nullptr;
205 }
206 
UpdateReference(CPDF_Object * pObj,CPDF_Document * pDoc,ObjectNumberMap * pObjNumberMap)207 FX_BOOL CPDF_PageOrganizer::UpdateReference(CPDF_Object* pObj,
208                                             CPDF_Document* pDoc,
209                                             ObjectNumberMap* pObjNumberMap) {
210   switch (pObj->GetType()) {
211     case CPDF_Object::REFERENCE: {
212       CPDF_Reference* pReference = pObj->AsReference();
213       uint32_t newobjnum = GetNewObjId(pDoc, pObjNumberMap, pReference);
214       if (newobjnum == 0)
215         return FALSE;
216       pReference->SetRef(pDoc, newobjnum);
217       break;
218     }
219     case CPDF_Object::DICTIONARY: {
220       CPDF_Dictionary* pDict = pObj->AsDictionary();
221       auto it = pDict->begin();
222       while (it != pDict->end()) {
223         const CFX_ByteString& key = it->first;
224         CPDF_Object* pNextObj = it->second;
225         ++it;
226         if (!FXSYS_strcmp(key.c_str(), "Parent") ||
227             !FXSYS_strcmp(key.c_str(), "Prev") ||
228             !FXSYS_strcmp(key.c_str(), "First")) {
229           continue;
230         }
231         if (pNextObj) {
232           if (!UpdateReference(pNextObj, pDoc, pObjNumberMap))
233             pDict->RemoveAt(key);
234         } else {
235           return FALSE;
236         }
237       }
238       break;
239     }
240     case CPDF_Object::ARRAY: {
241       CPDF_Array* pArray = pObj->AsArray();
242       for (size_t i = 0; i < pArray->GetCount(); ++i) {
243         CPDF_Object* pNextObj = pArray->GetObjectAt(i);
244         if (!pNextObj)
245           return FALSE;
246         if (!UpdateReference(pNextObj, pDoc, pObjNumberMap))
247           return FALSE;
248       }
249       break;
250     }
251     case CPDF_Object::STREAM: {
252       CPDF_Stream* pStream = pObj->AsStream();
253       CPDF_Dictionary* pDict = pStream->GetDict();
254       if (pDict) {
255         if (!UpdateReference(pDict, pDoc, pObjNumberMap))
256           return FALSE;
257       } else {
258         return FALSE;
259       }
260       break;
261     }
262     default:
263       break;
264   }
265 
266   return TRUE;
267 }
268 
GetNewObjId(CPDF_Document * pDoc,ObjectNumberMap * pObjNumberMap,CPDF_Reference * pRef)269 uint32_t CPDF_PageOrganizer::GetNewObjId(CPDF_Document* pDoc,
270                                          ObjectNumberMap* pObjNumberMap,
271                                          CPDF_Reference* pRef) {
272   if (!pRef)
273     return 0;
274 
275   uint32_t dwObjnum = pRef->GetRefObjNum();
276   uint32_t dwNewObjNum = 0;
277   const auto it = pObjNumberMap->find(dwObjnum);
278   if (it != pObjNumberMap->end())
279     dwNewObjNum = it->second;
280   if (dwNewObjNum)
281     return dwNewObjNum;
282 
283   CPDF_Object* pDirect = pRef->GetDirect();
284   if (!pDirect)
285     return 0;
286 
287   CPDF_Object* pClone = pDirect->Clone();
288   if (!pClone)
289     return 0;
290 
291   if (CPDF_Dictionary* pDictClone = pClone->AsDictionary()) {
292     if (pDictClone->KeyExist("Type")) {
293       CFX_ByteString strType = pDictClone->GetStringBy("Type");
294       if (!FXSYS_stricmp(strType.c_str(), "Pages")) {
295         pDictClone->Release();
296         return 4;
297       }
298       if (!FXSYS_stricmp(strType.c_str(), "Page")) {
299         pDictClone->Release();
300         return 0;
301       }
302     }
303   }
304   dwNewObjNum = pDoc->AddIndirectObject(pClone);
305   (*pObjNumberMap)[dwObjnum] = dwNewObjNum;
306   if (!UpdateReference(pClone, pDoc, pObjNumberMap)) {
307     pClone->Release();
308     return 0;
309   }
310   return dwNewObjNum;
311 }
312 
ParserPageRangeString(CFX_ByteString rangstring,std::vector<uint16_t> * pageArray,int nCount)313 FPDF_BOOL ParserPageRangeString(CFX_ByteString rangstring,
314                                 std::vector<uint16_t>* pageArray,
315                                 int nCount) {
316   if (rangstring.GetLength() != 0) {
317     rangstring.Remove(' ');
318     int nLength = rangstring.GetLength();
319     CFX_ByteString cbCompareString("0123456789-,");
320     for (int i = 0; i < nLength; ++i) {
321       if (cbCompareString.Find(rangstring[i]) == -1)
322         return FALSE;
323     }
324     CFX_ByteString cbMidRange;
325     int nStringFrom = 0;
326     int nStringTo = 0;
327     while (nStringTo < nLength) {
328       nStringTo = rangstring.Find(',', nStringFrom);
329       if (nStringTo == -1)
330         nStringTo = nLength;
331       cbMidRange = rangstring.Mid(nStringFrom, nStringTo - nStringFrom);
332       int nMid = cbMidRange.Find('-');
333       if (nMid == -1) {
334         long lPageNum = atol(cbMidRange.c_str());
335         if (lPageNum <= 0 || lPageNum > nCount)
336           return FALSE;
337         pageArray->push_back((uint16_t)lPageNum);
338       } else {
339         int nStartPageNum = atol(cbMidRange.Mid(0, nMid).c_str());
340         if (nStartPageNum == 0)
341           return FALSE;
342 
343         ++nMid;
344         int nEnd = cbMidRange.GetLength() - nMid;
345         if (nEnd == 0)
346           return FALSE;
347 
348         int nEndPageNum = atol(cbMidRange.Mid(nMid, nEnd).c_str());
349         if (nStartPageNum < 0 || nStartPageNum > nEndPageNum ||
350             nEndPageNum > nCount) {
351           return FALSE;
352         }
353         for (int i = nStartPageNum; i <= nEndPageNum; ++i) {
354           pageArray->push_back(i);
355         }
356       }
357       nStringFrom = nStringTo + 1;
358     }
359   }
360   return TRUE;
361 }
362 
FPDF_ImportPages(FPDF_DOCUMENT dest_doc,FPDF_DOCUMENT src_doc,FPDF_BYTESTRING pagerange,int index)363 DLLEXPORT FPDF_BOOL STDCALL FPDF_ImportPages(FPDF_DOCUMENT dest_doc,
364                                              FPDF_DOCUMENT src_doc,
365                                              FPDF_BYTESTRING pagerange,
366                                              int index) {
367   CPDF_Document* pDestDoc = CPDFDocumentFromFPDFDocument(dest_doc);
368   if (!dest_doc)
369     return FALSE;
370 
371   CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
372   if (!pSrcDoc)
373     return FALSE;
374 
375   std::vector<uint16_t> pageArray;
376   int nCount = pSrcDoc->GetPageCount();
377   if (pagerange) {
378     if (!ParserPageRangeString(pagerange, &pageArray, nCount))
379       return FALSE;
380   } else {
381     for (int i = 1; i <= nCount; ++i) {
382       pageArray.push_back(i);
383     }
384   }
385 
386   CPDF_PageOrganizer pageOrg;
387   pageOrg.PDFDocInit(pDestDoc, pSrcDoc);
388   return pageOrg.ExportPage(pSrcDoc, &pageArray, pDestDoc, index);
389 }
390 
FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc,FPDF_DOCUMENT src_doc)391 DLLEXPORT FPDF_BOOL STDCALL FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc,
392                                                        FPDF_DOCUMENT src_doc) {
393   CPDF_Document* pDstDoc = CPDFDocumentFromFPDFDocument(dest_doc);
394   if (!pDstDoc)
395     return FALSE;
396 
397   CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
398   if (!pSrcDoc)
399     return FALSE;
400 
401   CPDF_Dictionary* pSrcDict = pSrcDoc->GetRoot();
402   pSrcDict = pSrcDict->GetDictBy("ViewerPreferences");
403   if (!pSrcDict)
404     return FALSE;
405 
406   CPDF_Dictionary* pDstDict = pDstDoc->GetRoot();
407   if (!pDstDict)
408     return FALSE;
409 
410   pDstDict->SetAt("ViewerPreferences", pSrcDict->Clone(TRUE));
411   return TRUE;
412 }
413