1 ///////////////////////////////////////////////////////////////////////////////
2 // Name:        pdfparser.cpp
3 // Purpose:
4 // Author:      Ulrich Telle
5 // Created:     2006-10-15
6 // Copyright:   (c) Ulrich Telle
7 // Licence:     wxWindows licence
8 ///////////////////////////////////////////////////////////////////////////////
9 
10 /// \file pdfparser.cpp Implementation of PDF parser
11 
12 // For compilers that support precompilation, includes <wx.h>.
13 #include <wx/wxprec.h>
14 
15 #ifdef __BORLANDC__
16 #pragma hdrstop
17 #endif
18 
19 #ifndef WX_PRECOMP
20 #include <wx/wx.h>
21 #endif
22 
23 // includes
24 #include <wx/uri.h>
25 #include <wx/url.h>
26 
27 #include "wx/pdfencrypt.h"
28 #include "wx/pdfinfo.h"
29 #include "wx/pdfobjects.h"
30 #include "wx/pdfparser.h"
31 #include "wx/pdftemplate.h"
32 
33 #include <wx/arrimpl.cpp>
34 WX_DEFINE_OBJARRAY(wxPdfXRef);
35 
wxPdfXRefEntry()36 wxPdfXRefEntry::wxPdfXRefEntry()
37 {
38   m_type = -1;
39   m_ofs_idx = 0;
40   m_gen_ref = 0;
41 }
42 
~wxPdfXRefEntry()43 wxPdfXRefEntry::~wxPdfXRefEntry()
44 {
45 }
46 
47 void
ReserveXRef(size_t count)48 wxPdfParser::ReserveXRef(size_t count)
49 {
50   size_t currentCount = m_xref.GetCount();
51   if (count > currentCount)
52   {
53     m_xref.Add(wxPdfXRefEntry(), count-currentCount);
54   }
55 }
56 
57 wxFileSystem* wxPdfParser::ms_fileSystem = NULL;
58 
59 wxFileSystem*
GetFileSystem()60 wxPdfParser::GetFileSystem()
61 {
62   if (ms_fileSystem == NULL)
63   {
64     static wxFileSystem fileSystem;
65     ms_fileSystem = &fileSystem;
66   }
67   return ms_fileSystem;
68 }
69 
wxPdfParser(const wxString & filename,const wxString & password)70 wxPdfParser::wxPdfParser(const wxString& filename, const wxString& password)
71 {
72   m_objectQueue     = new wxPdfObjectQueue();
73   m_objectQueueLast = m_objectQueue;
74   m_objectMap       = new wxPdfObjectMap();
75   m_objStmCache     = new wxPdfObjStmMap();
76   m_tokens  = NULL;
77   m_trailer = NULL;
78   m_root    = NULL;
79   m_useRawStream = false;
80   m_cacheObjects = true;
81 
82   m_encrypted = false;
83   m_decryptor = NULL;
84 
85   m_filename = filename;
86   m_password = password;
87 
88   wxString fileURL = filename;
89   wxURI uri(filename);
90   if (!uri.HasScheme())
91   {
92     fileURL = wxFileSystem::FileNameToURL(filename);
93   }
94   m_pdfFile = wxPdfParser::GetFileSystem()->OpenFile(fileURL);
95   if (m_pdfFile != NULL)
96   {
97     m_tokens = new wxPdfTokenizer(m_pdfFile->GetStream());
98     m_initialized = ParseDocument();
99   }
100 }
101 
~wxPdfParser()102 wxPdfParser::~wxPdfParser()
103 {
104   wxPdfObjectQueue* entry = m_objectQueue;
105   wxPdfObjectQueue* next;
106   while (entry != NULL)
107   {
108     wxPdfObject* object = entry->GetObject();
109     if (object != NULL && object->IsCreatedIndirect())
110     {
111       delete object;
112     }
113     next = entry->GetNext();
114     delete entry;
115     entry = next;
116   }
117   delete m_objectMap;
118 
119   wxPdfObjStmMap::iterator objStm = m_objStmCache->begin();
120   for (objStm = m_objStmCache->begin(); objStm != m_objStmCache->end(); objStm++)
121   {
122     if (objStm->second != NULL)
123     {
124       delete objStm->second;
125     }
126   }
127   delete m_objStmCache;
128 
129   size_t j;
130   for (j = 0; j < m_pages.GetCount(); j++)
131   {
132     wxPdfObject* obj = (wxPdfObject*) m_pages.Item(j);
133     delete obj;
134   }
135   m_pages.Clear();
136 
137   if (m_trailer != NULL)
138   {
139     delete m_trailer;
140   }
141   if (m_root != NULL)
142   {
143     delete m_root;
144   }
145 
146   delete m_tokens;
147   if (m_pdfFile != NULL)
148   {
149     delete m_pdfFile;
150   }
151 
152   if (m_decryptor != NULL)
153   {
154     delete m_decryptor;
155   }
156 }
157 
158 bool
IsOk()159 wxPdfParser::IsOk()
160 {
161   return (m_pdfFile != NULL && m_initialized);
162 }
163 
164 void
AppendObject(int originalObjectId,int actualObjectId,wxPdfObject * obj)165 wxPdfParser::AppendObject(int originalObjectId, int actualObjectId, wxPdfObject* obj)
166 {
167   wxPdfObjectQueue* newEntry = new wxPdfObjectQueue(originalObjectId, actualObjectId, obj);
168   m_objectQueueLast->SetNext(newEntry);
169   m_objectQueueLast = newEntry;
170   (*m_objectMap)[originalObjectId] = newEntry;
171 }
172 
173 unsigned int
GetPageCount()174 wxPdfParser::GetPageCount()
175 {
176   return (unsigned int) m_pages.GetCount();
177 }
178 
179 static const wxChar* gs_entryList[] = {
180   wxS("Title"),   wxS("Author"),   wxS("Subject"),      wxS("Keywords"),
181   wxS("Creator"), wxS("Producer"), wxS("CreationDate"), wxS("ModDate"),
182   NULL
183 }; //, "Trapped")
184 
185 bool
GetSourceInfo(wxPdfInfo & info)186 wxPdfParser::GetSourceInfo(wxPdfInfo& info)
187 {
188   bool ok = false;
189   wxPdfDictionary* infoDict = (wxPdfDictionary*) ResolveObject(m_trailer->Get(wxS("Info")));
190   if (infoDict != NULL && infoDict->GetType() == OBJTYPE_DICTIONARY)
191   {
192     typedef void (wxPdfInfo::*InfoSetter) (const wxString& value);
193     InfoSetter entryFunc[] = { &wxPdfInfo::SetTitle,        &wxPdfInfo::SetAuthor,  &wxPdfInfo::SetSubject,
194                                &wxPdfInfo::SetKeywords,     &wxPdfInfo::SetCreator, &wxPdfInfo::SetProducer,
195                                &wxPdfInfo::SetCreationDate, &wxPdfInfo::SetModDate,
196                                NULL };
197     wxString value;
198     size_t j;
199     for (j = 0; gs_entryList[j] != NULL; j++)
200     {
201       wxPdfString* entry = (wxPdfString*) infoDict->Get(gs_entryList[j]);
202       if (entry != NULL)
203       {
204         value = entry->GetValue();
205 #if wxUSE_UNICODE
206         if ((value.Length() >= 2) && (value.GetChar(0) == 254) && (value.GetChar(1) == 255))
207         {
208           wxMBConvUTF16BE conv;
209           size_t k;
210           size_t len = value.Length()-2;
211           char* mbstr = new char[len+2];
212           for (k = 0; k < len; k++)
213           {
214             mbstr[k] = (int) value.GetChar(k+2);
215           }
216           mbstr[len] = 0;
217           mbstr[len+1] = 0;
218           value = conv.cMB2WC(mbstr);
219           delete [] mbstr;
220         }
221 #endif
222         (info.*entryFunc[j])(value);
223       }
224     }
225     if (infoDict->IsCreatedIndirect())
226     {
227       delete infoDict;
228     }
229     ok = true;
230   }
231   return ok;
232 }
233 
234 bool
ParseDocument()235 wxPdfParser::ParseDocument()
236 {
237   bool ok = false;
238   m_fileSize = m_tokens->GetLength();
239   m_pdfVersion = m_tokens->CheckPdfHeader();
240   if (m_pdfVersion != wxEmptyString)
241   {
242     if (ParseXRef())
243     {
244       if (SetupDecryptor())
245       {
246         m_root = (wxPdfDictionary*) m_trailer->Get(wxS("Root"));
247         m_root = (wxPdfDictionary*) ResolveObject(m_root);
248         if (m_root != NULL)
249         {
250           wxPdfName* versionEntry = (wxPdfName*) ResolveObject(m_root->Get(wxS("Version")));
251           if (versionEntry != NULL)
252           {
253             wxString version = versionEntry->GetName();
254             version = version.Mid(1, 3);
255             if (m_pdfVersion < version)
256             {
257               m_pdfVersion = version;
258             }
259             if (versionEntry->IsCreatedIndirect())
260             {
261               delete versionEntry;
262             }
263           }
264           wxPdfDictionary* pages = (wxPdfDictionary*) ResolveObject(m_root->Get(wxS("Pages")));
265           ok = ParsePageTree(pages);
266           delete pages;
267         }
268       }
269     }
270   }
271   return ok;
272 }
273 
274 bool
SetupDecryptor()275 wxPdfParser::SetupDecryptor()
276 {
277   bool ok = true;
278   wxPdfObject* encDic = m_trailer->Get(wxS("Encrypt"));
279   if (encDic == NULL || encDic->GetType() == OBJTYPE_NULL)
280   {
281     return true;
282   }
283   wxPdfDictionary* enc = (wxPdfDictionary*) ResolveObject(encDic);
284   wxPdfObject* obj;
285   wxPdfArray* documentIDs = (wxPdfArray*) ResolveObject(m_trailer->Get(wxS("ID")));
286   wxString documentID;
287   if (documentIDs != NULL)
288   {
289     obj = (wxPdfObject*) documentIDs->Get(0);
290     if (obj->GetType() == OBJTYPE_STRING)
291     {
292       documentID = ((wxPdfString*) obj)->GetValue();
293     }
294     if (documentIDs->IsCreatedIndirect())
295     {
296       delete documentIDs;
297     }
298   }
299 
300   wxString uValue = wxEmptyString;
301   obj = enc->Get(wxS("U"));
302   if (obj->GetType() == OBJTYPE_STRING)
303   {
304     uValue = ((wxPdfString*) obj)->GetValue();
305     if (uValue.Length() != 32)
306     {
307       wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
308                  wxString(_("Invalid length of U value.")));
309       ok = false;
310     }
311   }
312 
313   wxString oValue = wxEmptyString;
314   obj = enc->Get(wxS("O"));
315   if (obj->GetType() == OBJTYPE_STRING)
316   {
317     oValue = ((wxPdfString*) obj)->GetValue();
318     if (oValue.Length() != 32)
319     {
320       wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
321                  wxString(_("Invalid length of O value.")));
322       ok = false;
323     }
324   }
325 
326   int rValue = 0;
327   obj = enc->Get(wxS("R"));
328   if (obj->GetType() == OBJTYPE_NUMBER)
329   {
330     rValue = ((wxPdfNumber*) obj)->GetInt();
331     if (rValue != 2 && rValue != 3)
332     {
333       wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
334                  wxString::Format(_("Unknown encryption type (%d)."), rValue));
335       ok = false;
336     }
337   }
338   else
339   {
340     wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
341                wxString(_("Illegal R value.")));
342     ok = false;
343   }
344 
345   int vValue = 0;
346   obj = enc->Get(wxS("V"));
347   if (obj != NULL && obj->GetType() == OBJTYPE_NUMBER)
348   {
349     vValue = ((wxPdfNumber*) obj)->GetInt();
350     if (!((rValue == 2 && vValue == 1) || (rValue == 3 && vValue == 2)))
351     {
352       wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
353                  wxString(_("Unsupported V value.")));
354       ok = false;
355     }
356   }
357   else
358   {
359     wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
360                wxString(_("Illegal V value.")));
361     ok = false;
362   }
363 
364   int pValue = 0;
365   obj = enc->Get(wxS("P"));
366   if (obj->GetType() == OBJTYPE_NUMBER)
367   {
368     pValue = ((wxPdfNumber*) obj)->GetInt();
369     // Check required permissions (Applications MUST respect the permission settings)
370     if ((pValue & REQUIRED_PERMISSIONS) != REQUIRED_PERMISSIONS)
371     {
372       wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
373                  wxString(_("Import of document not allowed due to missing permissions.")));
374       ok = false;
375     }
376   }
377   else
378   {
379     wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
380                wxString(_("Illegal P value.")));
381     ok = false;
382   }
383 
384   int lengthValue = 40; // Default for revisison 2
385   if (rValue == 3)
386   {
387     // Get the key length if revision is 3
388     obj = enc->Get(wxS("Length"));
389     if (obj->GetType() == OBJTYPE_NUMBER)
390     {
391       lengthValue = ((wxPdfNumber*) obj)->GetInt();
392       if (lengthValue > 128 || lengthValue < 40 || lengthValue % 8 != 0)
393       {
394         wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
395                    wxString(_("Illegal Length value.")));
396         ok = false;
397       }
398     }
399     else
400     {
401       wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
402                  wxString(_("Illegal Length value.")));
403       ok = false;
404     }
405   }
406 
407   if (enc->IsCreatedIndirect())
408   {
409     delete enc;
410   }
411 
412   if (ok)
413   {
414     m_encrypted = true;
415     m_decryptor = new wxPdfEncrypt();
416     if (!m_decryptor->Authenticate(documentID, m_password, uValue, oValue, pValue, lengthValue, rValue))
417     {
418       wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
419                  wxString(_("Bad password.")));
420       ok = false;
421     }
422   }
423 
424   return ok;
425 }
426 
427 bool
ParsePageTree(wxPdfDictionary * pages)428 wxPdfParser::ParsePageTree(wxPdfDictionary* pages)
429 {
430   bool ok = false;
431   // Get the kids dictionary
432   wxPdfArray* kids = (wxPdfArray*) ResolveObject(pages->Get(wxS("Kids")));
433   if (kids != NULL)
434   {
435     size_t nKids = kids->GetSize();
436     size_t j;
437     ok = true;
438     for (j = 0; j < nKids; j++)
439     {
440       wxPdfDictionary* page = (wxPdfDictionary*) ResolveObject(kids->Get(j));
441       wxPdfName* type = (wxPdfName*) page->Get(wxS("Type"));
442       if (type->GetName() == wxS("Pages"))
443       {
444         // If one of the kids is an embedded
445         // /Pages array, resolve it as well.
446         ok = ok && ParsePageTree(page);
447         delete page;
448       }
449       else
450       {
451         m_pages.Add(page);
452       }
453     }
454     if (kids->IsCreatedIndirect())
455     {
456       delete kids;
457     }
458   }
459   else
460   {
461     wxLogError(wxString(wxS("wxPdfParser::ParsePageTree: ")) +
462                wxString(_("Cannot find /Kids in current /Page-Dictionary")));
463   }
464   return ok;
465 }
466 
467 wxPdfObject*
GetPageResources(unsigned int pageno)468 wxPdfParser::GetPageResources(unsigned int pageno)
469 {
470   wxPdfObject* resources = NULL;
471   if (pageno < GetPageCount())
472   {
473     resources = GetPageResources((wxPdfObject*) m_pages[pageno]);
474   }
475   return resources;
476 }
477 
478 wxPdfObject*
GetPageResources(wxPdfObject * page)479 wxPdfParser::GetPageResources(wxPdfObject* page)
480 {
481   wxPdfObject* resources = NULL;
482   wxPdfDictionary* dic = (wxPdfDictionary*) ResolveObject(page);
483 
484   // If the current object has a resources dictionary associated with it,
485   // we use it. Otherwise, we move back to its parent object.
486   wxPdfObject* resourceRef = ResolveObject(dic->Get(wxS("Resources")));
487   if (resourceRef != NULL)
488   {
489     resources = ResolveObject(resourceRef);
490   }
491   else
492   {
493     wxPdfObject* parent = ResolveObject(dic->Get(wxS("Parent")));
494     if (parent != NULL)
495     {
496       resources = GetPageResources(parent);
497       delete parent;
498     }
499   }
500   return resources;
501 }
502 
503 void
GetContent(unsigned int pageno,wxArrayPtrVoid & contents)504 wxPdfParser::GetContent(unsigned int pageno, wxArrayPtrVoid& contents)
505 {
506   if (pageno < GetPageCount())
507   {
508     wxPdfObject* content = ((wxPdfDictionary*) m_pages[pageno])->Get(wxS("Contents"));
509     if (content != NULL)
510     {
511       GetPageContent(content, contents);
512     }
513   }
514 }
515 
516 void
GetPageContent(wxPdfObject * contentRef,wxArrayPtrVoid & contents)517 wxPdfParser::GetPageContent(wxPdfObject* contentRef, wxArrayPtrVoid& contents)
518 {
519   int type = contentRef->GetType();
520   if (type == OBJTYPE_INDIRECT)
521   {
522     wxPdfObject* content = ResolveObject(contentRef);
523     if (content->GetType() == OBJTYPE_ARRAY)
524     {
525       GetPageContent(content, contents);
526       delete content;
527     }
528     else
529     {
530       contents.Add(content);
531     }
532   }
533   else if (type == OBJTYPE_ARRAY)
534   {
535     wxPdfArray* contentArray = (wxPdfArray*) contentRef;
536     size_t n = contentArray->GetSize();
537     size_t j;
538     for (j = 0; j < n; j++)
539     {
540       GetPageContent(contentArray->Get(j), contents);
541     }
542   }
543 }
544 
545 wxPdfArrayDouble*
GetPageMediaBox(unsigned int pageno)546 wxPdfParser::GetPageMediaBox(unsigned int pageno)
547 {
548   wxPdfArrayDouble* box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("MediaBox"));
549   return box;
550 }
551 
552 wxPdfArrayDouble*
GetPageCropBox(unsigned int pageno)553 wxPdfParser::GetPageCropBox(unsigned int pageno)
554 {
555   wxPdfArrayDouble* box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("CropBox"));
556   if (box == NULL)
557   {
558     box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("MediaBox"));
559   }
560   return box;
561 }
562 
563 wxPdfArrayDouble*
GetPageBleedBox(unsigned int pageno)564 wxPdfParser::GetPageBleedBox(unsigned int pageno)
565 {
566   wxPdfArrayDouble* box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("BleedBox"));
567   if (box == NULL)
568   {
569     box = GetPageCropBox(pageno);
570   }
571   return box;
572 }
573 
574 wxPdfArrayDouble*
GetPageTrimBox(unsigned int pageno)575 wxPdfParser::GetPageTrimBox(unsigned int pageno)
576 {
577   wxPdfArrayDouble* box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("TrimBox"));
578   if (box == NULL)
579   {
580     box = GetPageCropBox(pageno);
581   }
582   return box;
583 }
584 
585 wxPdfArrayDouble*
GetPageArtBox(unsigned int pageno)586 wxPdfParser::GetPageArtBox(unsigned int pageno)
587 {
588   wxPdfArrayDouble* box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("ArtBox"));
589   if (box == NULL)
590   {
591     box = GetPageCropBox(pageno);
592   }
593   return box;
594 }
595 
596 wxPdfArrayDouble*
GetPageBox(wxPdfDictionary * page,const wxString & boxIndex)597 wxPdfParser::GetPageBox(wxPdfDictionary* page, const wxString& boxIndex)
598 {
599   wxPdfArrayDouble* pageBox = NULL;
600   wxPdfArray* box = (wxPdfArray*) ResolveObject(page->Get(boxIndex));
601   if (box == NULL)
602   {
603     wxPdfDictionary* parent = (wxPdfDictionary*) ResolveObject(page->Get(wxS("Parent")));
604     if (parent != NULL)
605     {
606       pageBox = GetPageBox(parent, boxIndex);
607       delete parent;
608     }
609   }
610   else
611   {
612     pageBox = new wxPdfArrayDouble();
613     size_t j;
614     for (j = 0; j < box->GetSize(); j++)
615     {
616       wxPdfNumber* item = (wxPdfNumber*) box->Get(j);
617       pageBox->Add(item->GetValue());
618     }
619   }
620   return pageBox;
621 }
622 
623 int
GetPageRotation(unsigned int pageno)624 wxPdfParser::GetPageRotation(unsigned int pageno)
625 {
626   return GetPageRotation((wxPdfDictionary*) m_pages[pageno]);
627 }
628 
629 int
GetPageRotation(wxPdfDictionary * page)630 wxPdfParser::GetPageRotation (wxPdfDictionary* page)
631 {
632   int pageRotation = 0;
633   wxPdfNumber* rotation = (wxPdfNumber*) ResolveObject(page->Get(wxS("Rotate")));
634   if (rotation == NULL)
635   {
636     wxPdfDictionary* parent = (wxPdfDictionary*) ResolveObject(page->Get(wxS("Parent")));
637     if (parent != NULL)
638     {
639       pageRotation = GetPageRotation(parent);
640       delete parent;
641     }
642   }
643   else
644   {
645     pageRotation = rotation->GetInt();
646   }
647   return pageRotation;
648 }
649 
650 bool
ParseXRef()651 wxPdfParser::ParseXRef()
652 {
653   m_tokens->Seek(m_tokens->GetStartXRef());
654   m_tokens->NextToken();
655   if (m_tokens->GetStringValue() != wxS("startxref"))
656   {
657     wxLogError(wxString(wxS("wxPdfParser::ParseXRef: ")) +
658                wxString(_("'startxref' not found.")));
659     return false;
660   }
661   m_tokens->NextToken();
662   if (m_tokens->GetTokenType() != /*PRTokeniser.*/ TOKEN_NUMBER)
663   {
664     wxLogError(wxString(wxS("wxPdfParser::ParseXRef: ")) +
665                wxString(_("'startxref' is not followed by a number.")));
666     return false;
667   }
668   int startxref = m_tokens->GetIntValue();
669 
670   if (!ParseXRefStream(startxref, true))
671   {
672     m_xref.Clear();
673     m_tokens->Seek(startxref);
674     m_trailer = ParseXRefSection();
675     wxPdfDictionary* trailer1 = m_trailer;
676     wxPdfDictionary* trailer2 = NULL;
677     while (trailer1 != NULL)
678     {
679       wxPdfNumber* prev = (wxPdfNumber*) trailer1->Get(wxS("Prev"));
680       trailer2 = trailer1;
681       if (prev != NULL)
682       {
683         m_tokens->Seek(prev->GetInt());
684         trailer1 = ParseXRefSection();
685       }
686       else
687       {
688         trailer1 = NULL;
689       }
690       if (trailer2 != m_trailer)
691       {
692         delete trailer2;
693       }
694     }
695   }
696   return (m_trailer != NULL);
697 }
698 
699 wxPdfDictionary*
ParseXRefSection()700 wxPdfParser::ParseXRefSection()
701 {
702   m_tokens->NextValidToken();
703   if (m_tokens->GetStringValue() != wxS("xref"))
704   {
705     wxLogError(wxString(wxS("wxPdfParser::ParseXRefSection: ")) +
706                wxString(_("xref subsection not found.")));
707     return NULL;
708   }
709   int start = 0;
710   int end = 0;
711   int pos = 0;
712   int gen = 0;
713   while (true)
714   {
715     m_tokens->NextValidToken();
716     if (m_tokens->GetStringValue() == wxS("trailer"))
717       break;
718     if (m_tokens->GetTokenType() != TOKEN_NUMBER)
719     {
720       wxLogError(wxString(wxS("wxPdfParser::ParseXRefSection: ")) +
721                  wxString(_("Object number of the first object in this xref subsection not found.")));
722       return NULL;
723     }
724     start = m_tokens->GetIntValue();
725     m_tokens->NextValidToken();
726     if (m_tokens->GetTokenType() != TOKEN_NUMBER)
727     {
728       wxLogError(wxString(wxS("wxPdfParser::ParseXRefSection: ")) +
729                  wxString(_("Number of entries in this xref subsection not found.")));
730       return NULL;
731     }
732     end = m_tokens->GetIntValue() + start;
733     if (start == 1)
734     { // fix incorrect start number
735       int back = m_tokens->Tell();
736       m_tokens->NextValidToken();
737       pos = m_tokens->GetIntValue();
738       m_tokens->NextValidToken();
739       gen = m_tokens->GetIntValue();
740       if (pos == 0 && gen == 65535)
741       {
742         --start;
743         --end;
744       }
745       m_tokens->Seek(back);
746     }
747     ReserveXRef(end);
748 
749     int k;
750     for (k = start; k < end; ++k)
751     {
752       wxPdfXRefEntry& xrefEntry = m_xref[k];
753       m_tokens->NextValidToken();
754       pos = m_tokens->GetIntValue();
755       m_tokens->NextValidToken();
756       gen = m_tokens->GetIntValue();
757       m_tokens->NextValidToken();
758       if (m_tokens->GetStringValue() == wxS("n"))
759       {
760         if (xrefEntry.m_ofs_idx == 0 && xrefEntry.m_gen_ref == 0)
761         {
762           // TODO: if (pos == 0)
763           //   wxLogError(wxS("File position 0 cross-reference entry in this xref subsection"));
764           xrefEntry.m_ofs_idx = pos;
765           xrefEntry.m_gen_ref = gen;
766           xrefEntry.m_type = 1;
767         }
768       }
769       else if (m_tokens->GetStringValue() == wxS("f"))
770       {
771         if (xrefEntry.m_ofs_idx == 0 && xrefEntry.m_gen_ref == 0)
772         {
773           xrefEntry.m_ofs_idx = -1;
774           xrefEntry.m_gen_ref = 0;
775           xrefEntry.m_type = 0;
776         }
777       }
778       else
779       {
780         wxLogError(wxString(wxS("wxPdfParser:ReadXRefSection: ")) +
781                    wxString(_("Invalid cross-reference entry in this xref subsection.")));
782         return NULL;
783       }
784     }
785   }
786   wxPdfDictionary* trailer = (wxPdfDictionary*) ParseObject();
787   wxPdfNumber* xrefSize = (wxPdfNumber*) trailer->Get(wxS("Size"));
788   ReserveXRef(xrefSize->GetInt());
789 
790   wxPdfObject* xrs = trailer->Get(wxS("XRefStm"));
791   if (xrs != NULL && xrs->GetType() == OBJTYPE_NUMBER)
792   {
793     int loc = ((wxPdfNumber*) xrs)->GetInt();
794     ParseXRefStream(loc, false);
795   }
796   return trailer;
797 }
798 
799 bool
ParseXRefStream(int ptr,bool setTrailer)800 wxPdfParser::ParseXRefStream(int ptr, bool setTrailer)
801 {
802   int idx, k;
803 
804   m_tokens->Seek(ptr);
805   int streamRef = 0;
806   if (!m_tokens->NextToken())
807   {
808     return false;
809   }
810   if (m_tokens->GetTokenType() != TOKEN_NUMBER)
811   {
812     return false;
813   }
814   streamRef = m_tokens->GetIntValue();
815   if (!m_tokens->NextToken() || m_tokens->GetTokenType() != TOKEN_NUMBER)
816   {
817     return false;
818   }
819   if (!m_tokens->NextToken() || m_tokens->GetStringValue() != wxS("obj"))
820   {
821     return false;
822   }
823   wxPdfObject* object = ParseObject();
824   wxPdfStream* stm = NULL;
825   if (object->GetType() == OBJTYPE_STREAM)
826   {
827     stm = (wxPdfStream*) object;
828     if (((wxPdfName*) stm->Get(wxS("Type")))->GetName() != wxS("XRef"))
829     {
830       delete object;
831       return false;
832     }
833   }
834   int size = ((wxPdfNumber*) stm->Get(wxS("Size")))->GetInt();
835   bool indexAllocated = false;
836   wxPdfArray* index;
837   wxPdfObject* obj = stm->Get(wxS("Index"));
838   if (obj == NULL)
839   {
840     indexAllocated = true;
841     index = new wxPdfArray();
842     index->Add(0);
843     index->Add(size);
844   }
845   else
846   {
847     index = (wxPdfArray*) obj;
848   }
849   wxPdfArray* w = (wxPdfArray*) stm->Get(wxS("W"));
850   int prev = -1;
851   obj = stm->Get(wxS("Prev"));
852   if (obj != NULL)
853   {
854     prev = ((wxPdfNumber* )obj)->GetInt();
855   }
856   // Each xref pair is a position
857   // type 0 -> -1, 0
858   // type 1 -> offset, 0
859   // type 2 -> index, obj num
860   ReserveXRef(size);
861 
862   GetStreamBytes(stm);
863   wxMemoryOutputStream* streamBuffer = stm->GetBuffer();
864   wxMemoryInputStream streamBytes(*streamBuffer);
865   size_t inLength = streamBytes.GetSize();
866   char* buffer = new char[inLength];
867   streamBytes.Read(buffer, inLength);
868 
869   int bptr = 0;
870   int wc[3];
871   for (k = 0; k < 3; ++k)
872   {
873     wc[k] = ((wxPdfNumber*) (w->Get(k)))->GetInt();
874   }
875   for (idx = 0; (size_t) idx < index->GetSize(); idx += 2)
876   {
877     int start = ((wxPdfNumber*) (index->Get(idx)))->GetInt();
878     int length = ((wxPdfNumber*) (index->Get(idx + 1)))->GetInt();
879     ReserveXRef(start+length);
880     while (length-- > 0)
881     {
882       wxPdfXRefEntry& xrefEntry = m_xref[start];
883       int type = 1;
884       if (wc[0] > 0)
885       {
886         type = 0;
887         for (k = 0; k < wc[0]; ++k)
888         {
889           type = (type << 8) + (buffer[bptr++] & 0xff);
890         }
891       }
892       int field2 = 0;
893       for (k = 0; k < wc[1]; ++k)
894       {
895         field2 = (field2 << 8) + (buffer[bptr++] & 0xff);
896       }
897       int field3 = 0;
898       for (k = 0; k < wc[2]; ++k)
899       {
900         field3 = (field3 << 8) + (buffer[bptr++] & 0xff);
901       }
902       if (xrefEntry.m_ofs_idx == 0 && xrefEntry.m_gen_ref == 0)
903       {
904         switch (type)
905         {
906           case 0:
907             xrefEntry.m_type = 0;
908             xrefEntry.m_ofs_idx = -1;
909             xrefEntry.m_gen_ref = 0;
910             break;
911           case 1:
912             xrefEntry.m_type = 1;
913             xrefEntry.m_ofs_idx = field2;
914             xrefEntry.m_gen_ref = field3;
915             break;
916           case 2:
917             xrefEntry.m_type = 2;
918             xrefEntry.m_ofs_idx = field3;
919             xrefEntry.m_gen_ref = field2;
920             break;
921         }
922       }
923       start++;
924     }
925   }
926   delete [] buffer;
927   if ((size_t) streamRef < m_xref.GetCount())
928   {
929     m_xref[streamRef].m_ofs_idx = -1;
930   }
931   if (indexAllocated)
932   {
933     delete index;
934   }
935 
936   // Set the first xref stream dictionary as the trailer dictionary
937   if (setTrailer && m_trailer == NULL)
938   {
939 
940     m_trailer = stm->GetDictionary();
941     stm->SetDictionary(NULL);
942   }
943   delete stm;
944 
945   if (prev == -1)
946   {
947     return true;
948   }
949   return ParseXRefStream(prev, false);
950 }
951 
952 wxPdfDictionary*
ParseDictionary()953 wxPdfParser::ParseDictionary()
954 {
955   wxPdfDictionary* dic = new wxPdfDictionary();
956   while (true)
957   {
958     m_tokens->NextValidToken();
959     if (m_tokens->GetTokenType() == TOKEN_END_DICTIONARY)
960       break;
961     if (m_tokens->GetTokenType() != TOKEN_NAME)
962     {
963       wxLogError(wxString(wxS("wxPdfParser::ParseDictionary: ")) +
964                  wxString(_("Dictionary key is not a name.")));
965       break;
966     }
967     wxPdfName* name = new wxPdfName(m_tokens->GetStringValue());
968     wxPdfObject* obj = ParseObject();
969     int type = obj->GetType();
970     if (-type == TOKEN_END_DICTIONARY)
971     {
972       wxLogError(wxString(wxS("wxPdfParser::ParseDictionary: ")) +
973                  wxString(_("Unexpected '>>'.")));
974       delete obj;
975       delete name;
976       break;
977     }
978     if (-type == TOKEN_END_ARRAY)
979     {
980       wxLogError(wxString(wxS("wxPdfParser::ParseDictionary: ")) +
981                  wxString(_("Unexpected ']'.")));
982       delete obj;
983       delete name;
984       break;
985     }
986     dic->Put(name, obj);
987     delete name;
988   }
989   return dic;
990 }
991 
992 wxPdfArray*
ParseArray()993 wxPdfParser::ParseArray()
994 {
995   wxPdfArray* array = new wxPdfArray();
996   while (true)
997   {
998     wxPdfObject* obj = ParseObject();
999     int type = obj->GetType();
1000     if (-type == TOKEN_END_ARRAY)
1001     {
1002       delete obj;
1003       break;
1004     }
1005     if (-type == TOKEN_END_DICTIONARY)
1006     {
1007       wxLogError(wxString(wxS("wxPdfParser::ParseArray: ")) +
1008                  wxString(_("Unexpected '>>'.")));
1009       delete obj;
1010       break;
1011     }
1012     array->Add(obj);
1013   }
1014   return array;
1015 }
1016 
1017 wxPdfObject*
ParseObject()1018 wxPdfParser::ParseObject()
1019 {
1020   wxPdfObject* obj;
1021   m_tokens->NextValidToken();
1022   int type = m_tokens->GetTokenType();
1023   switch (type)
1024   {
1025     case TOKEN_START_DICTIONARY:
1026       {
1027         wxPdfDictionary* dic = ParseDictionary();
1028         int pos = m_tokens->Tell();
1029         // be careful in the trailer. May not be a "next" token.
1030         if (m_tokens->NextToken() && m_tokens->GetStringValue() == wxS("stream"))
1031         {
1032           int ch = m_tokens->ReadChar();
1033           if (ch != '\n')
1034             ch = m_tokens->ReadChar();
1035           if (ch != '\n')
1036             m_tokens->BackOnePosition(ch);
1037           wxPdfStream* stream = new wxPdfStream(m_tokens->Tell());
1038           stream->SetDictionary(dic);
1039           obj = stream;
1040         }
1041         else
1042         {
1043           m_tokens->Seek(pos);
1044           obj = dic;
1045         }
1046       }
1047       break;
1048 
1049     case TOKEN_START_ARRAY:
1050       {
1051         obj = ParseArray();
1052       }
1053       break;
1054 
1055     case TOKEN_NUMBER:
1056       {
1057         obj = new wxPdfNumber(m_tokens->GetStringValue());
1058       }
1059       break;
1060 
1061     case TOKEN_STRING:
1062       {
1063         wxString token = m_tokens->GetStringValue();
1064         // Decrypt if necessary
1065         if (m_encrypted)
1066         {
1067           m_decryptor->Encrypt(m_objNum, m_objGen, token);
1068         }
1069 
1070         wxPdfString* strObj = new wxPdfString(token);
1071         strObj->SetIsHexString(m_tokens->IsHexString());
1072         obj = strObj;
1073       }
1074       break;
1075 
1076     case TOKEN_NAME:
1077       {
1078         obj = new wxPdfName(m_tokens->GetStringValue());
1079       }
1080       break;
1081 
1082     case TOKEN_REFERENCE:
1083       {
1084         int num = m_tokens->GetReference();
1085         obj = new wxPdfIndirectReference(num, m_tokens->GetGeneration());
1086       }
1087       break;
1088 
1089     case TOKEN_BOOLEAN:
1090       {
1091         obj = new wxPdfBoolean((m_tokens->GetStringValue() == wxS("true")));
1092       }
1093       break;
1094 
1095     case TOKEN_NULL:
1096       {
1097         obj = new wxPdfNull();
1098       }
1099       break;
1100 
1101     default:
1102       {
1103         wxString token = m_tokens->GetStringValue();
1104         obj = new wxPdfLiteral(-type, m_tokens->GetStringValue());
1105       }
1106       break;
1107   }
1108   return obj;
1109 }
1110 
1111 wxPdfObject*
ResolveObject(wxPdfObject * obj)1112 wxPdfParser::ResolveObject(wxPdfObject* obj)
1113 {
1114   if (obj != NULL && obj->GetType() == OBJTYPE_INDIRECT)
1115   {
1116     wxPdfIndirectReference* ref = (wxPdfIndirectReference*)obj;
1117     int idx = ref->GetNumber();
1118     obj = ParseSpecificObject(idx);
1119     obj->SetCreatedIndirect(true);
1120   }
1121   return obj;
1122 }
1123 
1124 wxPdfObject*
ParseSpecificObject(int idx)1125 wxPdfParser::ParseSpecificObject(int idx)
1126 {
1127   wxPdfObject* obj = NULL;
1128   if ((size_t)(idx) >= m_xref.GetCount())
1129   {
1130     return NULL;
1131   }
1132   obj = ParseDirectObject(idx);
1133   return obj;
1134 }
1135 
1136 wxPdfObject*
ParseDirectObject(int k)1137 wxPdfParser::ParseDirectObject(int k)
1138 {
1139   int objIndex = 0;
1140   int objStreamIndex = 0;
1141   bool isCached = false;
1142   wxPdfObject* obj = NULL;
1143 
1144   // Check for free object
1145   if (m_xref[k].m_type == 0)
1146   {
1147     return NULL;
1148   }
1149   int pos = m_xref[k].m_ofs_idx;
1150   if (m_xref[k].m_type == 2)
1151   {
1152     objIndex = m_xref[k].m_gen_ref;
1153     wxPdfObjStmMap::iterator objStm = m_objStmCache->find(objIndex);
1154     if (objStm != m_objStmCache->end())
1155     {
1156       obj = objStm->second;
1157       isCached = true;
1158     }
1159     else
1160     {
1161       objStreamIndex = m_xref[k].m_gen_ref;
1162       pos = m_xref[objStreamIndex].m_ofs_idx;
1163     }
1164   }
1165   if (!isCached)
1166   {
1167     m_tokens->Seek(pos);
1168     m_tokens->NextValidToken();
1169     if (m_tokens->GetTokenType() != TOKEN_NUMBER)
1170     {
1171       wxLogError(wxString(wxS("wxPdfParser::ParseSingleObject: ")) +
1172                  wxString(_("Invalid object number.")));
1173       return NULL;
1174     }
1175     m_objNum = m_tokens->GetIntValue();
1176     m_tokens->NextValidToken();
1177     if (m_tokens->GetTokenType() != TOKEN_NUMBER)
1178     {
1179       wxLogError(wxString(wxS("wxPdfParser::ParseSingleObject: ")) +
1180                  wxString(_("Invalid generation number.")));
1181       return NULL;
1182     }
1183     m_objGen = m_tokens->GetIntValue();
1184     m_tokens->NextValidToken();
1185     if (m_tokens->GetStringValue() != wxS("obj"))
1186     {
1187       wxLogError(wxString(wxS("wxPdfParser::ParseSingleObject: ")) +
1188                  wxString(_("Token 'obj' expected.")));
1189       return NULL;
1190     }
1191     obj = ParseObject();
1192   }
1193 
1194   // TODO: Check for valid 'endstream'
1195 
1196   if (m_xref[k].m_type == 2)
1197   {
1198     m_objNum = k;
1199     m_objGen = 0;
1200     wxPdfStream* objStream = (wxPdfStream*) obj;
1201     obj = ParseObjectStream((wxPdfStream*) obj, m_xref[k].m_ofs_idx);
1202     if (m_cacheObjects)
1203     {
1204       if (!isCached)
1205       {
1206         (*m_objStmCache)[objIndex] = objStream;
1207       }
1208     }
1209     else
1210     {
1211       delete objStream;
1212     }
1213   }
1214 
1215   if (obj != NULL)
1216   {
1217     obj->SetObjNum(m_objNum, m_objGen);
1218   }
1219   if (obj->GetType() == OBJTYPE_STREAM)
1220   {
1221     GetStreamBytes((wxPdfStream*) obj);
1222   }
1223   return obj;
1224 }
1225 
1226 wxPdfObject*
ParseObjectStream(wxPdfStream * objStm,int idx)1227 wxPdfParser::ParseObjectStream(wxPdfStream* objStm, int idx)
1228 {
1229   wxPdfObject* obj = NULL;
1230 
1231   wxPdfNumber* firstNumber = (wxPdfNumber*) ResolveObject(objStm->Get(wxS("First")));
1232   int first = firstNumber->GetInt();
1233   if (objStm->GetBuffer() == NULL)
1234   {
1235     bool saveUseRawStream = m_useRawStream;
1236     m_useRawStream = false;
1237     GetStreamBytes(objStm);
1238     m_useRawStream = saveUseRawStream;
1239   }
1240 
1241   bool saveEncrypted = m_encrypted;
1242   m_encrypted = false;
1243   wxPdfTokenizer* saveTokens = m_tokens;
1244   wxMemoryInputStream objStream(*(objStm->GetBuffer()));
1245   m_tokens = new wxPdfTokenizer(&objStream);
1246 
1247   int address = 0;
1248   bool ok = true;
1249   if (!objStm->HasObjOffsets())
1250   {
1251     // Read object offsets
1252     wxArrayInt* objOffsets = objStm->GetObjOffsets();
1253     int objCount = idx + 1;
1254     if (m_cacheObjects)
1255     {
1256       wxPdfNumber* objCountNumber = (wxPdfNumber*) ResolveObject(objStm->Get(wxS("N")));
1257       objCount = objCountNumber->GetInt();
1258     }
1259     int offset;
1260     int k;
1261     for (k = 0; k < objCount; ++k)
1262     {
1263       ok = m_tokens->NextToken();
1264       if (!ok)
1265         break;
1266       if (m_tokens->GetTokenType() != TOKEN_NUMBER)
1267       {
1268         ok = false;
1269         break;
1270       }
1271       ok = m_tokens->NextToken();
1272       if (!ok)
1273         break;
1274       if (m_tokens->GetTokenType() != TOKEN_NUMBER)
1275       {
1276         ok = false;
1277         break;
1278       }
1279       offset = m_tokens->GetIntValue() + first;
1280       if (m_cacheObjects)
1281       {
1282         objOffsets->Add(offset);
1283       }
1284       if (k == idx)
1285       {
1286         address = offset;
1287       }
1288     }
1289     if (ok)
1290     {
1291       objStm->SetHasObjOffsets(m_cacheObjects);
1292     }
1293   }
1294   else
1295   {
1296     address = objStm->GetObjOffset(idx);
1297     ok = (address > 0);
1298   }
1299   if (ok)
1300   {
1301     m_tokens->Seek(address);
1302     obj = ParseObject();
1303   }
1304   else
1305   {
1306     wxLogError(wxString(wxS("wxPdfParser::ParseOneObjStm: ")) +
1307                wxString(_("Error reading ObjStm.")));
1308   }
1309 
1310   delete m_tokens;
1311   m_tokens = saveTokens;
1312   m_encrypted = saveEncrypted;
1313 
1314   return obj;
1315 }
1316 
1317 void
GetStreamBytes(wxPdfStream * stream)1318 wxPdfParser::GetStreamBytes(wxPdfStream* stream)
1319 {
1320   GetStreamBytesRaw(stream);
1321 
1322   // Do not decode the content of resource object streams
1323   if (m_useRawStream) return;
1324 
1325   // Check whether the stream buffer is empty
1326   wxMemoryOutputStream* osIn = stream->GetBuffer();
1327   if (osIn->GetLength() == 0) return;
1328 
1329   size_t j;
1330   wxArrayPtrVoid filters;
1331   wxPdfObject* filter = ResolveObject(stream->Get(wxS("Filter")));
1332   if (filter != NULL)
1333   {
1334     int type = filter->GetType();
1335     if (type == OBJTYPE_NAME)
1336     {
1337       filters.Add(filter);
1338     }
1339     else if (type == OBJTYPE_ARRAY)
1340     {
1341       wxPdfArray* filterArray = (wxPdfArray*) filter;
1342       size_t size = filterArray->GetSize();
1343       for (j = 0; j < size; j++)
1344       {
1345         filters.Add(filterArray->Get(j));
1346       }
1347     }
1348 
1349     // Read decode parameters if available
1350     wxArrayPtrVoid dp;
1351     wxPdfObject* dpo = ResolveObject(stream->Get(wxS("DecodeParms")));
1352     if (dpo == NULL || (dpo->GetType() != OBJTYPE_DICTIONARY && dpo->GetType() != OBJTYPE_ARRAY))
1353     {
1354       dpo = ResolveObject(stream->Get(wxS("DP")));
1355     }
1356     if (dpo != NULL)
1357     {
1358       if (dpo->GetType() == OBJTYPE_DICTIONARY)
1359       {
1360         dp.Add(dpo);
1361       }
1362       else if (dpo->GetType() == OBJTYPE_ARRAY)
1363       {
1364         wxPdfArray* dpArray = (wxPdfArray*) dpo;
1365         size_t size = dpArray->GetSize();
1366         for (j = 0; j < size; j++)
1367         {
1368           dp.Add(dpArray->Get(j));
1369         }
1370       }
1371     }
1372 
1373     wxPdfObject* dicParam = NULL;
1374     wxMemoryOutputStream* osOut = NULL;
1375     for (j = 0; j < filters.GetCount(); j++)
1376     {
1377       osIn = stream->GetBuffer();
1378       wxPdfName* name = (wxPdfName*) filters[j];
1379       if (name->GetName() == wxS("FlateDecode") || name->GetName() == wxS("Fl"))
1380       {
1381         osOut = FlateDecode(osIn);
1382         if (j < dp.GetCount())
1383         {
1384           wxMemoryOutputStream* osIn2 = osOut;
1385           dicParam = (wxPdfObject*) dp[j];
1386           osOut = DecodePredictor(osIn2, dicParam);
1387           if (osOut != osIn2)
1388           {
1389             delete osIn2;
1390           }
1391         }
1392       }
1393       else if(name->GetName() == wxS("ASCIIHexDecode") || name->GetName() == wxS("AHx"))
1394       {
1395         osOut = ASCIIHexDecode(osIn);
1396       }
1397       else if(name->GetName() == wxS("ASCII85Decode") || name->GetName() == wxS("A85"))
1398       {
1399         osOut = ASCII85Decode(osIn);
1400       }
1401       else if(name->GetName() == wxS("LZWDecode"))
1402       {
1403         osOut = LZWDecode(osIn);
1404         if (j < dp.GetCount())
1405         {
1406           wxMemoryOutputStream* osIn2 = osOut;
1407           dicParam = (wxPdfObject*) dp[j];
1408           osOut = DecodePredictor(osIn2, dicParam);
1409           if (osOut != osIn2)
1410           {
1411             delete osIn2;
1412           }
1413         }
1414       }
1415       else
1416       {
1417         wxLogError(wxString(wxS("wxPdfParser::GetStreamBytes: ")) +
1418                    wxString::Format(_("Filter '%s' not supported."), name->GetName().c_str()));
1419       }
1420       if (osOut != NULL)
1421       {
1422         stream->SetBuffer(osOut);
1423         if (osIn != osOut)
1424         {
1425           delete osIn;
1426         }
1427       }
1428     }
1429   }
1430 }
1431 
1432 void
GetStreamBytesRaw(wxPdfStream * stream)1433 wxPdfParser::GetStreamBytesRaw(wxPdfStream* stream)
1434 {
1435   wxPdfNumber* streamLength = (wxPdfNumber*) ResolveObject(stream->Get(wxS("Length")));
1436   size_t size = streamLength->GetInt();
1437   m_tokens->Seek(stream->GetOffset());
1438   wxMemoryOutputStream* memoryBuffer = NULL;
1439   wxMemoryOutputStream* streamBuffer = m_tokens->ReadBuffer(size);
1440 
1441   if (m_encrypted && size > 0)
1442   {
1443     wxMemoryInputStream inData(*streamBuffer);
1444     delete streamBuffer;
1445     memoryBuffer = new wxMemoryOutputStream();
1446     unsigned char* buffer = new unsigned char[size];
1447     inData.Read(buffer, size);
1448     if (inData.LastRead() == size)
1449     {
1450       m_decryptor->Encrypt(stream->GetNumber(), stream->GetGeneration(), buffer, (unsigned int) size);
1451       memoryBuffer->Write(buffer, size);
1452     }
1453     delete [] buffer;
1454     memoryBuffer->Close();
1455   }
1456   else
1457   {
1458     memoryBuffer = streamBuffer;
1459   }
1460 
1461   stream->SetBuffer(memoryBuffer);
1462   if (streamLength->IsCreatedIndirect())
1463   {
1464     delete streamLength;
1465   }
1466 }
1467 
1468 // --- Tokenizer
1469 
wxPdfTokenizer(wxInputStream * inputStream)1470 wxPdfTokenizer::wxPdfTokenizer(wxInputStream* inputStream)
1471 {
1472   m_inputStream = inputStream;
1473 }
1474 
~wxPdfTokenizer()1475 wxPdfTokenizer::~wxPdfTokenizer()
1476 {
1477 }
1478 
1479 off_t
Seek(off_t pos)1480 wxPdfTokenizer::Seek(off_t pos)
1481 {
1482   return m_inputStream->SeekI(pos);
1483 }
1484 
1485 off_t
Tell()1486 wxPdfTokenizer::Tell()
1487 {
1488   return m_inputStream->TellI();
1489 }
1490 
1491 void
BackOnePosition(int ch)1492 wxPdfTokenizer::BackOnePosition(int ch)
1493 {
1494   if (ch != -1)
1495   {
1496     off_t pos = Tell();
1497     if (pos > 0) pos--;
1498     Seek(pos);
1499   }
1500 }
1501 
1502 off_t
GetLength()1503 wxPdfTokenizer::GetLength()
1504 {
1505   return m_inputStream->GetLength();
1506 }
1507 
1508 int
ReadChar()1509 wxPdfTokenizer::ReadChar()
1510 {
1511   int readChar;
1512   char ch = m_inputStream->GetC();
1513   readChar = (m_inputStream->LastRead() > 0) ? (unsigned char) ch : -1;
1514   return readChar;
1515 }
1516 
1517 wxMemoryOutputStream*
ReadBuffer(size_t size)1518 wxPdfTokenizer::ReadBuffer(size_t size)
1519 {
1520   wxMemoryOutputStream* memoryBuffer = new wxMemoryOutputStream();
1521   if (size > 0)
1522   {
1523     char* buffer = new char[size];
1524     m_inputStream->Read(buffer, size);
1525     if (m_inputStream->LastRead() == size)
1526     {
1527       memoryBuffer->Write(buffer, size);
1528     }
1529     delete [] buffer;
1530   }
1531   memoryBuffer->Close();
1532   return memoryBuffer;
1533 }
1534 
1535 off_t
GetStartXRef()1536 wxPdfTokenizer::GetStartXRef()
1537 {
1538   char buffer[1024];
1539   int idx, found;
1540   off_t size = GetLength();
1541   if (size > 1024) size = 1024;
1542   off_t pos = GetLength() - size;
1543   do
1544   {
1545     m_inputStream->SeekI(pos);
1546     m_inputStream->Read(buffer, size);
1547     idx = size - 9;
1548     do
1549     {
1550       found = memcmp(buffer + idx, "startxref", 9);
1551       --idx;
1552     }
1553     while (found != 0 && idx >= 0);
1554     if (found == 0) break;
1555     pos = (pos > 1) ? (pos > (size - 9)) ? pos - size + 9 : 1 : 0;
1556   }
1557   while (pos > 0);
1558   if (found == 0)
1559   {
1560     pos = pos + idx + 1;
1561   }
1562   else
1563   {
1564     wxLogError(wxString(wxS("wxPdfTokenizer::GetStartXRef: ")) +
1565                wxString(_("PDF startxref not found.")));
1566   }
1567   return pos;
1568 }
1569 
1570 wxString
CheckPdfHeader()1571 wxPdfTokenizer::CheckPdfHeader()
1572 {
1573   wxString version = wxEmptyString;
1574   m_inputStream->SeekI(0);
1575   wxString str = ReadString(1024);
1576   int idx = str.Find(wxS("%PDF-1."));
1577   if (idx >= 0)
1578   {
1579     m_inputStream->SeekI(idx);
1580     version = str.Mid(idx + 5, 3);
1581   }
1582   else
1583   {
1584     m_inputStream->SeekI(0);
1585     wxLogError(wxString(wxS("wxPdfTokenizer::GetStartXref: ")) +
1586                wxString(_("PDF header signature not found.")));
1587   }
1588   return version;
1589 }
1590 
1591 wxString
ReadString(int size)1592 wxPdfTokenizer::ReadString(int size)
1593 {
1594   wxString buf;
1595   int ch;
1596   while (size > 0)
1597   {
1598     size--;
1599     ch = ReadChar();
1600     if (ch == -1)
1601       break;
1602     buf += ch;
1603   }
1604   return buf;
1605 }
1606 
1607 bool
NextToken()1608 wxPdfTokenizer::NextToken()
1609 {
1610   wxString buffer = wxEmptyString;
1611   m_stringValue = wxEmptyString;
1612   int ch = 0;
1613   do
1614   {
1615     ch = ReadChar();
1616   }
1617   while (ch != -1 && IsWhitespace(ch));
1618 
1619   if (ch == -1)
1620     return false;
1621 
1622   switch (ch)
1623   {
1624     case '[':
1625       m_type = TOKEN_START_ARRAY;
1626       break;
1627     case ']':
1628       m_type = TOKEN_END_ARRAY;
1629       break;
1630     case '/':
1631     {
1632       m_type = TOKEN_NAME;
1633       // The slash is not part of the name
1634       // buffer += ch;
1635       while (true)
1636       {
1637         ch = ReadChar();
1638         if (IsDelimiterOrWhitespace(ch))
1639           break;
1640         buffer += ch;
1641       }
1642       BackOnePosition(ch);
1643       break;
1644     }
1645     case '>':
1646       ch = ReadChar();
1647       if (ch != '>')
1648       {
1649         wxLogError(wxString(wxS("wxPdfTokenizer::NextToken: ")) +
1650                    wxString(_("'>' not expected.")));
1651         return false;
1652       }
1653       m_type = TOKEN_END_DICTIONARY;
1654       break;
1655     case '<':
1656     {
1657       int v1 = ReadChar();
1658       if (v1 == '<')
1659       {
1660         m_type = TOKEN_START_DICTIONARY;
1661         break;
1662       }
1663       m_type = TOKEN_STRING;
1664       m_hexString = true;
1665       int v2 = 0;
1666       while (true)
1667       {
1668         while (IsWhitespace(v1))
1669         {
1670           v1 = ReadChar();
1671         }
1672         if (v1 == '>')
1673           break;
1674         v1 = GetHex(v1);
1675         if (v1 < 0)
1676           break;
1677         v2 = ReadChar();
1678         while (IsWhitespace(v2))
1679         {
1680           v2 = ReadChar();
1681         }
1682         if (v2 == '>')
1683         {
1684           ch = v1 << 4;
1685           buffer += ch;
1686           break;
1687         }
1688         v2 = GetHex(v2);
1689         if (v2 < 0)
1690           break;
1691         ch = (v1 << 4) + v2;
1692         buffer += ch;
1693         v1 = ReadChar();
1694       }
1695       if (v1 < 0 || v2 < 0)
1696       {
1697         wxLogError(wxString(wxS("wxPdfTokenizer::NextToken: ")) +
1698                    wxString(_("Error reading string.")));
1699         return false;
1700       }
1701       break;
1702     }
1703     case '%':
1704       m_type = TOKEN_COMMENT;
1705       do
1706       {
1707         ch = ReadChar();
1708       }
1709       while (ch != -1 && ch != '\r' && ch != '\n');
1710       break;
1711     case '(':
1712     {
1713       m_type = TOKEN_STRING;
1714       m_hexString = false;
1715       int nesting = 0;
1716       while (true)
1717       {
1718         ch = ReadChar();
1719         if (ch == -1)
1720           break;
1721         if (ch == '(')
1722         {
1723           ++nesting;
1724         }
1725         else if (ch == ')')
1726         {
1727           --nesting;
1728         }
1729         else if (ch == '\\')
1730         {
1731           bool lineBreak = false;
1732           ch = ReadChar();
1733           switch (ch)
1734           {
1735             case 'n':
1736               ch = '\n';
1737               break;
1738             case 'r':
1739               ch = '\r';
1740               break;
1741             case 't':
1742               ch = '\t';
1743               break;
1744             case 'b':
1745               ch = '\b';
1746               break;
1747             case 'f':
1748               ch = '\f';
1749               break;
1750             case '(':
1751             case ')':
1752             case '\\':
1753               break;
1754             case '\r':
1755               lineBreak = true;
1756               ch = ReadChar();
1757               if (ch != '\n')
1758                 BackOnePosition(ch);
1759               break;
1760             case '\n':
1761               lineBreak = true;
1762               break;
1763             default:
1764             {
1765               if (ch < '0' || ch > '7')
1766               {
1767                 break;
1768               }
1769               int octal = ch - '0';
1770               ch = ReadChar();
1771               if (ch < '0' || ch > '7')
1772               {
1773                 BackOnePosition(ch);
1774                 ch = octal;
1775                 break;
1776               }
1777               octal = (octal << 3) + ch - '0';
1778               ch = ReadChar();
1779               if (ch < '0' || ch > '7')
1780               {
1781                 BackOnePosition(ch);
1782                 ch = octal;
1783                 break;
1784               }
1785               octal = (octal << 3) + ch - '0';
1786               ch = octal & 0xff;
1787               break;
1788             }
1789           }
1790           if (lineBreak)
1791             continue;
1792           if (ch < 0)
1793             break;
1794         }
1795         else if (ch == '\r')
1796         {
1797           ch = ReadChar();
1798           if (ch < 0)
1799             break;
1800           if (ch != '\n')
1801           {
1802             BackOnePosition(ch);
1803             ch = '\n';
1804           }
1805         }
1806         if (nesting == -1)
1807           break;
1808         buffer += ch;
1809       }
1810       if (ch == -1)
1811       {
1812         wxLogError(wxString(wxS("wxPdfTokenizer::NextToken: ")) +
1813                    wxString(_("Error reading string.")));
1814         return false;
1815       }
1816       break;
1817     }
1818     default:
1819     {
1820       if (ch == '-' || ch == '+' || ch == '.' || (ch >= '0' && ch <= '9'))
1821       {
1822         m_type = TOKEN_NUMBER;
1823         do
1824         {
1825           buffer += ch;
1826           ch = ReadChar();
1827         }
1828         while (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.'));
1829       }
1830       else
1831       {
1832         m_type = TOKEN_OTHER;
1833         do
1834         {
1835           buffer += ch;
1836           ch = ReadChar();
1837         }
1838         while (!IsDelimiterOrWhitespace(ch));
1839       }
1840       BackOnePosition(ch);
1841       break;
1842     }
1843   }
1844   if (buffer != wxEmptyString)
1845   {
1846     m_stringValue.Append(buffer);
1847     if (m_type == TOKEN_OTHER && (m_stringValue == wxS("true") || m_stringValue == wxS("false")))
1848     {
1849       m_type = TOKEN_BOOLEAN;
1850     }
1851   }
1852   return true;
1853 }
1854 
1855 void
NextValidToken()1856 wxPdfTokenizer::NextValidToken()
1857 {
1858   int level = 0;
1859   wxString n1 = wxEmptyString;
1860   wxString n2 = wxEmptyString;
1861   int ptr = 0;
1862   while (NextToken())
1863   {
1864     if (m_type == TOKEN_COMMENT)
1865       continue;
1866     switch (level)
1867     {
1868       case 0:
1869       {
1870         if (m_type != TOKEN_NUMBER)
1871           return;
1872         ptr = Tell();
1873         n1 = m_stringValue;
1874         ++level;
1875         break;
1876       }
1877       case 1:
1878       {
1879         if (m_type != TOKEN_NUMBER) {
1880           Seek(ptr);
1881           m_type = TOKEN_NUMBER;
1882           m_stringValue = n1;
1883           return;
1884         }
1885         n2 = m_stringValue;
1886         ++level;
1887         break;
1888       }
1889       default:
1890       {
1891         if (m_type != TOKEN_OTHER || m_stringValue != wxS("R"))
1892         {
1893           Seek(ptr);
1894           m_type = TOKEN_NUMBER;
1895           m_stringValue = n1;
1896           return;
1897         }
1898         m_type = TOKEN_REFERENCE;
1899         long value;
1900         n1.ToLong(&value);
1901         m_reference = value;
1902         n2.ToLong(&value);
1903         m_generation = value;
1904         return;
1905       }
1906     }
1907   }
1908   wxLogError(wxString(wxS("wxPdfTokenizer::NextValidToken: ")) +
1909              wxString(_("Unexpected end of file.")));
1910 }
1911 
1912 int
GetTokenType()1913 wxPdfTokenizer::GetTokenType()
1914 {
1915   return m_type;
1916 }
1917 
1918 wxString
GetStringValue()1919 wxPdfTokenizer::GetStringValue()
1920 {
1921   return m_stringValue;
1922 }
1923 
1924 int
GetIntValue()1925 wxPdfTokenizer::GetIntValue()
1926 {
1927   long value;
1928   m_stringValue.ToLong(&value);
1929   return value;
1930 }
1931 
1932 int
GetReference()1933 wxPdfTokenizer::GetReference()
1934 {
1935   return m_reference;
1936 }
1937 
1938 int
GetGeneration()1939 wxPdfTokenizer::GetGeneration()
1940 {
1941   return m_generation;
1942 }
1943 
1944 bool
IsWhitespace(int ch)1945 wxPdfTokenizer::IsWhitespace(int ch)
1946 {
1947   return (ch == 0 || ch == 9 || ch == 10 || ch == 12 || ch == 13 || ch == 32);
1948 }
1949 
1950 bool
IsDelimiter(int ch)1951 wxPdfTokenizer::IsDelimiter(int ch)
1952 {
1953   return (ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '/' || ch == '%');
1954 }
1955 
1956 bool
IsDelimiterOrWhitespace(int ch)1957 wxPdfTokenizer::IsDelimiterOrWhitespace(int ch)
1958 {
1959   return IsWhitespace(ch) || IsDelimiter(ch) || (ch == -1);
1960 }
1961 
1962 int
GetHex(int v)1963 wxPdfTokenizer::GetHex(int v)
1964 {
1965   if (v >= '0' && v <= '9')
1966     return v - '0';
1967   if (v >= 'A' && v <= 'F')
1968     return v - 'A' + 10;
1969   if (v >= 'a' && v <= 'f')
1970     return v - 'a' + 10;
1971   return -1;
1972 }
1973