1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: pdfparser.cpp
3 // Purpose:
4 // Author: Ulrich Telle
5 // Created: 2006-10-15
6 // Copyright: (c) Ulrich Telle
7 // Licence: wxWindows licence
8 ///////////////////////////////////////////////////////////////////////////////
9
10 /// \file pdfparser.cpp Implementation of PDF parser
11
12 // For compilers that support precompilation, includes <wx.h>.
13 #include <wx/wxprec.h>
14
15 #ifdef __BORLANDC__
16 #pragma hdrstop
17 #endif
18
19 #ifndef WX_PRECOMP
20 #include <wx/wx.h>
21 #endif
22
23 // includes
24 #include <wx/uri.h>
25 #include <wx/url.h>
26
27 #include "wx/pdfencrypt.h"
28 #include "wx/pdfinfo.h"
29 #include "wx/pdfobjects.h"
30 #include "wx/pdfparser.h"
31 #include "wx/pdftemplate.h"
32
33 #include <wx/arrimpl.cpp>
34 WX_DEFINE_OBJARRAY(wxPdfXRef);
35
wxPdfXRefEntry()36 wxPdfXRefEntry::wxPdfXRefEntry()
37 {
38 m_type = -1;
39 m_ofs_idx = 0;
40 m_gen_ref = 0;
41 }
42
~wxPdfXRefEntry()43 wxPdfXRefEntry::~wxPdfXRefEntry()
44 {
45 }
46
47 void
ReserveXRef(size_t count)48 wxPdfParser::ReserveXRef(size_t count)
49 {
50 size_t currentCount = m_xref.GetCount();
51 if (count > currentCount)
52 {
53 m_xref.Add(wxPdfXRefEntry(), count-currentCount);
54 }
55 }
56
57 wxFileSystem* wxPdfParser::ms_fileSystem = NULL;
58
59 wxFileSystem*
GetFileSystem()60 wxPdfParser::GetFileSystem()
61 {
62 if (ms_fileSystem == NULL)
63 {
64 static wxFileSystem fileSystem;
65 ms_fileSystem = &fileSystem;
66 }
67 return ms_fileSystem;
68 }
69
wxPdfParser(const wxString & filename,const wxString & password)70 wxPdfParser::wxPdfParser(const wxString& filename, const wxString& password)
71 {
72 m_objectQueue = new wxPdfObjectQueue();
73 m_objectQueueLast = m_objectQueue;
74 m_objectMap = new wxPdfObjectMap();
75 m_objStmCache = new wxPdfObjStmMap();
76 m_tokens = NULL;
77 m_trailer = NULL;
78 m_root = NULL;
79 m_useRawStream = false;
80 m_cacheObjects = true;
81
82 m_encrypted = false;
83 m_decryptor = NULL;
84
85 m_filename = filename;
86 m_password = password;
87
88 wxString fileURL = filename;
89 wxURI uri(filename);
90 if (!uri.HasScheme())
91 {
92 fileURL = wxFileSystem::FileNameToURL(filename);
93 }
94 m_pdfFile = wxPdfParser::GetFileSystem()->OpenFile(fileURL);
95 if (m_pdfFile != NULL)
96 {
97 m_tokens = new wxPdfTokenizer(m_pdfFile->GetStream());
98 m_initialized = ParseDocument();
99 }
100 }
101
~wxPdfParser()102 wxPdfParser::~wxPdfParser()
103 {
104 wxPdfObjectQueue* entry = m_objectQueue;
105 wxPdfObjectQueue* next;
106 while (entry != NULL)
107 {
108 wxPdfObject* object = entry->GetObject();
109 if (object != NULL && object->IsCreatedIndirect())
110 {
111 delete object;
112 }
113 next = entry->GetNext();
114 delete entry;
115 entry = next;
116 }
117 delete m_objectMap;
118
119 wxPdfObjStmMap::iterator objStm = m_objStmCache->begin();
120 for (objStm = m_objStmCache->begin(); objStm != m_objStmCache->end(); objStm++)
121 {
122 if (objStm->second != NULL)
123 {
124 delete objStm->second;
125 }
126 }
127 delete m_objStmCache;
128
129 size_t j;
130 for (j = 0; j < m_pages.GetCount(); j++)
131 {
132 wxPdfObject* obj = (wxPdfObject*) m_pages.Item(j);
133 delete obj;
134 }
135 m_pages.Clear();
136
137 if (m_trailer != NULL)
138 {
139 delete m_trailer;
140 }
141 if (m_root != NULL)
142 {
143 delete m_root;
144 }
145
146 delete m_tokens;
147 if (m_pdfFile != NULL)
148 {
149 delete m_pdfFile;
150 }
151
152 if (m_decryptor != NULL)
153 {
154 delete m_decryptor;
155 }
156 }
157
158 bool
IsOk()159 wxPdfParser::IsOk()
160 {
161 return (m_pdfFile != NULL && m_initialized);
162 }
163
164 void
AppendObject(int originalObjectId,int actualObjectId,wxPdfObject * obj)165 wxPdfParser::AppendObject(int originalObjectId, int actualObjectId, wxPdfObject* obj)
166 {
167 wxPdfObjectQueue* newEntry = new wxPdfObjectQueue(originalObjectId, actualObjectId, obj);
168 m_objectQueueLast->SetNext(newEntry);
169 m_objectQueueLast = newEntry;
170 (*m_objectMap)[originalObjectId] = newEntry;
171 }
172
173 unsigned int
GetPageCount()174 wxPdfParser::GetPageCount()
175 {
176 return (unsigned int) m_pages.GetCount();
177 }
178
179 static const wxChar* gs_entryList[] = {
180 wxS("Title"), wxS("Author"), wxS("Subject"), wxS("Keywords"),
181 wxS("Creator"), wxS("Producer"), wxS("CreationDate"), wxS("ModDate"),
182 NULL
183 }; //, "Trapped")
184
185 bool
GetSourceInfo(wxPdfInfo & info)186 wxPdfParser::GetSourceInfo(wxPdfInfo& info)
187 {
188 bool ok = false;
189 wxPdfDictionary* infoDict = (wxPdfDictionary*) ResolveObject(m_trailer->Get(wxS("Info")));
190 if (infoDict != NULL && infoDict->GetType() == OBJTYPE_DICTIONARY)
191 {
192 typedef void (wxPdfInfo::*InfoSetter) (const wxString& value);
193 InfoSetter entryFunc[] = { &wxPdfInfo::SetTitle, &wxPdfInfo::SetAuthor, &wxPdfInfo::SetSubject,
194 &wxPdfInfo::SetKeywords, &wxPdfInfo::SetCreator, &wxPdfInfo::SetProducer,
195 &wxPdfInfo::SetCreationDate, &wxPdfInfo::SetModDate,
196 NULL };
197 wxString value;
198 size_t j;
199 for (j = 0; gs_entryList[j] != NULL; j++)
200 {
201 wxPdfString* entry = (wxPdfString*) infoDict->Get(gs_entryList[j]);
202 if (entry != NULL)
203 {
204 value = entry->GetValue();
205 #if wxUSE_UNICODE
206 if ((value.Length() >= 2) && (value.GetChar(0) == 254) && (value.GetChar(1) == 255))
207 {
208 wxMBConvUTF16BE conv;
209 size_t k;
210 size_t len = value.Length()-2;
211 char* mbstr = new char[len+2];
212 for (k = 0; k < len; k++)
213 {
214 mbstr[k] = (int) value.GetChar(k+2);
215 }
216 mbstr[len] = 0;
217 mbstr[len+1] = 0;
218 value = conv.cMB2WC(mbstr);
219 delete [] mbstr;
220 }
221 #endif
222 (info.*entryFunc[j])(value);
223 }
224 }
225 if (infoDict->IsCreatedIndirect())
226 {
227 delete infoDict;
228 }
229 ok = true;
230 }
231 return ok;
232 }
233
234 bool
ParseDocument()235 wxPdfParser::ParseDocument()
236 {
237 bool ok = false;
238 m_fileSize = m_tokens->GetLength();
239 m_pdfVersion = m_tokens->CheckPdfHeader();
240 if (m_pdfVersion != wxEmptyString)
241 {
242 if (ParseXRef())
243 {
244 if (SetupDecryptor())
245 {
246 m_root = (wxPdfDictionary*) m_trailer->Get(wxS("Root"));
247 m_root = (wxPdfDictionary*) ResolveObject(m_root);
248 if (m_root != NULL)
249 {
250 wxPdfName* versionEntry = (wxPdfName*) ResolveObject(m_root->Get(wxS("Version")));
251 if (versionEntry != NULL)
252 {
253 wxString version = versionEntry->GetName();
254 version = version.Mid(1, 3);
255 if (m_pdfVersion < version)
256 {
257 m_pdfVersion = version;
258 }
259 if (versionEntry->IsCreatedIndirect())
260 {
261 delete versionEntry;
262 }
263 }
264 wxPdfDictionary* pages = (wxPdfDictionary*) ResolveObject(m_root->Get(wxS("Pages")));
265 ok = ParsePageTree(pages);
266 delete pages;
267 }
268 }
269 }
270 }
271 return ok;
272 }
273
274 bool
SetupDecryptor()275 wxPdfParser::SetupDecryptor()
276 {
277 bool ok = true;
278 wxPdfObject* encDic = m_trailer->Get(wxS("Encrypt"));
279 if (encDic == NULL || encDic->GetType() == OBJTYPE_NULL)
280 {
281 return true;
282 }
283 wxPdfDictionary* enc = (wxPdfDictionary*) ResolveObject(encDic);
284 wxPdfObject* obj;
285 wxPdfArray* documentIDs = (wxPdfArray*) ResolveObject(m_trailer->Get(wxS("ID")));
286 wxString documentID;
287 if (documentIDs != NULL)
288 {
289 obj = (wxPdfObject*) documentIDs->Get(0);
290 if (obj->GetType() == OBJTYPE_STRING)
291 {
292 documentID = ((wxPdfString*) obj)->GetValue();
293 }
294 if (documentIDs->IsCreatedIndirect())
295 {
296 delete documentIDs;
297 }
298 }
299
300 wxString uValue = wxEmptyString;
301 obj = enc->Get(wxS("U"));
302 if (obj->GetType() == OBJTYPE_STRING)
303 {
304 uValue = ((wxPdfString*) obj)->GetValue();
305 if (uValue.Length() != 32)
306 {
307 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
308 wxString(_("Invalid length of U value.")));
309 ok = false;
310 }
311 }
312
313 wxString oValue = wxEmptyString;
314 obj = enc->Get(wxS("O"));
315 if (obj->GetType() == OBJTYPE_STRING)
316 {
317 oValue = ((wxPdfString*) obj)->GetValue();
318 if (oValue.Length() != 32)
319 {
320 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
321 wxString(_("Invalid length of O value.")));
322 ok = false;
323 }
324 }
325
326 int rValue = 0;
327 obj = enc->Get(wxS("R"));
328 if (obj->GetType() == OBJTYPE_NUMBER)
329 {
330 rValue = ((wxPdfNumber*) obj)->GetInt();
331 if (rValue != 2 && rValue != 3)
332 {
333 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
334 wxString::Format(_("Unknown encryption type (%d)."), rValue));
335 ok = false;
336 }
337 }
338 else
339 {
340 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
341 wxString(_("Illegal R value.")));
342 ok = false;
343 }
344
345 int vValue = 0;
346 obj = enc->Get(wxS("V"));
347 if (obj != NULL && obj->GetType() == OBJTYPE_NUMBER)
348 {
349 vValue = ((wxPdfNumber*) obj)->GetInt();
350 if (!((rValue == 2 && vValue == 1) || (rValue == 3 && vValue == 2)))
351 {
352 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
353 wxString(_("Unsupported V value.")));
354 ok = false;
355 }
356 }
357 else
358 {
359 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
360 wxString(_("Illegal V value.")));
361 ok = false;
362 }
363
364 int pValue = 0;
365 obj = enc->Get(wxS("P"));
366 if (obj->GetType() == OBJTYPE_NUMBER)
367 {
368 pValue = ((wxPdfNumber*) obj)->GetInt();
369 // Check required permissions (Applications MUST respect the permission settings)
370 if ((pValue & REQUIRED_PERMISSIONS) != REQUIRED_PERMISSIONS)
371 {
372 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
373 wxString(_("Import of document not allowed due to missing permissions.")));
374 ok = false;
375 }
376 }
377 else
378 {
379 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
380 wxString(_("Illegal P value.")));
381 ok = false;
382 }
383
384 int lengthValue = 40; // Default for revisison 2
385 if (rValue == 3)
386 {
387 // Get the key length if revision is 3
388 obj = enc->Get(wxS("Length"));
389 if (obj->GetType() == OBJTYPE_NUMBER)
390 {
391 lengthValue = ((wxPdfNumber*) obj)->GetInt();
392 if (lengthValue > 128 || lengthValue < 40 || lengthValue % 8 != 0)
393 {
394 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
395 wxString(_("Illegal Length value.")));
396 ok = false;
397 }
398 }
399 else
400 {
401 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
402 wxString(_("Illegal Length value.")));
403 ok = false;
404 }
405 }
406
407 if (enc->IsCreatedIndirect())
408 {
409 delete enc;
410 }
411
412 if (ok)
413 {
414 m_encrypted = true;
415 m_decryptor = new wxPdfEncrypt();
416 if (!m_decryptor->Authenticate(documentID, m_password, uValue, oValue, pValue, lengthValue, rValue))
417 {
418 wxLogError(wxString(wxS("wxPdfParser::SetupDecryptor: ")) +
419 wxString(_("Bad password.")));
420 ok = false;
421 }
422 }
423
424 return ok;
425 }
426
427 bool
ParsePageTree(wxPdfDictionary * pages)428 wxPdfParser::ParsePageTree(wxPdfDictionary* pages)
429 {
430 bool ok = false;
431 // Get the kids dictionary
432 wxPdfArray* kids = (wxPdfArray*) ResolveObject(pages->Get(wxS("Kids")));
433 if (kids != NULL)
434 {
435 size_t nKids = kids->GetSize();
436 size_t j;
437 ok = true;
438 for (j = 0; j < nKids; j++)
439 {
440 wxPdfDictionary* page = (wxPdfDictionary*) ResolveObject(kids->Get(j));
441 wxPdfName* type = (wxPdfName*) page->Get(wxS("Type"));
442 if (type->GetName() == wxS("Pages"))
443 {
444 // If one of the kids is an embedded
445 // /Pages array, resolve it as well.
446 ok = ok && ParsePageTree(page);
447 delete page;
448 }
449 else
450 {
451 m_pages.Add(page);
452 }
453 }
454 if (kids->IsCreatedIndirect())
455 {
456 delete kids;
457 }
458 }
459 else
460 {
461 wxLogError(wxString(wxS("wxPdfParser::ParsePageTree: ")) +
462 wxString(_("Cannot find /Kids in current /Page-Dictionary")));
463 }
464 return ok;
465 }
466
467 wxPdfObject*
GetPageResources(unsigned int pageno)468 wxPdfParser::GetPageResources(unsigned int pageno)
469 {
470 wxPdfObject* resources = NULL;
471 if (pageno < GetPageCount())
472 {
473 resources = GetPageResources((wxPdfObject*) m_pages[pageno]);
474 }
475 return resources;
476 }
477
478 wxPdfObject*
GetPageResources(wxPdfObject * page)479 wxPdfParser::GetPageResources(wxPdfObject* page)
480 {
481 wxPdfObject* resources = NULL;
482 wxPdfDictionary* dic = (wxPdfDictionary*) ResolveObject(page);
483
484 // If the current object has a resources dictionary associated with it,
485 // we use it. Otherwise, we move back to its parent object.
486 wxPdfObject* resourceRef = ResolveObject(dic->Get(wxS("Resources")));
487 if (resourceRef != NULL)
488 {
489 resources = ResolveObject(resourceRef);
490 }
491 else
492 {
493 wxPdfObject* parent = ResolveObject(dic->Get(wxS("Parent")));
494 if (parent != NULL)
495 {
496 resources = GetPageResources(parent);
497 delete parent;
498 }
499 }
500 return resources;
501 }
502
503 void
GetContent(unsigned int pageno,wxArrayPtrVoid & contents)504 wxPdfParser::GetContent(unsigned int pageno, wxArrayPtrVoid& contents)
505 {
506 if (pageno < GetPageCount())
507 {
508 wxPdfObject* content = ((wxPdfDictionary*) m_pages[pageno])->Get(wxS("Contents"));
509 if (content != NULL)
510 {
511 GetPageContent(content, contents);
512 }
513 }
514 }
515
516 void
GetPageContent(wxPdfObject * contentRef,wxArrayPtrVoid & contents)517 wxPdfParser::GetPageContent(wxPdfObject* contentRef, wxArrayPtrVoid& contents)
518 {
519 int type = contentRef->GetType();
520 if (type == OBJTYPE_INDIRECT)
521 {
522 wxPdfObject* content = ResolveObject(contentRef);
523 if (content->GetType() == OBJTYPE_ARRAY)
524 {
525 GetPageContent(content, contents);
526 delete content;
527 }
528 else
529 {
530 contents.Add(content);
531 }
532 }
533 else if (type == OBJTYPE_ARRAY)
534 {
535 wxPdfArray* contentArray = (wxPdfArray*) contentRef;
536 size_t n = contentArray->GetSize();
537 size_t j;
538 for (j = 0; j < n; j++)
539 {
540 GetPageContent(contentArray->Get(j), contents);
541 }
542 }
543 }
544
545 wxPdfArrayDouble*
GetPageMediaBox(unsigned int pageno)546 wxPdfParser::GetPageMediaBox(unsigned int pageno)
547 {
548 wxPdfArrayDouble* box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("MediaBox"));
549 return box;
550 }
551
552 wxPdfArrayDouble*
GetPageCropBox(unsigned int pageno)553 wxPdfParser::GetPageCropBox(unsigned int pageno)
554 {
555 wxPdfArrayDouble* box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("CropBox"));
556 if (box == NULL)
557 {
558 box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("MediaBox"));
559 }
560 return box;
561 }
562
563 wxPdfArrayDouble*
GetPageBleedBox(unsigned int pageno)564 wxPdfParser::GetPageBleedBox(unsigned int pageno)
565 {
566 wxPdfArrayDouble* box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("BleedBox"));
567 if (box == NULL)
568 {
569 box = GetPageCropBox(pageno);
570 }
571 return box;
572 }
573
574 wxPdfArrayDouble*
GetPageTrimBox(unsigned int pageno)575 wxPdfParser::GetPageTrimBox(unsigned int pageno)
576 {
577 wxPdfArrayDouble* box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("TrimBox"));
578 if (box == NULL)
579 {
580 box = GetPageCropBox(pageno);
581 }
582 return box;
583 }
584
585 wxPdfArrayDouble*
GetPageArtBox(unsigned int pageno)586 wxPdfParser::GetPageArtBox(unsigned int pageno)
587 {
588 wxPdfArrayDouble* box = GetPageBox((wxPdfDictionary*) m_pages[pageno], wxS("ArtBox"));
589 if (box == NULL)
590 {
591 box = GetPageCropBox(pageno);
592 }
593 return box;
594 }
595
596 wxPdfArrayDouble*
GetPageBox(wxPdfDictionary * page,const wxString & boxIndex)597 wxPdfParser::GetPageBox(wxPdfDictionary* page, const wxString& boxIndex)
598 {
599 wxPdfArrayDouble* pageBox = NULL;
600 wxPdfArray* box = (wxPdfArray*) ResolveObject(page->Get(boxIndex));
601 if (box == NULL)
602 {
603 wxPdfDictionary* parent = (wxPdfDictionary*) ResolveObject(page->Get(wxS("Parent")));
604 if (parent != NULL)
605 {
606 pageBox = GetPageBox(parent, boxIndex);
607 delete parent;
608 }
609 }
610 else
611 {
612 pageBox = new wxPdfArrayDouble();
613 size_t j;
614 for (j = 0; j < box->GetSize(); j++)
615 {
616 wxPdfNumber* item = (wxPdfNumber*) box->Get(j);
617 pageBox->Add(item->GetValue());
618 }
619 }
620 return pageBox;
621 }
622
623 int
GetPageRotation(unsigned int pageno)624 wxPdfParser::GetPageRotation(unsigned int pageno)
625 {
626 return GetPageRotation((wxPdfDictionary*) m_pages[pageno]);
627 }
628
629 int
GetPageRotation(wxPdfDictionary * page)630 wxPdfParser::GetPageRotation (wxPdfDictionary* page)
631 {
632 int pageRotation = 0;
633 wxPdfNumber* rotation = (wxPdfNumber*) ResolveObject(page->Get(wxS("Rotate")));
634 if (rotation == NULL)
635 {
636 wxPdfDictionary* parent = (wxPdfDictionary*) ResolveObject(page->Get(wxS("Parent")));
637 if (parent != NULL)
638 {
639 pageRotation = GetPageRotation(parent);
640 delete parent;
641 }
642 }
643 else
644 {
645 pageRotation = rotation->GetInt();
646 }
647 return pageRotation;
648 }
649
650 bool
ParseXRef()651 wxPdfParser::ParseXRef()
652 {
653 m_tokens->Seek(m_tokens->GetStartXRef());
654 m_tokens->NextToken();
655 if (m_tokens->GetStringValue() != wxS("startxref"))
656 {
657 wxLogError(wxString(wxS("wxPdfParser::ParseXRef: ")) +
658 wxString(_("'startxref' not found.")));
659 return false;
660 }
661 m_tokens->NextToken();
662 if (m_tokens->GetTokenType() != /*PRTokeniser.*/ TOKEN_NUMBER)
663 {
664 wxLogError(wxString(wxS("wxPdfParser::ParseXRef: ")) +
665 wxString(_("'startxref' is not followed by a number.")));
666 return false;
667 }
668 int startxref = m_tokens->GetIntValue();
669
670 if (!ParseXRefStream(startxref, true))
671 {
672 m_xref.Clear();
673 m_tokens->Seek(startxref);
674 m_trailer = ParseXRefSection();
675 wxPdfDictionary* trailer1 = m_trailer;
676 wxPdfDictionary* trailer2 = NULL;
677 while (trailer1 != NULL)
678 {
679 wxPdfNumber* prev = (wxPdfNumber*) trailer1->Get(wxS("Prev"));
680 trailer2 = trailer1;
681 if (prev != NULL)
682 {
683 m_tokens->Seek(prev->GetInt());
684 trailer1 = ParseXRefSection();
685 }
686 else
687 {
688 trailer1 = NULL;
689 }
690 if (trailer2 != m_trailer)
691 {
692 delete trailer2;
693 }
694 }
695 }
696 return (m_trailer != NULL);
697 }
698
699 wxPdfDictionary*
ParseXRefSection()700 wxPdfParser::ParseXRefSection()
701 {
702 m_tokens->NextValidToken();
703 if (m_tokens->GetStringValue() != wxS("xref"))
704 {
705 wxLogError(wxString(wxS("wxPdfParser::ParseXRefSection: ")) +
706 wxString(_("xref subsection not found.")));
707 return NULL;
708 }
709 int start = 0;
710 int end = 0;
711 int pos = 0;
712 int gen = 0;
713 while (true)
714 {
715 m_tokens->NextValidToken();
716 if (m_tokens->GetStringValue() == wxS("trailer"))
717 break;
718 if (m_tokens->GetTokenType() != TOKEN_NUMBER)
719 {
720 wxLogError(wxString(wxS("wxPdfParser::ParseXRefSection: ")) +
721 wxString(_("Object number of the first object in this xref subsection not found.")));
722 return NULL;
723 }
724 start = m_tokens->GetIntValue();
725 m_tokens->NextValidToken();
726 if (m_tokens->GetTokenType() != TOKEN_NUMBER)
727 {
728 wxLogError(wxString(wxS("wxPdfParser::ParseXRefSection: ")) +
729 wxString(_("Number of entries in this xref subsection not found.")));
730 return NULL;
731 }
732 end = m_tokens->GetIntValue() + start;
733 if (start == 1)
734 { // fix incorrect start number
735 int back = m_tokens->Tell();
736 m_tokens->NextValidToken();
737 pos = m_tokens->GetIntValue();
738 m_tokens->NextValidToken();
739 gen = m_tokens->GetIntValue();
740 if (pos == 0 && gen == 65535)
741 {
742 --start;
743 --end;
744 }
745 m_tokens->Seek(back);
746 }
747 ReserveXRef(end);
748
749 int k;
750 for (k = start; k < end; ++k)
751 {
752 wxPdfXRefEntry& xrefEntry = m_xref[k];
753 m_tokens->NextValidToken();
754 pos = m_tokens->GetIntValue();
755 m_tokens->NextValidToken();
756 gen = m_tokens->GetIntValue();
757 m_tokens->NextValidToken();
758 if (m_tokens->GetStringValue() == wxS("n"))
759 {
760 if (xrefEntry.m_ofs_idx == 0 && xrefEntry.m_gen_ref == 0)
761 {
762 // TODO: if (pos == 0)
763 // wxLogError(wxS("File position 0 cross-reference entry in this xref subsection"));
764 xrefEntry.m_ofs_idx = pos;
765 xrefEntry.m_gen_ref = gen;
766 xrefEntry.m_type = 1;
767 }
768 }
769 else if (m_tokens->GetStringValue() == wxS("f"))
770 {
771 if (xrefEntry.m_ofs_idx == 0 && xrefEntry.m_gen_ref == 0)
772 {
773 xrefEntry.m_ofs_idx = -1;
774 xrefEntry.m_gen_ref = 0;
775 xrefEntry.m_type = 0;
776 }
777 }
778 else
779 {
780 wxLogError(wxString(wxS("wxPdfParser:ReadXRefSection: ")) +
781 wxString(_("Invalid cross-reference entry in this xref subsection.")));
782 return NULL;
783 }
784 }
785 }
786 wxPdfDictionary* trailer = (wxPdfDictionary*) ParseObject();
787 wxPdfNumber* xrefSize = (wxPdfNumber*) trailer->Get(wxS("Size"));
788 ReserveXRef(xrefSize->GetInt());
789
790 wxPdfObject* xrs = trailer->Get(wxS("XRefStm"));
791 if (xrs != NULL && xrs->GetType() == OBJTYPE_NUMBER)
792 {
793 int loc = ((wxPdfNumber*) xrs)->GetInt();
794 ParseXRefStream(loc, false);
795 }
796 return trailer;
797 }
798
799 bool
ParseXRefStream(int ptr,bool setTrailer)800 wxPdfParser::ParseXRefStream(int ptr, bool setTrailer)
801 {
802 int idx, k;
803
804 m_tokens->Seek(ptr);
805 int streamRef = 0;
806 if (!m_tokens->NextToken())
807 {
808 return false;
809 }
810 if (m_tokens->GetTokenType() != TOKEN_NUMBER)
811 {
812 return false;
813 }
814 streamRef = m_tokens->GetIntValue();
815 if (!m_tokens->NextToken() || m_tokens->GetTokenType() != TOKEN_NUMBER)
816 {
817 return false;
818 }
819 if (!m_tokens->NextToken() || m_tokens->GetStringValue() != wxS("obj"))
820 {
821 return false;
822 }
823 wxPdfObject* object = ParseObject();
824 wxPdfStream* stm = NULL;
825 if (object->GetType() == OBJTYPE_STREAM)
826 {
827 stm = (wxPdfStream*) object;
828 if (((wxPdfName*) stm->Get(wxS("Type")))->GetName() != wxS("XRef"))
829 {
830 delete object;
831 return false;
832 }
833 }
834 int size = ((wxPdfNumber*) stm->Get(wxS("Size")))->GetInt();
835 bool indexAllocated = false;
836 wxPdfArray* index;
837 wxPdfObject* obj = stm->Get(wxS("Index"));
838 if (obj == NULL)
839 {
840 indexAllocated = true;
841 index = new wxPdfArray();
842 index->Add(0);
843 index->Add(size);
844 }
845 else
846 {
847 index = (wxPdfArray*) obj;
848 }
849 wxPdfArray* w = (wxPdfArray*) stm->Get(wxS("W"));
850 int prev = -1;
851 obj = stm->Get(wxS("Prev"));
852 if (obj != NULL)
853 {
854 prev = ((wxPdfNumber* )obj)->GetInt();
855 }
856 // Each xref pair is a position
857 // type 0 -> -1, 0
858 // type 1 -> offset, 0
859 // type 2 -> index, obj num
860 ReserveXRef(size);
861
862 GetStreamBytes(stm);
863 wxMemoryOutputStream* streamBuffer = stm->GetBuffer();
864 wxMemoryInputStream streamBytes(*streamBuffer);
865 size_t inLength = streamBytes.GetSize();
866 char* buffer = new char[inLength];
867 streamBytes.Read(buffer, inLength);
868
869 int bptr = 0;
870 int wc[3];
871 for (k = 0; k < 3; ++k)
872 {
873 wc[k] = ((wxPdfNumber*) (w->Get(k)))->GetInt();
874 }
875 for (idx = 0; (size_t) idx < index->GetSize(); idx += 2)
876 {
877 int start = ((wxPdfNumber*) (index->Get(idx)))->GetInt();
878 int length = ((wxPdfNumber*) (index->Get(idx + 1)))->GetInt();
879 ReserveXRef(start+length);
880 while (length-- > 0)
881 {
882 wxPdfXRefEntry& xrefEntry = m_xref[start];
883 int type = 1;
884 if (wc[0] > 0)
885 {
886 type = 0;
887 for (k = 0; k < wc[0]; ++k)
888 {
889 type = (type << 8) + (buffer[bptr++] & 0xff);
890 }
891 }
892 int field2 = 0;
893 for (k = 0; k < wc[1]; ++k)
894 {
895 field2 = (field2 << 8) + (buffer[bptr++] & 0xff);
896 }
897 int field3 = 0;
898 for (k = 0; k < wc[2]; ++k)
899 {
900 field3 = (field3 << 8) + (buffer[bptr++] & 0xff);
901 }
902 if (xrefEntry.m_ofs_idx == 0 && xrefEntry.m_gen_ref == 0)
903 {
904 switch (type)
905 {
906 case 0:
907 xrefEntry.m_type = 0;
908 xrefEntry.m_ofs_idx = -1;
909 xrefEntry.m_gen_ref = 0;
910 break;
911 case 1:
912 xrefEntry.m_type = 1;
913 xrefEntry.m_ofs_idx = field2;
914 xrefEntry.m_gen_ref = field3;
915 break;
916 case 2:
917 xrefEntry.m_type = 2;
918 xrefEntry.m_ofs_idx = field3;
919 xrefEntry.m_gen_ref = field2;
920 break;
921 }
922 }
923 start++;
924 }
925 }
926 delete [] buffer;
927 if ((size_t) streamRef < m_xref.GetCount())
928 {
929 m_xref[streamRef].m_ofs_idx = -1;
930 }
931 if (indexAllocated)
932 {
933 delete index;
934 }
935
936 // Set the first xref stream dictionary as the trailer dictionary
937 if (setTrailer && m_trailer == NULL)
938 {
939
940 m_trailer = stm->GetDictionary();
941 stm->SetDictionary(NULL);
942 }
943 delete stm;
944
945 if (prev == -1)
946 {
947 return true;
948 }
949 return ParseXRefStream(prev, false);
950 }
951
952 wxPdfDictionary*
ParseDictionary()953 wxPdfParser::ParseDictionary()
954 {
955 wxPdfDictionary* dic = new wxPdfDictionary();
956 while (true)
957 {
958 m_tokens->NextValidToken();
959 if (m_tokens->GetTokenType() == TOKEN_END_DICTIONARY)
960 break;
961 if (m_tokens->GetTokenType() != TOKEN_NAME)
962 {
963 wxLogError(wxString(wxS("wxPdfParser::ParseDictionary: ")) +
964 wxString(_("Dictionary key is not a name.")));
965 break;
966 }
967 wxPdfName* name = new wxPdfName(m_tokens->GetStringValue());
968 wxPdfObject* obj = ParseObject();
969 int type = obj->GetType();
970 if (-type == TOKEN_END_DICTIONARY)
971 {
972 wxLogError(wxString(wxS("wxPdfParser::ParseDictionary: ")) +
973 wxString(_("Unexpected '>>'.")));
974 delete obj;
975 delete name;
976 break;
977 }
978 if (-type == TOKEN_END_ARRAY)
979 {
980 wxLogError(wxString(wxS("wxPdfParser::ParseDictionary: ")) +
981 wxString(_("Unexpected ']'.")));
982 delete obj;
983 delete name;
984 break;
985 }
986 dic->Put(name, obj);
987 delete name;
988 }
989 return dic;
990 }
991
992 wxPdfArray*
ParseArray()993 wxPdfParser::ParseArray()
994 {
995 wxPdfArray* array = new wxPdfArray();
996 while (true)
997 {
998 wxPdfObject* obj = ParseObject();
999 int type = obj->GetType();
1000 if (-type == TOKEN_END_ARRAY)
1001 {
1002 delete obj;
1003 break;
1004 }
1005 if (-type == TOKEN_END_DICTIONARY)
1006 {
1007 wxLogError(wxString(wxS("wxPdfParser::ParseArray: ")) +
1008 wxString(_("Unexpected '>>'.")));
1009 delete obj;
1010 break;
1011 }
1012 array->Add(obj);
1013 }
1014 return array;
1015 }
1016
1017 wxPdfObject*
ParseObject()1018 wxPdfParser::ParseObject()
1019 {
1020 wxPdfObject* obj;
1021 m_tokens->NextValidToken();
1022 int type = m_tokens->GetTokenType();
1023 switch (type)
1024 {
1025 case TOKEN_START_DICTIONARY:
1026 {
1027 wxPdfDictionary* dic = ParseDictionary();
1028 int pos = m_tokens->Tell();
1029 // be careful in the trailer. May not be a "next" token.
1030 if (m_tokens->NextToken() && m_tokens->GetStringValue() == wxS("stream"))
1031 {
1032 int ch = m_tokens->ReadChar();
1033 if (ch != '\n')
1034 ch = m_tokens->ReadChar();
1035 if (ch != '\n')
1036 m_tokens->BackOnePosition(ch);
1037 wxPdfStream* stream = new wxPdfStream(m_tokens->Tell());
1038 stream->SetDictionary(dic);
1039 obj = stream;
1040 }
1041 else
1042 {
1043 m_tokens->Seek(pos);
1044 obj = dic;
1045 }
1046 }
1047 break;
1048
1049 case TOKEN_START_ARRAY:
1050 {
1051 obj = ParseArray();
1052 }
1053 break;
1054
1055 case TOKEN_NUMBER:
1056 {
1057 obj = new wxPdfNumber(m_tokens->GetStringValue());
1058 }
1059 break;
1060
1061 case TOKEN_STRING:
1062 {
1063 wxString token = m_tokens->GetStringValue();
1064 // Decrypt if necessary
1065 if (m_encrypted)
1066 {
1067 m_decryptor->Encrypt(m_objNum, m_objGen, token);
1068 }
1069
1070 wxPdfString* strObj = new wxPdfString(token);
1071 strObj->SetIsHexString(m_tokens->IsHexString());
1072 obj = strObj;
1073 }
1074 break;
1075
1076 case TOKEN_NAME:
1077 {
1078 obj = new wxPdfName(m_tokens->GetStringValue());
1079 }
1080 break;
1081
1082 case TOKEN_REFERENCE:
1083 {
1084 int num = m_tokens->GetReference();
1085 obj = new wxPdfIndirectReference(num, m_tokens->GetGeneration());
1086 }
1087 break;
1088
1089 case TOKEN_BOOLEAN:
1090 {
1091 obj = new wxPdfBoolean((m_tokens->GetStringValue() == wxS("true")));
1092 }
1093 break;
1094
1095 case TOKEN_NULL:
1096 {
1097 obj = new wxPdfNull();
1098 }
1099 break;
1100
1101 default:
1102 {
1103 wxString token = m_tokens->GetStringValue();
1104 obj = new wxPdfLiteral(-type, m_tokens->GetStringValue());
1105 }
1106 break;
1107 }
1108 return obj;
1109 }
1110
1111 wxPdfObject*
ResolveObject(wxPdfObject * obj)1112 wxPdfParser::ResolveObject(wxPdfObject* obj)
1113 {
1114 if (obj != NULL && obj->GetType() == OBJTYPE_INDIRECT)
1115 {
1116 wxPdfIndirectReference* ref = (wxPdfIndirectReference*)obj;
1117 int idx = ref->GetNumber();
1118 obj = ParseSpecificObject(idx);
1119 obj->SetCreatedIndirect(true);
1120 }
1121 return obj;
1122 }
1123
1124 wxPdfObject*
ParseSpecificObject(int idx)1125 wxPdfParser::ParseSpecificObject(int idx)
1126 {
1127 wxPdfObject* obj = NULL;
1128 if ((size_t)(idx) >= m_xref.GetCount())
1129 {
1130 return NULL;
1131 }
1132 obj = ParseDirectObject(idx);
1133 return obj;
1134 }
1135
1136 wxPdfObject*
ParseDirectObject(int k)1137 wxPdfParser::ParseDirectObject(int k)
1138 {
1139 int objIndex = 0;
1140 int objStreamIndex = 0;
1141 bool isCached = false;
1142 wxPdfObject* obj = NULL;
1143
1144 // Check for free object
1145 if (m_xref[k].m_type == 0)
1146 {
1147 return NULL;
1148 }
1149 int pos = m_xref[k].m_ofs_idx;
1150 if (m_xref[k].m_type == 2)
1151 {
1152 objIndex = m_xref[k].m_gen_ref;
1153 wxPdfObjStmMap::iterator objStm = m_objStmCache->find(objIndex);
1154 if (objStm != m_objStmCache->end())
1155 {
1156 obj = objStm->second;
1157 isCached = true;
1158 }
1159 else
1160 {
1161 objStreamIndex = m_xref[k].m_gen_ref;
1162 pos = m_xref[objStreamIndex].m_ofs_idx;
1163 }
1164 }
1165 if (!isCached)
1166 {
1167 m_tokens->Seek(pos);
1168 m_tokens->NextValidToken();
1169 if (m_tokens->GetTokenType() != TOKEN_NUMBER)
1170 {
1171 wxLogError(wxString(wxS("wxPdfParser::ParseSingleObject: ")) +
1172 wxString(_("Invalid object number.")));
1173 return NULL;
1174 }
1175 m_objNum = m_tokens->GetIntValue();
1176 m_tokens->NextValidToken();
1177 if (m_tokens->GetTokenType() != TOKEN_NUMBER)
1178 {
1179 wxLogError(wxString(wxS("wxPdfParser::ParseSingleObject: ")) +
1180 wxString(_("Invalid generation number.")));
1181 return NULL;
1182 }
1183 m_objGen = m_tokens->GetIntValue();
1184 m_tokens->NextValidToken();
1185 if (m_tokens->GetStringValue() != wxS("obj"))
1186 {
1187 wxLogError(wxString(wxS("wxPdfParser::ParseSingleObject: ")) +
1188 wxString(_("Token 'obj' expected.")));
1189 return NULL;
1190 }
1191 obj = ParseObject();
1192 }
1193
1194 // TODO: Check for valid 'endstream'
1195
1196 if (m_xref[k].m_type == 2)
1197 {
1198 m_objNum = k;
1199 m_objGen = 0;
1200 wxPdfStream* objStream = (wxPdfStream*) obj;
1201 obj = ParseObjectStream((wxPdfStream*) obj, m_xref[k].m_ofs_idx);
1202 if (m_cacheObjects)
1203 {
1204 if (!isCached)
1205 {
1206 (*m_objStmCache)[objIndex] = objStream;
1207 }
1208 }
1209 else
1210 {
1211 delete objStream;
1212 }
1213 }
1214
1215 if (obj != NULL)
1216 {
1217 obj->SetObjNum(m_objNum, m_objGen);
1218 }
1219 if (obj->GetType() == OBJTYPE_STREAM)
1220 {
1221 GetStreamBytes((wxPdfStream*) obj);
1222 }
1223 return obj;
1224 }
1225
1226 wxPdfObject*
ParseObjectStream(wxPdfStream * objStm,int idx)1227 wxPdfParser::ParseObjectStream(wxPdfStream* objStm, int idx)
1228 {
1229 wxPdfObject* obj = NULL;
1230
1231 wxPdfNumber* firstNumber = (wxPdfNumber*) ResolveObject(objStm->Get(wxS("First")));
1232 int first = firstNumber->GetInt();
1233 if (objStm->GetBuffer() == NULL)
1234 {
1235 bool saveUseRawStream = m_useRawStream;
1236 m_useRawStream = false;
1237 GetStreamBytes(objStm);
1238 m_useRawStream = saveUseRawStream;
1239 }
1240
1241 bool saveEncrypted = m_encrypted;
1242 m_encrypted = false;
1243 wxPdfTokenizer* saveTokens = m_tokens;
1244 wxMemoryInputStream objStream(*(objStm->GetBuffer()));
1245 m_tokens = new wxPdfTokenizer(&objStream);
1246
1247 int address = 0;
1248 bool ok = true;
1249 if (!objStm->HasObjOffsets())
1250 {
1251 // Read object offsets
1252 wxArrayInt* objOffsets = objStm->GetObjOffsets();
1253 int objCount = idx + 1;
1254 if (m_cacheObjects)
1255 {
1256 wxPdfNumber* objCountNumber = (wxPdfNumber*) ResolveObject(objStm->Get(wxS("N")));
1257 objCount = objCountNumber->GetInt();
1258 }
1259 int offset;
1260 int k;
1261 for (k = 0; k < objCount; ++k)
1262 {
1263 ok = m_tokens->NextToken();
1264 if (!ok)
1265 break;
1266 if (m_tokens->GetTokenType() != TOKEN_NUMBER)
1267 {
1268 ok = false;
1269 break;
1270 }
1271 ok = m_tokens->NextToken();
1272 if (!ok)
1273 break;
1274 if (m_tokens->GetTokenType() != TOKEN_NUMBER)
1275 {
1276 ok = false;
1277 break;
1278 }
1279 offset = m_tokens->GetIntValue() + first;
1280 if (m_cacheObjects)
1281 {
1282 objOffsets->Add(offset);
1283 }
1284 if (k == idx)
1285 {
1286 address = offset;
1287 }
1288 }
1289 if (ok)
1290 {
1291 objStm->SetHasObjOffsets(m_cacheObjects);
1292 }
1293 }
1294 else
1295 {
1296 address = objStm->GetObjOffset(idx);
1297 ok = (address > 0);
1298 }
1299 if (ok)
1300 {
1301 m_tokens->Seek(address);
1302 obj = ParseObject();
1303 }
1304 else
1305 {
1306 wxLogError(wxString(wxS("wxPdfParser::ParseOneObjStm: ")) +
1307 wxString(_("Error reading ObjStm.")));
1308 }
1309
1310 delete m_tokens;
1311 m_tokens = saveTokens;
1312 m_encrypted = saveEncrypted;
1313
1314 return obj;
1315 }
1316
1317 void
GetStreamBytes(wxPdfStream * stream)1318 wxPdfParser::GetStreamBytes(wxPdfStream* stream)
1319 {
1320 GetStreamBytesRaw(stream);
1321
1322 // Do not decode the content of resource object streams
1323 if (m_useRawStream) return;
1324
1325 // Check whether the stream buffer is empty
1326 wxMemoryOutputStream* osIn = stream->GetBuffer();
1327 if (osIn->GetLength() == 0) return;
1328
1329 size_t j;
1330 wxArrayPtrVoid filters;
1331 wxPdfObject* filter = ResolveObject(stream->Get(wxS("Filter")));
1332 if (filter != NULL)
1333 {
1334 int type = filter->GetType();
1335 if (type == OBJTYPE_NAME)
1336 {
1337 filters.Add(filter);
1338 }
1339 else if (type == OBJTYPE_ARRAY)
1340 {
1341 wxPdfArray* filterArray = (wxPdfArray*) filter;
1342 size_t size = filterArray->GetSize();
1343 for (j = 0; j < size; j++)
1344 {
1345 filters.Add(filterArray->Get(j));
1346 }
1347 }
1348
1349 // Read decode parameters if available
1350 wxArrayPtrVoid dp;
1351 wxPdfObject* dpo = ResolveObject(stream->Get(wxS("DecodeParms")));
1352 if (dpo == NULL || (dpo->GetType() != OBJTYPE_DICTIONARY && dpo->GetType() != OBJTYPE_ARRAY))
1353 {
1354 dpo = ResolveObject(stream->Get(wxS("DP")));
1355 }
1356 if (dpo != NULL)
1357 {
1358 if (dpo->GetType() == OBJTYPE_DICTIONARY)
1359 {
1360 dp.Add(dpo);
1361 }
1362 else if (dpo->GetType() == OBJTYPE_ARRAY)
1363 {
1364 wxPdfArray* dpArray = (wxPdfArray*) dpo;
1365 size_t size = dpArray->GetSize();
1366 for (j = 0; j < size; j++)
1367 {
1368 dp.Add(dpArray->Get(j));
1369 }
1370 }
1371 }
1372
1373 wxPdfObject* dicParam = NULL;
1374 wxMemoryOutputStream* osOut = NULL;
1375 for (j = 0; j < filters.GetCount(); j++)
1376 {
1377 osIn = stream->GetBuffer();
1378 wxPdfName* name = (wxPdfName*) filters[j];
1379 if (name->GetName() == wxS("FlateDecode") || name->GetName() == wxS("Fl"))
1380 {
1381 osOut = FlateDecode(osIn);
1382 if (j < dp.GetCount())
1383 {
1384 wxMemoryOutputStream* osIn2 = osOut;
1385 dicParam = (wxPdfObject*) dp[j];
1386 osOut = DecodePredictor(osIn2, dicParam);
1387 if (osOut != osIn2)
1388 {
1389 delete osIn2;
1390 }
1391 }
1392 }
1393 else if(name->GetName() == wxS("ASCIIHexDecode") || name->GetName() == wxS("AHx"))
1394 {
1395 osOut = ASCIIHexDecode(osIn);
1396 }
1397 else if(name->GetName() == wxS("ASCII85Decode") || name->GetName() == wxS("A85"))
1398 {
1399 osOut = ASCII85Decode(osIn);
1400 }
1401 else if(name->GetName() == wxS("LZWDecode"))
1402 {
1403 osOut = LZWDecode(osIn);
1404 if (j < dp.GetCount())
1405 {
1406 wxMemoryOutputStream* osIn2 = osOut;
1407 dicParam = (wxPdfObject*) dp[j];
1408 osOut = DecodePredictor(osIn2, dicParam);
1409 if (osOut != osIn2)
1410 {
1411 delete osIn2;
1412 }
1413 }
1414 }
1415 else
1416 {
1417 wxLogError(wxString(wxS("wxPdfParser::GetStreamBytes: ")) +
1418 wxString::Format(_("Filter '%s' not supported."), name->GetName().c_str()));
1419 }
1420 if (osOut != NULL)
1421 {
1422 stream->SetBuffer(osOut);
1423 if (osIn != osOut)
1424 {
1425 delete osIn;
1426 }
1427 }
1428 }
1429 }
1430 }
1431
1432 void
GetStreamBytesRaw(wxPdfStream * stream)1433 wxPdfParser::GetStreamBytesRaw(wxPdfStream* stream)
1434 {
1435 wxPdfNumber* streamLength = (wxPdfNumber*) ResolveObject(stream->Get(wxS("Length")));
1436 size_t size = streamLength->GetInt();
1437 m_tokens->Seek(stream->GetOffset());
1438 wxMemoryOutputStream* memoryBuffer = NULL;
1439 wxMemoryOutputStream* streamBuffer = m_tokens->ReadBuffer(size);
1440
1441 if (m_encrypted && size > 0)
1442 {
1443 wxMemoryInputStream inData(*streamBuffer);
1444 delete streamBuffer;
1445 memoryBuffer = new wxMemoryOutputStream();
1446 unsigned char* buffer = new unsigned char[size];
1447 inData.Read(buffer, size);
1448 if (inData.LastRead() == size)
1449 {
1450 m_decryptor->Encrypt(stream->GetNumber(), stream->GetGeneration(), buffer, (unsigned int) size);
1451 memoryBuffer->Write(buffer, size);
1452 }
1453 delete [] buffer;
1454 memoryBuffer->Close();
1455 }
1456 else
1457 {
1458 memoryBuffer = streamBuffer;
1459 }
1460
1461 stream->SetBuffer(memoryBuffer);
1462 if (streamLength->IsCreatedIndirect())
1463 {
1464 delete streamLength;
1465 }
1466 }
1467
1468 // --- Tokenizer
1469
wxPdfTokenizer(wxInputStream * inputStream)1470 wxPdfTokenizer::wxPdfTokenizer(wxInputStream* inputStream)
1471 {
1472 m_inputStream = inputStream;
1473 }
1474
~wxPdfTokenizer()1475 wxPdfTokenizer::~wxPdfTokenizer()
1476 {
1477 }
1478
1479 off_t
Seek(off_t pos)1480 wxPdfTokenizer::Seek(off_t pos)
1481 {
1482 return m_inputStream->SeekI(pos);
1483 }
1484
1485 off_t
Tell()1486 wxPdfTokenizer::Tell()
1487 {
1488 return m_inputStream->TellI();
1489 }
1490
1491 void
BackOnePosition(int ch)1492 wxPdfTokenizer::BackOnePosition(int ch)
1493 {
1494 if (ch != -1)
1495 {
1496 off_t pos = Tell();
1497 if (pos > 0) pos--;
1498 Seek(pos);
1499 }
1500 }
1501
1502 off_t
GetLength()1503 wxPdfTokenizer::GetLength()
1504 {
1505 return m_inputStream->GetLength();
1506 }
1507
1508 int
ReadChar()1509 wxPdfTokenizer::ReadChar()
1510 {
1511 int readChar;
1512 char ch = m_inputStream->GetC();
1513 readChar = (m_inputStream->LastRead() > 0) ? (unsigned char) ch : -1;
1514 return readChar;
1515 }
1516
1517 wxMemoryOutputStream*
ReadBuffer(size_t size)1518 wxPdfTokenizer::ReadBuffer(size_t size)
1519 {
1520 wxMemoryOutputStream* memoryBuffer = new wxMemoryOutputStream();
1521 if (size > 0)
1522 {
1523 char* buffer = new char[size];
1524 m_inputStream->Read(buffer, size);
1525 if (m_inputStream->LastRead() == size)
1526 {
1527 memoryBuffer->Write(buffer, size);
1528 }
1529 delete [] buffer;
1530 }
1531 memoryBuffer->Close();
1532 return memoryBuffer;
1533 }
1534
1535 off_t
GetStartXRef()1536 wxPdfTokenizer::GetStartXRef()
1537 {
1538 char buffer[1024];
1539 int idx, found;
1540 off_t size = GetLength();
1541 if (size > 1024) size = 1024;
1542 off_t pos = GetLength() - size;
1543 do
1544 {
1545 m_inputStream->SeekI(pos);
1546 m_inputStream->Read(buffer, size);
1547 idx = size - 9;
1548 do
1549 {
1550 found = memcmp(buffer + idx, "startxref", 9);
1551 --idx;
1552 }
1553 while (found != 0 && idx >= 0);
1554 if (found == 0) break;
1555 pos = (pos > 1) ? (pos > (size - 9)) ? pos - size + 9 : 1 : 0;
1556 }
1557 while (pos > 0);
1558 if (found == 0)
1559 {
1560 pos = pos + idx + 1;
1561 }
1562 else
1563 {
1564 wxLogError(wxString(wxS("wxPdfTokenizer::GetStartXRef: ")) +
1565 wxString(_("PDF startxref not found.")));
1566 }
1567 return pos;
1568 }
1569
1570 wxString
CheckPdfHeader()1571 wxPdfTokenizer::CheckPdfHeader()
1572 {
1573 wxString version = wxEmptyString;
1574 m_inputStream->SeekI(0);
1575 wxString str = ReadString(1024);
1576 int idx = str.Find(wxS("%PDF-1."));
1577 if (idx >= 0)
1578 {
1579 m_inputStream->SeekI(idx);
1580 version = str.Mid(idx + 5, 3);
1581 }
1582 else
1583 {
1584 m_inputStream->SeekI(0);
1585 wxLogError(wxString(wxS("wxPdfTokenizer::GetStartXref: ")) +
1586 wxString(_("PDF header signature not found.")));
1587 }
1588 return version;
1589 }
1590
1591 wxString
ReadString(int size)1592 wxPdfTokenizer::ReadString(int size)
1593 {
1594 wxString buf;
1595 int ch;
1596 while (size > 0)
1597 {
1598 size--;
1599 ch = ReadChar();
1600 if (ch == -1)
1601 break;
1602 buf += ch;
1603 }
1604 return buf;
1605 }
1606
1607 bool
NextToken()1608 wxPdfTokenizer::NextToken()
1609 {
1610 wxString buffer = wxEmptyString;
1611 m_stringValue = wxEmptyString;
1612 int ch = 0;
1613 do
1614 {
1615 ch = ReadChar();
1616 }
1617 while (ch != -1 && IsWhitespace(ch));
1618
1619 if (ch == -1)
1620 return false;
1621
1622 switch (ch)
1623 {
1624 case '[':
1625 m_type = TOKEN_START_ARRAY;
1626 break;
1627 case ']':
1628 m_type = TOKEN_END_ARRAY;
1629 break;
1630 case '/':
1631 {
1632 m_type = TOKEN_NAME;
1633 // The slash is not part of the name
1634 // buffer += ch;
1635 while (true)
1636 {
1637 ch = ReadChar();
1638 if (IsDelimiterOrWhitespace(ch))
1639 break;
1640 buffer += ch;
1641 }
1642 BackOnePosition(ch);
1643 break;
1644 }
1645 case '>':
1646 ch = ReadChar();
1647 if (ch != '>')
1648 {
1649 wxLogError(wxString(wxS("wxPdfTokenizer::NextToken: ")) +
1650 wxString(_("'>' not expected.")));
1651 return false;
1652 }
1653 m_type = TOKEN_END_DICTIONARY;
1654 break;
1655 case '<':
1656 {
1657 int v1 = ReadChar();
1658 if (v1 == '<')
1659 {
1660 m_type = TOKEN_START_DICTIONARY;
1661 break;
1662 }
1663 m_type = TOKEN_STRING;
1664 m_hexString = true;
1665 int v2 = 0;
1666 while (true)
1667 {
1668 while (IsWhitespace(v1))
1669 {
1670 v1 = ReadChar();
1671 }
1672 if (v1 == '>')
1673 break;
1674 v1 = GetHex(v1);
1675 if (v1 < 0)
1676 break;
1677 v2 = ReadChar();
1678 while (IsWhitespace(v2))
1679 {
1680 v2 = ReadChar();
1681 }
1682 if (v2 == '>')
1683 {
1684 ch = v1 << 4;
1685 buffer += ch;
1686 break;
1687 }
1688 v2 = GetHex(v2);
1689 if (v2 < 0)
1690 break;
1691 ch = (v1 << 4) + v2;
1692 buffer += ch;
1693 v1 = ReadChar();
1694 }
1695 if (v1 < 0 || v2 < 0)
1696 {
1697 wxLogError(wxString(wxS("wxPdfTokenizer::NextToken: ")) +
1698 wxString(_("Error reading string.")));
1699 return false;
1700 }
1701 break;
1702 }
1703 case '%':
1704 m_type = TOKEN_COMMENT;
1705 do
1706 {
1707 ch = ReadChar();
1708 }
1709 while (ch != -1 && ch != '\r' && ch != '\n');
1710 break;
1711 case '(':
1712 {
1713 m_type = TOKEN_STRING;
1714 m_hexString = false;
1715 int nesting = 0;
1716 while (true)
1717 {
1718 ch = ReadChar();
1719 if (ch == -1)
1720 break;
1721 if (ch == '(')
1722 {
1723 ++nesting;
1724 }
1725 else if (ch == ')')
1726 {
1727 --nesting;
1728 }
1729 else if (ch == '\\')
1730 {
1731 bool lineBreak = false;
1732 ch = ReadChar();
1733 switch (ch)
1734 {
1735 case 'n':
1736 ch = '\n';
1737 break;
1738 case 'r':
1739 ch = '\r';
1740 break;
1741 case 't':
1742 ch = '\t';
1743 break;
1744 case 'b':
1745 ch = '\b';
1746 break;
1747 case 'f':
1748 ch = '\f';
1749 break;
1750 case '(':
1751 case ')':
1752 case '\\':
1753 break;
1754 case '\r':
1755 lineBreak = true;
1756 ch = ReadChar();
1757 if (ch != '\n')
1758 BackOnePosition(ch);
1759 break;
1760 case '\n':
1761 lineBreak = true;
1762 break;
1763 default:
1764 {
1765 if (ch < '0' || ch > '7')
1766 {
1767 break;
1768 }
1769 int octal = ch - '0';
1770 ch = ReadChar();
1771 if (ch < '0' || ch > '7')
1772 {
1773 BackOnePosition(ch);
1774 ch = octal;
1775 break;
1776 }
1777 octal = (octal << 3) + ch - '0';
1778 ch = ReadChar();
1779 if (ch < '0' || ch > '7')
1780 {
1781 BackOnePosition(ch);
1782 ch = octal;
1783 break;
1784 }
1785 octal = (octal << 3) + ch - '0';
1786 ch = octal & 0xff;
1787 break;
1788 }
1789 }
1790 if (lineBreak)
1791 continue;
1792 if (ch < 0)
1793 break;
1794 }
1795 else if (ch == '\r')
1796 {
1797 ch = ReadChar();
1798 if (ch < 0)
1799 break;
1800 if (ch != '\n')
1801 {
1802 BackOnePosition(ch);
1803 ch = '\n';
1804 }
1805 }
1806 if (nesting == -1)
1807 break;
1808 buffer += ch;
1809 }
1810 if (ch == -1)
1811 {
1812 wxLogError(wxString(wxS("wxPdfTokenizer::NextToken: ")) +
1813 wxString(_("Error reading string.")));
1814 return false;
1815 }
1816 break;
1817 }
1818 default:
1819 {
1820 if (ch == '-' || ch == '+' || ch == '.' || (ch >= '0' && ch <= '9'))
1821 {
1822 m_type = TOKEN_NUMBER;
1823 do
1824 {
1825 buffer += ch;
1826 ch = ReadChar();
1827 }
1828 while (ch != -1 && ((ch >= '0' && ch <= '9') || ch == '.'));
1829 }
1830 else
1831 {
1832 m_type = TOKEN_OTHER;
1833 do
1834 {
1835 buffer += ch;
1836 ch = ReadChar();
1837 }
1838 while (!IsDelimiterOrWhitespace(ch));
1839 }
1840 BackOnePosition(ch);
1841 break;
1842 }
1843 }
1844 if (buffer != wxEmptyString)
1845 {
1846 m_stringValue.Append(buffer);
1847 if (m_type == TOKEN_OTHER && (m_stringValue == wxS("true") || m_stringValue == wxS("false")))
1848 {
1849 m_type = TOKEN_BOOLEAN;
1850 }
1851 }
1852 return true;
1853 }
1854
1855 void
NextValidToken()1856 wxPdfTokenizer::NextValidToken()
1857 {
1858 int level = 0;
1859 wxString n1 = wxEmptyString;
1860 wxString n2 = wxEmptyString;
1861 int ptr = 0;
1862 while (NextToken())
1863 {
1864 if (m_type == TOKEN_COMMENT)
1865 continue;
1866 switch (level)
1867 {
1868 case 0:
1869 {
1870 if (m_type != TOKEN_NUMBER)
1871 return;
1872 ptr = Tell();
1873 n1 = m_stringValue;
1874 ++level;
1875 break;
1876 }
1877 case 1:
1878 {
1879 if (m_type != TOKEN_NUMBER) {
1880 Seek(ptr);
1881 m_type = TOKEN_NUMBER;
1882 m_stringValue = n1;
1883 return;
1884 }
1885 n2 = m_stringValue;
1886 ++level;
1887 break;
1888 }
1889 default:
1890 {
1891 if (m_type != TOKEN_OTHER || m_stringValue != wxS("R"))
1892 {
1893 Seek(ptr);
1894 m_type = TOKEN_NUMBER;
1895 m_stringValue = n1;
1896 return;
1897 }
1898 m_type = TOKEN_REFERENCE;
1899 long value;
1900 n1.ToLong(&value);
1901 m_reference = value;
1902 n2.ToLong(&value);
1903 m_generation = value;
1904 return;
1905 }
1906 }
1907 }
1908 wxLogError(wxString(wxS("wxPdfTokenizer::NextValidToken: ")) +
1909 wxString(_("Unexpected end of file.")));
1910 }
1911
1912 int
GetTokenType()1913 wxPdfTokenizer::GetTokenType()
1914 {
1915 return m_type;
1916 }
1917
1918 wxString
GetStringValue()1919 wxPdfTokenizer::GetStringValue()
1920 {
1921 return m_stringValue;
1922 }
1923
1924 int
GetIntValue()1925 wxPdfTokenizer::GetIntValue()
1926 {
1927 long value;
1928 m_stringValue.ToLong(&value);
1929 return value;
1930 }
1931
1932 int
GetReference()1933 wxPdfTokenizer::GetReference()
1934 {
1935 return m_reference;
1936 }
1937
1938 int
GetGeneration()1939 wxPdfTokenizer::GetGeneration()
1940 {
1941 return m_generation;
1942 }
1943
1944 bool
IsWhitespace(int ch)1945 wxPdfTokenizer::IsWhitespace(int ch)
1946 {
1947 return (ch == 0 || ch == 9 || ch == 10 || ch == 12 || ch == 13 || ch == 32);
1948 }
1949
1950 bool
IsDelimiter(int ch)1951 wxPdfTokenizer::IsDelimiter(int ch)
1952 {
1953 return (ch == '(' || ch == ')' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '/' || ch == '%');
1954 }
1955
1956 bool
IsDelimiterOrWhitespace(int ch)1957 wxPdfTokenizer::IsDelimiterOrWhitespace(int ch)
1958 {
1959 return IsWhitespace(ch) || IsDelimiter(ch) || (ch == -1);
1960 }
1961
1962 int
GetHex(int v)1963 wxPdfTokenizer::GetHex(int v)
1964 {
1965 if (v >= '0' && v <= '9')
1966 return v - '0';
1967 if (v >= 'A' && v <= 'F')
1968 return v - 'A' + 10;
1969 if (v >= 'a' && v <= 'f')
1970 return v - 'a' + 10;
1971 return -1;
1972 }
1973