1 /***************************************************************************
2  *   Copyright (C) 2007 by Pierre Marchand   *
3  *   pierre@moulindetouvois.com   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #include "pdftranslator.h"
21 // #include "charpainter.h"
22 #include "planreader_legacy.h"
23 
24 #ifdef PODOFO_HAVE_LUA
25 #include "planreader_lua.h"
26 #endif
27 
28 #include <fstream>
29 #include <stdexcept>
30 #include <algorithm>
31 #include <cmath>
32 #include <istream>
33 #include <ostream>
34 #include <cstdlib>
35 using std::ostringstream;
36 using std::map;
37 using std::vector;
38 using std::string;
39 using std::ifstream;
40 using std::istream;
41 using std::ostream;
42 using std::endl;
43 using std::runtime_error;
44 
45 #include <iostream> //XXX
46 namespace PoDoFo
47 {
48 	namespace Impose
49 	{
50 
51 #define MAX_SOURCE_PAGES 5000
52 #define MAX_RECORD_SIZE 2048
53 
54 
55 
checkIsPDF(std::string path)56 		bool PdfTranslator::checkIsPDF ( std::string path )
57 		{
58 			ifstream in ( path.c_str(), ifstream::in );
59 			if ( !in.good() )
60 				throw runtime_error ( "setSource() failed to open input file" );
61 
62 			const int magicBufferLen = 5;
63 			char magicBuffer[magicBufferLen ];
64 			in.read ( magicBuffer, magicBufferLen );
65 			std::string magic ( magicBuffer , magicBufferLen );
66 
67 			in.close();
68 			if ( magic.find ( "%PDF" ) < 5 )
69 				return true;
70 // 			throw runtime_error("First bytes of the file tend to indicate it is not a PDF file");
71 			return false;
72 		}
73 
PdfTranslator()74 		PdfTranslator::PdfTranslator ( )
75 		{
76 			std::cerr<<"PdfTranslator::PdfTranslator"<<std::endl;
77 			sourceDoc = NULL;
78 			targetDoc = NULL;
79 			planImposition = NULL;
80 			duplicate = 0;
81 			extraSpace = 0;
82 			scaleFactor = 1.0;
83 			pcount = 0;
84 			sourceWidth = 0.0;
85 			sourceHeight = 0.0;
86 			destWidth = 0.0;
87 			destHeight = 0.0;
88 		}
89 
setSource(const std::string & source)90 		void PdfTranslator::setSource ( const std::string & source )
91 		{
92 			int dbg(0);
93 // 			std::cerr<<"PdfTranslator::setSource "<<source<<std::endl;
94 			std::cerr<< ++dbg <<std::endl;
95 			if ( checkIsPDF ( source ) )
96 			{
97 // 		std::cerr << "Appending "<<source<<" to source" << endl;
98 				multiSource.push_back ( source );
99 			}
100 			else
101 			{
102 
103 				ifstream in ( source.c_str(), ifstream::in );
104 				if ( !in.good() )
105 					throw runtime_error ( "setSource() failed to open input file" );
106 
107 
108 				char *filenameBuffer = new char[1000];
109 				do
110 				{
111 					if ( !in.getline ( filenameBuffer, 1000 ) )
112 						throw runtime_error ( "failed reading line from input file" );
113 
114 					std::string ts ( filenameBuffer, in.gcount() );
115 					if ( ts.size() > 4 ) // at least ".pdf" because just test if ts is empty doesn't work.
116 					{
117 						multiSource.push_back ( ts );
118 						std::cerr << "Appending "<< ts <<" to source" << endl;
119 					}
120 				}
121 				while ( !in.eof() );
122 				in.close();
123 				delete [] filenameBuffer;
124 			}
125 			std::cerr<< ++dbg <<std::endl;
126 
127 			if (multiSource.empty())
128 				throw runtime_error( "No recognized source given" );
129 
130 			for ( std::vector<std::string>::const_iterator ms = multiSource.begin(); ms != multiSource.end(); ++ms )
131 			{
132 				if ( ms == multiSource.begin() )
133 				{
134 // 					std::cerr << "First doc is "<< (*ms).c_str()   << endl;
135 					try{
136 						sourceDoc = new PdfMemDocument ( ( *ms ).c_str() );
137 					}
138 					catch(PdfError& e)
139 					{
140                         std::cerr << "Unable to create Document: " << PdfError::ErrorMessage( e.GetError() ) << std::endl;
141 						return;
142 					}
143 				}
144 				else
145 				{
146 					PdfMemDocument mdoc ( ( *ms ).c_str() );
147 // 			std::cerr << "Appending "<< mdoc.GetPageCount() << " page(s) of " << *ms  << endl;
148 					sourceDoc->InsertPages ( mdoc, 0, mdoc.GetPageCount() );
149 				}
150 			}
151 
152 			pcount = sourceDoc->GetPageCount();
153 // 	std::cerr << "Document has "<< pcount << " page(s) " << endl;
154 			if ( pcount > 0 ) // only here to avoid possible segfault, but PDF without page is not conform IIRC
155 			{
156                 PoDoFo::PdfPage* pFirstPage = sourceDoc->GetPage ( 0 );
157                 if ( NULL == pFirstPage ) // Fixes CVE-2019-9199 (issue #40)
158                 {
159                     PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, "First page (0) of source document not found" );
160                 }
161                 PoDoFo::PdfRect rect ( pFirstPage->GetMediaBox() );
162 				// keep in mind it’s just a hint since PDF can have different page sizes in a same doc
163 				sourceWidth =  rect.GetWidth() - rect.GetLeft();
164 				sourceHeight =  rect.GetHeight() - rect.GetBottom() ;
165 			}
166 		}
167 
addToSource(const std::string & source)168 		void PdfTranslator::addToSource ( const std::string & source )
169 		{
170 // 			std::cerr<<"PdfTranslator::addToSource "<< source<<std::endl;
171 			if ( !sourceDoc )
172 				return;
173 
174 			PdfMemDocument extraDoc ( source.c_str() );
175 			sourceDoc->InsertPages ( extraDoc, 0,  extraDoc.GetPageCount() );
176 			multiSource.push_back ( source );
177 
178 		}
179 
migrateResource(const PdfObject * obj)180 		PdfObject* PdfTranslator::migrateResource ( const PdfObject * obj )
181 		{
182 // 			std::cerr<<"PdfTranslator::migrateResource"<<std::endl;
183 			PdfObject *ret ( 0 );
184 
185 			if ( !obj )
186 				PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidHandle, "migrateResource called"
187                                          " with NULL object" );
188 
189 			if ( obj->IsDictionary() )
190 			{
191 				if ( obj->Reference().IsIndirect() )
192 				{
193 					ret = targetDoc->GetObjects().CreateObject ( *obj );
194 				}
195 				else
196 				{
197 					ret = new PdfObject( *obj );
198 				}
199 				TKeyMap resmap = obj->GetDictionary().GetKeys();
200 				for ( TCIKeyMap itres = resmap.begin(); itres != resmap.end(); ++itres )
201 				{
202 					PdfObject *o = itres->second;
203 					std::pair<std::set<PdfObject*>::iterator,bool> res = setMigrationPending.insert( o );
204 					if (!res.second)
205 					{
206 						std::ostringstream oss;
207 						oss << "Cycle detected: Object with ref " << o->Reference().ToString()
208 							<< " is already pending migration to the target.\n";
209 						PdfError::LogMessage( eLogSeverity_Warning, oss.str().c_str() );
210 						continue;
211 					}
212 					PdfObject *migrated = migrateResource ( o );
213 					if (NULL != migrated)
214 					{
215 						ret->GetDictionary().AddKey ( itres->first, migrated );
216 						if ( !(migrated->Reference().IsIndirect()) )
217 						{
218 							delete migrated;
219 						}
220 					}
221 				}
222 
223 				if ( obj->HasStream() )
224 				{
225 					* ( ret->GetStream() ) = * ( obj->GetStream() );
226 				}
227 			}
228 			else if ( obj->IsArray() )
229 			{
230 				PdfArray carray ( obj->GetArray() );
231 				PdfArray narray;
232 				for ( unsigned int ci = 0; ci < carray.GetSize(); ++ci )
233 				{
234 					PdfObject *co ( migrateResource ( &carray[ci] ) );
235 					if ( NULL == co )
236 						continue;
237 					narray.push_back ( *co );
238 
239 					if ( !(co->Reference().IsIndirect()) )
240 					{
241 						delete co;
242 					}
243 				}
244 				if ( obj->Reference().IsIndirect() )
245 				{
246 					ret = targetDoc->GetObjects().CreateObject ( narray );
247 				}
248 				else
249 				{
250 					ret = new PdfObject( narray );
251 				}
252 			}
253 			else if ( obj->IsReference() )
254 			{
255 				if ( migrateMap.find ( obj->GetReference().ToString() ) != migrateMap.end() )
256 				{
257 					std::ostringstream oss;
258 					oss << "Referenced object " << obj->GetReference().ToString()
259 					    << " already migrated." << std::endl;
260 					PdfError::DebugMessage( oss.str().c_str() );
261 
262 					const PdfObject* const found = migrateMap[ obj->GetReference().ToString() ];
263 					return new PdfObject( found->Reference() );
264 				}
265 
266 				PdfObject *to_migrate = sourceDoc->GetObjects().GetObject ( obj->GetReference() );
267 
268 				std::pair<std::set<PdfObject*>::iterator, bool> res
269 						= setMigrationPending.insert( to_migrate );
270 				if (!res.second)
271 				{
272 					std::ostringstream oss;
273 					oss << "Cycle detected: Object with ref " << obj->GetReference().ToString()
274 						<< " is already pending migration to the target.\n";
275 					PdfError::LogMessage( eLogSeverity_Warning, oss.str().c_str() );
276 					return NULL; // skip this migration
277 				}
278 				PdfObject * o ( migrateResource ( to_migrate ) );
279 				if ( NULL != o )
280 					ret  = new PdfObject ( o->Reference() );
281 				else
282 					return NULL; // avoid going through rest of method
283 			}
284 			else
285 			{
286 				ret = new PdfObject ( *obj );//targetDoc->GetObjects().CreateObject(*obj);
287 			}
288 
289 			if ( obj->Reference().IsIndirect() )
290 			{
291 				migrateMap.insert ( std::pair<std::string, PdfObject*> ( obj->Reference().ToString(), ret ) );
292 			}
293 
294 			return ret;
295 		}
296 
getInheritedResources(PdfPage * page)297 		PdfObject* PdfTranslator::getInheritedResources ( PdfPage* page )
298 		{
299 // 			std::cerr<<"PdfTranslator::getInheritedResources"<<std::endl;
300 			PdfObject *res ( 0 );
301 			// mabri: resources are inherited as whole dict, not at all if the page has the dict
302 			// mabri: specified in PDF32000_2008.pdf section 7.7.3.4 Inheritance of Page Attributes
303 			// mabri: and in section 7.8.3 Resource Dictionaries
304 			const PdfObject *sourceRes = page->GetInheritedKey( PdfName ( "Resources" ) );
305 			if ( sourceRes )
306 			{
307 			    res = migrateResource( sourceRes );
308 			}
309 			return res;
310 		}
311 
setTarget(const std::string & target)312 		void PdfTranslator::setTarget ( const std::string & target )
313 		{
314 // 			std::cerr<<"PdfTranslator::setTarget "<<target<<std::endl;
315 			if ( !sourceDoc )
316 				throw std::logic_error ( "setTarget() called before setSource()" );
317 
318 			targetDoc = new PdfMemDocument;
319 			outFilePath  = target;
320 
321 			for ( int i = 0; i < pcount ; ++i )
322 			{
323 				PdfPage * page = sourceDoc->GetPage ( i );
324 				PdfMemoryOutputStream outMemStream ( 1 );
325 
326 				if (!page) // Fix issue #32
327                 {
328                     std::ostringstream oss;
329                     oss << "Page " << i << " (0-based) of " << pcount << " in source doc not found!";
330                     PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, oss.str() );
331                 }
332                 PdfXObject *xobj = new PdfXObject ( page->GetMediaBox(), targetDoc );
333 				if ( page->GetContents()->HasStream() )
334 				{
335 					page->GetContents()->GetStream()->GetFilteredCopy ( &outMemStream );
336 				}
337 				else if ( page->GetContents()->IsArray() )
338 				{
339 					PdfArray carray ( page->GetContents()->GetArray() );
340 					for ( unsigned int ci = 0; ci < carray.GetSize(); ++ci )
341 					{
342 						if ( carray[ci].HasStream() )
343 						{
344 							carray[ci].GetStream()->GetFilteredCopy ( &outMemStream );
345 						}
346 						else if ( carray[ci].IsReference() )
347 						{
348 							PdfObject *co = sourceDoc->GetObjects().GetObject ( carray[ci].GetReference() );
349 
350 							while ( co != NULL )
351 							{
352 								if ( co->IsReference() )
353 								{
354 									co = sourceDoc->GetObjects().GetObject ( co->GetReference() );
355 								}
356 								else if ( co->HasStream() )
357 								{
358 									co->GetStream()->GetFilteredCopy ( &outMemStream );
359 									break;
360 								}
361 							}
362 
363 						}
364 
365 					}
366 				}
367 
368 				/// Its time to manage other keys of the page dictionary.
369 				std::vector<std::string> pageKeys;
370 				std::vector<std::string>::const_iterator itKey;
371 				pageKeys.push_back ( "Group" );
372 				for ( itKey = pageKeys.begin(); itKey != pageKeys.end(); ++itKey )
373 				{
374 					PoDoFo::PdfName keyname ( *itKey );
375 					if ( page->GetObject()->GetDictionary().HasKey ( keyname ) )
376 					{
377 						PdfObject* migObj = migrateResource ( page->GetObject()->GetDictionary().GetKey ( keyname ) );
378 						if ( NULL == migObj )
379 							continue;
380 						xobj->GetObject()->GetDictionary().AddKey ( keyname, migObj );
381 					}
382 				}
383 
384 				outMemStream.Close();
385 
386 				PdfMemoryInputStream inStream ( outMemStream.TakeBuffer(),outMemStream.GetLength() );
387 				xobj->GetContents()->GetStream()->Set ( &inStream );
388 
389 				resources[i+1] = getInheritedResources ( page );
390 				xobjects[i+1] = xobj;
391 				cropRect[i+1] = page->GetCropBox();
392 				bleedRect[i+1] = page->GetBleedBox();
393 				trimRect[i+1] = page->GetTrimBox();
394 				artRect[i+1] = page->GetArtBox();
395 
396 			}
397 
398 
399 			targetDoc->SetPdfVersion ( sourceDoc->GetPdfVersion() );
400 
401 			PdfInfo *sInfo ( sourceDoc->GetInfo() );
402 			PdfInfo *tInfo ( targetDoc->GetInfo() );
403 
404 			if ( sInfo->GetAuthor() != PdfString::StringNull )
405 				tInfo->SetAuthor ( sInfo->GetAuthor() );
406 			if ( sInfo->GetCreator() != PdfString::StringNull )
407 				tInfo->SetCreator ( sInfo->GetCreator() );
408 			if ( sInfo->GetSubject() != PdfString::StringNull )
409 				tInfo->SetSubject ( sInfo->GetSubject() );
410 			if ( sInfo->GetTitle() != PdfString::StringNull )
411 				tInfo->SetTitle ( sInfo->GetTitle() );
412 			if ( sInfo->GetKeywords() != PdfString::StringNull )
413 				tInfo->SetKeywords ( sInfo->GetKeywords() );
414 
415 			if ( sInfo->GetTrapped() != PdfName::KeyNull )
416 				tInfo->SetTrapped ( sInfo->GetTrapped() );
417 
418 
419 // 	PdfObject *scat( sourceDoc->GetCatalog() );
420 // 	PdfObject *tcat( targetDoc->GetCatalog() );
421 // 	TKeyMap catmap = scat->GetDictionary().GetKeys();
422 // 	for ( TCIKeyMap itc = catmap.begin(); itc != catmap.end(); ++itc )
423 // 	{
424 // 		if(tcat->GetDictionary().GetKey(itc->first) == 0)
425 // 		{
426 // 			PdfObject *o = itc->second;
427 // 			tcat->GetDictionary().AddKey (itc->first , migrateResource( o ) );
428 // 		}
429 // 	}
430 
431 // 	delete sourceDoc;
432 		}
433 
loadPlan(const std::string & planFile,PoDoFo::Impose::PlanReader loader)434 		void PdfTranslator::loadPlan ( const std::string & planFile , PoDoFo::Impose::PlanReader loader )
435 		{
436 // 			std::cerr<< "loadPlan" << planFile<<std::endl;
437 			SourceVars sv;
438 			sv.PageCount = pcount;
439 			sv.PageHeight = sourceHeight;
440 			sv.PageWidth = sourceWidth;
441 			planImposition = new ImpositionPlan ( sv );
442 			if ( loader == PoDoFo::Impose::Legacy )
443 			{
444 				PlanReader_Legacy ( planFile, planImposition );
445 			}
446 #if defined(PODOFO_HAVE_LUA)
447 			else if ( loader == PoDoFo::Impose::Lua )
448 			{
449 				PlanReader_Lua ( planFile, planImposition );
450 			}
451 #endif
452 
453 			if ( !planImposition->valid() )
454 				throw std::runtime_error ( "Unable to build a valid imposition plan" );
455 
456 			destWidth = planImposition->destWidth();
457 			destHeight = planImposition->destHeight();
458 			scaleFactor = planImposition->scale();
459 			boundingBox = planImposition->boundingBox();
460 // 	std::cerr <<"Plan completed "<< planImposition.size() <<endl;
461 
462 		}
463 
impose()464 		void PdfTranslator::impose()
465 		{
466 // 			std::cerr<<"PdfTranslator::impose"<<std::endl;
467 			if ( !targetDoc )
468 				throw std::invalid_argument ( "impose() called with empty target" );
469 
470 //			PdfObject trimbox;
471 //			PdfRect trim ( 0, 0, destWidth, destHeight );
472 //			trim.ToVariant ( trimbox );
473 			std::map<int, PdfRect>* bbIndex = NULL;
474 			if(boundingBox.size() > 0)
475 			{
476 				if(boundingBox.find("crop") != std::string::npos)
477 				{
478 					bbIndex = &cropRect;
479 				}
480 				else if(boundingBox.find("bleed") != std::string::npos)
481 				{
482 					bbIndex = &bleedRect;
483 				}
484 				else if(boundingBox.find("trim") != std::string::npos)
485 				{
486 					bbIndex = &trimRect;
487 				}
488 				else if(boundingBox.find("art") != std::string::npos)
489 				{
490 					bbIndex = &artRect;
491 				}
492 			}
493 
494 			typedef map<int, vector<PageRecord> > groups_t;
495 			groups_t groups;
496 			for ( unsigned int i = 0; i < planImposition->size(); ++i )
497 			{
498 				groups[ ( *planImposition ) [i].destPage].push_back ( ( *planImposition ) [i] );
499 			}
500 
501 			unsigned int lastPlate(0);
502 			groups_t::const_iterator  git = groups.begin();
503 			const groups_t::const_iterator gitEnd = groups.end();
504 			while ( git != gitEnd )
505 			{
506 				PdfPage * newpage = NULL;
507 				// Allow "holes" in dest. pages sequence.
508 				unsigned int curPlate(git->first);
509 				while(lastPlate != curPlate)
510 				{
511 					newpage = targetDoc->CreatePage ( PdfRect ( 0.0, 0.0, destWidth, destHeight ) );
512 					++lastPlate;
513 				}
514 // 		newpage->GetObject()->GetDictionary().AddKey ( PdfName ( "TrimBox" ), trimbox );
515 				PdfDictionary xdict;
516 
517 				ostringstream buffer;
518 				// Scale
519 				buffer << std::fixed << scaleFactor <<" 0 0 "<< scaleFactor <<" 0 0 cm\n";
520 
521 				for ( unsigned int i = 0; i < git->second.size(); ++i )
522 				{
523 					PageRecord curRecord ( git->second[i] );
524 // 					std::cerr<<curRecord.sourcePage<< " " << curRecord.destPage<<std::endl;
525 					if(curRecord.sourcePage <= pcount)
526 					{
527 						double cosR = cos ( curRecord.rotate  *  3.14159 / 180.0 );
528 						double sinR = sin ( curRecord.rotate  *  3.14159 / 180.0 );
529 						double tx = curRecord.transX ;
530 						double ty = curRecord.transY ;
531 
532 						int resourceIndex ( /*(curRecord.duplicateOf > 0) ? curRecord.duplicateOf : */curRecord.sourcePage );
533 						PdfXObject *xo = xobjects[resourceIndex];
534 						if(NULL != bbIndex)
535 						{
536 							PdfObject bb;
537 							// DominikS: Fix compilation using Visual Studio on Windows
538 							// mabri: ML post archive URL is https://sourceforge.net/p/podofo/mailman/message/24609746/
539 							// bbIndex->at(resourceIndex).ToVariant( bb );
540 							((*bbIndex)[resourceIndex]).ToVariant( bb );
541 							xo->GetObject()->GetDictionary().AddKey ( PdfName ( "BBox" ), bb );
542 						}
543 						ostringstream op;
544 						op << "OriginalPage" << resourceIndex;
545 						xdict.AddKey ( PdfName ( op.str() ) , xo->GetObjectReference() );
546 
547 						if ( resources[resourceIndex] )
548 						{
549 							if ( resources[resourceIndex]->IsDictionary() )
550 							{
551 								TKeyMap resmap = resources[resourceIndex]->GetDictionary().GetKeys();
552 								TCIKeyMap itres;
553 								for ( itres = resmap.begin(); itres != resmap.end(); ++itres )
554 								{
555 									xo->GetResources()->GetDictionary().AddKey ( itres->first, itres->second );
556 								}
557 							}
558 							else if ( resources[resourceIndex]->IsReference() )
559 							{
560 								xo->GetObject()->GetDictionary().AddKey ( PdfName ( "Resources" ), resources[resourceIndex] );
561 							}
562 							else
563 								std::cerr<<"ERROR Unknown type resource "<<resources[resourceIndex]->GetDataTypeString()  <<  std::endl;
564 
565 						}
566 						// Very primitive but it makes it easy to track down imposition plan into content stream.
567 						buffer << "q\n";
568 						buffer << std::fixed << cosR <<" "<< sinR<<" "<<-sinR<<" "<< cosR<<" "<< tx <<" "<<  ty << " cm\n";
569 						buffer << "/OriginalPage" << resourceIndex << " Do\n";
570 						buffer << "Q\n";
571 					}
572 				}
573 				if (!newpage)
574 					PODOFO_RAISE_ERROR (ePdfError_ValueOutOfRange);
575 				string bufStr = buffer.str();
576 				newpage->GetContentsForAppending()->GetStream()->Set ( bufStr.data(), bufStr.size() );
577 				newpage->GetResources()->GetDictionary().AddKey ( PdfName ( "XObject" ), xdict );
578 				++git;
579 			}
580 
581 			targetDoc->Write ( outFilePath.c_str() );
582 
583 			// The following is necessary to avoid line 195 being detected as allocation having a memory leak
584 			// without changing other files than this one (thorough leak prevention shall be applied later).
585 			for (std::map<int, PdfObject*>::iterator it = resources.begin(); it != resources.end(); it++)
586 			{
587 				delete (*it).second;
588 			}
589 			resources.clear();
590 		}
591 
592 
593 	};
594 }; // end of namespace
595