1 /*************************************************************************** 2 * Copyright (C) 2007 by Pierre Marchand * 3 * pierre@moulindetouvois.com * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 2 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, write to the * 17 * Free Software Foundation, Inc., * 18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 19 ***************************************************************************/ 20 #include "pdftranslator.h" 21 // #include "charpainter.h" 22 #include "planreader_legacy.h" 23 24 #ifdef PODOFO_HAVE_LUA 25 #include "planreader_lua.h" 26 #endif 27 28 #include <fstream> 29 #include <stdexcept> 30 #include <algorithm> 31 #include <cmath> 32 #include <istream> 33 #include <ostream> 34 #include <cstdlib> 35 using std::ostringstream; 36 using std::map; 37 using std::vector; 38 using std::string; 39 using std::ifstream; 40 using std::istream; 41 using std::ostream; 42 using std::endl; 43 using std::runtime_error; 44 45 #include <iostream> //XXX 46 namespace PoDoFo 47 { 48 namespace Impose 49 { 50 51 #define MAX_SOURCE_PAGES 5000 52 #define MAX_RECORD_SIZE 2048 53 54 55 checkIsPDF(std::string path)56 bool PdfTranslator::checkIsPDF ( std::string path ) 57 { 58 ifstream in ( path.c_str(), ifstream::in ); 59 if ( !in.good() ) 60 throw runtime_error ( "setSource() failed to open input file" ); 61 62 const int magicBufferLen = 5; 63 char magicBuffer[magicBufferLen ]; 64 in.read ( magicBuffer, magicBufferLen ); 65 std::string magic ( magicBuffer , magicBufferLen ); 66 67 in.close(); 68 if ( magic.find ( "%PDF" ) < 5 ) 69 return true; 70 // throw runtime_error("First bytes of the file tend to indicate it is not a PDF file"); 71 return false; 72 } 73 PdfTranslator()74 PdfTranslator::PdfTranslator ( ) 75 { 76 std::cerr<<"PdfTranslator::PdfTranslator"<<std::endl; 77 sourceDoc = NULL; 78 targetDoc = NULL; 79 planImposition = NULL; 80 duplicate = 0; 81 extraSpace = 0; 82 scaleFactor = 1.0; 83 pcount = 0; 84 sourceWidth = 0.0; 85 sourceHeight = 0.0; 86 destWidth = 0.0; 87 destHeight = 0.0; 88 } 89 setSource(const std::string & source)90 void PdfTranslator::setSource ( const std::string & source ) 91 { 92 int dbg(0); 93 // std::cerr<<"PdfTranslator::setSource "<<source<<std::endl; 94 std::cerr<< ++dbg <<std::endl; 95 if ( checkIsPDF ( source ) ) 96 { 97 // std::cerr << "Appending "<<source<<" to source" << endl; 98 multiSource.push_back ( source ); 99 } 100 else 101 { 102 103 ifstream in ( source.c_str(), ifstream::in ); 104 if ( !in.good() ) 105 throw runtime_error ( "setSource() failed to open input file" ); 106 107 108 char *filenameBuffer = new char[1000]; 109 do 110 { 111 if ( !in.getline ( filenameBuffer, 1000 ) ) 112 throw runtime_error ( "failed reading line from input file" ); 113 114 std::string ts ( filenameBuffer, in.gcount() ); 115 if ( ts.size() > 4 ) // at least ".pdf" because just test if ts is empty doesn't work. 116 { 117 multiSource.push_back ( ts ); 118 std::cerr << "Appending "<< ts <<" to source" << endl; 119 } 120 } 121 while ( !in.eof() ); 122 in.close(); 123 delete [] filenameBuffer; 124 } 125 std::cerr<< ++dbg <<std::endl; 126 127 if (multiSource.empty()) 128 throw runtime_error( "No recognized source given" ); 129 130 for ( std::vector<std::string>::const_iterator ms = multiSource.begin(); ms != multiSource.end(); ++ms ) 131 { 132 if ( ms == multiSource.begin() ) 133 { 134 // std::cerr << "First doc is "<< (*ms).c_str() << endl; 135 try{ 136 sourceDoc = new PdfMemDocument ( ( *ms ).c_str() ); 137 } 138 catch(PdfError& e) 139 { 140 std::cerr << "Unable to create Document: " << PdfError::ErrorMessage( e.GetError() ) << std::endl; 141 return; 142 } 143 } 144 else 145 { 146 PdfMemDocument mdoc ( ( *ms ).c_str() ); 147 // std::cerr << "Appending "<< mdoc.GetPageCount() << " page(s) of " << *ms << endl; 148 sourceDoc->InsertPages ( mdoc, 0, mdoc.GetPageCount() ); 149 } 150 } 151 152 pcount = sourceDoc->GetPageCount(); 153 // std::cerr << "Document has "<< pcount << " page(s) " << endl; 154 if ( pcount > 0 ) // only here to avoid possible segfault, but PDF without page is not conform IIRC 155 { 156 PoDoFo::PdfPage* pFirstPage = sourceDoc->GetPage ( 0 ); 157 if ( NULL == pFirstPage ) // Fixes CVE-2019-9199 (issue #40) 158 { 159 PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, "First page (0) of source document not found" ); 160 } 161 PoDoFo::PdfRect rect ( pFirstPage->GetMediaBox() ); 162 // keep in mind it’s just a hint since PDF can have different page sizes in a same doc 163 sourceWidth = rect.GetWidth() - rect.GetLeft(); 164 sourceHeight = rect.GetHeight() - rect.GetBottom() ; 165 } 166 } 167 addToSource(const std::string & source)168 void PdfTranslator::addToSource ( const std::string & source ) 169 { 170 // std::cerr<<"PdfTranslator::addToSource "<< source<<std::endl; 171 if ( !sourceDoc ) 172 return; 173 174 PdfMemDocument extraDoc ( source.c_str() ); 175 sourceDoc->InsertPages ( extraDoc, 0, extraDoc.GetPageCount() ); 176 multiSource.push_back ( source ); 177 178 } 179 migrateResource(const PdfObject * obj)180 PdfObject* PdfTranslator::migrateResource ( const PdfObject * obj ) 181 { 182 // std::cerr<<"PdfTranslator::migrateResource"<<std::endl; 183 PdfObject *ret ( 0 ); 184 185 if ( !obj ) 186 PODOFO_RAISE_ERROR_INFO( ePdfError_InvalidHandle, "migrateResource called" 187 " with NULL object" ); 188 189 if ( obj->IsDictionary() ) 190 { 191 if ( obj->Reference().IsIndirect() ) 192 { 193 ret = targetDoc->GetObjects().CreateObject ( *obj ); 194 } 195 else 196 { 197 ret = new PdfObject( *obj ); 198 } 199 TKeyMap resmap = obj->GetDictionary().GetKeys(); 200 for ( TCIKeyMap itres = resmap.begin(); itres != resmap.end(); ++itres ) 201 { 202 PdfObject *o = itres->second; 203 std::pair<std::set<PdfObject*>::iterator,bool> res = setMigrationPending.insert( o ); 204 if (!res.second) 205 { 206 std::ostringstream oss; 207 oss << "Cycle detected: Object with ref " << o->Reference().ToString() 208 << " is already pending migration to the target.\n"; 209 PdfError::LogMessage( eLogSeverity_Warning, oss.str().c_str() ); 210 continue; 211 } 212 PdfObject *migrated = migrateResource ( o ); 213 if (NULL != migrated) 214 { 215 ret->GetDictionary().AddKey ( itres->first, migrated ); 216 if ( !(migrated->Reference().IsIndirect()) ) 217 { 218 delete migrated; 219 } 220 } 221 } 222 223 if ( obj->HasStream() ) 224 { 225 * ( ret->GetStream() ) = * ( obj->GetStream() ); 226 } 227 } 228 else if ( obj->IsArray() ) 229 { 230 PdfArray carray ( obj->GetArray() ); 231 PdfArray narray; 232 for ( unsigned int ci = 0; ci < carray.GetSize(); ++ci ) 233 { 234 PdfObject *co ( migrateResource ( &carray[ci] ) ); 235 if ( NULL == co ) 236 continue; 237 narray.push_back ( *co ); 238 239 if ( !(co->Reference().IsIndirect()) ) 240 { 241 delete co; 242 } 243 } 244 if ( obj->Reference().IsIndirect() ) 245 { 246 ret = targetDoc->GetObjects().CreateObject ( narray ); 247 } 248 else 249 { 250 ret = new PdfObject( narray ); 251 } 252 } 253 else if ( obj->IsReference() ) 254 { 255 if ( migrateMap.find ( obj->GetReference().ToString() ) != migrateMap.end() ) 256 { 257 std::ostringstream oss; 258 oss << "Referenced object " << obj->GetReference().ToString() 259 << " already migrated." << std::endl; 260 PdfError::DebugMessage( oss.str().c_str() ); 261 262 const PdfObject* const found = migrateMap[ obj->GetReference().ToString() ]; 263 return new PdfObject( found->Reference() ); 264 } 265 266 PdfObject *to_migrate = sourceDoc->GetObjects().GetObject ( obj->GetReference() ); 267 268 std::pair<std::set<PdfObject*>::iterator, bool> res 269 = setMigrationPending.insert( to_migrate ); 270 if (!res.second) 271 { 272 std::ostringstream oss; 273 oss << "Cycle detected: Object with ref " << obj->GetReference().ToString() 274 << " is already pending migration to the target.\n"; 275 PdfError::LogMessage( eLogSeverity_Warning, oss.str().c_str() ); 276 return NULL; // skip this migration 277 } 278 PdfObject * o ( migrateResource ( to_migrate ) ); 279 if ( NULL != o ) 280 ret = new PdfObject ( o->Reference() ); 281 else 282 return NULL; // avoid going through rest of method 283 } 284 else 285 { 286 ret = new PdfObject ( *obj );//targetDoc->GetObjects().CreateObject(*obj); 287 } 288 289 if ( obj->Reference().IsIndirect() ) 290 { 291 migrateMap.insert ( std::pair<std::string, PdfObject*> ( obj->Reference().ToString(), ret ) ); 292 } 293 294 return ret; 295 } 296 getInheritedResources(PdfPage * page)297 PdfObject* PdfTranslator::getInheritedResources ( PdfPage* page ) 298 { 299 // std::cerr<<"PdfTranslator::getInheritedResources"<<std::endl; 300 PdfObject *res ( 0 ); 301 // mabri: resources are inherited as whole dict, not at all if the page has the dict 302 // mabri: specified in PDF32000_2008.pdf section 7.7.3.4 Inheritance of Page Attributes 303 // mabri: and in section 7.8.3 Resource Dictionaries 304 const PdfObject *sourceRes = page->GetInheritedKey( PdfName ( "Resources" ) ); 305 if ( sourceRes ) 306 { 307 res = migrateResource( sourceRes ); 308 } 309 return res; 310 } 311 setTarget(const std::string & target)312 void PdfTranslator::setTarget ( const std::string & target ) 313 { 314 // std::cerr<<"PdfTranslator::setTarget "<<target<<std::endl; 315 if ( !sourceDoc ) 316 throw std::logic_error ( "setTarget() called before setSource()" ); 317 318 targetDoc = new PdfMemDocument; 319 outFilePath = target; 320 321 for ( int i = 0; i < pcount ; ++i ) 322 { 323 PdfPage * page = sourceDoc->GetPage ( i ); 324 PdfMemoryOutputStream outMemStream ( 1 ); 325 326 if (!page) // Fix issue #32 327 { 328 std::ostringstream oss; 329 oss << "Page " << i << " (0-based) of " << pcount << " in source doc not found!"; 330 PODOFO_RAISE_ERROR_INFO( ePdfError_PageNotFound, oss.str() ); 331 } 332 PdfXObject *xobj = new PdfXObject ( page->GetMediaBox(), targetDoc ); 333 if ( page->GetContents()->HasStream() ) 334 { 335 page->GetContents()->GetStream()->GetFilteredCopy ( &outMemStream ); 336 } 337 else if ( page->GetContents()->IsArray() ) 338 { 339 PdfArray carray ( page->GetContents()->GetArray() ); 340 for ( unsigned int ci = 0; ci < carray.GetSize(); ++ci ) 341 { 342 if ( carray[ci].HasStream() ) 343 { 344 carray[ci].GetStream()->GetFilteredCopy ( &outMemStream ); 345 } 346 else if ( carray[ci].IsReference() ) 347 { 348 PdfObject *co = sourceDoc->GetObjects().GetObject ( carray[ci].GetReference() ); 349 350 while ( co != NULL ) 351 { 352 if ( co->IsReference() ) 353 { 354 co = sourceDoc->GetObjects().GetObject ( co->GetReference() ); 355 } 356 else if ( co->HasStream() ) 357 { 358 co->GetStream()->GetFilteredCopy ( &outMemStream ); 359 break; 360 } 361 } 362 363 } 364 365 } 366 } 367 368 /// Its time to manage other keys of the page dictionary. 369 std::vector<std::string> pageKeys; 370 std::vector<std::string>::const_iterator itKey; 371 pageKeys.push_back ( "Group" ); 372 for ( itKey = pageKeys.begin(); itKey != pageKeys.end(); ++itKey ) 373 { 374 PoDoFo::PdfName keyname ( *itKey ); 375 if ( page->GetObject()->GetDictionary().HasKey ( keyname ) ) 376 { 377 PdfObject* migObj = migrateResource ( page->GetObject()->GetDictionary().GetKey ( keyname ) ); 378 if ( NULL == migObj ) 379 continue; 380 xobj->GetObject()->GetDictionary().AddKey ( keyname, migObj ); 381 } 382 } 383 384 outMemStream.Close(); 385 386 PdfMemoryInputStream inStream ( outMemStream.TakeBuffer(),outMemStream.GetLength() ); 387 xobj->GetContents()->GetStream()->Set ( &inStream ); 388 389 resources[i+1] = getInheritedResources ( page ); 390 xobjects[i+1] = xobj; 391 cropRect[i+1] = page->GetCropBox(); 392 bleedRect[i+1] = page->GetBleedBox(); 393 trimRect[i+1] = page->GetTrimBox(); 394 artRect[i+1] = page->GetArtBox(); 395 396 } 397 398 399 targetDoc->SetPdfVersion ( sourceDoc->GetPdfVersion() ); 400 401 PdfInfo *sInfo ( sourceDoc->GetInfo() ); 402 PdfInfo *tInfo ( targetDoc->GetInfo() ); 403 404 if ( sInfo->GetAuthor() != PdfString::StringNull ) 405 tInfo->SetAuthor ( sInfo->GetAuthor() ); 406 if ( sInfo->GetCreator() != PdfString::StringNull ) 407 tInfo->SetCreator ( sInfo->GetCreator() ); 408 if ( sInfo->GetSubject() != PdfString::StringNull ) 409 tInfo->SetSubject ( sInfo->GetSubject() ); 410 if ( sInfo->GetTitle() != PdfString::StringNull ) 411 tInfo->SetTitle ( sInfo->GetTitle() ); 412 if ( sInfo->GetKeywords() != PdfString::StringNull ) 413 tInfo->SetKeywords ( sInfo->GetKeywords() ); 414 415 if ( sInfo->GetTrapped() != PdfName::KeyNull ) 416 tInfo->SetTrapped ( sInfo->GetTrapped() ); 417 418 419 // PdfObject *scat( sourceDoc->GetCatalog() ); 420 // PdfObject *tcat( targetDoc->GetCatalog() ); 421 // TKeyMap catmap = scat->GetDictionary().GetKeys(); 422 // for ( TCIKeyMap itc = catmap.begin(); itc != catmap.end(); ++itc ) 423 // { 424 // if(tcat->GetDictionary().GetKey(itc->first) == 0) 425 // { 426 // PdfObject *o = itc->second; 427 // tcat->GetDictionary().AddKey (itc->first , migrateResource( o ) ); 428 // } 429 // } 430 431 // delete sourceDoc; 432 } 433 loadPlan(const std::string & planFile,PoDoFo::Impose::PlanReader loader)434 void PdfTranslator::loadPlan ( const std::string & planFile , PoDoFo::Impose::PlanReader loader ) 435 { 436 // std::cerr<< "loadPlan" << planFile<<std::endl; 437 SourceVars sv; 438 sv.PageCount = pcount; 439 sv.PageHeight = sourceHeight; 440 sv.PageWidth = sourceWidth; 441 planImposition = new ImpositionPlan ( sv ); 442 if ( loader == PoDoFo::Impose::Legacy ) 443 { 444 PlanReader_Legacy ( planFile, planImposition ); 445 } 446 #if defined(PODOFO_HAVE_LUA) 447 else if ( loader == PoDoFo::Impose::Lua ) 448 { 449 PlanReader_Lua ( planFile, planImposition ); 450 } 451 #endif 452 453 if ( !planImposition->valid() ) 454 throw std::runtime_error ( "Unable to build a valid imposition plan" ); 455 456 destWidth = planImposition->destWidth(); 457 destHeight = planImposition->destHeight(); 458 scaleFactor = planImposition->scale(); 459 boundingBox = planImposition->boundingBox(); 460 // std::cerr <<"Plan completed "<< planImposition.size() <<endl; 461 462 } 463 impose()464 void PdfTranslator::impose() 465 { 466 // std::cerr<<"PdfTranslator::impose"<<std::endl; 467 if ( !targetDoc ) 468 throw std::invalid_argument ( "impose() called with empty target" ); 469 470 // PdfObject trimbox; 471 // PdfRect trim ( 0, 0, destWidth, destHeight ); 472 // trim.ToVariant ( trimbox ); 473 std::map<int, PdfRect>* bbIndex = NULL; 474 if(boundingBox.size() > 0) 475 { 476 if(boundingBox.find("crop") != std::string::npos) 477 { 478 bbIndex = &cropRect; 479 } 480 else if(boundingBox.find("bleed") != std::string::npos) 481 { 482 bbIndex = &bleedRect; 483 } 484 else if(boundingBox.find("trim") != std::string::npos) 485 { 486 bbIndex = &trimRect; 487 } 488 else if(boundingBox.find("art") != std::string::npos) 489 { 490 bbIndex = &artRect; 491 } 492 } 493 494 typedef map<int, vector<PageRecord> > groups_t; 495 groups_t groups; 496 for ( unsigned int i = 0; i < planImposition->size(); ++i ) 497 { 498 groups[ ( *planImposition ) [i].destPage].push_back ( ( *planImposition ) [i] ); 499 } 500 501 unsigned int lastPlate(0); 502 groups_t::const_iterator git = groups.begin(); 503 const groups_t::const_iterator gitEnd = groups.end(); 504 while ( git != gitEnd ) 505 { 506 PdfPage * newpage = NULL; 507 // Allow "holes" in dest. pages sequence. 508 unsigned int curPlate(git->first); 509 while(lastPlate != curPlate) 510 { 511 newpage = targetDoc->CreatePage ( PdfRect ( 0.0, 0.0, destWidth, destHeight ) ); 512 ++lastPlate; 513 } 514 // newpage->GetObject()->GetDictionary().AddKey ( PdfName ( "TrimBox" ), trimbox ); 515 PdfDictionary xdict; 516 517 ostringstream buffer; 518 // Scale 519 buffer << std::fixed << scaleFactor <<" 0 0 "<< scaleFactor <<" 0 0 cm\n"; 520 521 for ( unsigned int i = 0; i < git->second.size(); ++i ) 522 { 523 PageRecord curRecord ( git->second[i] ); 524 // std::cerr<<curRecord.sourcePage<< " " << curRecord.destPage<<std::endl; 525 if(curRecord.sourcePage <= pcount) 526 { 527 double cosR = cos ( curRecord.rotate * 3.14159 / 180.0 ); 528 double sinR = sin ( curRecord.rotate * 3.14159 / 180.0 ); 529 double tx = curRecord.transX ; 530 double ty = curRecord.transY ; 531 532 int resourceIndex ( /*(curRecord.duplicateOf > 0) ? curRecord.duplicateOf : */curRecord.sourcePage ); 533 PdfXObject *xo = xobjects[resourceIndex]; 534 if(NULL != bbIndex) 535 { 536 PdfObject bb; 537 // DominikS: Fix compilation using Visual Studio on Windows 538 // mabri: ML post archive URL is https://sourceforge.net/p/podofo/mailman/message/24609746/ 539 // bbIndex->at(resourceIndex).ToVariant( bb ); 540 ((*bbIndex)[resourceIndex]).ToVariant( bb ); 541 xo->GetObject()->GetDictionary().AddKey ( PdfName ( "BBox" ), bb ); 542 } 543 ostringstream op; 544 op << "OriginalPage" << resourceIndex; 545 xdict.AddKey ( PdfName ( op.str() ) , xo->GetObjectReference() ); 546 547 if ( resources[resourceIndex] ) 548 { 549 if ( resources[resourceIndex]->IsDictionary() ) 550 { 551 TKeyMap resmap = resources[resourceIndex]->GetDictionary().GetKeys(); 552 TCIKeyMap itres; 553 for ( itres = resmap.begin(); itres != resmap.end(); ++itres ) 554 { 555 xo->GetResources()->GetDictionary().AddKey ( itres->first, itres->second ); 556 } 557 } 558 else if ( resources[resourceIndex]->IsReference() ) 559 { 560 xo->GetObject()->GetDictionary().AddKey ( PdfName ( "Resources" ), resources[resourceIndex] ); 561 } 562 else 563 std::cerr<<"ERROR Unknown type resource "<<resources[resourceIndex]->GetDataTypeString() << std::endl; 564 565 } 566 // Very primitive but it makes it easy to track down imposition plan into content stream. 567 buffer << "q\n"; 568 buffer << std::fixed << cosR <<" "<< sinR<<" "<<-sinR<<" "<< cosR<<" "<< tx <<" "<< ty << " cm\n"; 569 buffer << "/OriginalPage" << resourceIndex << " Do\n"; 570 buffer << "Q\n"; 571 } 572 } 573 if (!newpage) 574 PODOFO_RAISE_ERROR (ePdfError_ValueOutOfRange); 575 string bufStr = buffer.str(); 576 newpage->GetContentsForAppending()->GetStream()->Set ( bufStr.data(), bufStr.size() ); 577 newpage->GetResources()->GetDictionary().AddKey ( PdfName ( "XObject" ), xdict ); 578 ++git; 579 } 580 581 targetDoc->Write ( outFilePath.c_str() ); 582 583 // The following is necessary to avoid line 195 being detected as allocation having a memory leak 584 // without changing other files than this one (thorough leak prevention shall be applied later). 585 for (std::map<int, PdfObject*>::iterator it = resources.begin(); it != resources.end(); it++) 586 { 587 delete (*it).second; 588 } 589 resources.clear(); 590 } 591 592 593 }; 594 }; // end of namespace 595