1 //========================================================================
2 //
3 // pdfseparate.cc
4 //
5 // This file is licensed under the GPLv2 or later
6 //
7 // Copyright (C) 2011, 2012, 2015 Thomas Freitag <Thomas.Freitag@alfa.de>
8 // Copyright (C) 2012-2014, 2017, 2018, 2021 Albert Astals Cid <aacid@kde.org>
9 // Copyright (C) 2013, 2016 Pino Toscano <pino@kde.org>
10 // Copyright (C) 2013 Daniel Kahn Gillmor <dkg@fifthhorseman.net>
11 // Copyright (C) 2013 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
12 // Copyright (C) 2017 Léonard Michelet <leonard.michelet@smile.fr>
13 // Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
14 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
15 // Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
16 //
17 //========================================================================
18 #include "config.h"
19 #include <poppler-config.h>
20 #include <cstdio>
21 #include <cstdlib>
22 #include <cstddef>
23 #include <cstring>
24 #include "parseargs.h"
25 #include "goo/GooString.h"
26 #include "PDFDoc.h"
27 #include "ErrorCodes.h"
28 #include "GlobalParams.h"
29 #include "Win32Console.h"
30 #include <cctype>
31 
32 static int firstPage = 0;
33 static int lastPage = 0;
34 static bool printVersion = false;
35 static bool printHelp = false;
36 
37 static const ArgDesc argDesc[] = { { "-f", argInt, &firstPage, 0, "first page to extract" },
38                                    { "-l", argInt, &lastPage, 0, "last page to extract" },
39                                    { "-v", argFlag, &printVersion, 0, "print copyright and version info" },
40                                    { "-h", argFlag, &printHelp, 0, "print usage information" },
41                                    { "-help", argFlag, &printHelp, 0, "print usage information" },
42                                    { "--help", argFlag, &printHelp, 0, "print usage information" },
43                                    { "-?", argFlag, &printHelp, 0, "print usage information" },
44                                    {} };
45 
extractPages(const char * srcFileName,const char * destFileName)46 static bool extractPages(const char *srcFileName, const char *destFileName)
47 {
48     char pathName[4096];
49     GooString *gfileName = new GooString(srcFileName);
50     PDFDoc *doc = new PDFDoc(gfileName, nullptr, nullptr, nullptr);
51 
52     if (!doc->isOk()) {
53         error(errSyntaxError, -1, "Could not extract page(s) from damaged file ('{0:s}')", srcFileName);
54         delete doc;
55         return false;
56     }
57 
58     // destFileName can have multiple %% and one %d
59     // We use auxDestFileName to replace all the valid % appearances
60     // by 'A' (random char that is not %), if at the end of replacing
61     // any of the valid appearances there is still any % around, the
62     // pattern is wrong
63     if (firstPage == 0 && lastPage == 0) {
64         firstPage = 1;
65         lastPage = doc->getNumPages();
66     }
67     if (lastPage == 0)
68         lastPage = doc->getNumPages();
69     if (firstPage == 0)
70         firstPage = 1;
71     if (lastPage < firstPage) {
72         error(errCommandLine, -1, "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).", firstPage, lastPage);
73         delete doc;
74         return false;
75     }
76     bool foundmatch = false;
77     char *auxDestFileName = strdup(destFileName);
78     char *p = strstr(auxDestFileName, "%d");
79     if (p != nullptr) {
80         foundmatch = true;
81         *p = 'A';
82     } else {
83         char pattern[6];
84         for (int i = 2; i < 10; i++) {
85             sprintf(pattern, "%%0%dd", i);
86             p = strstr(auxDestFileName, pattern);
87             if (p != nullptr) {
88                 foundmatch = true;
89                 *p = 'A';
90                 break;
91             }
92         }
93     }
94     if (!foundmatch && firstPage != lastPage) {
95         error(errSyntaxError, -1, "'{0:s}' must contain '%d' (or any variant respecting printf format) if more than one page should be extracted, in order to print the page number", destFileName);
96         free(auxDestFileName);
97         delete doc;
98         return false;
99     }
100 
101     // at this point auxDestFileName can only contain %%
102     p = strstr(auxDestFileName, "%%");
103     while (p != nullptr) {
104         *p = 'A';
105         *(p + 1) = 'A';
106         p = strstr(p, "%%");
107     }
108 
109     // at this point any other % is wrong
110     p = strstr(auxDestFileName, "%");
111     if (p != nullptr) {
112         error(errSyntaxError, -1, "'{0:s}' can only contain one '%d' pattern", destFileName);
113         free(auxDestFileName);
114         delete doc;
115         return false;
116     }
117     free(auxDestFileName);
118 
119     for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
120         snprintf(pathName, sizeof(pathName) - 1, destFileName, pageNo);
121         GooString *gpageName = new GooString(pathName);
122         PDFDoc *pagedoc = new PDFDoc(new GooString(srcFileName), nullptr, nullptr, nullptr);
123         int errCode = pagedoc->savePageAs(gpageName, pageNo);
124         if (errCode != errNone) {
125             delete gpageName;
126             delete doc;
127             delete pagedoc;
128             return false;
129         }
130         delete pagedoc;
131         delete gpageName;
132     }
133     delete doc;
134     return true;
135 }
136 
137 static constexpr int kOtherError = 99;
138 
main(int argc,char * argv[])139 int main(int argc, char *argv[])
140 {
141     // parse args
142     Win32Console win32console(&argc, &argv);
143     const bool parseOK = parseArgs(argDesc, &argc, argv);
144     if (!parseOK || argc != 3 || printVersion || printHelp) {
145         fprintf(stderr, "pdfseparate version %s\n", PACKAGE_VERSION);
146         fprintf(stderr, "%s\n", popplerCopyright);
147         fprintf(stderr, "%s\n", xpdfCopyright);
148         if (!printVersion) {
149             printUsage("pdfseparate", "<PDF-sourcefile> <PDF-pattern-destfile>", argDesc);
150         }
151         if (printVersion || printHelp) {
152             return 0;
153         } else {
154             return kOtherError;
155         }
156     }
157     globalParams = std::make_unique<GlobalParams>();
158     const bool extractOK = extractPages(argv[1], argv[2]);
159     return extractOK ? 0 : kOtherError;
160 }
161