1 //========================================================================
2 //
3 // pdfdetach.cc
4 //
5 // Copyright 2010 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org>
17 // Copyright (C) 2013 Yury G. Kudryashov <urkud.urkud@gmail.com>
18 // Copyright (C) 2014, 2017 Adrian Johnson <ajohnson@redneon.com>
19 // Copyright (C) 2018, 2020 Albert Astals Cid <aacid@kde.org>
20 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
21 // Copyright (C) 2019, 2021 Oliver Sander <oliver.sander@tu-dresden.de>
22 // Copyright (C) 2020 <r.coeffier@bee-buzziness.com>
23 //
24 // To see a description of the changes please see the Changelog file that
25 // came with your tarball or type make ChangeLog if you are building from git
26 //
27 //========================================================================
28 
29 #include "config.h"
30 #include <poppler-config.h>
31 #include <cstdio>
32 #include "goo/gmem.h"
33 #include "parseargs.h"
34 #include "Annot.h"
35 #include "GlobalParams.h"
36 #include "Page.h"
37 #include "PDFDoc.h"
38 #include "PDFDocFactory.h"
39 #include "FileSpec.h"
40 #include "CharTypes.h"
41 #include "Catalog.h"
42 #include "UnicodeMap.h"
43 #include "PDFDocEncoding.h"
44 #include "Error.h"
45 #include "Win32Console.h"
46 
47 static bool doList = false;
48 static int saveNum = 0;
49 static char saveFile[128] = "";
50 static bool saveAll = false;
51 static char savePath[1024] = "";
52 static char textEncName[128] = "";
53 static char ownerPassword[33] = "\001";
54 static char userPassword[33] = "\001";
55 static bool printVersion = false;
56 static bool printHelp = false;
57 
58 static const ArgDesc argDesc[] = { { "-list", argFlag, &doList, 0, "list all embedded files" },
59                                    { "-save", argInt, &saveNum, 0, "save the specified embedded file (file number)" },
60                                    { "-savefile", argString, &saveFile, sizeof(saveFile), "save the specified embedded file (file name)" },
61                                    { "-saveall", argFlag, &saveAll, 0, "save all embedded files" },
62                                    { "-o", argString, savePath, sizeof(savePath), "file name for the saved embedded file" },
63                                    { "-enc", argString, textEncName, sizeof(textEncName), "output text encoding name" },
64                                    { "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" },
65                                    { "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" },
66                                    { "-v", argFlag, &printVersion, 0, "print copyright and version info" },
67                                    { "-h", argFlag, &printHelp, 0, "print usage information" },
68                                    { "-help", argFlag, &printHelp, 0, "print usage information" },
69                                    { "--help", argFlag, &printHelp, 0, "print usage information" },
70                                    { "-?", argFlag, &printHelp, 0, "print usage information" },
71                                    {} };
72 
main(int argc,char * argv[])73 int main(int argc, char *argv[])
74 {
75     std::unique_ptr<PDFDoc> doc;
76     GooString *fileName;
77     const UnicodeMap *uMap;
78     GooString *ownerPW, *userPW;
79     char uBuf[8];
80     char path[1024];
81     char *p;
82     bool ok;
83     bool hasSaveFile;
84     int exitCode;
85     std::vector<FileSpec *> embeddedFiles;
86     int nFiles, nPages, n, i, j;
87     FileSpec *fileSpec;
88     Page *page;
89     Annots *annots;
90     Annot *annot;
91     const GooString *s1;
92     Unicode u;
93     bool isUnicode;
94 
95     Win32Console win32Console(&argc, &argv);
96     exitCode = 99;
97 
98     // parse args
99     ok = parseArgs(argDesc, &argc, argv);
100     hasSaveFile = strlen(saveFile) > 0;
101     if ((doList ? 1 : 0) + ((saveNum != 0) ? 1 : 0) + ((hasSaveFile != 0) ? 1 : 0) + (saveAll ? 1 : 0) != 1) {
102         ok = false;
103     }
104     if (!ok || argc != 2 || printVersion || printHelp) {
105         fprintf(stderr, "pdfdetach version %s\n", PACKAGE_VERSION);
106         fprintf(stderr, "%s\n", popplerCopyright);
107         fprintf(stderr, "%s\n", xpdfCopyright);
108         if (!printVersion) {
109             printUsage("pdfdetach", "<PDF-file>", argDesc);
110         }
111         goto err0;
112     }
113     fileName = new GooString(argv[1]);
114 
115     // read config file
116     globalParams = std::make_unique<GlobalParams>();
117     if (textEncName[0]) {
118         globalParams->setTextEncoding(textEncName);
119     }
120 
121     // get mapping to output encoding
122     if (!(uMap = globalParams->getTextEncoding())) {
123         error(errConfig, -1, "Couldn't get text encoding");
124         delete fileName;
125         goto err0;
126     }
127 
128     // open PDF file
129     if (ownerPassword[0] != '\001') {
130         ownerPW = new GooString(ownerPassword);
131     } else {
132         ownerPW = nullptr;
133     }
134     if (userPassword[0] != '\001') {
135         userPW = new GooString(userPassword);
136     } else {
137         userPW = nullptr;
138     }
139 
140     doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
141 
142     if (userPW) {
143         delete userPW;
144     }
145     if (ownerPW) {
146         delete ownerPW;
147     }
148     if (!doc->isOk()) {
149         exitCode = 1;
150         goto err2;
151     }
152 
153     for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i)
154         embeddedFiles.push_back(doc->getCatalog()->embeddedFile(i));
155 
156     nPages = doc->getCatalog()->getNumPages();
157     for (i = 0; i < nPages; ++i) {
158         page = doc->getCatalog()->getPage(i + 1);
159         if (!page)
160             continue;
161         annots = page->getAnnots();
162         if (!annots)
163             break;
164 
165         for (j = 0; j < annots->getNumAnnots(); ++j) {
166             annot = annots->getAnnot(j);
167             if (annot->getType() != Annot::typeFileAttachment)
168                 continue;
169             embeddedFiles.push_back(new FileSpec(static_cast<AnnotFileAttachment *>(annot)->getFile()));
170         }
171     }
172 
173     nFiles = embeddedFiles.size();
174 
175     // list embedded files
176     if (doList) {
177         printf("%d embedded files\n", nFiles);
178         for (i = 0; i < nFiles; ++i) {
179             fileSpec = embeddedFiles[i];
180             printf("%d: ", i + 1);
181             s1 = fileSpec->getFileName();
182             if (!s1) {
183                 exitCode = 3;
184                 goto err2;
185             }
186             if (s1->hasUnicodeMarker()) {
187                 isUnicode = true;
188                 j = 2;
189             } else {
190                 isUnicode = false;
191                 j = 0;
192             }
193             while (j < s1->getLength()) {
194                 if (isUnicode) {
195                     u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff);
196                     j += 2;
197                 } else {
198                     u = pdfDocEncoding[s1->getChar(j) & 0xff];
199                     ++j;
200                 }
201                 n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
202                 fwrite(uBuf, 1, n, stdout);
203             }
204             fputc('\n', stdout);
205         }
206 
207         // save all embedded files
208     } else if (saveAll) {
209         for (i = 0; i < nFiles; ++i) {
210             fileSpec = embeddedFiles[i];
211             if (savePath[0]) {
212                 n = strlen(savePath);
213                 if (n > (int)sizeof(path) - 2) {
214                     n = sizeof(path) - 2;
215                 }
216                 memcpy(path, savePath, n);
217                 path[n] = '/';
218                 p = path + n + 1;
219             } else {
220                 p = path;
221             }
222             s1 = fileSpec->getFileName();
223             if (!s1) {
224                 exitCode = 3;
225                 goto err2;
226             }
227             if (s1->hasUnicodeMarker()) {
228                 isUnicode = true;
229                 j = 2;
230             } else {
231                 isUnicode = false;
232                 j = 0;
233             }
234             while (j < s1->getLength()) {
235                 if (isUnicode) {
236                     u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff);
237                     j += 2;
238                 } else {
239                     u = pdfDocEncoding[s1->getChar(j) & 0xff];
240                     ++j;
241                 }
242                 n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
243                 if (p + n >= path + sizeof(path))
244                     break;
245                 memcpy(p, uBuf, n);
246                 p += n;
247             }
248             *p = '\0';
249 
250             auto *embFile = fileSpec->getEmbeddedFile();
251             if (!embFile || !embFile->isOk()) {
252                 exitCode = 3;
253                 goto err2;
254             }
255             if (!embFile->save(path)) {
256                 error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
257                 exitCode = 2;
258                 goto err2;
259             }
260         }
261 
262         // save an embedded file
263     } else {
264         if (hasSaveFile) {
265             for (i = 0; i < nFiles; ++i) {
266                 fileSpec = embeddedFiles[i];
267                 s1 = fileSpec->getFileName();
268                 if (strcmp(s1->c_str(), saveFile) == 0) {
269                     saveNum = i + 1;
270                     break;
271                 }
272             }
273         }
274         if (saveNum < 1 || saveNum > nFiles) {
275             error(errCommandLine, -1, hasSaveFile ? "Invalid file name" : "Invalid file number");
276             goto err2;
277         }
278 
279         fileSpec = embeddedFiles[saveNum - 1];
280         if (savePath[0]) {
281             p = savePath;
282         } else {
283             p = path;
284             s1 = fileSpec->getFileName();
285             if (!s1) {
286                 exitCode = 3;
287                 goto err2;
288             }
289             if (s1->hasUnicodeMarker()) {
290                 isUnicode = true;
291                 j = 2;
292             } else {
293                 isUnicode = false;
294                 j = 0;
295             }
296             while (j < s1->getLength()) {
297                 if (isUnicode) {
298                     u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff);
299                     j += 2;
300                 } else {
301                     u = pdfDocEncoding[s1->getChar(j) & 0xff];
302                     ++j;
303                 }
304                 n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
305                 if (p + n >= path + sizeof(path))
306                     break;
307                 memcpy(p, uBuf, n);
308                 p += n;
309             }
310             *p = '\0';
311             p = path;
312         }
313 
314         auto *embFile = fileSpec->getEmbeddedFile();
315         if (!embFile || !embFile->isOk()) {
316             exitCode = 3;
317             goto err2;
318         }
319         if (!embFile->save(p)) {
320             error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
321             exitCode = 2;
322             goto err2;
323         }
324     }
325 
326     exitCode = 0;
327 
328     // clean up
329 err2:
330     for (auto &file : embeddedFiles)
331         delete file;
332 err0:
333 
334     return exitCode;
335 }
336