1 //========================================================================
2 //
3 // pdfdetach.cc
4 //
5 // Copyright 2010 Glyph & Cog, LLC
6 //
7 //========================================================================
8
9 //========================================================================
10 //
11 // Modified under the Poppler project - http://poppler.freedesktop.org
12 //
13 // All changes made under the Poppler project to this file are licensed
14 // under GPL version 2 or later
15 //
16 // Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org>
17 // Copyright (C) 2013 Yury G. Kudryashov <urkud.urkud@gmail.com>
18 // Copyright (C) 2014, 2017 Adrian Johnson <ajohnson@redneon.com>
19 // Copyright (C) 2018, 2020 Albert Astals Cid <aacid@kde.org>
20 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
21 // Copyright (C) 2019, 2021 Oliver Sander <oliver.sander@tu-dresden.de>
22 // Copyright (C) 2020 <r.coeffier@bee-buzziness.com>
23 //
24 // To see a description of the changes please see the Changelog file that
25 // came with your tarball or type make ChangeLog if you are building from git
26 //
27 //========================================================================
28
29 #include "config.h"
30 #include <poppler-config.h>
31 #include <cstdio>
32 #include "goo/gmem.h"
33 #include "parseargs.h"
34 #include "Annot.h"
35 #include "GlobalParams.h"
36 #include "Page.h"
37 #include "PDFDoc.h"
38 #include "PDFDocFactory.h"
39 #include "FileSpec.h"
40 #include "CharTypes.h"
41 #include "Catalog.h"
42 #include "UnicodeMap.h"
43 #include "PDFDocEncoding.h"
44 #include "Error.h"
45 #include "Win32Console.h"
46
47 static bool doList = false;
48 static int saveNum = 0;
49 static char saveFile[128] = "";
50 static bool saveAll = false;
51 static char savePath[1024] = "";
52 static char textEncName[128] = "";
53 static char ownerPassword[33] = "\001";
54 static char userPassword[33] = "\001";
55 static bool printVersion = false;
56 static bool printHelp = false;
57
58 static const ArgDesc argDesc[] = { { "-list", argFlag, &doList, 0, "list all embedded files" },
59 { "-save", argInt, &saveNum, 0, "save the specified embedded file (file number)" },
60 { "-savefile", argString, &saveFile, sizeof(saveFile), "save the specified embedded file (file name)" },
61 { "-saveall", argFlag, &saveAll, 0, "save all embedded files" },
62 { "-o", argString, savePath, sizeof(savePath), "file name for the saved embedded file" },
63 { "-enc", argString, textEncName, sizeof(textEncName), "output text encoding name" },
64 { "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" },
65 { "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" },
66 { "-v", argFlag, &printVersion, 0, "print copyright and version info" },
67 { "-h", argFlag, &printHelp, 0, "print usage information" },
68 { "-help", argFlag, &printHelp, 0, "print usage information" },
69 { "--help", argFlag, &printHelp, 0, "print usage information" },
70 { "-?", argFlag, &printHelp, 0, "print usage information" },
71 {} };
72
main(int argc,char * argv[])73 int main(int argc, char *argv[])
74 {
75 std::unique_ptr<PDFDoc> doc;
76 GooString *fileName;
77 const UnicodeMap *uMap;
78 GooString *ownerPW, *userPW;
79 char uBuf[8];
80 char path[1024];
81 char *p;
82 bool ok;
83 bool hasSaveFile;
84 int exitCode;
85 std::vector<FileSpec *> embeddedFiles;
86 int nFiles, nPages, n, i, j;
87 FileSpec *fileSpec;
88 Page *page;
89 Annots *annots;
90 Annot *annot;
91 const GooString *s1;
92 Unicode u;
93 bool isUnicode;
94
95 Win32Console win32Console(&argc, &argv);
96 exitCode = 99;
97
98 // parse args
99 ok = parseArgs(argDesc, &argc, argv);
100 hasSaveFile = strlen(saveFile) > 0;
101 if ((doList ? 1 : 0) + ((saveNum != 0) ? 1 : 0) + ((hasSaveFile != 0) ? 1 : 0) + (saveAll ? 1 : 0) != 1) {
102 ok = false;
103 }
104 if (!ok || argc != 2 || printVersion || printHelp) {
105 fprintf(stderr, "pdfdetach version %s\n", PACKAGE_VERSION);
106 fprintf(stderr, "%s\n", popplerCopyright);
107 fprintf(stderr, "%s\n", xpdfCopyright);
108 if (!printVersion) {
109 printUsage("pdfdetach", "<PDF-file>", argDesc);
110 }
111 goto err0;
112 }
113 fileName = new GooString(argv[1]);
114
115 // read config file
116 globalParams = std::make_unique<GlobalParams>();
117 if (textEncName[0]) {
118 globalParams->setTextEncoding(textEncName);
119 }
120
121 // get mapping to output encoding
122 if (!(uMap = globalParams->getTextEncoding())) {
123 error(errConfig, -1, "Couldn't get text encoding");
124 delete fileName;
125 goto err0;
126 }
127
128 // open PDF file
129 if (ownerPassword[0] != '\001') {
130 ownerPW = new GooString(ownerPassword);
131 } else {
132 ownerPW = nullptr;
133 }
134 if (userPassword[0] != '\001') {
135 userPW = new GooString(userPassword);
136 } else {
137 userPW = nullptr;
138 }
139
140 doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
141
142 if (userPW) {
143 delete userPW;
144 }
145 if (ownerPW) {
146 delete ownerPW;
147 }
148 if (!doc->isOk()) {
149 exitCode = 1;
150 goto err2;
151 }
152
153 for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i)
154 embeddedFiles.push_back(doc->getCatalog()->embeddedFile(i));
155
156 nPages = doc->getCatalog()->getNumPages();
157 for (i = 0; i < nPages; ++i) {
158 page = doc->getCatalog()->getPage(i + 1);
159 if (!page)
160 continue;
161 annots = page->getAnnots();
162 if (!annots)
163 break;
164
165 for (j = 0; j < annots->getNumAnnots(); ++j) {
166 annot = annots->getAnnot(j);
167 if (annot->getType() != Annot::typeFileAttachment)
168 continue;
169 embeddedFiles.push_back(new FileSpec(static_cast<AnnotFileAttachment *>(annot)->getFile()));
170 }
171 }
172
173 nFiles = embeddedFiles.size();
174
175 // list embedded files
176 if (doList) {
177 printf("%d embedded files\n", nFiles);
178 for (i = 0; i < nFiles; ++i) {
179 fileSpec = embeddedFiles[i];
180 printf("%d: ", i + 1);
181 s1 = fileSpec->getFileName();
182 if (!s1) {
183 exitCode = 3;
184 goto err2;
185 }
186 if (s1->hasUnicodeMarker()) {
187 isUnicode = true;
188 j = 2;
189 } else {
190 isUnicode = false;
191 j = 0;
192 }
193 while (j < s1->getLength()) {
194 if (isUnicode) {
195 u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff);
196 j += 2;
197 } else {
198 u = pdfDocEncoding[s1->getChar(j) & 0xff];
199 ++j;
200 }
201 n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
202 fwrite(uBuf, 1, n, stdout);
203 }
204 fputc('\n', stdout);
205 }
206
207 // save all embedded files
208 } else if (saveAll) {
209 for (i = 0; i < nFiles; ++i) {
210 fileSpec = embeddedFiles[i];
211 if (savePath[0]) {
212 n = strlen(savePath);
213 if (n > (int)sizeof(path) - 2) {
214 n = sizeof(path) - 2;
215 }
216 memcpy(path, savePath, n);
217 path[n] = '/';
218 p = path + n + 1;
219 } else {
220 p = path;
221 }
222 s1 = fileSpec->getFileName();
223 if (!s1) {
224 exitCode = 3;
225 goto err2;
226 }
227 if (s1->hasUnicodeMarker()) {
228 isUnicode = true;
229 j = 2;
230 } else {
231 isUnicode = false;
232 j = 0;
233 }
234 while (j < s1->getLength()) {
235 if (isUnicode) {
236 u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff);
237 j += 2;
238 } else {
239 u = pdfDocEncoding[s1->getChar(j) & 0xff];
240 ++j;
241 }
242 n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
243 if (p + n >= path + sizeof(path))
244 break;
245 memcpy(p, uBuf, n);
246 p += n;
247 }
248 *p = '\0';
249
250 auto *embFile = fileSpec->getEmbeddedFile();
251 if (!embFile || !embFile->isOk()) {
252 exitCode = 3;
253 goto err2;
254 }
255 if (!embFile->save(path)) {
256 error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
257 exitCode = 2;
258 goto err2;
259 }
260 }
261
262 // save an embedded file
263 } else {
264 if (hasSaveFile) {
265 for (i = 0; i < nFiles; ++i) {
266 fileSpec = embeddedFiles[i];
267 s1 = fileSpec->getFileName();
268 if (strcmp(s1->c_str(), saveFile) == 0) {
269 saveNum = i + 1;
270 break;
271 }
272 }
273 }
274 if (saveNum < 1 || saveNum > nFiles) {
275 error(errCommandLine, -1, hasSaveFile ? "Invalid file name" : "Invalid file number");
276 goto err2;
277 }
278
279 fileSpec = embeddedFiles[saveNum - 1];
280 if (savePath[0]) {
281 p = savePath;
282 } else {
283 p = path;
284 s1 = fileSpec->getFileName();
285 if (!s1) {
286 exitCode = 3;
287 goto err2;
288 }
289 if (s1->hasUnicodeMarker()) {
290 isUnicode = true;
291 j = 2;
292 } else {
293 isUnicode = false;
294 j = 0;
295 }
296 while (j < s1->getLength()) {
297 if (isUnicode) {
298 u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff);
299 j += 2;
300 } else {
301 u = pdfDocEncoding[s1->getChar(j) & 0xff];
302 ++j;
303 }
304 n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
305 if (p + n >= path + sizeof(path))
306 break;
307 memcpy(p, uBuf, n);
308 p += n;
309 }
310 *p = '\0';
311 p = path;
312 }
313
314 auto *embFile = fileSpec->getEmbeddedFile();
315 if (!embFile || !embFile->isOk()) {
316 exitCode = 3;
317 goto err2;
318 }
319 if (!embFile->save(p)) {
320 error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
321 exitCode = 2;
322 goto err2;
323 }
324 }
325
326 exitCode = 0;
327
328 // clean up
329 err2:
330 for (auto &file : embeddedFiles)
331 delete file;
332 err0:
333
334 return exitCode;
335 }
336