1 /*
2 *
3 * Copyright (C) 2007-2019, OFFIS e.V.
4 * All rights reserved. See COPYRIGHT file for details.
5 *
6 * This software and supporting documentation were developed by
7 *
8 * OFFIS e.V.
9 * R&D Division Health
10 * Escherweg 2
11 * D-26121 Oldenburg, Germany
12 *
13 *
14 * Module: dcmdata
15 *
16 * Author: Marco Eichelberg
17 *
18 * Purpose: Exctract PDF file from DICOM encapsulated PDF storage object
19 *
20 */
21
22 #include "dcmtk/config/osconfig.h" /* make sure OS specific configuration is included first */
23
24 #define INCLUDE_CSTDLIB
25 #define INCLUDE_CSTDIO
26 #define INCLUDE_CSTRING
27 #include "dcmtk/ofstd/ofstdinc.h"
28
29 BEGIN_EXTERN_C
30 #ifdef HAVE_FCNTL_H
31 #include <fcntl.h> /* for O_RDONLY */
32 #endif
33 #ifdef HAVE_SYS_TYPES_H
34 #include <sys/types.h> /* required for sys/stat.h */
35 #endif
36 #ifdef HAVE_SYS_STAT_H
37 #include <sys/stat.h> /* for stat, fstat */
38 #endif
39 END_EXTERN_C
40
41 #include "dcmtk/dcmdata/dctk.h"
42 #include "dcmtk/dcmdata/cmdlnarg.h"
43 #include "dcmtk/ofstd/ofconapp.h"
44 #include "dcmtk/dcmdata/dcuid.h" /* for dcmtk version name */
45 #include "dcmtk/ofstd/ofstd.h"
46 #include "dcmtk/dcmdata/dcistrmz.h" /* for dcmZlibExpectRFC1950Encoding */
47
48 #ifdef WITH_ZLIB
49 #include <zlib.h> /* for zlibVersion() */
50 #endif
51
52 #define OFFIS_CONSOLE_APPLICATION "dcm2pdf"
53
54 static OFLogger dcm2pdfLogger = OFLog::getLogger("dcmtk.apps." OFFIS_CONSOLE_APPLICATION);
55
56 static char rcsid[] = "$dcmtk: " OFFIS_CONSOLE_APPLICATION " v"
57 OFFIS_DCMTK_VERSION " " OFFIS_DCMTK_RELEASEDATE " $";
58
59 #define FILENAME_PLACEHOLDER "#f"
60
replaceChars(const OFString & srcstr,const OFString & pattern,const OFString & substitute)61 static OFString replaceChars(const OFString &srcstr, const OFString &pattern, const OFString &substitute)
62 /*
63 * This function replaces all occurrences of pattern in srcstr with substitute and returns
64 * the result as a new OFString variable. Note that srcstr itself will not be changed.
65 *
66 * Parameters:
67 * srcstr - [in] The source string.
68 * pattern - [in] The pattern string which shall be substituted.
69 * substitute - [in] The substitute for pattern in srcstr.
70 */
71 {
72 OFString result = srcstr;
73 size_t pos = 0;
74
75 while (pos != OFString_npos)
76 {
77 pos = result.find(pattern, pos);
78
79 if (pos != OFString_npos)
80 {
81 result.replace(pos, pattern.size(), substitute);
82 pos += substitute.size();
83 }
84 }
85
86 return result;
87 }
88
89
90 #define SHORTCOL 3
91 #define LONGCOL 20
92
main(int argc,char * argv[])93 int main(int argc, char *argv[])
94 {
95 const char *opt_ifname = NULL;
96 const char *opt_ofname = NULL;
97 const char *opt_execString = NULL;
98 E_FileReadMode opt_readMode = ERM_autoDetect;
99 E_TransferSyntax opt_ixfer = EXS_Unknown;
100
101 OFConsoleApplication app(OFFIS_CONSOLE_APPLICATION, "Extract PDF file from DICOM encapsulated PDF", rcsid);
102 OFCommandLine cmd;
103 cmd.setOptionColumns(LONGCOL, SHORTCOL);
104 cmd.setParamColumn(LONGCOL + SHORTCOL + 4);
105
106 cmd.addParam("dcmfile-in", "DICOM input filename");
107 cmd.addParam("pdffile-out", "PDF output filename");
108
109 cmd.addGroup("general options:", LONGCOL, SHORTCOL + 2);
110 cmd.addOption("--help", "-h", "print this help text and exit", OFCommandLine::AF_Exclusive);
111 cmd.addOption("--version", "print version information and exit", OFCommandLine::AF_Exclusive);
112 OFLog::addOptions(cmd);
113
114 cmd.addGroup("input options:");
115 cmd.addSubGroup("input file format:");
116 cmd.addOption("--read-file", "+f", "read file format or data set (default)");
117 cmd.addOption("--read-file-only", "+fo", "read file format only");
118 cmd.addOption("--read-dataset", "-f", "read data set without file meta information");
119 cmd.addSubGroup("input transfer syntax:", LONGCOL, SHORTCOL);
120 cmd.addOption("--read-xfer-auto", "-t=", "use TS recognition (default)");
121 cmd.addOption("--read-xfer-detect", "-td", "ignore TS specified in the file meta header");
122 cmd.addOption("--read-xfer-little", "-te", "read with explicit VR little endian TS");
123 cmd.addOption("--read-xfer-big", "-tb", "read with explicit VR big endian TS");
124 cmd.addOption("--read-xfer-implicit", "-ti", "read with implicit VR little endian TS");
125 cmd.addSubGroup("parsing of odd-length attributes:");
126 cmd.addOption("--accept-odd-length", "+ao", "accept odd length attributes (default)");
127 cmd.addOption("--assume-even-length", "+ae", "assume real length is one byte larger");
128 cmd.addSubGroup("handling of undefined length UN elements:");
129 cmd.addOption("--enable-cp246", "+ui", "read undefined len UN as implicit VR (default)");
130 cmd.addOption("--disable-cp246", "-ui", "read undefined len UN as explicit VR");
131 cmd.addSubGroup("handling of defined length UN elements:");
132 cmd.addOption("--retain-un", "-uc", "retain elements as UN (default)");
133 cmd.addOption("--convert-un", "+uc", "convert to real VR if known");
134 cmd.addSubGroup("automatic data correction:");
135 cmd.addOption("--enable-correction", "+dc", "enable automatic data correction (default)");
136 cmd.addOption("--disable-correction", "-dc", "disable automatic data correction");
137 #ifdef WITH_ZLIB
138 cmd.addSubGroup("bitstream format of deflated input:");
139 cmd.addOption("--bitstream-deflated", "+bd", "expect deflated bitstream (default)");
140 cmd.addOption("--bitstream-zlib", "+bz", "expect deflated zlib bitstream");
141 #endif
142
143 cmd.addGroup("execution options:", LONGCOL, SHORTCOL + 2);
144 cmd.addOption("--exec", "-x", 1, "[c]ommand: string",
145 "execute command c after PDF extraction");
146 /* evaluate command line */
147 prepareCmdLineArgs(argc, argv, OFFIS_CONSOLE_APPLICATION);
148 if (app.parseCommandLine(cmd, argc, argv))
149 {
150 /* check exclusive options first */
151 if (cmd.hasExclusiveOption())
152 {
153 if (cmd.findOption("--version"))
154 {
155 app.printHeader(OFTrue /*print host identifier*/);
156 COUT << OFendl << "External libraries used:";
157 #ifdef WITH_ZLIB
158 COUT << OFendl << "- ZLIB, Version " << zlibVersion() << OFendl;
159 #else
160 COUT << " none" << OFendl;
161 #endif
162 return 0;
163 }
164 }
165
166 /* command line parameters and options */
167 cmd.getParam(1, opt_ifname);
168 cmd.getParam(2, opt_ofname);
169
170 OFLog::configureFromCommandLine(cmd, app);
171
172 cmd.beginOptionBlock();
173 if (cmd.findOption("--read-file")) opt_readMode = ERM_autoDetect;
174 if (cmd.findOption("--read-file-only")) opt_readMode = ERM_fileOnly;
175 if (cmd.findOption("--read-dataset")) opt_readMode = ERM_dataset;
176 cmd.endOptionBlock();
177
178 cmd.beginOptionBlock();
179 if (cmd.findOption("--read-xfer-auto"))
180 opt_ixfer = EXS_Unknown;
181 if (cmd.findOption("--read-xfer-detect"))
182 dcmAutoDetectDatasetXfer.set(OFTrue);
183 if (cmd.findOption("--read-xfer-little"))
184 {
185 app.checkDependence("--read-xfer-little", "--read-dataset", opt_readMode == ERM_dataset);
186 opt_ixfer = EXS_LittleEndianExplicit;
187 }
188 if (cmd.findOption("--read-xfer-big"))
189 {
190 app.checkDependence("--read-xfer-big", "--read-dataset", opt_readMode == ERM_dataset);
191 opt_ixfer = EXS_BigEndianExplicit;
192 }
193 if (cmd.findOption("--read-xfer-implicit"))
194 {
195 app.checkDependence("--read-xfer-implicit", "--read-dataset", opt_readMode == ERM_dataset);
196 opt_ixfer = EXS_LittleEndianImplicit;
197 }
198 cmd.endOptionBlock();
199
200 cmd.beginOptionBlock();
201 if (cmd.findOption("--accept-odd-length"))
202 {
203 dcmAcceptOddAttributeLength.set(OFTrue);
204 }
205 if (cmd.findOption("--assume-even-length"))
206 {
207 dcmAcceptOddAttributeLength.set(OFFalse);
208 }
209 cmd.endOptionBlock();
210
211 cmd.beginOptionBlock();
212 if (cmd.findOption("--enable-cp246"))
213 {
214 dcmEnableCP246Support.set(OFTrue);
215 }
216 if (cmd.findOption("--disable-cp246"))
217 {
218 dcmEnableCP246Support.set(OFFalse);
219 }
220 cmd.endOptionBlock();
221
222 cmd.beginOptionBlock();
223 if (cmd.findOption("--retain-un"))
224 {
225 dcmEnableUnknownVRConversion.set(OFFalse);
226 }
227 if (cmd.findOption("--convert-un"))
228 {
229 dcmEnableUnknownVRConversion.set(OFTrue);
230 }
231 cmd.endOptionBlock();
232
233 cmd.beginOptionBlock();
234 if (cmd.findOption("--enable-correction"))
235 {
236 dcmEnableAutomaticInputDataCorrection.set(OFTrue);
237 }
238 if (cmd.findOption("--disable-correction"))
239 {
240 dcmEnableAutomaticInputDataCorrection.set(OFFalse);
241 }
242 cmd.endOptionBlock();
243
244 #ifdef WITH_ZLIB
245 cmd.beginOptionBlock();
246 if (cmd.findOption("--bitstream-deflated"))
247 {
248 dcmZlibExpectRFC1950Encoding.set(OFFalse);
249 }
250 if (cmd.findOption("--bitstream-zlib"))
251 {
252 dcmZlibExpectRFC1950Encoding.set(OFTrue);
253 }
254 cmd.endOptionBlock();
255 #endif
256
257 if (cmd.findOption("--exec")) app.checkValue(cmd.getValue(opt_execString));
258 }
259
260 /* print resource identifier */
261 OFLOG_DEBUG(dcm2pdfLogger, rcsid << OFendl);
262
263 /* make sure data dictionary is loaded */
264 if (!dcmDataDict.isDictionaryLoaded())
265 {
266 OFLOG_WARN(dcm2pdfLogger, "no data dictionary loaded, check environment variable: "
267 << DCM_DICT_ENVIRONMENT_VARIABLE);
268 }
269
270 // open inputfile
271 if ((opt_ifname == NULL) || (strlen(opt_ifname) == 0))
272 {
273 OFLOG_FATAL(dcm2pdfLogger, "invalid filename: <empty string>");
274 return 1;
275 }
276
277 DcmFileFormat fileformat;
278 DcmDataset * dataset = fileformat.getDataset();
279
280 OFLOG_INFO(dcm2pdfLogger, "open input file " << opt_ifname);
281
282 OFCondition error = fileformat.loadFile(opt_ifname, opt_ixfer, EGL_noChange, DCM_MaxReadLength, opt_readMode);
283
284 if (error.bad())
285 {
286 OFLOG_FATAL(dcm2pdfLogger, error.text() << ": reading file: " << opt_ifname);
287 return 1;
288 }
289
290 OFString sopClass;
291 error = dataset->findAndGetOFString(DCM_SOPClassUID, sopClass);
292 if (error.bad() || sopClass != UID_EncapsulatedPDFStorage)
293 {
294 OFLOG_FATAL(dcm2pdfLogger, "not an Encapsulated PDF Storage object: " << opt_ifname);
295 return 1;
296 }
297
298 DcmElement *delem = NULL;
299 error = dataset->findAndGetElement(DCM_EncapsulatedDocument, delem);
300 if (error.bad() || delem == NULL)
301 {
302 OFLOG_FATAL(dcm2pdfLogger, "attribute (0042,0011) Encapsulated Document missing.");
303 return 1;
304 }
305
306 Uint32 len = delem->getLength();
307 Uint8 *pdfDocument = NULL;
308 error = delem->getUint8Array(pdfDocument);
309 if (error.bad() || pdfDocument == NULL || len == 0)
310 {
311 OFLOG_FATAL(dcm2pdfLogger, "attribute (0042,0011) Encapsulated Document empty or wrong VR.");
312 return 1;
313 }
314
315 /* strip pad byte at end of file, if there is one. The PDF format expects
316 * files to end with %%EOF followed by CR/LF (although in some cases the
317 * CR/LF may be missing or you might only find CR or LF).
318 * If the last character of the file is not a CR or LF, and not the
319 * letter 'F', we assume it is either trailing garbage or a pad byte, and remove it.
320 */
321 if (pdfDocument[len-1] != 10 && pdfDocument[len-1] != 13 && pdfDocument[len-1] != 'F')
322 {
323 --len;
324 }
325
326 FILE *pdffile = fopen(opt_ofname, "wb");
327 if (pdffile == NULL)
328 {
329 OFLOG_FATAL(dcm2pdfLogger, "unable to create file " << opt_ofname);
330 return 1;
331 }
332
333 if (len != fwrite(pdfDocument, 1, len, pdffile))
334 {
335 OFLOG_FATAL(dcm2pdfLogger, "write error in file " << opt_ofname);
336 fclose(pdffile);
337 return 1;
338 }
339
340 fclose(pdffile);
341
342 OFLOG_INFO(dcm2pdfLogger, "conversion successful");
343
344 if (opt_execString)
345 {
346 OFString cmdStr = opt_execString;
347 cmdStr = replaceChars(cmdStr, OFString(FILENAME_PLACEHOLDER), opt_ofname);
348
349 // Execute command and return result
350 return system(cmdStr.c_str());
351 }
352
353 return 0;
354 }
355