1 /*
2  *
3  *  Copyright (C) 2007-2019, OFFIS e.V.
4  *  All rights reserved.  See COPYRIGHT file for details.
5  *
6  *  This software and supporting documentation were developed by
7  *
8  *    OFFIS e.V.
9  *    R&D Division Health
10  *    Escherweg 2
11  *    D-26121 Oldenburg, Germany
12  *
13  *
14  *  Module:  dcmdata
15  *
16  *  Author:  Marco Eichelberg
17  *
18  *  Purpose: Exctract PDF file from DICOM encapsulated PDF storage object
19  *
20  */
21 
22 #include "dcmtk/config/osconfig.h"    /* make sure OS specific configuration is included first */
23 
24 #define INCLUDE_CSTDLIB
25 #define INCLUDE_CSTDIO
26 #define INCLUDE_CSTRING
27 #include "dcmtk/ofstd/ofstdinc.h"
28 
29 BEGIN_EXTERN_C
30 #ifdef HAVE_FCNTL_H
31 #include <fcntl.h>       /* for O_RDONLY */
32 #endif
33 #ifdef HAVE_SYS_TYPES_H
34 #include <sys/types.h>   /* required for sys/stat.h */
35 #endif
36 #ifdef HAVE_SYS_STAT_H
37 #include <sys/stat.h>    /* for stat, fstat */
38 #endif
39 END_EXTERN_C
40 
41 #include "dcmtk/dcmdata/dctk.h"
42 #include "dcmtk/dcmdata/cmdlnarg.h"
43 #include "dcmtk/ofstd/ofconapp.h"
44 #include "dcmtk/dcmdata/dcuid.h"       /* for dcmtk version name */
45 #include "dcmtk/ofstd/ofstd.h"
46 #include "dcmtk/dcmdata/dcistrmz.h"    /* for dcmZlibExpectRFC1950Encoding */
47 
48 #ifdef WITH_ZLIB
49 #include <zlib.h>        /* for zlibVersion() */
50 #endif
51 
52 #define OFFIS_CONSOLE_APPLICATION "dcm2pdf"
53 
54 static OFLogger dcm2pdfLogger = OFLog::getLogger("dcmtk.apps." OFFIS_CONSOLE_APPLICATION);
55 
56 static char rcsid[] = "$dcmtk: " OFFIS_CONSOLE_APPLICATION " v"
57   OFFIS_DCMTK_VERSION " " OFFIS_DCMTK_RELEASEDATE " $";
58 
59 #define FILENAME_PLACEHOLDER "#f"
60 
replaceChars(const OFString & srcstr,const OFString & pattern,const OFString & substitute)61 static OFString replaceChars(const OFString &srcstr, const OFString &pattern, const OFString &substitute)
62     /*
63      * This function replaces all occurrences of pattern in srcstr with substitute and returns
64      * the result as a new OFString variable. Note that srcstr itself will not be changed.
65      *
66      * Parameters:
67      *   srcstr     - [in] The source string.
68      *   pattern    - [in] The pattern string which shall be substituted.
69      *   substitute - [in] The substitute for pattern in srcstr.
70      */
71 {
72   OFString result = srcstr;
73   size_t pos = 0;
74 
75   while (pos != OFString_npos)
76   {
77     pos = result.find(pattern, pos);
78 
79     if (pos != OFString_npos)
80     {
81       result.replace(pos, pattern.size(), substitute);
82       pos += substitute.size();
83     }
84   }
85 
86   return result;
87 }
88 
89 
90 #define SHORTCOL 3
91 #define LONGCOL 20
92 
main(int argc,char * argv[])93 int main(int argc, char *argv[])
94 {
95   const char *opt_ifname = NULL;
96   const char *opt_ofname = NULL;
97   const char    *opt_execString = NULL;
98   E_FileReadMode opt_readMode = ERM_autoDetect;
99   E_TransferSyntax opt_ixfer = EXS_Unknown;
100 
101   OFConsoleApplication app(OFFIS_CONSOLE_APPLICATION, "Extract PDF file from DICOM encapsulated PDF", rcsid);
102   OFCommandLine cmd;
103   cmd.setOptionColumns(LONGCOL, SHORTCOL);
104   cmd.setParamColumn(LONGCOL + SHORTCOL + 4);
105 
106   cmd.addParam("dcmfile-in",  "DICOM input filename");
107   cmd.addParam("pdffile-out", "PDF output filename");
108 
109   cmd.addGroup("general options:", LONGCOL, SHORTCOL + 2);
110     cmd.addOption("--help",                 "-h",     "print this help text and exit", OFCommandLine::AF_Exclusive);
111     cmd.addOption("--version",                        "print version information and exit", OFCommandLine::AF_Exclusive);
112     OFLog::addOptions(cmd);
113 
114   cmd.addGroup("input options:");
115     cmd.addSubGroup("input file format:");
116       cmd.addOption("--read-file",          "+f",     "read file format or data set (default)");
117       cmd.addOption("--read-file-only",     "+fo",    "read file format only");
118       cmd.addOption("--read-dataset",       "-f",     "read data set without file meta information");
119     cmd.addSubGroup("input transfer syntax:", LONGCOL, SHORTCOL);
120       cmd.addOption("--read-xfer-auto",     "-t=",    "use TS recognition (default)");
121       cmd.addOption("--read-xfer-detect",   "-td",    "ignore TS specified in the file meta header");
122       cmd.addOption("--read-xfer-little",   "-te",    "read with explicit VR little endian TS");
123       cmd.addOption("--read-xfer-big",      "-tb",    "read with explicit VR big endian TS");
124       cmd.addOption("--read-xfer-implicit", "-ti",    "read with implicit VR little endian TS");
125     cmd.addSubGroup("parsing of odd-length attributes:");
126       cmd.addOption("--accept-odd-length",  "+ao",    "accept odd length attributes (default)");
127       cmd.addOption("--assume-even-length", "+ae",    "assume real length is one byte larger");
128     cmd.addSubGroup("handling of undefined length UN elements:");
129       cmd.addOption("--enable-cp246",       "+ui",    "read undefined len UN as implicit VR (default)");
130       cmd.addOption("--disable-cp246",      "-ui",    "read undefined len UN as explicit VR");
131     cmd.addSubGroup("handling of defined length UN elements:");
132       cmd.addOption("--retain-un",          "-uc",    "retain elements as UN (default)");
133       cmd.addOption("--convert-un",         "+uc",    "convert to real VR if known");
134     cmd.addSubGroup("automatic data correction:");
135       cmd.addOption("--enable-correction",  "+dc",    "enable automatic data correction (default)");
136       cmd.addOption("--disable-correction", "-dc",    "disable automatic data correction");
137 #ifdef WITH_ZLIB
138     cmd.addSubGroup("bitstream format of deflated input:");
139       cmd.addOption("--bitstream-deflated", "+bd",    "expect deflated bitstream (default)");
140       cmd.addOption("--bitstream-zlib",     "+bz",    "expect deflated zlib bitstream");
141 #endif
142 
143    cmd.addGroup("execution options:", LONGCOL, SHORTCOL + 2);
144      cmd.addOption("--exec",                "-x",  1, "[c]ommand: string",
145                                                       "execute command c after PDF extraction");
146     /* evaluate command line */
147     prepareCmdLineArgs(argc, argv, OFFIS_CONSOLE_APPLICATION);
148     if (app.parseCommandLine(cmd, argc, argv))
149     {
150       /* check exclusive options first */
151       if (cmd.hasExclusiveOption())
152       {
153           if (cmd.findOption("--version"))
154           {
155               app.printHeader(OFTrue /*print host identifier*/);
156               COUT << OFendl << "External libraries used:";
157 #ifdef WITH_ZLIB
158               COUT << OFendl << "- ZLIB, Version " << zlibVersion() << OFendl;
159 #else
160               COUT << " none" << OFendl;
161 #endif
162               return 0;
163           }
164       }
165 
166       /* command line parameters and options */
167       cmd.getParam(1, opt_ifname);
168       cmd.getParam(2, opt_ofname);
169 
170       OFLog::configureFromCommandLine(cmd, app);
171 
172       cmd.beginOptionBlock();
173       if (cmd.findOption("--read-file")) opt_readMode = ERM_autoDetect;
174       if (cmd.findOption("--read-file-only")) opt_readMode = ERM_fileOnly;
175       if (cmd.findOption("--read-dataset")) opt_readMode = ERM_dataset;
176       cmd.endOptionBlock();
177 
178       cmd.beginOptionBlock();
179       if (cmd.findOption("--read-xfer-auto"))
180           opt_ixfer = EXS_Unknown;
181       if (cmd.findOption("--read-xfer-detect"))
182           dcmAutoDetectDatasetXfer.set(OFTrue);
183       if (cmd.findOption("--read-xfer-little"))
184       {
185           app.checkDependence("--read-xfer-little", "--read-dataset", opt_readMode == ERM_dataset);
186           opt_ixfer = EXS_LittleEndianExplicit;
187       }
188       if (cmd.findOption("--read-xfer-big"))
189       {
190           app.checkDependence("--read-xfer-big", "--read-dataset", opt_readMode == ERM_dataset);
191           opt_ixfer = EXS_BigEndianExplicit;
192       }
193       if (cmd.findOption("--read-xfer-implicit"))
194       {
195           app.checkDependence("--read-xfer-implicit", "--read-dataset", opt_readMode == ERM_dataset);
196           opt_ixfer = EXS_LittleEndianImplicit;
197       }
198       cmd.endOptionBlock();
199 
200       cmd.beginOptionBlock();
201       if (cmd.findOption("--accept-odd-length"))
202       {
203           dcmAcceptOddAttributeLength.set(OFTrue);
204       }
205       if (cmd.findOption("--assume-even-length"))
206       {
207           dcmAcceptOddAttributeLength.set(OFFalse);
208       }
209       cmd.endOptionBlock();
210 
211       cmd.beginOptionBlock();
212       if (cmd.findOption("--enable-cp246"))
213       {
214           dcmEnableCP246Support.set(OFTrue);
215       }
216       if (cmd.findOption("--disable-cp246"))
217       {
218           dcmEnableCP246Support.set(OFFalse);
219       }
220       cmd.endOptionBlock();
221 
222       cmd.beginOptionBlock();
223       if (cmd.findOption("--retain-un"))
224       {
225           dcmEnableUnknownVRConversion.set(OFFalse);
226       }
227       if (cmd.findOption("--convert-un"))
228       {
229           dcmEnableUnknownVRConversion.set(OFTrue);
230       }
231       cmd.endOptionBlock();
232 
233       cmd.beginOptionBlock();
234       if (cmd.findOption("--enable-correction"))
235       {
236           dcmEnableAutomaticInputDataCorrection.set(OFTrue);
237       }
238       if (cmd.findOption("--disable-correction"))
239       {
240           dcmEnableAutomaticInputDataCorrection.set(OFFalse);
241       }
242       cmd.endOptionBlock();
243 
244 #ifdef WITH_ZLIB
245       cmd.beginOptionBlock();
246       if (cmd.findOption("--bitstream-deflated"))
247       {
248           dcmZlibExpectRFC1950Encoding.set(OFFalse);
249       }
250       if (cmd.findOption("--bitstream-zlib"))
251       {
252           dcmZlibExpectRFC1950Encoding.set(OFTrue);
253       }
254       cmd.endOptionBlock();
255 #endif
256 
257       if (cmd.findOption("--exec")) app.checkValue(cmd.getValue(opt_execString));
258     }
259 
260     /* print resource identifier */
261     OFLOG_DEBUG(dcm2pdfLogger, rcsid << OFendl);
262 
263     /* make sure data dictionary is loaded */
264     if (!dcmDataDict.isDictionaryLoaded())
265     {
266         OFLOG_WARN(dcm2pdfLogger, "no data dictionary loaded, check environment variable: "
267             << DCM_DICT_ENVIRONMENT_VARIABLE);
268     }
269 
270     // open inputfile
271     if ((opt_ifname == NULL) || (strlen(opt_ifname) == 0))
272     {
273         OFLOG_FATAL(dcm2pdfLogger, "invalid filename: <empty string>");
274         return 1;
275     }
276 
277     DcmFileFormat fileformat;
278     DcmDataset * dataset = fileformat.getDataset();
279 
280     OFLOG_INFO(dcm2pdfLogger, "open input file " << opt_ifname);
281 
282     OFCondition error = fileformat.loadFile(opt_ifname, opt_ixfer, EGL_noChange, DCM_MaxReadLength, opt_readMode);
283 
284     if (error.bad())
285     {
286         OFLOG_FATAL(dcm2pdfLogger, error.text() << ": reading file: " << opt_ifname);
287         return 1;
288     }
289 
290     OFString sopClass;
291     error = dataset->findAndGetOFString(DCM_SOPClassUID, sopClass);
292     if (error.bad() || sopClass != UID_EncapsulatedPDFStorage)
293     {
294         OFLOG_FATAL(dcm2pdfLogger, "not an Encapsulated PDF Storage object: " << opt_ifname);
295         return 1;
296     }
297 
298     DcmElement *delem = NULL;
299     error = dataset->findAndGetElement(DCM_EncapsulatedDocument, delem);
300     if (error.bad() || delem == NULL)
301     {
302         OFLOG_FATAL(dcm2pdfLogger, "attribute (0042,0011) Encapsulated Document missing.");
303         return 1;
304     }
305 
306     Uint32 len = delem->getLength();
307     Uint8 *pdfDocument = NULL;
308     error = delem->getUint8Array(pdfDocument);
309     if (error.bad() || pdfDocument == NULL || len == 0)
310     {
311         OFLOG_FATAL(dcm2pdfLogger, "attribute (0042,0011) Encapsulated Document empty or wrong VR.");
312         return 1;
313     }
314 
315     /* strip pad byte at end of file, if there is one. The PDF format expects
316      * files to end with %%EOF followed by CR/LF (although in some cases the
317      * CR/LF may be missing or you might only find CR or LF).
318      * If the last character of the file is not a CR or LF, and not the
319      * letter 'F', we assume it is either trailing garbage or a pad byte, and remove it.
320      */
321     if (pdfDocument[len-1] != 10 && pdfDocument[len-1] != 13 && pdfDocument[len-1] != 'F')
322     {
323         --len;
324     }
325 
326     FILE *pdffile = fopen(opt_ofname, "wb");
327     if (pdffile == NULL)
328     {
329         OFLOG_FATAL(dcm2pdfLogger, "unable to create file " << opt_ofname);
330         return 1;
331     }
332 
333     if (len != fwrite(pdfDocument, 1, len, pdffile))
334     {
335         OFLOG_FATAL(dcm2pdfLogger, "write error in file " << opt_ofname);
336         fclose(pdffile);
337         return 1;
338     }
339 
340     fclose(pdffile);
341 
342     OFLOG_INFO(dcm2pdfLogger, "conversion successful");
343 
344     if (opt_execString)
345     {
346         OFString cmdStr = opt_execString;
347         cmdStr = replaceChars(cmdStr, OFString(FILENAME_PLACEHOLDER), opt_ofname);
348 
349         // Execute command and return result
350         return system(cmdStr.c_str());
351     }
352 
353     return 0;
354 }
355