1# Copyright 2008-2018 pydicom authors. See LICENSE file for details.
2"""
3Produce runnable python code which can recreate DICOM objects or files.
4
5Can run as a script to produce code for an entire file,
6or import and use specific functions to provide code for pydicom DICOM classes
7
8"""
9
10# Run this from the same directory as a "base" dicom file and
11# this code will output to screen the dicom parameters like:
12#    ds.PatientName = 'TEST'
13# etc for all parameters in the file.
14# This can then be pasted into a python file and parameters edited as necessary
15# to create a DICOM file from scratch
16
17import argparse
18import os.path
19import re
20import sys
21from typing import Optional, List, Callable
22
23import pydicom
24from pydicom.datadict import dictionary_keyword
25from pydicom.dataelem import DataElement, BINARY_VR_VALUES
26from pydicom.dataset import Dataset
27from pydicom.tag import BaseTag
28from pydicom.cli.main import filespec_help, filespec_parser
29
30
31line_term = "\n"
32
33# Precompiled search patterns for camel_to_underscore()
34first_cap_re = re.compile("(.)([A-Z][a-z]+)")
35all_cap_re = re.compile("([a-z0-9])([A-Z])")
36
37
38def camel_to_underscore(name: str) -> str:
39    """Convert name from CamelCase to lower_case_with_underscores"""
40    # From http://stackoverflow.com/questions/1175208
41    s1 = first_cap_re.sub(r"\1_\2", name)
42    return all_cap_re.sub(r"\1_\2", s1).lower()
43
44
45def tag_repr(tag: BaseTag) -> str:
46    """String of tag value as (0xgggg, 0xeeee)"""
47    return f"(0x{tag.group:04x}, 0x{tag.element:04x})"
48
49
50def default_name_filter(name: str) -> str:
51    """Callable to reduce some names in code to more readable short form
52
53    :arg name: a sequence variable name or sequence item name
54    :return: a shorter version of name if a known conversion,
55             else return original name
56
57    """
58    name = camel_to_underscore(name)
59    name = name.replace("control_point", "cp")
60    name = name.replace("reference", "ref")
61    name = name.replace("fraction_group", "frxn_gp")
62    return name
63
64
65# Functions to produce python code
66def code_imports() -> str:
67    """Code the import statements needed by other codify results
68
69    :return: a string of import statement lines
70
71    """
72    line1 = "import pydicom"
73    line2 = "from pydicom.dataset import Dataset, FileMetaDataset"
74    line3 = "from pydicom.sequence import Sequence"
75    return line_term.join((line1, line2, line3))
76
77
78def code_dataelem(
79    dataelem: DataElement,
80    dataset_name: str = "ds",
81    exclude_size: Optional[int] = None,
82    include_private: bool = False
83) -> str:
84    """Code lines for a single DICOM data element
85
86    Parameters
87    ----------
88
89    dataelem : DataElement
90        The DataElement instance to turn into code
91    dataset_name : str
92        The variable name of the Dataset containing `dataelem`
93    exclude_size : Union[int, None]
94        If specified, values longer than this (in bytes)
95        will only have a commented string for a value,
96        causing a syntax error when the code is run,
97        and thus prompting the user to remove or fix that line.
98
99    Returns
100    -------
101    str
102        A string containing code to recreate the data element
103        If the data element is a sequence, calls code_sequence
104    """
105
106    if dataelem.VR == "SQ":
107        return code_sequence(
108            dataelem, dataset_name, exclude_size, include_private
109        )
110
111    # If in DICOM dictionary, set using the keyword
112    # If not (e.g. is private element), set using add_new method
113    have_keyword = True
114    try:
115        keyword = dictionary_keyword(dataelem.tag)
116    except KeyError:
117        have_keyword = False
118
119    valuerep = repr(dataelem.value)
120
121    if exclude_size:
122        if (
123            dataelem.VR in BINARY_VR_VALUES
124            and not isinstance(dataelem.value, (int, float))
125            and len(dataelem.value) > exclude_size
126        ):
127            valuerep = f"# XXX Array of {len(dataelem.value)} bytes excluded"
128
129    if have_keyword:
130        line = f"{dataset_name}.{keyword} = {valuerep}"
131    else:
132        tag = tag_repr(dataelem.tag)
133        VR = dataelem.VR
134        line = f"{dataset_name}.add_new({tag}, '{VR}', {valuerep})"
135
136    return line
137
138
139def code_sequence(
140    dataelem: DataElement,
141    dataset_name: str = "ds",
142    exclude_size: Optional[int] = None,
143    include_private: bool = False,
144    name_filter: Callable[[str], str] = default_name_filter,
145) -> str:
146    """Code lines for recreating a Sequence data element
147
148    Parameters
149    ----------
150    dataelem : DataElement
151        The DataElement instance whose value is the Sequence
152    dataset_name : str
153        Variable name of the dataset containing the Sequence
154    exclude_size : int, optional
155        If not ``None``, values longer than this (in bytes) will only have a
156        commented string for a value, causing a syntax error when the code is
157        run, and thus prompting the user to remove or fix that line.
158    include_private: bool
159        If ``False`` (default) private elements are skipped, otherwise private
160        data elements will be coded.
161    name_filter: Callable[[str], str]
162        A callable taking a sequence name or sequence item name, and returning
163        a shorter name for easier code reading
164
165    Returns
166    -------
167    str
168        A string containing code lines to recreate a DICOM sequence
169    """
170
171    lines = []
172    seq = dataelem.value
173    seq_name = dataelem.name
174    seq_item_name = seq_name.replace(" Sequence", "")
175    try:
176        seq_keyword = dictionary_keyword(dataelem.tag)
177    except KeyError:
178        seq_keyword = f"Tag{dataelem.tag:08x}"
179
180    # Create comment line to document the start of Sequence
181    lines.append("")
182    lines.append("# " + seq_name)
183
184    # Code line to create a new Sequence object
185    if name_filter:
186        seq_var = name_filter(seq_keyword)
187    lines.append(seq_var + " = Sequence()")
188
189    # Code line to add the sequence to its parent
190    lines.append(dataset_name + "." + seq_keyword + " = " + seq_var)
191
192    # Code lines to add sequence items to the Sequence
193    for i, ds in enumerate(seq):
194        # Determine index to use. If seq item has a data element with 'Index',
195        #    use that; if one with 'Number', use that, else start at 1
196        index_keyword = seq_keyword.replace("Sequence", "") + "Index"
197        number_keyword = seq_keyword.replace("Sequence", "") + "Number"
198        if hasattr(ds, index_keyword):
199            index_str = str(getattr(ds, index_keyword))
200        elif hasattr(ds, number_keyword):
201            index_str = str(getattr(ds, number_keyword))
202        else:
203            index_str = str(i + 1)
204
205        # Code comment line to mark start of sequence item
206        lines.append("")
207        lines.append("# " + seq_name + ": " + seq_item_name + " " + index_str)
208
209        # Determine the variable name to use for the sequence item (dataset)
210        ds_name = seq_var.replace("_sequence", "") + index_str
211
212        # Code the sequence item
213        code_item = code_dataset(ds, ds_name, exclude_size, include_private)
214        lines.append(code_item)
215
216        # Code the line to append the item to its parent sequence
217        lines.append(seq_var + ".append(" + ds_name + ")")
218
219    # Join the lines and return a single string
220    return line_term.join(lines)
221
222
223def code_dataset(
224    ds: Dataset,
225    dataset_name: str = "ds",
226    exclude_size: Optional[int] = None,
227    include_private: bool = False,
228    is_file_meta: bool = False,
229) -> str:
230    """Return Python code for creating `ds`.
231
232    Parameters
233    ----------
234    ds : pydicom.dataset.Dataset
235        The dataset to codify.
236    dataset_name : str, optional
237        The Python variable name to use for the dataset, default ``'ds'``.
238    exclude_size : int, optional
239        If not ``None``, values longer than this (in bytes) will only have a
240        commented string for a value, causing a syntax error when the code is
241        run, and thus prompting the user to remove or fix that line.
242    include_private : bool, optional
243        If ``False`` (default) private elements are skipped, otherwise private
244        data elements will be coded.
245    is_file_meta : bool, optional
246        ``True`` if `ds` contains file meta information elements.
247
248    Returns
249    -------
250    str
251        The codified dataset.
252    """
253
254    lines = []
255    ds_class = " = FileMetaDataset()" if is_file_meta else " = Dataset()"
256    lines.append(dataset_name + ds_class)
257    for dataelem in ds:
258        # If a private data element and flag says so, skip it and go to next
259        if not include_private and dataelem.tag.is_private:
260            continue
261        # Otherwise code the line and add it to the lines list
262        code_line = code_dataelem(
263            dataelem, dataset_name, exclude_size, include_private
264        )
265        lines.append(code_line)
266        # Add blank line if just coded a sequence
267        if dataelem.VR == "SQ":
268            lines.append("")
269    # If sequence was end of this dataset, remove the extra blank line
270    if len(lines) and lines[-1] == "":
271        lines.pop()
272    # Join all the code lines and return them
273    return line_term.join(lines)
274
275
276def code_file(
277    filename: str,
278    exclude_size: Optional[int] = None,
279    include_private: bool = False
280) -> str:
281    """Write a complete source code file to recreate a DICOM file
282
283    Parameters
284    ----------
285    filename : str
286        Complete path and filename of a DICOM file to convert
287    exclude_size : Union[int,None]
288        If not None, values longer than this (in bytes)
289        will only have a commented string for a value,
290        causing a syntax error when the code is run,
291        and thus prompting the user to remove or fix that line.
292    include_private : bool
293        If ``False`` (default), private elements are skipped
294        If ``True``, private data elements will be coded.
295
296    Returns
297    -------
298    str
299        A string containing code lines to recreate the entire DICOM file
300
301    """
302    ds = pydicom.dcmread(filename, force=True)
303    return code_file_from_dataset(ds, exclude_size, include_private)
304
305
306def code_file_from_dataset(
307    ds: Dataset,
308    exclude_size: Optional[int] = None,
309    include_private: bool = False
310) -> str:
311    """Write a complete source code file to recreate a DICOM file
312
313    Parameters
314    ----------
315    filename : str
316        Complete path and filename of a DICOM file to convert
317    exclude_size : Union[int,None]
318        If not None, values longer than this (in bytes)
319        will only have a commented string for a value,
320        causing a syntax error when the code is run,
321        and thus prompting the user to remove or fix that line.
322    include_private : bool
323        If ``False`` (default), private elements are skipped
324        If ``True``, private data elements will be coded.
325
326    Returns
327    -------
328    str
329        A string containing code lines to recreate the entire DICOM file
330
331    """
332    lines = []
333
334    # Code a nice header for the python file
335    filename = ds.get("filename")
336    identifier = f"DICOM file '{filename}'" if filename else "non-file dataset"
337
338    lines.append(f"# Coded version of {identifier}")
339    lines.append("# Produced by pydicom codify utility script")
340
341    # Code the necessary imports
342    lines.append(code_imports())
343    lines.append("")
344
345    # Code the file_meta information
346    if hasattr(ds, 'file_meta'):
347        lines.append("# File meta info data elements")
348        code_meta = code_dataset(
349            ds.file_meta,
350            "file_meta",
351            exclude_size,
352            include_private,
353            is_file_meta=True,
354        )
355        lines.append(code_meta)
356        lines.append("")
357
358    # Code the main dataset
359    lines.append("# Main data elements")
360    code_ds = code_dataset(
361        ds, exclude_size=exclude_size, include_private=include_private
362    )
363    lines.append(code_ds)
364    lines.append("")
365
366    # Add the file meta to the dataset, and set transfer syntax
367    if hasattr(ds, 'file_meta'):
368        lines.append("ds.file_meta = file_meta")
369    lines.append("ds.is_implicit_VR = " + str(ds.is_implicit_VR))
370    lines.append("ds.is_little_endian = " + str(ds.is_little_endian))
371
372    # Return the complete code string
373    return line_term.join(lines)
374
375
376def set_parser_arguments(
377    parser: argparse.ArgumentParser, default_exclude_size: int
378) -> None:
379    parser.add_argument(
380        "filespec",
381        help=filespec_help,
382        type=filespec_parser,
383    )
384    parser.add_argument(
385        "outfile",
386        nargs="?",
387        type=argparse.FileType("w"),
388        help=(
389            "Filename to write Python code to, if not specified then code is "
390            "written to stdout"
391        ),
392        default=sys.stdout,
393    )
394    parser.add_argument(
395        "-e",
396        "--exclude-size",
397        type=int,
398        default=default_exclude_size,
399        help=(
400            "Exclude binary data larger than specified (default: "
401            f"{default_exclude_size} bytes)"
402        ),
403    )
404    parser.add_argument(
405        "-p",
406        "--include-private",
407        action="store_true",
408        help="Include private data elements (default is to exclude them)",
409    )
410    parser.add_argument(
411        "-s",
412        "--save-as",
413        help=(
414            "Specify the filename for ds.save_as(save_filename); "
415            "otherwise the input name + '_from_codify' will be used"
416        ),
417    )
418
419
420def do_codify(args: argparse.Namespace) -> None:
421    # Convert the requested dataset to python/pydicom code lines
422    if len(args.filespec) != 1:
423        raise NotImplementedError(
424            "Codify can only work on a single DICOM file input"
425        )
426
427    ds, element = args.filespec[0]
428    filename = ds.filename
429
430    if element and not isinstance(element, Dataset):
431        raise NotImplementedError(
432            f"Codify can only code a Dataset, not a {type(element)}"
433        )
434
435    code_str = code_file_from_dataset(
436        element or ds, args.exclude_size, args.include_private
437    )
438
439    # If requested, write a code line to save the dataset
440    if args.save_as:
441        save_as_filename = args.save_as
442    else:
443        base, _ = os.path.splitext(filename)
444        save_as_filename = base + "_from_codify" + ".dcm"
445    save_line = (
446        f"\nds.save_as(r'{save_as_filename}', write_like_original=False)"
447    )
448    code_str += save_line
449
450    # Write the code lines to specified file or to standard output
451    # For test_util, captured output .name throws error, ignore it:
452    try:
453        if args.outfile.name != "<stdout>":
454            print(f"Writing code to file '{args.outfile.name}'")
455    except AttributeError:
456        pass
457    args.outfile.write(code_str)
458
459
460def main(default_exclude_size: int, args: Optional[List[str]] = None) -> None:
461    """Create Python code according to user options
462
463    Parameters:
464    -----------
465    default_exclude_size : int
466        Values longer than this will be coded as a commented syntax error
467    args : List[str], optional
468        Command-line arguments to parse.  If ``None`` then :attr:`sys.argv` is
469        used.
470    """
471    parser = argparse.ArgumentParser(
472        description="Produce python/pydicom code from a DICOM file",
473        epilog=(
474            "Binary data (e.g. pixels) larger than --exclude-size "
475            f"(default {default_exclude_size} bytes) is not included. A "
476            "dummy line with a syntax error is produced. "
477            "Private data elements are not included by default."
478        ),
479    )
480    set_parser_arguments(parser, default_exclude_size)
481    do_codify(parser.parse_args(args))
482
483
484if __name__ == "__main__":  # pragma: no cover
485    main(default_exclude_size=100)
486