1# Copyright 2008-2018 pydicom authors. See LICENSE file for details. 2""" 3Produce runnable python code which can recreate DICOM objects or files. 4 5Can run as a script to produce code for an entire file, 6or import and use specific functions to provide code for pydicom DICOM classes 7 8""" 9 10# Run this from the same directory as a "base" dicom file and 11# this code will output to screen the dicom parameters like: 12# ds.PatientName = 'TEST' 13# etc for all parameters in the file. 14# This can then be pasted into a python file and parameters edited as necessary 15# to create a DICOM file from scratch 16 17import argparse 18import os.path 19import re 20import sys 21from typing import Optional, List, Callable 22 23import pydicom 24from pydicom.datadict import dictionary_keyword 25from pydicom.dataelem import DataElement, BINARY_VR_VALUES 26from pydicom.dataset import Dataset 27from pydicom.tag import BaseTag 28from pydicom.cli.main import filespec_help, filespec_parser 29 30 31line_term = "\n" 32 33# Precompiled search patterns for camel_to_underscore() 34first_cap_re = re.compile("(.)([A-Z][a-z]+)") 35all_cap_re = re.compile("([a-z0-9])([A-Z])") 36 37 38def camel_to_underscore(name: str) -> str: 39 """Convert name from CamelCase to lower_case_with_underscores""" 40 # From http://stackoverflow.com/questions/1175208 41 s1 = first_cap_re.sub(r"\1_\2", name) 42 return all_cap_re.sub(r"\1_\2", s1).lower() 43 44 45def tag_repr(tag: BaseTag) -> str: 46 """String of tag value as (0xgggg, 0xeeee)""" 47 return f"(0x{tag.group:04x}, 0x{tag.element:04x})" 48 49 50def default_name_filter(name: str) -> str: 51 """Callable to reduce some names in code to more readable short form 52 53 :arg name: a sequence variable name or sequence item name 54 :return: a shorter version of name if a known conversion, 55 else return original name 56 57 """ 58 name = camel_to_underscore(name) 59 name = name.replace("control_point", "cp") 60 name = name.replace("reference", "ref") 61 name = name.replace("fraction_group", "frxn_gp") 62 return name 63 64 65# Functions to produce python code 66def code_imports() -> str: 67 """Code the import statements needed by other codify results 68 69 :return: a string of import statement lines 70 71 """ 72 line1 = "import pydicom" 73 line2 = "from pydicom.dataset import Dataset, FileMetaDataset" 74 line3 = "from pydicom.sequence import Sequence" 75 return line_term.join((line1, line2, line3)) 76 77 78def code_dataelem( 79 dataelem: DataElement, 80 dataset_name: str = "ds", 81 exclude_size: Optional[int] = None, 82 include_private: bool = False 83) -> str: 84 """Code lines for a single DICOM data element 85 86 Parameters 87 ---------- 88 89 dataelem : DataElement 90 The DataElement instance to turn into code 91 dataset_name : str 92 The variable name of the Dataset containing `dataelem` 93 exclude_size : Union[int, None] 94 If specified, values longer than this (in bytes) 95 will only have a commented string for a value, 96 causing a syntax error when the code is run, 97 and thus prompting the user to remove or fix that line. 98 99 Returns 100 ------- 101 str 102 A string containing code to recreate the data element 103 If the data element is a sequence, calls code_sequence 104 """ 105 106 if dataelem.VR == "SQ": 107 return code_sequence( 108 dataelem, dataset_name, exclude_size, include_private 109 ) 110 111 # If in DICOM dictionary, set using the keyword 112 # If not (e.g. is private element), set using add_new method 113 have_keyword = True 114 try: 115 keyword = dictionary_keyword(dataelem.tag) 116 except KeyError: 117 have_keyword = False 118 119 valuerep = repr(dataelem.value) 120 121 if exclude_size: 122 if ( 123 dataelem.VR in BINARY_VR_VALUES 124 and not isinstance(dataelem.value, (int, float)) 125 and len(dataelem.value) > exclude_size 126 ): 127 valuerep = f"# XXX Array of {len(dataelem.value)} bytes excluded" 128 129 if have_keyword: 130 line = f"{dataset_name}.{keyword} = {valuerep}" 131 else: 132 tag = tag_repr(dataelem.tag) 133 VR = dataelem.VR 134 line = f"{dataset_name}.add_new({tag}, '{VR}', {valuerep})" 135 136 return line 137 138 139def code_sequence( 140 dataelem: DataElement, 141 dataset_name: str = "ds", 142 exclude_size: Optional[int] = None, 143 include_private: bool = False, 144 name_filter: Callable[[str], str] = default_name_filter, 145) -> str: 146 """Code lines for recreating a Sequence data element 147 148 Parameters 149 ---------- 150 dataelem : DataElement 151 The DataElement instance whose value is the Sequence 152 dataset_name : str 153 Variable name of the dataset containing the Sequence 154 exclude_size : int, optional 155 If not ``None``, values longer than this (in bytes) will only have a 156 commented string for a value, causing a syntax error when the code is 157 run, and thus prompting the user to remove or fix that line. 158 include_private: bool 159 If ``False`` (default) private elements are skipped, otherwise private 160 data elements will be coded. 161 name_filter: Callable[[str], str] 162 A callable taking a sequence name or sequence item name, and returning 163 a shorter name for easier code reading 164 165 Returns 166 ------- 167 str 168 A string containing code lines to recreate a DICOM sequence 169 """ 170 171 lines = [] 172 seq = dataelem.value 173 seq_name = dataelem.name 174 seq_item_name = seq_name.replace(" Sequence", "") 175 try: 176 seq_keyword = dictionary_keyword(dataelem.tag) 177 except KeyError: 178 seq_keyword = f"Tag{dataelem.tag:08x}" 179 180 # Create comment line to document the start of Sequence 181 lines.append("") 182 lines.append("# " + seq_name) 183 184 # Code line to create a new Sequence object 185 if name_filter: 186 seq_var = name_filter(seq_keyword) 187 lines.append(seq_var + " = Sequence()") 188 189 # Code line to add the sequence to its parent 190 lines.append(dataset_name + "." + seq_keyword + " = " + seq_var) 191 192 # Code lines to add sequence items to the Sequence 193 for i, ds in enumerate(seq): 194 # Determine index to use. If seq item has a data element with 'Index', 195 # use that; if one with 'Number', use that, else start at 1 196 index_keyword = seq_keyword.replace("Sequence", "") + "Index" 197 number_keyword = seq_keyword.replace("Sequence", "") + "Number" 198 if hasattr(ds, index_keyword): 199 index_str = str(getattr(ds, index_keyword)) 200 elif hasattr(ds, number_keyword): 201 index_str = str(getattr(ds, number_keyword)) 202 else: 203 index_str = str(i + 1) 204 205 # Code comment line to mark start of sequence item 206 lines.append("") 207 lines.append("# " + seq_name + ": " + seq_item_name + " " + index_str) 208 209 # Determine the variable name to use for the sequence item (dataset) 210 ds_name = seq_var.replace("_sequence", "") + index_str 211 212 # Code the sequence item 213 code_item = code_dataset(ds, ds_name, exclude_size, include_private) 214 lines.append(code_item) 215 216 # Code the line to append the item to its parent sequence 217 lines.append(seq_var + ".append(" + ds_name + ")") 218 219 # Join the lines and return a single string 220 return line_term.join(lines) 221 222 223def code_dataset( 224 ds: Dataset, 225 dataset_name: str = "ds", 226 exclude_size: Optional[int] = None, 227 include_private: bool = False, 228 is_file_meta: bool = False, 229) -> str: 230 """Return Python code for creating `ds`. 231 232 Parameters 233 ---------- 234 ds : pydicom.dataset.Dataset 235 The dataset to codify. 236 dataset_name : str, optional 237 The Python variable name to use for the dataset, default ``'ds'``. 238 exclude_size : int, optional 239 If not ``None``, values longer than this (in bytes) will only have a 240 commented string for a value, causing a syntax error when the code is 241 run, and thus prompting the user to remove or fix that line. 242 include_private : bool, optional 243 If ``False`` (default) private elements are skipped, otherwise private 244 data elements will be coded. 245 is_file_meta : bool, optional 246 ``True`` if `ds` contains file meta information elements. 247 248 Returns 249 ------- 250 str 251 The codified dataset. 252 """ 253 254 lines = [] 255 ds_class = " = FileMetaDataset()" if is_file_meta else " = Dataset()" 256 lines.append(dataset_name + ds_class) 257 for dataelem in ds: 258 # If a private data element and flag says so, skip it and go to next 259 if not include_private and dataelem.tag.is_private: 260 continue 261 # Otherwise code the line and add it to the lines list 262 code_line = code_dataelem( 263 dataelem, dataset_name, exclude_size, include_private 264 ) 265 lines.append(code_line) 266 # Add blank line if just coded a sequence 267 if dataelem.VR == "SQ": 268 lines.append("") 269 # If sequence was end of this dataset, remove the extra blank line 270 if len(lines) and lines[-1] == "": 271 lines.pop() 272 # Join all the code lines and return them 273 return line_term.join(lines) 274 275 276def code_file( 277 filename: str, 278 exclude_size: Optional[int] = None, 279 include_private: bool = False 280) -> str: 281 """Write a complete source code file to recreate a DICOM file 282 283 Parameters 284 ---------- 285 filename : str 286 Complete path and filename of a DICOM file to convert 287 exclude_size : Union[int,None] 288 If not None, values longer than this (in bytes) 289 will only have a commented string for a value, 290 causing a syntax error when the code is run, 291 and thus prompting the user to remove or fix that line. 292 include_private : bool 293 If ``False`` (default), private elements are skipped 294 If ``True``, private data elements will be coded. 295 296 Returns 297 ------- 298 str 299 A string containing code lines to recreate the entire DICOM file 300 301 """ 302 ds = pydicom.dcmread(filename, force=True) 303 return code_file_from_dataset(ds, exclude_size, include_private) 304 305 306def code_file_from_dataset( 307 ds: Dataset, 308 exclude_size: Optional[int] = None, 309 include_private: bool = False 310) -> str: 311 """Write a complete source code file to recreate a DICOM file 312 313 Parameters 314 ---------- 315 filename : str 316 Complete path and filename of a DICOM file to convert 317 exclude_size : Union[int,None] 318 If not None, values longer than this (in bytes) 319 will only have a commented string for a value, 320 causing a syntax error when the code is run, 321 and thus prompting the user to remove or fix that line. 322 include_private : bool 323 If ``False`` (default), private elements are skipped 324 If ``True``, private data elements will be coded. 325 326 Returns 327 ------- 328 str 329 A string containing code lines to recreate the entire DICOM file 330 331 """ 332 lines = [] 333 334 # Code a nice header for the python file 335 filename = ds.get("filename") 336 identifier = f"DICOM file '{filename}'" if filename else "non-file dataset" 337 338 lines.append(f"# Coded version of {identifier}") 339 lines.append("# Produced by pydicom codify utility script") 340 341 # Code the necessary imports 342 lines.append(code_imports()) 343 lines.append("") 344 345 # Code the file_meta information 346 if hasattr(ds, 'file_meta'): 347 lines.append("# File meta info data elements") 348 code_meta = code_dataset( 349 ds.file_meta, 350 "file_meta", 351 exclude_size, 352 include_private, 353 is_file_meta=True, 354 ) 355 lines.append(code_meta) 356 lines.append("") 357 358 # Code the main dataset 359 lines.append("# Main data elements") 360 code_ds = code_dataset( 361 ds, exclude_size=exclude_size, include_private=include_private 362 ) 363 lines.append(code_ds) 364 lines.append("") 365 366 # Add the file meta to the dataset, and set transfer syntax 367 if hasattr(ds, 'file_meta'): 368 lines.append("ds.file_meta = file_meta") 369 lines.append("ds.is_implicit_VR = " + str(ds.is_implicit_VR)) 370 lines.append("ds.is_little_endian = " + str(ds.is_little_endian)) 371 372 # Return the complete code string 373 return line_term.join(lines) 374 375 376def set_parser_arguments( 377 parser: argparse.ArgumentParser, default_exclude_size: int 378) -> None: 379 parser.add_argument( 380 "filespec", 381 help=filespec_help, 382 type=filespec_parser, 383 ) 384 parser.add_argument( 385 "outfile", 386 nargs="?", 387 type=argparse.FileType("w"), 388 help=( 389 "Filename to write Python code to, if not specified then code is " 390 "written to stdout" 391 ), 392 default=sys.stdout, 393 ) 394 parser.add_argument( 395 "-e", 396 "--exclude-size", 397 type=int, 398 default=default_exclude_size, 399 help=( 400 "Exclude binary data larger than specified (default: " 401 f"{default_exclude_size} bytes)" 402 ), 403 ) 404 parser.add_argument( 405 "-p", 406 "--include-private", 407 action="store_true", 408 help="Include private data elements (default is to exclude them)", 409 ) 410 parser.add_argument( 411 "-s", 412 "--save-as", 413 help=( 414 "Specify the filename for ds.save_as(save_filename); " 415 "otherwise the input name + '_from_codify' will be used" 416 ), 417 ) 418 419 420def do_codify(args: argparse.Namespace) -> None: 421 # Convert the requested dataset to python/pydicom code lines 422 if len(args.filespec) != 1: 423 raise NotImplementedError( 424 "Codify can only work on a single DICOM file input" 425 ) 426 427 ds, element = args.filespec[0] 428 filename = ds.filename 429 430 if element and not isinstance(element, Dataset): 431 raise NotImplementedError( 432 f"Codify can only code a Dataset, not a {type(element)}" 433 ) 434 435 code_str = code_file_from_dataset( 436 element or ds, args.exclude_size, args.include_private 437 ) 438 439 # If requested, write a code line to save the dataset 440 if args.save_as: 441 save_as_filename = args.save_as 442 else: 443 base, _ = os.path.splitext(filename) 444 save_as_filename = base + "_from_codify" + ".dcm" 445 save_line = ( 446 f"\nds.save_as(r'{save_as_filename}', write_like_original=False)" 447 ) 448 code_str += save_line 449 450 # Write the code lines to specified file or to standard output 451 # For test_util, captured output .name throws error, ignore it: 452 try: 453 if args.outfile.name != "<stdout>": 454 print(f"Writing code to file '{args.outfile.name}'") 455 except AttributeError: 456 pass 457 args.outfile.write(code_str) 458 459 460def main(default_exclude_size: int, args: Optional[List[str]] = None) -> None: 461 """Create Python code according to user options 462 463 Parameters: 464 ----------- 465 default_exclude_size : int 466 Values longer than this will be coded as a commented syntax error 467 args : List[str], optional 468 Command-line arguments to parse. If ``None`` then :attr:`sys.argv` is 469 used. 470 """ 471 parser = argparse.ArgumentParser( 472 description="Produce python/pydicom code from a DICOM file", 473 epilog=( 474 "Binary data (e.g. pixels) larger than --exclude-size " 475 f"(default {default_exclude_size} bytes) is not included. A " 476 "dummy line with a syntax error is produced. " 477 "Private data elements are not included by default." 478 ), 479 ) 480 set_parser_arguments(parser, default_exclude_size) 481 do_codify(parser.parse_args(args)) 482 483 484if __name__ == "__main__": # pragma: no cover 485 main(default_exclude_size=100) 486