1# Copyright 2008-2018 pydicom authors. See LICENSE file for details. 2"""Define the DataElement class. 3 4A DataElement has a tag, 5 a value representation (VR), 6 a value multiplicity (VM) 7 and a value. 8""" 9 10import base64 11import json 12from typing import ( 13 Optional, Any, Tuple, Callable, Union, TYPE_CHECKING, Dict, Type, 14 List, NamedTuple, MutableSequence, cast 15) 16import warnings 17 18from pydicom import config # don't import datetime_conversion directly 19from pydicom.config import logger 20from pydicom.datadict import (dictionary_has_tag, dictionary_description, 21 dictionary_keyword, dictionary_is_retired, 22 private_dictionary_description, dictionary_VR, 23 repeater_has_tag, private_dictionary_VR) 24from pydicom.errors import BytesLengthException 25from pydicom.jsonrep import JsonDataElementConverter, BulkDataType 26from pydicom.multival import MultiValue 27from pydicom.tag import Tag, BaseTag 28from pydicom.uid import UID 29from pydicom import jsonrep 30import pydicom.valuerep # don't import DS directly as can be changed by config 31from pydicom.valuerep import PersonName 32 33if config.have_numpy: 34 import numpy 35 36if TYPE_CHECKING: # pragma: no cover 37 from pydicom.dataset import Dataset 38 39 40BINARY_VR_VALUES = [ 41 'US', 'SS', 'UL', 'SL', 'OW', 'OB', 'OL', 'UN', 42 'OB or OW', 'US or OW', 'US or SS or OW', 'FL', 'FD', 'OF', 'OD' 43] 44 45 46def empty_value_for_VR( 47 VR: Optional[str], raw: bool = False 48) -> Union[bytes, List[str], str, None, PersonName]: 49 """Return the value for an empty element for `VR`. 50 51 .. versionadded:: 1.4 52 53 The behavior of this property depends on the setting of 54 :attr:`config.use_none_as_empty_value`. If that is set to ``True``, 55 an empty value is represented by ``None`` (except for VR 'SQ'), otherwise 56 it depends on `VR`. For text VRs (this includes 'AE', 'AS', 'CS', 'DA', 57 'DT', 'LO', 'LT', 'PN', 'SH', 'ST', 'TM', 'UC', 'UI', 'UR' and 'UT') an 58 empty string is used as empty value representation, for all other VRs 59 except 'SQ', ``None``. For empty sequence values (VR 'SQ') an empty list 60 is used in all cases. 61 Note that this is used only if decoding the element - it is always 62 possible to set the value to another empty value representation, 63 which will be preserved during the element object lifetime. 64 65 Parameters 66 ---------- 67 VR : str or None 68 The VR of the corresponding element. 69 raw : bool, optional 70 If ``True``, returns the value for a :class:`RawDataElement`, 71 otherwise for a :class:`DataElement` 72 73 Returns 74 ------- 75 str or bytes or None or list 76 The value a data element with `VR` is assigned on decoding 77 if it is empty. 78 """ 79 if VR == 'SQ': 80 return b'' if raw else [] 81 82 if config.use_none_as_empty_text_VR_value: 83 return None 84 85 if VR == 'PN': 86 return b'' if raw else PersonName('') 87 88 if VR in ( 89 'AE', 'AS', 'CS', 'DA', 'DT', 'LO', 'LT', 'SH', 'ST', 'TM', 90 'UC', 'UI', 'UR', 'UT' 91 ): 92 return b'' if raw else '' 93 94 return None 95 96 97def _is_bytes(val: object) -> bool: 98 """Return True only if `val` is of type `bytes`.""" 99 return isinstance(val, bytes) 100 101 102# double '\' because it is used as escape chr in Python 103_backslash_str = "\\" 104_backslash_byte = b"\\" 105 106 107class DataElement: 108 """Contain and manipulate a DICOM Element. 109 110 Examples 111 -------- 112 113 While its possible to create a new :class:`DataElement` directly and add 114 it to a :class:`~pydicom.dataset.Dataset`: 115 116 >>> from pydicom import Dataset 117 >>> elem = DataElement(0x00100010, 'PN', 'CITIZEN^Joan') 118 >>> ds = Dataset() 119 >>> ds.add(elem) 120 121 Its far more convenient to use a :class:`~pydicom.dataset.Dataset` 122 to add a new :class:`DataElement`, as the VR and tag are determined 123 automatically from the DICOM dictionary: 124 125 >>> ds = Dataset() 126 >>> ds.PatientName = 'CITIZEN^Joan' 127 128 Empty DataElement objects (e.g. with VM = 0) show an empty string as 129 value for text VRs and `None` for non-text (binary) VRs: 130 131 >>> ds = Dataset() 132 >>> ds.PatientName = None 133 >>> ds.PatientName 134 '' 135 136 >>> ds.BitsAllocated = None 137 >>> ds.BitsAllocated 138 139 >>> str(ds.BitsAllocated) 140 'None' 141 142 Attributes 143 ---------- 144 descripWidth : int 145 For string display, this is the maximum width of the description 146 field (default ``35``). 147 is_undefined_length : bool 148 Indicates whether the length field for the element was ``0xFFFFFFFFL`` 149 (ie undefined). 150 maxBytesToDisplay : int 151 For string display, elements with values containing data which is 152 longer than this value will display ``"array of # bytes"`` 153 (default ``16``). 154 showVR : bool 155 For string display, include the element's VR just before it's value 156 (default ``True``). 157 tag : pydicom.tag.BaseTag 158 The element's tag. 159 VR : str 160 The element's Value Representation. 161 """ 162 163 descripWidth = 35 164 maxBytesToDisplay = 16 165 showVR = True 166 is_raw = False 167 168 def __init__( 169 self, 170 tag: Union[int, str, Tuple[int, int]], 171 VR: str, 172 value: Any, 173 file_value_tell: Optional[int] = None, 174 is_undefined_length: bool = False, 175 already_converted: bool = False 176 ) -> None: 177 """Create a new :class:`DataElement`. 178 179 Parameters 180 ---------- 181 tag : int or str or 2-tuple of int 182 The DICOM (group, element) tag in any form accepted by 183 :func:`~pydicom.tag.Tag` such as ``'PatientName'``, 184 ``(0x10, 0x10)``, ``0x00100010``, etc. 185 VR : str 186 The 2 character DICOM value representation (see DICOM Standard, 187 Part 5, :dcm:`Section 6.2<part05/sect_6.2.html>`). 188 value 189 The value of the data element. One of the following: 190 191 * a single string value 192 * a number 193 * a :class:`list` or :class:`tuple` with all strings or all numbers 194 * a multi-value string with backslash separator 195 file_value_tell : int, optional 196 The byte offset to the start of the encoded element value. 197 is_undefined_length : bool 198 Used internally to store whether the length field for this element 199 was ``0xFFFFFFFF``, i.e. 'undefined length'. Default is ``False``. 200 already_converted : bool 201 Used to determine whether or not the element's value requires 202 conversion to a value with VM > 1. Default is ``False``. 203 """ 204 if not isinstance(tag, BaseTag): 205 tag = Tag(tag) 206 self.tag = tag 207 208 # a known tag shall only have the VR 'UN' if it has a length that 209 # exceeds the size that can be encoded in 16 bit - all other cases 210 # can be seen as an encoding error and can be corrected 211 if ( 212 VR == 'UN' 213 and not tag.is_private 214 and config.replace_un_with_known_vr 215 and (is_undefined_length or value is None or len(value) < 0xffff) 216 ): 217 try: 218 VR = dictionary_VR(tag) 219 except KeyError: 220 pass 221 222 self.VR = VR # Note: you must set VR before setting value 223 if already_converted: 224 self._value = value 225 else: 226 self.value = value # calls property setter which will convert 227 self.file_tell = file_value_tell 228 self.is_undefined_length = is_undefined_length 229 self.private_creator: Optional[str] = None 230 self.parent: Optional["Dataset"] = None 231 232 @classmethod 233 def from_json( 234 cls: Type["DataElement"], 235 dataset_class: Type["Dataset"], 236 tag: str, 237 vr: str, 238 value: Any, 239 value_key: Optional[str], 240 bulk_data_uri_handler: Optional[ 241 Union[ 242 Callable[[str, str, str], BulkDataType], 243 Callable[[str], BulkDataType] 244 ] 245 ] = None 246 ) -> "DataElement": 247 """Return a :class:`DataElement` from a DICOM JSON Model attribute 248 object. 249 250 .. versionadded:: 1.3 251 252 Parameters 253 ---------- 254 dataset_class : dataset.Dataset derived class 255 The class object to use for **SQ** element items. 256 tag : str 257 The data element's tag as uppercase hex. 258 vr : str 259 The data element's value representation (VR). 260 value : str or List[Union[None, str, int, float, bytes, dict]] 261 The data element's value(s). 262 value_key : str or None 263 The attribute name for `value`, should be one of: 264 ``{"Value", "InlineBinary", "BulkDataURI"}``. If the element's VM 265 is ``0`` and none of the keys are used then will be ``None``. 266 bulk_data_uri_handler: callable or None 267 Callable function that accepts either the `tag`, `vr` and 268 "BulkDataURI" `value` or just the "BulkDataURI" `value` of the JSON 269 representation of a data element and returns the actual value of 270 that data element (retrieved via DICOMweb WADO-RS). If no 271 `bulk_data_uri_handler` is specified (default) then the 272 corresponding element will have an "empty" value such as 273 ``""``, ``b""`` or ``None`` depending on the `vr` (i.e. the 274 Value Multiplicity will be 0). 275 276 Returns 277 ------- 278 DataElement 279 """ 280 # TODO: test wado-rs retrieve wrapper 281 converter = JsonDataElementConverter( 282 dataset_class, tag, vr, value, value_key, bulk_data_uri_handler 283 ) 284 elem_value = converter.get_element_values() 285 try: 286 return cls(tag=tag, value=elem_value, VR=vr) 287 except Exception as exc: 288 raise ValueError( 289 f"Data element '{tag}' could not be loaded from JSON: " 290 f"{elem_value}" 291 ) from exc 292 293 def to_json_dict( 294 self, 295 bulk_data_element_handler: Optional[Callable[["DataElement"], str]], 296 bulk_data_threshold: int 297 ) -> Dict[str, Any]: 298 """Return a dictionary representation of the :class:`DataElement` 299 conforming to the DICOM JSON Model as described in the DICOM 300 Standard, Part 18, :dcm:`Annex F<part18/chaptr_F.html>`. 301 302 .. versionadded:: 1.4 303 304 Parameters 305 ---------- 306 bulk_data_element_handler : callable or None 307 Callable that accepts a bulk :class`data element 308 <pydicom.dataelem.DataElement>` and returns the 309 "BulkDataURI" as a :class:`str` for retrieving the value of the 310 data element via DICOMweb WADO-RS. 311 bulk_data_threshold : int 312 Size of base64 encoded data element above which a value will be 313 provided in form of a "BulkDataURI" rather than "InlineBinary". 314 Ignored if no `bulk_data_element_handler` is given. 315 316 Returns 317 ------- 318 dict 319 Mapping representing a JSON encoded data element as ``{str: Any}``. 320 """ 321 json_element: Dict[str, Any] = {'vr': self.VR} 322 if self.VR in jsonrep.BINARY_VR_VALUES: 323 if not self.is_empty: 324 binary_value = self.value 325 encoded_value = base64.b64encode(binary_value).decode('utf-8') 326 if ( 327 bulk_data_element_handler is not None 328 and len(encoded_value) > bulk_data_threshold 329 ): 330 json_element['BulkDataURI'] = ( 331 bulk_data_element_handler(self) 332 ) 333 else: 334 logger.info( 335 f"encode bulk data element '{self.name}' inline" 336 ) 337 json_element['InlineBinary'] = encoded_value 338 elif self.VR == 'SQ': 339 # recursive call to get sequence item JSON dicts 340 value = [ 341 ds.to_json( 342 bulk_data_element_handler=bulk_data_element_handler, 343 bulk_data_threshold=bulk_data_threshold, 344 dump_handler=lambda d: d 345 ) 346 for ds in self.value 347 ] 348 json_element['Value'] = value 349 elif self.VR == 'PN': 350 if not self.is_empty: 351 elem_value = [] 352 if self.VM > 1: 353 value = self.value 354 else: 355 value = [self.value] 356 for v in value: 357 comps = {'Alphabetic': v.components[0]} 358 if len(v.components) > 1: 359 comps['Ideographic'] = v.components[1] 360 if len(v.components) > 2: 361 comps['Phonetic'] = v.components[2] 362 elem_value.append(comps) 363 json_element['Value'] = elem_value 364 elif self.VR == 'AT': 365 if not self.is_empty: 366 value = self.value 367 if self.VM == 1: 368 value = [value] 369 json_element['Value'] = [format(v, '08X') for v in value] 370 else: 371 if not self.is_empty: 372 if self.VM > 1: 373 value = self.value 374 else: 375 value = [self.value] 376 json_element['Value'] = [v for v in value] 377 if 'Value' in json_element: 378 json_element['Value'] = jsonrep.convert_to_python_number( 379 json_element['Value'], self.VR 380 ) 381 return json_element 382 383 def to_json( 384 self, 385 bulk_data_threshold: int = 1024, 386 bulk_data_element_handler: Optional[ 387 Callable[["DataElement"], str] 388 ] = None, 389 dump_handler: Optional[ 390 Callable[[Dict[str, Any]], str] 391 ] = None 392 ) -> str: 393 """Return a JSON representation of the :class:`DataElement`. 394 395 .. versionadded:: 1.3 396 397 Parameters 398 ---------- 399 bulk_data_threshold : int, optional 400 Size of base64 encoded data element above which a value will be 401 provided in form of a "BulkDataURI" rather than "InlineBinary". 402 Ignored if no `bulk_data_element_handler` is given. 403 bulk_data_element_handler : callable, optional 404 Callable that accepts a bulk :class`data element 405 <pydicom.dataelem.DataElement>` and returns the 406 "BulkDataURI" as a :class:`str` for retrieving the value of the 407 data element via DICOMweb WADO-RS. 408 dump_handler : callable, optional 409 Callable function that accepts a :class:`dict` of ``{str: Any}`` 410 and returns the serialized (dumped) JSON :class:`str` (by default 411 uses :func:`json.dumps`). 412 413 Returns 414 ------- 415 str 416 Mapping representing a JSON encoded data element 417 418 See also 419 -------- 420 Dataset.to_json 421 """ 422 def json_dump(d: Dict[str, Any]) -> str: 423 return json.dumps(d, sort_keys=True) 424 425 dump_handler = json_dump if dump_handler is None else dump_handler 426 427 return dump_handler( 428 self.to_json_dict(bulk_data_element_handler, bulk_data_threshold) 429 ) 430 431 @property 432 def value(self) -> Any: 433 """Return the element's value.""" 434 return self._value 435 436 @value.setter 437 def value(self, val: Any) -> None: 438 """Convert (if necessary) and set the value of the element.""" 439 # Ignore backslash characters in these VRs, based on: 440 # * Which str VRs can have backslashes in Part 5, Section 6.2 441 # * All byte VRs 442 exclusions = [ 443 'LT', 'OB', 'OD', 'OF', 'OL', 'OV', 'OW', 'ST', 'UN', 'UT', 444 'OB or OW', 445 # Probably not needed 446 'AT', 'FD', 'FL', 'SQ', 'SS', 'SL', 'UL', 447 ] 448 449 # Check if is a string with multiple values separated by '\' 450 # If so, turn them into a list of separate strings 451 # Last condition covers 'US or SS' etc 452 if ( 453 isinstance(val, (str, bytes)) 454 and self.VR not in exclusions 455 and 'US' not in self.VR 456 ): 457 try: 458 if _backslash_str in val: 459 val = cast(str, val).split(_backslash_str) 460 except TypeError: 461 if _backslash_byte in val: 462 val = val.split(_backslash_byte) 463 self._value = self._convert_value(val) 464 465 @property 466 def VM(self) -> int: 467 """Return the value multiplicity of the element as :class:`int`.""" 468 if self.value is None: 469 return 0 470 if isinstance(self.value, (str, bytes, PersonName)): 471 return 1 if self.value else 0 472 try: 473 iter(self.value) 474 except TypeError: 475 return 1 476 return len(self.value) 477 478 @property 479 def is_empty(self) -> bool: 480 """Return ``True`` if the element has no value. 481 482 .. versionadded:: 1.4 483 """ 484 return self.VM == 0 485 486 @property 487 def empty_value(self) -> Union[bytes, List[str], None, str, PersonName]: 488 """Return the value for an empty element. 489 490 .. versionadded:: 1.4 491 492 See :func:`empty_value_for_VR` for more information. 493 494 Returns 495 ------- 496 str or None 497 The value this data element is assigned on decoding if it is empty. 498 """ 499 return empty_value_for_VR(self.VR) 500 501 def clear(self) -> None: 502 """Clears the value, e.g. sets it to the configured empty value. 503 504 .. versionadded:: 1.4 505 506 See :func:`empty_value_for_VR`. 507 """ 508 self._value = self.empty_value 509 510 def _convert_value(self, val: Any) -> Any: 511 """Convert `val` to an appropriate type and return the result. 512 513 Uses the element's VR in order to determine the conversion method and 514 resulting type. 515 """ 516 if self.VR == 'SQ': # a sequence - leave it alone 517 from pydicom.sequence import Sequence 518 if isinstance(val, Sequence): 519 return val 520 else: 521 return Sequence(val) 522 523 # if the value is a list, convert each element 524 try: 525 val.append 526 except AttributeError: # not a list 527 return self._convert(val) 528 else: 529 return MultiValue(self._convert, val) 530 531 def _convert(self, val: Any) -> Any: 532 """Convert `val` to an appropriate type for the element's VR.""" 533 # If the value is a byte string and has a VR that can only be encoded 534 # using the default character repertoire, we convert it to a string 535 # here to allow for byte string input in these cases 536 if _is_bytes(val) and self.VR in ( 537 'AE', 'AS', 'CS', 'DA', 'DS', 'DT', 'IS', 'TM', 'UI', 'UR'): 538 val = val.decode() 539 540 if self.VR == 'IS': 541 return pydicom.valuerep.IS(val) 542 elif self.VR == 'DA' and config.datetime_conversion: 543 return pydicom.valuerep.DA(val) 544 elif self.VR == 'DS': 545 return pydicom.valuerep.DS(val) 546 elif self.VR == 'DT' and config.datetime_conversion: 547 return pydicom.valuerep.DT(val) 548 elif self.VR == 'TM' and config.datetime_conversion: 549 return pydicom.valuerep.TM(val) 550 elif self.VR == "UI": 551 return UID(val) if val is not None else None 552 elif self.VR == "PN": 553 return PersonName(val) 554 elif self.VR == "AT" and (val == 0 or val): 555 return val if isinstance(val, BaseTag) else Tag(val) 556 # Later may need this for PersonName as for UI, 557 # but needs more thought 558 # elif self.VR == "PN": 559 # return PersonName(val) 560 else: # is either a string or a type 2 optionally blank string 561 return val # this means a "numeric" value could be empty string "" 562 # except TypeError: 563 # print "Could not convert value '%s' to VR '%s' in tag %s" \ 564 # % (repr(val), self.VR, self.tag) 565 # except ValueError: 566 # print "Could not convert value '%s' to VR '%s' in tag %s" \ 567 # % (repr(val), self.VR, self.tag) 568 569 def __eq__(self, other: Any) -> Any: 570 """Compare `self` and `other` for equality. 571 572 Returns 573 ------- 574 bool 575 The result if `self` and `other` are the same class 576 NotImplemented 577 If `other` is not the same class as `self` then returning 578 :class:`NotImplemented` delegates the result to 579 ``superclass.__eq__(subclass)``. 580 """ 581 # Faster result if same object 582 if other is self: 583 return True 584 585 if isinstance(other, self.__class__): 586 if self.tag != other.tag or self.VR != other.VR: 587 return False 588 589 # tag and VR match, now check the value 590 if config.have_numpy and isinstance(self.value, numpy.ndarray): 591 return ( 592 len(self.value) == len(other.value) 593 and numpy.allclose(self.value, other.value) 594 ) 595 596 return self.value == other.value 597 598 return NotImplemented 599 600 def __ne__(self, other: Any) -> Any: 601 """Compare `self` and `other` for inequality.""" 602 return not (self == other) 603 604 def __str__(self) -> str: 605 """Return :class:`str` representation of the element.""" 606 repVal = self.repval or '' 607 if self.showVR: 608 s = "%s %-*s %s: %s" % (str(self.tag), self.descripWidth, 609 self.description()[:self.descripWidth], 610 self.VR, repVal) 611 else: 612 s = "%s %-*s %s" % (str(self.tag), self.descripWidth, 613 self.description()[:self.descripWidth], repVal) 614 return s 615 616 @property 617 def repval(self) -> str: 618 """Return a :class:`str` representation of the element's value.""" 619 long_VRs = {"OB", "OD", "OF", "OW", "UN", "UT"} 620 if set(self.VR.split(" or ")) & long_VRs: 621 try: 622 length = len(self.value) 623 except TypeError: 624 pass 625 else: 626 if length > self.maxBytesToDisplay: 627 return "Array of %d elements" % length 628 if self.VM > self.maxBytesToDisplay: 629 repVal = "Array of %d elements" % self.VM 630 elif isinstance(self.value, UID): 631 repVal = self.value.name 632 else: 633 repVal = repr(self.value) # will tolerate unicode too 634 return repVal 635 636 def __getitem__(self, key: int) -> Any: 637 """Return the item at `key` if the element's value is indexable.""" 638 try: 639 return self.value[key] 640 except TypeError: 641 raise TypeError("DataElement value is unscriptable " 642 "(not a Sequence)") 643 644 @property 645 def name(self) -> str: 646 """Return the DICOM dictionary name for the element as :class:`str`. 647 648 For officially registered DICOM Data Elements this will be the *Name* 649 as given in :dcm:`Table 6-1<part06/chapter_6.html#table_6-1>`. 650 For private elements known to *pydicom* 651 this will be the *Name* in the format ``'[name]'``. For unknown 652 private elements this will be ``'Private Creator'``. For unknown 653 elements this will return an empty string ``''``. 654 """ 655 return self.description() 656 657 def description(self) -> str: 658 """Return the DICOM dictionary name for the element as :class:`str`.""" 659 if self.tag.is_private: 660 name = "Private tag data" # default 661 if self.private_creator: 662 try: 663 # If have name from private dictionary, use it, but 664 # but put in square brackets so is differentiated, 665 # and clear that cannot access it by name 666 name = private_dictionary_description( 667 self.tag, self.private_creator) 668 name = "[%s]" % (name) 669 except KeyError: 670 pass 671 elif self.tag.element >> 8 == 0: 672 name = "Private Creator" 673 elif dictionary_has_tag(self.tag) or repeater_has_tag(self.tag): 674 name = dictionary_description(self.tag) 675 676 # implied Group Length dicom versions < 3 677 elif self.tag.element == 0: 678 name = "Group Length" 679 else: 680 name = "" 681 return name 682 683 @property 684 def is_private(self) -> bool: 685 """Return ``True`` if the element's tag is private. 686 687 .. versionadded:: 2.1 688 """ 689 return self.tag.is_private 690 691 @property 692 def is_retired(self) -> bool: 693 """Return the element's retired status as :class:`bool`. 694 695 For officially registered DICOM Data Elements this will be ``True`` if 696 the retired status as given in the DICOM Standard, Part 6, 697 :dcm:`Table 6-1<part06/chapter_6.html#table_6-1>` is 'RET'. For private 698 or unknown elements this will always be ``False``. 699 """ 700 if dictionary_has_tag(self.tag): 701 return dictionary_is_retired(self.tag) 702 703 return False 704 705 @property 706 def keyword(self) -> str: 707 """Return the element's keyword (if known) as :class:`str`. 708 709 For officially registered DICOM Data Elements this will be the 710 *Keyword* as given in 711 :dcm:`Table 6-1<part06/chapter_6.html#table_6-1>`. For private or 712 unknown elements this will return an empty string ``''``. 713 """ 714 if dictionary_has_tag(self.tag): 715 return dictionary_keyword(self.tag) 716 717 return '' 718 719 def __repr__(self) -> str: 720 """Return the representation of the element.""" 721 if self.VR == "SQ": 722 return repr(self.value) 723 724 return str(self) 725 726 727class RawDataElement(NamedTuple): 728 """Container for the data from a raw (mostly) undecoded element.""" 729 tag: BaseTag 730 VR: Optional[str] 731 length: int 732 value: Optional[bytes] 733 value_tell: int 734 is_implicit_VR: bool 735 is_little_endian: bool 736 is_raw: bool = True 737 738 739# The first and third values of the following elements are always US 740# even if the VR is SS (PS3.3 C.7.6.3.1.5, C.11.1, C.11.2). 741# (0028,1101-1103) RGB Palette Color LUT Descriptor 742# (0028,3002) LUT Descriptor 743_LUT_DESCRIPTOR_TAGS = (0x00281101, 0x00281102, 0x00281103, 0x00283002) 744 745 746def _private_vr_for_tag(ds: Optional["Dataset"], tag: BaseTag) -> str: 747 """Return the VR for a known private tag, otherwise "UN". 748 749 Parameters 750 ---------- 751 ds : Dataset, optional 752 The dataset needed for the private creator lookup. 753 If not given, "UN" is returned. 754 tag : BaseTag 755 The private tag to lookup. The caller has to ensure that the 756 tag is private. 757 758 Returns 759 ------- 760 str 761 "LO" if the tag is a private creator, the VR of the private tag if 762 found in the private dictionary, or "UN". 763 """ 764 if tag.is_private_creator: 765 return "LO" 766 # invalid private tags are handled as UN 767 if ds is not None and (tag.element & 0xff00): 768 private_creator_tag = tag.group << 16 | (tag.element >> 8) 769 private_creator = ds.get(private_creator_tag, "") 770 if private_creator: 771 try: 772 return private_dictionary_VR(tag, private_creator.value) 773 except KeyError: 774 pass 775 return "UN" 776 777 778def DataElement_from_raw( 779 raw_data_element: RawDataElement, 780 encoding: Optional[Union[str, MutableSequence[str]]] = None, 781 dataset: Optional["Dataset"] = None 782) -> DataElement: 783 """Return a :class:`DataElement` created from `raw_data_element`. 784 785 Parameters 786 ---------- 787 raw_data_element : RawDataElement 788 The raw data to convert to a :class:`DataElement`. 789 encoding : str or list of str, optional 790 The character encoding of the raw data. 791 dataset : Dataset, optional 792 If given, used to resolve the VR for known private tags. 793 794 Returns 795 ------- 796 DataElement 797 798 Raises 799 ------ 800 KeyError 801 If `raw_data_element` belongs to an unknown non-private tag and 802 `config.enforce_valid_values` is set. 803 """ 804 # XXX buried here to avoid circular import 805 # filereader->Dataset->convert_value->filereader 806 # (for SQ parsing) 807 808 from pydicom.values import convert_value 809 raw = raw_data_element 810 811 # If user has hooked into conversion of raw values, call his/her routine 812 if config.data_element_callback: 813 raw = config.data_element_callback( 814 raw_data_element, 815 encoding=encoding, 816 **config.data_element_callback_kwargs 817 ) 818 819 VR = raw.VR 820 if VR is None: # Can be if was implicit VR 821 try: 822 VR = dictionary_VR(raw.tag) 823 except KeyError: 824 # just read the bytes, no way to know what they mean 825 if raw.tag.is_private: 826 # for VR for private tags see PS3.5, 6.2.2 827 VR = _private_vr_for_tag(dataset, raw.tag) 828 829 # group length tag implied in versions < 3.0 830 elif raw.tag.element == 0: 831 VR = 'UL' 832 else: 833 msg = "Unknown DICOM tag {0:s}".format(str(raw.tag)) 834 if config.enforce_valid_values: 835 msg += " can't look up VR" 836 raise KeyError(msg) 837 else: 838 VR = 'UN' 839 msg += " - setting VR to 'UN'" 840 warnings.warn(msg) 841 elif VR == 'UN' and config.replace_un_with_known_vr: 842 # handle rare case of incorrectly set 'UN' in explicit encoding 843 # see also DataElement.__init__() 844 if raw.tag.is_private: 845 VR = _private_vr_for_tag(dataset, raw.tag) 846 elif raw.value is None or len(raw.value) < 0xffff: 847 try: 848 VR = dictionary_VR(raw.tag) 849 except KeyError: 850 pass 851 try: 852 value = convert_value(VR, raw, encoding) 853 except NotImplementedError as e: 854 raise NotImplementedError("{0:s} in tag {1!r}".format(str(e), raw.tag)) 855 except BytesLengthException as e: 856 message = (f"{e} This occurred while trying to parse " 857 f"{raw.tag} according to VR '{VR}'.") 858 if config.convert_wrong_length_to_UN: 859 warnings.warn(f"{message} Setting VR to 'UN'.") 860 VR = "UN" 861 value = raw.value 862 else: 863 raise BytesLengthException( 864 f"{message} To replace this error with a warning set " 865 "pydicom.config.convert_wrong_length_to_UN = True." 866 ) 867 868 if raw.tag in _LUT_DESCRIPTOR_TAGS and value: 869 # We only fix the first value as the third value is 8 or 16 870 try: 871 if value[0] < 0: 872 value[0] += 65536 873 except TypeError: 874 pass 875 876 return DataElement(raw.tag, VR, value, raw.value_tell, 877 raw.length == 0xFFFFFFFF, already_converted=True) 878