1# Copyright 2008-2018 pydicom authors. See LICENSE file for details.
2"""Define the DataElement class.
3
4A DataElement has a tag,
5              a value representation (VR),
6              a value multiplicity (VM)
7              and a value.
8"""
9
10import base64
11import json
12from typing import (
13    Optional, Any, Tuple, Callable, Union, TYPE_CHECKING, Dict, Type,
14    List, NamedTuple, MutableSequence, cast
15)
16import warnings
17
18from pydicom import config  # don't import datetime_conversion directly
19from pydicom.config import logger
20from pydicom.datadict import (dictionary_has_tag, dictionary_description,
21                              dictionary_keyword, dictionary_is_retired,
22                              private_dictionary_description, dictionary_VR,
23                              repeater_has_tag, private_dictionary_VR)
24from pydicom.errors import BytesLengthException
25from pydicom.jsonrep import JsonDataElementConverter, BulkDataType
26from pydicom.multival import MultiValue
27from pydicom.tag import Tag, BaseTag
28from pydicom.uid import UID
29from pydicom import jsonrep
30import pydicom.valuerep  # don't import DS directly as can be changed by config
31from pydicom.valuerep import PersonName
32
33if config.have_numpy:
34    import numpy
35
36if TYPE_CHECKING:  # pragma: no cover
37    from pydicom.dataset import Dataset
38
39
40BINARY_VR_VALUES = [
41    'US', 'SS', 'UL', 'SL', 'OW', 'OB', 'OL', 'UN',
42    'OB or OW', 'US or OW', 'US or SS or OW', 'FL', 'FD', 'OF', 'OD'
43]
44
45
46def empty_value_for_VR(
47    VR: Optional[str], raw: bool = False
48) -> Union[bytes, List[str], str, None, PersonName]:
49    """Return the value for an empty element for `VR`.
50
51    .. versionadded:: 1.4
52
53    The behavior of this property depends on the setting of
54    :attr:`config.use_none_as_empty_value`. If that is set to ``True``,
55    an empty value is represented by ``None`` (except for VR 'SQ'), otherwise
56    it depends on `VR`. For text VRs (this includes 'AE', 'AS', 'CS', 'DA',
57    'DT', 'LO', 'LT', 'PN', 'SH', 'ST', 'TM', 'UC', 'UI', 'UR' and 'UT') an
58    empty string is used as empty value representation, for all other VRs
59    except 'SQ', ``None``. For empty sequence values (VR 'SQ') an empty list
60    is used in all cases.
61    Note that this is used only if decoding the element - it is always
62    possible to set the value to another empty value representation,
63    which will be preserved during the element object lifetime.
64
65    Parameters
66    ----------
67    VR : str or None
68        The VR of the corresponding element.
69    raw : bool, optional
70        If ``True``, returns the value for a :class:`RawDataElement`,
71        otherwise for a :class:`DataElement`
72
73    Returns
74    -------
75    str or bytes or None or list
76        The value a data element with `VR` is assigned on decoding
77        if it is empty.
78    """
79    if VR == 'SQ':
80        return b'' if raw else []
81
82    if config.use_none_as_empty_text_VR_value:
83        return None
84
85    if VR == 'PN':
86        return b'' if raw else PersonName('')
87
88    if VR in (
89        'AE', 'AS', 'CS', 'DA', 'DT', 'LO', 'LT', 'SH', 'ST', 'TM',
90        'UC', 'UI', 'UR', 'UT'
91    ):
92        return b'' if raw else ''
93
94    return None
95
96
97def _is_bytes(val: object) -> bool:
98    """Return True only if `val` is of type `bytes`."""
99    return isinstance(val, bytes)
100
101
102# double '\' because it is used as escape chr in Python
103_backslash_str = "\\"
104_backslash_byte = b"\\"
105
106
107class DataElement:
108    """Contain and manipulate a DICOM Element.
109
110    Examples
111    --------
112
113    While its possible to create a new :class:`DataElement` directly and add
114    it to a :class:`~pydicom.dataset.Dataset`:
115
116    >>> from pydicom import Dataset
117    >>> elem = DataElement(0x00100010, 'PN', 'CITIZEN^Joan')
118    >>> ds = Dataset()
119    >>> ds.add(elem)
120
121    Its far more convenient to use a :class:`~pydicom.dataset.Dataset`
122    to add a new :class:`DataElement`, as the VR and tag are determined
123    automatically from the DICOM dictionary:
124
125    >>> ds = Dataset()
126    >>> ds.PatientName = 'CITIZEN^Joan'
127
128    Empty DataElement objects (e.g. with VM = 0) show an empty string as
129    value for text VRs and `None` for non-text (binary) VRs:
130
131    >>> ds = Dataset()
132    >>> ds.PatientName = None
133    >>> ds.PatientName
134    ''
135
136    >>> ds.BitsAllocated = None
137    >>> ds.BitsAllocated
138
139    >>> str(ds.BitsAllocated)
140    'None'
141
142    Attributes
143    ----------
144    descripWidth : int
145        For string display, this is the maximum width of the description
146        field (default ``35``).
147    is_undefined_length : bool
148        Indicates whether the length field for the element was ``0xFFFFFFFFL``
149        (ie undefined).
150    maxBytesToDisplay : int
151        For string display, elements with values containing data which is
152        longer than this value will display ``"array of # bytes"``
153        (default ``16``).
154    showVR : bool
155        For string display, include the element's VR just before it's value
156        (default ``True``).
157    tag : pydicom.tag.BaseTag
158        The element's tag.
159    VR : str
160        The element's Value Representation.
161    """
162
163    descripWidth = 35
164    maxBytesToDisplay = 16
165    showVR = True
166    is_raw = False
167
168    def __init__(
169        self,
170        tag: Union[int, str, Tuple[int, int]],
171        VR: str,
172        value: Any,
173        file_value_tell: Optional[int] = None,
174        is_undefined_length: bool = False,
175        already_converted: bool = False
176    ) -> None:
177        """Create a new :class:`DataElement`.
178
179        Parameters
180        ----------
181        tag : int or str or 2-tuple of int
182            The DICOM (group, element) tag in any form accepted by
183            :func:`~pydicom.tag.Tag` such as ``'PatientName'``,
184            ``(0x10, 0x10)``, ``0x00100010``, etc.
185        VR : str
186            The 2 character DICOM value representation (see DICOM Standard,
187            Part 5, :dcm:`Section 6.2<part05/sect_6.2.html>`).
188        value
189            The value of the data element. One of the following:
190
191            * a single string value
192            * a number
193            * a :class:`list` or :class:`tuple` with all strings or all numbers
194            * a multi-value string with backslash separator
195        file_value_tell : int, optional
196            The byte offset to the start of the encoded element value.
197        is_undefined_length : bool
198            Used internally to store whether the length field for this element
199            was ``0xFFFFFFFF``, i.e. 'undefined length'. Default is ``False``.
200        already_converted : bool
201            Used to determine whether or not the element's value requires
202            conversion to a value with VM > 1. Default is ``False``.
203        """
204        if not isinstance(tag, BaseTag):
205            tag = Tag(tag)
206        self.tag = tag
207
208        # a known tag shall only have the VR 'UN' if it has a length that
209        # exceeds the size that can be encoded in 16 bit - all other cases
210        # can be seen as an encoding error and can be corrected
211        if (
212            VR == 'UN'
213            and not tag.is_private
214            and config.replace_un_with_known_vr
215            and (is_undefined_length or value is None or len(value) < 0xffff)
216        ):
217            try:
218                VR = dictionary_VR(tag)
219            except KeyError:
220                pass
221
222        self.VR = VR  # Note: you must set VR before setting value
223        if already_converted:
224            self._value = value
225        else:
226            self.value = value  # calls property setter which will convert
227        self.file_tell = file_value_tell
228        self.is_undefined_length = is_undefined_length
229        self.private_creator: Optional[str] = None
230        self.parent: Optional["Dataset"] = None
231
232    @classmethod
233    def from_json(
234        cls: Type["DataElement"],
235        dataset_class: Type["Dataset"],
236        tag: str,
237        vr: str,
238        value: Any,
239        value_key: Optional[str],
240        bulk_data_uri_handler: Optional[
241            Union[
242                Callable[[str, str, str], BulkDataType],
243                Callable[[str], BulkDataType]
244            ]
245        ] = None
246    ) -> "DataElement":
247        """Return a :class:`DataElement` from a DICOM JSON Model attribute
248        object.
249
250        .. versionadded:: 1.3
251
252        Parameters
253        ----------
254        dataset_class : dataset.Dataset derived class
255            The class object to use for **SQ** element items.
256        tag : str
257            The data element's tag as uppercase hex.
258        vr : str
259            The data element's value representation (VR).
260        value : str or List[Union[None, str, int, float, bytes, dict]]
261            The data element's value(s).
262        value_key : str or None
263            The attribute name for `value`, should be one of:
264            ``{"Value", "InlineBinary", "BulkDataURI"}``. If the element's VM
265            is ``0`` and none of the keys are used then will be ``None``.
266        bulk_data_uri_handler: callable or None
267            Callable function that accepts either the `tag`, `vr` and
268            "BulkDataURI" `value` or just the "BulkDataURI" `value` of the JSON
269            representation of a data element and returns the actual value of
270            that data element (retrieved via DICOMweb WADO-RS). If no
271            `bulk_data_uri_handler` is specified (default) then the
272            corresponding element will have an "empty" value such as
273            ``""``, ``b""`` or ``None`` depending on the `vr` (i.e. the
274            Value Multiplicity will be 0).
275
276        Returns
277        -------
278        DataElement
279        """
280        # TODO: test wado-rs retrieve wrapper
281        converter = JsonDataElementConverter(
282            dataset_class, tag, vr, value, value_key, bulk_data_uri_handler
283        )
284        elem_value = converter.get_element_values()
285        try:
286            return cls(tag=tag, value=elem_value, VR=vr)
287        except Exception as exc:
288            raise ValueError(
289                f"Data element '{tag}' could not be loaded from JSON: "
290                f"{elem_value}"
291            ) from exc
292
293    def to_json_dict(
294        self,
295        bulk_data_element_handler: Optional[Callable[["DataElement"], str]],
296        bulk_data_threshold: int
297    ) -> Dict[str, Any]:
298        """Return a dictionary representation of the :class:`DataElement`
299        conforming to the DICOM JSON Model as described in the DICOM
300        Standard, Part 18, :dcm:`Annex F<part18/chaptr_F.html>`.
301
302        .. versionadded:: 1.4
303
304        Parameters
305        ----------
306        bulk_data_element_handler : callable or None
307            Callable that accepts a bulk :class`data element
308            <pydicom.dataelem.DataElement>` and returns the
309            "BulkDataURI" as a :class:`str` for retrieving the value of the
310            data element via DICOMweb WADO-RS.
311        bulk_data_threshold : int
312            Size of base64 encoded data element above which a value will be
313            provided in form of a "BulkDataURI" rather than "InlineBinary".
314            Ignored if no `bulk_data_element_handler` is given.
315
316        Returns
317        -------
318        dict
319            Mapping representing a JSON encoded data element as ``{str: Any}``.
320        """
321        json_element: Dict[str, Any] = {'vr': self.VR}
322        if self.VR in jsonrep.BINARY_VR_VALUES:
323            if not self.is_empty:
324                binary_value = self.value
325                encoded_value = base64.b64encode(binary_value).decode('utf-8')
326                if (
327                    bulk_data_element_handler is not None
328                    and len(encoded_value) > bulk_data_threshold
329                ):
330                    json_element['BulkDataURI'] = (
331                        bulk_data_element_handler(self)
332                    )
333                else:
334                    logger.info(
335                        f"encode bulk data element '{self.name}' inline"
336                    )
337                    json_element['InlineBinary'] = encoded_value
338        elif self.VR == 'SQ':
339            # recursive call to get sequence item JSON dicts
340            value = [
341                ds.to_json(
342                    bulk_data_element_handler=bulk_data_element_handler,
343                    bulk_data_threshold=bulk_data_threshold,
344                    dump_handler=lambda d: d
345                )
346                for ds in self.value
347            ]
348            json_element['Value'] = value
349        elif self.VR == 'PN':
350            if not self.is_empty:
351                elem_value = []
352                if self.VM > 1:
353                    value = self.value
354                else:
355                    value = [self.value]
356                for v in value:
357                    comps = {'Alphabetic': v.components[0]}
358                    if len(v.components) > 1:
359                        comps['Ideographic'] = v.components[1]
360                    if len(v.components) > 2:
361                        comps['Phonetic'] = v.components[2]
362                    elem_value.append(comps)
363                json_element['Value'] = elem_value
364        elif self.VR == 'AT':
365            if not self.is_empty:
366                value = self.value
367                if self.VM == 1:
368                    value = [value]
369                json_element['Value'] = [format(v, '08X') for v in value]
370        else:
371            if not self.is_empty:
372                if self.VM > 1:
373                    value = self.value
374                else:
375                    value = [self.value]
376                json_element['Value'] = [v for v in value]
377        if 'Value' in json_element:
378            json_element['Value'] = jsonrep.convert_to_python_number(
379                json_element['Value'], self.VR
380            )
381        return json_element
382
383    def to_json(
384        self,
385        bulk_data_threshold: int = 1024,
386        bulk_data_element_handler: Optional[
387            Callable[["DataElement"], str]
388        ] = None,
389        dump_handler: Optional[
390            Callable[[Dict[str, Any]], str]
391        ] = None
392    ) -> str:
393        """Return a JSON representation of the :class:`DataElement`.
394
395        .. versionadded:: 1.3
396
397        Parameters
398        ----------
399        bulk_data_threshold : int, optional
400            Size of base64 encoded data element above which a value will be
401            provided in form of a "BulkDataURI" rather than "InlineBinary".
402            Ignored if no `bulk_data_element_handler` is given.
403        bulk_data_element_handler : callable, optional
404            Callable that accepts a bulk :class`data element
405            <pydicom.dataelem.DataElement>` and returns the
406            "BulkDataURI" as a :class:`str` for retrieving the value of the
407            data element via DICOMweb WADO-RS.
408        dump_handler : callable, optional
409            Callable function that accepts a :class:`dict` of ``{str: Any}``
410            and returns the serialized (dumped) JSON :class:`str` (by default
411            uses :func:`json.dumps`).
412
413        Returns
414        -------
415        str
416            Mapping representing a JSON encoded data element
417
418        See also
419        --------
420        Dataset.to_json
421        """
422        def json_dump(d: Dict[str, Any]) -> str:
423            return json.dumps(d, sort_keys=True)
424
425        dump_handler = json_dump if dump_handler is None else dump_handler
426
427        return dump_handler(
428            self.to_json_dict(bulk_data_element_handler, bulk_data_threshold)
429        )
430
431    @property
432    def value(self) -> Any:
433        """Return the element's value."""
434        return self._value
435
436    @value.setter
437    def value(self, val: Any) -> None:
438        """Convert (if necessary) and set the value of the element."""
439        # Ignore backslash characters in these VRs, based on:
440        # * Which str VRs can have backslashes in Part 5, Section 6.2
441        # * All byte VRs
442        exclusions = [
443            'LT', 'OB', 'OD', 'OF', 'OL', 'OV', 'OW', 'ST', 'UN', 'UT',
444            'OB or OW',
445            # Probably not needed
446            'AT', 'FD', 'FL', 'SQ', 'SS', 'SL', 'UL',
447        ]
448
449        # Check if is a string with multiple values separated by '\'
450        # If so, turn them into a list of separate strings
451        #  Last condition covers 'US or SS' etc
452        if (
453            isinstance(val, (str, bytes))
454            and self.VR not in exclusions
455            and 'US' not in self.VR
456        ):
457            try:
458                if _backslash_str in val:
459                    val = cast(str, val).split(_backslash_str)
460            except TypeError:
461                if _backslash_byte in val:
462                    val = val.split(_backslash_byte)
463        self._value = self._convert_value(val)
464
465    @property
466    def VM(self) -> int:
467        """Return the value multiplicity of the element as :class:`int`."""
468        if self.value is None:
469            return 0
470        if isinstance(self.value, (str, bytes, PersonName)):
471            return 1 if self.value else 0
472        try:
473            iter(self.value)
474        except TypeError:
475            return 1
476        return len(self.value)
477
478    @property
479    def is_empty(self) -> bool:
480        """Return ``True`` if the element has no value.
481
482        .. versionadded:: 1.4
483        """
484        return self.VM == 0
485
486    @property
487    def empty_value(self) -> Union[bytes, List[str], None, str, PersonName]:
488        """Return the value for an empty element.
489
490        .. versionadded:: 1.4
491
492        See :func:`empty_value_for_VR` for more information.
493
494        Returns
495        -------
496        str or None
497            The value this data element is assigned on decoding if it is empty.
498        """
499        return empty_value_for_VR(self.VR)
500
501    def clear(self) -> None:
502        """Clears the value, e.g. sets it to the configured empty value.
503
504        .. versionadded:: 1.4
505
506        See :func:`empty_value_for_VR`.
507        """
508        self._value = self.empty_value
509
510    def _convert_value(self, val: Any) -> Any:
511        """Convert `val` to an appropriate type and return the result.
512
513        Uses the element's VR in order to determine the conversion method and
514        resulting type.
515        """
516        if self.VR == 'SQ':  # a sequence - leave it alone
517            from pydicom.sequence import Sequence
518            if isinstance(val, Sequence):
519                return val
520            else:
521                return Sequence(val)
522
523        # if the value is a list, convert each element
524        try:
525            val.append
526        except AttributeError:  # not a list
527            return self._convert(val)
528        else:
529            return MultiValue(self._convert, val)
530
531    def _convert(self, val: Any) -> Any:
532        """Convert `val` to an appropriate type for the element's VR."""
533        # If the value is a byte string and has a VR that can only be encoded
534        # using the default character repertoire, we convert it to a string
535        # here to allow for byte string input in these cases
536        if _is_bytes(val) and self.VR in (
537                'AE', 'AS', 'CS', 'DA', 'DS', 'DT', 'IS', 'TM', 'UI', 'UR'):
538            val = val.decode()
539
540        if self.VR == 'IS':
541            return pydicom.valuerep.IS(val)
542        elif self.VR == 'DA' and config.datetime_conversion:
543            return pydicom.valuerep.DA(val)
544        elif self.VR == 'DS':
545            return pydicom.valuerep.DS(val)
546        elif self.VR == 'DT' and config.datetime_conversion:
547            return pydicom.valuerep.DT(val)
548        elif self.VR == 'TM' and config.datetime_conversion:
549            return pydicom.valuerep.TM(val)
550        elif self.VR == "UI":
551            return UID(val) if val is not None else None
552        elif self.VR == "PN":
553            return PersonName(val)
554        elif self.VR == "AT" and (val == 0 or val):
555            return val if isinstance(val, BaseTag) else Tag(val)
556        # Later may need this for PersonName as for UI,
557        #    but needs more thought
558        # elif self.VR == "PN":
559        #    return PersonName(val)
560        else:  # is either a string or a type 2 optionally blank string
561            return val  # this means a "numeric" value could be empty string ""
562        # except TypeError:
563            # print "Could not convert value '%s' to VR '%s' in tag %s" \
564            # % (repr(val), self.VR, self.tag)
565        # except ValueError:
566            # print "Could not convert value '%s' to VR '%s' in tag %s" \
567            # % (repr(val), self.VR, self.tag)
568
569    def __eq__(self, other: Any) -> Any:
570        """Compare `self` and `other` for equality.
571
572        Returns
573        -------
574        bool
575            The result if `self` and `other` are the same class
576        NotImplemented
577            If `other` is not the same class as `self` then returning
578            :class:`NotImplemented` delegates the result to
579            ``superclass.__eq__(subclass)``.
580        """
581        # Faster result if same object
582        if other is self:
583            return True
584
585        if isinstance(other, self.__class__):
586            if self.tag != other.tag or self.VR != other.VR:
587                return False
588
589            # tag and VR match, now check the value
590            if config.have_numpy and isinstance(self.value, numpy.ndarray):
591                return (
592                    len(self.value) == len(other.value)
593                    and numpy.allclose(self.value, other.value)
594                )
595
596            return self.value == other.value
597
598        return NotImplemented
599
600    def __ne__(self, other: Any) -> Any:
601        """Compare `self` and `other` for inequality."""
602        return not (self == other)
603
604    def __str__(self) -> str:
605        """Return :class:`str` representation of the element."""
606        repVal = self.repval or ''
607        if self.showVR:
608            s = "%s %-*s %s: %s" % (str(self.tag), self.descripWidth,
609                                    self.description()[:self.descripWidth],
610                                    self.VR, repVal)
611        else:
612            s = "%s %-*s %s" % (str(self.tag), self.descripWidth,
613                                self.description()[:self.descripWidth], repVal)
614        return s
615
616    @property
617    def repval(self) -> str:
618        """Return a :class:`str` representation of the element's value."""
619        long_VRs = {"OB", "OD", "OF", "OW", "UN", "UT"}
620        if set(self.VR.split(" or ")) & long_VRs:
621            try:
622                length = len(self.value)
623            except TypeError:
624                pass
625            else:
626                if length > self.maxBytesToDisplay:
627                    return "Array of %d elements" % length
628        if self.VM > self.maxBytesToDisplay:
629            repVal = "Array of %d elements" % self.VM
630        elif isinstance(self.value, UID):
631            repVal = self.value.name
632        else:
633            repVal = repr(self.value)  # will tolerate unicode too
634        return repVal
635
636    def __getitem__(self, key: int) -> Any:
637        """Return the item at `key` if the element's value is indexable."""
638        try:
639            return self.value[key]
640        except TypeError:
641            raise TypeError("DataElement value is unscriptable "
642                            "(not a Sequence)")
643
644    @property
645    def name(self) -> str:
646        """Return the DICOM dictionary name for the element as :class:`str`.
647
648        For officially registered DICOM Data Elements this will be the *Name*
649        as given in :dcm:`Table 6-1<part06/chapter_6.html#table_6-1>`.
650        For private elements known to *pydicom*
651        this will be the *Name* in the format ``'[name]'``. For unknown
652        private elements this will be ``'Private Creator'``. For unknown
653        elements this will return an empty string ``''``.
654        """
655        return self.description()
656
657    def description(self) -> str:
658        """Return the DICOM dictionary name for the element as :class:`str`."""
659        if self.tag.is_private:
660            name = "Private tag data"  # default
661            if self.private_creator:
662                try:
663                    # If have name from private dictionary, use it, but
664                    #   but put in square brackets so is differentiated,
665                    #   and clear that cannot access it by name
666                    name = private_dictionary_description(
667                        self.tag, self.private_creator)
668                    name = "[%s]" % (name)
669                except KeyError:
670                    pass
671            elif self.tag.element >> 8 == 0:
672                name = "Private Creator"
673        elif dictionary_has_tag(self.tag) or repeater_has_tag(self.tag):
674            name = dictionary_description(self.tag)
675
676        # implied Group Length dicom versions < 3
677        elif self.tag.element == 0:
678            name = "Group Length"
679        else:
680            name = ""
681        return name
682
683    @property
684    def is_private(self) -> bool:
685        """Return ``True`` if the element's tag is private.
686
687        .. versionadded:: 2.1
688        """
689        return self.tag.is_private
690
691    @property
692    def is_retired(self) -> bool:
693        """Return the element's retired status as :class:`bool`.
694
695        For officially registered DICOM Data Elements this will be ``True`` if
696        the retired status as given in the DICOM Standard, Part 6,
697        :dcm:`Table 6-1<part06/chapter_6.html#table_6-1>` is 'RET'. For private
698        or unknown elements this will always be ``False``.
699        """
700        if dictionary_has_tag(self.tag):
701            return dictionary_is_retired(self.tag)
702
703        return False
704
705    @property
706    def keyword(self) -> str:
707        """Return the element's keyword (if known) as :class:`str`.
708
709        For officially registered DICOM Data Elements this will be the
710        *Keyword* as given in
711        :dcm:`Table 6-1<part06/chapter_6.html#table_6-1>`. For private or
712        unknown elements this will return an empty string ``''``.
713        """
714        if dictionary_has_tag(self.tag):
715            return dictionary_keyword(self.tag)
716
717        return ''
718
719    def __repr__(self) -> str:
720        """Return the representation of the element."""
721        if self.VR == "SQ":
722            return repr(self.value)
723
724        return str(self)
725
726
727class RawDataElement(NamedTuple):
728    """Container for the data from a raw (mostly) undecoded element."""
729    tag: BaseTag
730    VR: Optional[str]
731    length: int
732    value: Optional[bytes]
733    value_tell: int
734    is_implicit_VR: bool
735    is_little_endian: bool
736    is_raw: bool = True
737
738
739# The first and third values of the following elements are always US
740#   even if the VR is SS (PS3.3 C.7.6.3.1.5, C.11.1, C.11.2).
741# (0028,1101-1103) RGB Palette Color LUT Descriptor
742# (0028,3002) LUT Descriptor
743_LUT_DESCRIPTOR_TAGS = (0x00281101, 0x00281102, 0x00281103, 0x00283002)
744
745
746def _private_vr_for_tag(ds: Optional["Dataset"], tag: BaseTag) -> str:
747    """Return the VR for a known private tag, otherwise "UN".
748
749    Parameters
750    ----------
751    ds : Dataset, optional
752        The dataset needed for the private creator lookup.
753        If not given, "UN" is returned.
754    tag : BaseTag
755        The private tag to lookup. The caller has to ensure that the
756        tag is private.
757
758    Returns
759    -------
760    str
761        "LO" if the tag is a private creator, the VR of the private tag if
762        found in the private dictionary, or "UN".
763    """
764    if tag.is_private_creator:
765        return "LO"
766    # invalid private tags are handled as UN
767    if ds is not None and (tag.element & 0xff00):
768        private_creator_tag = tag.group << 16 | (tag.element >> 8)
769        private_creator = ds.get(private_creator_tag, "")
770        if private_creator:
771            try:
772                return private_dictionary_VR(tag, private_creator.value)
773            except KeyError:
774                pass
775    return "UN"
776
777
778def DataElement_from_raw(
779    raw_data_element: RawDataElement,
780    encoding: Optional[Union[str, MutableSequence[str]]] = None,
781    dataset: Optional["Dataset"] = None
782) -> DataElement:
783    """Return a :class:`DataElement` created from `raw_data_element`.
784
785    Parameters
786    ----------
787    raw_data_element : RawDataElement
788        The raw data to convert to a :class:`DataElement`.
789    encoding : str or list of str, optional
790        The character encoding of the raw data.
791    dataset : Dataset, optional
792        If given, used to resolve the VR for known private tags.
793
794    Returns
795    -------
796    DataElement
797
798    Raises
799    ------
800    KeyError
801        If `raw_data_element` belongs to an unknown non-private tag and
802        `config.enforce_valid_values` is set.
803    """
804    # XXX buried here to avoid circular import
805    # filereader->Dataset->convert_value->filereader
806    # (for SQ parsing)
807
808    from pydicom.values import convert_value
809    raw = raw_data_element
810
811    # If user has hooked into conversion of raw values, call his/her routine
812    if config.data_element_callback:
813        raw = config.data_element_callback(
814            raw_data_element,
815            encoding=encoding,
816            **config.data_element_callback_kwargs
817        )
818
819    VR = raw.VR
820    if VR is None:  # Can be if was implicit VR
821        try:
822            VR = dictionary_VR(raw.tag)
823        except KeyError:
824            # just read the bytes, no way to know what they mean
825            if raw.tag.is_private:
826                # for VR for private tags see PS3.5, 6.2.2
827                VR = _private_vr_for_tag(dataset, raw.tag)
828
829            # group length tag implied in versions < 3.0
830            elif raw.tag.element == 0:
831                VR = 'UL'
832            else:
833                msg = "Unknown DICOM tag {0:s}".format(str(raw.tag))
834                if config.enforce_valid_values:
835                    msg += " can't look up VR"
836                    raise KeyError(msg)
837                else:
838                    VR = 'UN'
839                    msg += " - setting VR to 'UN'"
840                    warnings.warn(msg)
841    elif VR == 'UN' and config.replace_un_with_known_vr:
842        # handle rare case of incorrectly set 'UN' in explicit encoding
843        # see also DataElement.__init__()
844        if raw.tag.is_private:
845            VR = _private_vr_for_tag(dataset, raw.tag)
846        elif raw.value is None or len(raw.value) < 0xffff:
847            try:
848                VR = dictionary_VR(raw.tag)
849            except KeyError:
850                pass
851    try:
852        value = convert_value(VR, raw, encoding)
853    except NotImplementedError as e:
854        raise NotImplementedError("{0:s} in tag {1!r}".format(str(e), raw.tag))
855    except BytesLengthException as e:
856        message = (f"{e} This occurred while trying to parse "
857                   f"{raw.tag} according to VR '{VR}'.")
858        if config.convert_wrong_length_to_UN:
859            warnings.warn(f"{message} Setting VR to 'UN'.")
860            VR = "UN"
861            value = raw.value
862        else:
863            raise BytesLengthException(
864                f"{message} To replace this error with a warning set "
865                "pydicom.config.convert_wrong_length_to_UN = True."
866            )
867
868    if raw.tag in _LUT_DESCRIPTOR_TAGS and value:
869        # We only fix the first value as the third value is 8 or 16
870        try:
871            if value[0] < 0:
872                value[0] += 65536
873        except TypeError:
874            pass
875
876    return DataElement(raw.tag, VR, value, raw.value_tell,
877                       raw.length == 0xFFFFFFFF, already_converted=True)
878