1import re
2import sys
3import copy
4import types
5import inspect
6import keyword
7import builtins
8import functools
9import _thread
10from types import GenericAlias
11
12
13__all__ = ['dataclass',
14           'field',
15           'Field',
16           'FrozenInstanceError',
17           'InitVar',
18           'MISSING',
19
20           # Helper functions.
21           'fields',
22           'asdict',
23           'astuple',
24           'make_dataclass',
25           'replace',
26           'is_dataclass',
27           ]
28
29# Conditions for adding methods.  The boxes indicate what action the
30# dataclass decorator takes.  For all of these tables, when I talk
31# about init=, repr=, eq=, order=, unsafe_hash=, or frozen=, I'm
32# referring to the arguments to the @dataclass decorator.  When
33# checking if a dunder method already exists, I mean check for an
34# entry in the class's __dict__.  I never check to see if an attribute
35# is defined in a base class.
36
37# Key:
38# +=========+=========================================+
39# + Value   | Meaning                                 |
40# +=========+=========================================+
41# | <blank> | No action: no method is added.          |
42# +---------+-----------------------------------------+
43# | add     | Generated method is added.              |
44# +---------+-----------------------------------------+
45# | raise   | TypeError is raised.                    |
46# +---------+-----------------------------------------+
47# | None    | Attribute is set to None.               |
48# +=========+=========================================+
49
50# __init__
51#
52#   +--- init= parameter
53#   |
54#   v     |       |       |
55#         |  no   |  yes  |  <--- class has __init__ in __dict__?
56# +=======+=======+=======+
57# | False |       |       |
58# +-------+-------+-------+
59# | True  | add   |       |  <- the default
60# +=======+=======+=======+
61
62# __repr__
63#
64#    +--- repr= parameter
65#    |
66#    v    |       |       |
67#         |  no   |  yes  |  <--- class has __repr__ in __dict__?
68# +=======+=======+=======+
69# | False |       |       |
70# +-------+-------+-------+
71# | True  | add   |       |  <- the default
72# +=======+=======+=======+
73
74
75# __setattr__
76# __delattr__
77#
78#    +--- frozen= parameter
79#    |
80#    v    |       |       |
81#         |  no   |  yes  |  <--- class has __setattr__ or __delattr__ in __dict__?
82# +=======+=======+=======+
83# | False |       |       |  <- the default
84# +-------+-------+-------+
85# | True  | add   | raise |
86# +=======+=======+=======+
87# Raise because not adding these methods would break the "frozen-ness"
88# of the class.
89
90# __eq__
91#
92#    +--- eq= parameter
93#    |
94#    v    |       |       |
95#         |  no   |  yes  |  <--- class has __eq__ in __dict__?
96# +=======+=======+=======+
97# | False |       |       |
98# +-------+-------+-------+
99# | True  | add   |       |  <- the default
100# +=======+=======+=======+
101
102# __lt__
103# __le__
104# __gt__
105# __ge__
106#
107#    +--- order= parameter
108#    |
109#    v    |       |       |
110#         |  no   |  yes  |  <--- class has any comparison method in __dict__?
111# +=======+=======+=======+
112# | False |       |       |  <- the default
113# +-------+-------+-------+
114# | True  | add   | raise |
115# +=======+=======+=======+
116# Raise because to allow this case would interfere with using
117# functools.total_ordering.
118
119# __hash__
120
121#    +------------------- unsafe_hash= parameter
122#    |       +----------- eq= parameter
123#    |       |       +--- frozen= parameter
124#    |       |       |
125#    v       v       v    |        |        |
126#                         |   no   |  yes   |  <--- class has explicitly defined __hash__
127# +=======+=======+=======+========+========+
128# | False | False | False |        |        | No __eq__, use the base class __hash__
129# +-------+-------+-------+--------+--------+
130# | False | False | True  |        |        | No __eq__, use the base class __hash__
131# +-------+-------+-------+--------+--------+
132# | False | True  | False | None   |        | <-- the default, not hashable
133# +-------+-------+-------+--------+--------+
134# | False | True  | True  | add    |        | Frozen, so hashable, allows override
135# +-------+-------+-------+--------+--------+
136# | True  | False | False | add    | raise  | Has no __eq__, but hashable
137# +-------+-------+-------+--------+--------+
138# | True  | False | True  | add    | raise  | Has no __eq__, but hashable
139# +-------+-------+-------+--------+--------+
140# | True  | True  | False | add    | raise  | Not frozen, but hashable
141# +-------+-------+-------+--------+--------+
142# | True  | True  | True  | add    | raise  | Frozen, so hashable
143# +=======+=======+=======+========+========+
144# For boxes that are blank, __hash__ is untouched and therefore
145# inherited from the base class.  If the base is object, then
146# id-based hashing is used.
147#
148# Note that a class may already have __hash__=None if it specified an
149# __eq__ method in the class body (not one that was created by
150# @dataclass).
151#
152# See _hash_action (below) for a coded version of this table.
153
154
155# Raised when an attempt is made to modify a frozen class.
156class FrozenInstanceError(AttributeError): pass
157
158# A sentinel object for default values to signal that a default
159# factory will be used.  This is given a nice repr() which will appear
160# in the function signature of dataclasses' constructors.
161class _HAS_DEFAULT_FACTORY_CLASS:
162    def __repr__(self):
163        return '<factory>'
164_HAS_DEFAULT_FACTORY = _HAS_DEFAULT_FACTORY_CLASS()
165
166# A sentinel object to detect if a parameter is supplied or not.  Use
167# a class to give it a better repr.
168class _MISSING_TYPE:
169    pass
170MISSING = _MISSING_TYPE()
171
172# Since most per-field metadata will be unused, create an empty
173# read-only proxy that can be shared among all fields.
174_EMPTY_METADATA = types.MappingProxyType({})
175
176# Markers for the various kinds of fields and pseudo-fields.
177class _FIELD_BASE:
178    def __init__(self, name):
179        self.name = name
180    def __repr__(self):
181        return self.name
182_FIELD = _FIELD_BASE('_FIELD')
183_FIELD_CLASSVAR = _FIELD_BASE('_FIELD_CLASSVAR')
184_FIELD_INITVAR = _FIELD_BASE('_FIELD_INITVAR')
185
186# The name of an attribute on the class where we store the Field
187# objects.  Also used to check if a class is a Data Class.
188_FIELDS = '__dataclass_fields__'
189
190# The name of an attribute on the class that stores the parameters to
191# @dataclass.
192_PARAMS = '__dataclass_params__'
193
194# The name of the function, that if it exists, is called at the end of
195# __init__.
196_POST_INIT_NAME = '__post_init__'
197
198# String regex that string annotations for ClassVar or InitVar must match.
199# Allows "identifier.identifier[" or "identifier[".
200# https://bugs.python.org/issue33453 for details.
201_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)')
202
203class InitVar:
204    __slots__ = ('type', )
205
206    def __init__(self, type):
207        self.type = type
208
209    def __repr__(self):
210        if isinstance(self.type, type):
211            type_name = self.type.__name__
212        else:
213            # typing objects, e.g. List[int]
214            type_name = repr(self.type)
215        return f'dataclasses.InitVar[{type_name}]'
216
217    def __class_getitem__(cls, type):
218        return InitVar(type)
219
220
221# Instances of Field are only ever created from within this module,
222# and only from the field() function, although Field instances are
223# exposed externally as (conceptually) read-only objects.
224#
225# name and type are filled in after the fact, not in __init__.
226# They're not known at the time this class is instantiated, but it's
227# convenient if they're available later.
228#
229# When cls._FIELDS is filled in with a list of Field objects, the name
230# and type fields will have been populated.
231class Field:
232    __slots__ = ('name',
233                 'type',
234                 'default',
235                 'default_factory',
236                 'repr',
237                 'hash',
238                 'init',
239                 'compare',
240                 'metadata',
241                 '_field_type',  # Private: not to be used by user code.
242                 )
243
244    def __init__(self, default, default_factory, init, repr, hash, compare,
245                 metadata):
246        self.name = None
247        self.type = None
248        self.default = default
249        self.default_factory = default_factory
250        self.init = init
251        self.repr = repr
252        self.hash = hash
253        self.compare = compare
254        self.metadata = (_EMPTY_METADATA
255                         if metadata is None else
256                         types.MappingProxyType(metadata))
257        self._field_type = None
258
259    def __repr__(self):
260        return ('Field('
261                f'name={self.name!r},'
262                f'type={self.type!r},'
263                f'default={self.default!r},'
264                f'default_factory={self.default_factory!r},'
265                f'init={self.init!r},'
266                f'repr={self.repr!r},'
267                f'hash={self.hash!r},'
268                f'compare={self.compare!r},'
269                f'metadata={self.metadata!r},'
270                f'_field_type={self._field_type}'
271                ')')
272
273    # This is used to support the PEP 487 __set_name__ protocol in the
274    # case where we're using a field that contains a descriptor as a
275    # default value.  For details on __set_name__, see
276    # https://www.python.org/dev/peps/pep-0487/#implementation-details.
277    #
278    # Note that in _process_class, this Field object is overwritten
279    # with the default value, so the end result is a descriptor that
280    # had __set_name__ called on it at the right time.
281    def __set_name__(self, owner, name):
282        func = getattr(type(self.default), '__set_name__', None)
283        if func:
284            # There is a __set_name__ method on the descriptor, call
285            # it.
286            func(self.default, owner, name)
287
288    __class_getitem__ = classmethod(GenericAlias)
289
290
291class _DataclassParams:
292    __slots__ = ('init',
293                 'repr',
294                 'eq',
295                 'order',
296                 'unsafe_hash',
297                 'frozen',
298                 )
299
300    def __init__(self, init, repr, eq, order, unsafe_hash, frozen):
301        self.init = init
302        self.repr = repr
303        self.eq = eq
304        self.order = order
305        self.unsafe_hash = unsafe_hash
306        self.frozen = frozen
307
308    def __repr__(self):
309        return ('_DataclassParams('
310                f'init={self.init!r},'
311                f'repr={self.repr!r},'
312                f'eq={self.eq!r},'
313                f'order={self.order!r},'
314                f'unsafe_hash={self.unsafe_hash!r},'
315                f'frozen={self.frozen!r}'
316                ')')
317
318
319# This function is used instead of exposing Field creation directly,
320# so that a type checker can be told (via overloads) that this is a
321# function whose type depends on its parameters.
322def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True,
323          hash=None, compare=True, metadata=None):
324    """Return an object to identify dataclass fields.
325
326    default is the default value of the field.  default_factory is a
327    0-argument function called to initialize a field's value.  If init
328    is True, the field will be a parameter to the class's __init__()
329    function.  If repr is True, the field will be included in the
330    object's repr().  If hash is True, the field will be included in
331    the object's hash().  If compare is True, the field will be used
332    in comparison functions.  metadata, if specified, must be a
333    mapping which is stored but not otherwise examined by dataclass.
334
335    It is an error to specify both default and default_factory.
336    """
337
338    if default is not MISSING and default_factory is not MISSING:
339        raise ValueError('cannot specify both default and default_factory')
340    return Field(default, default_factory, init, repr, hash, compare,
341                 metadata)
342
343
344def _tuple_str(obj_name, fields):
345    # Return a string representing each field of obj_name as a tuple
346    # member.  So, if fields is ['x', 'y'] and obj_name is "self",
347    # return "(self.x,self.y)".
348
349    # Special case for the 0-tuple.
350    if not fields:
351        return '()'
352    # Note the trailing comma, needed if this turns out to be a 1-tuple.
353    return f'({",".join([f"{obj_name}.{f.name}" for f in fields])},)'
354
355
356# This function's logic is copied from "recursive_repr" function in
357# reprlib module to avoid dependency.
358def _recursive_repr(user_function):
359    # Decorator to make a repr function return "..." for a recursive
360    # call.
361    repr_running = set()
362
363    @functools.wraps(user_function)
364    def wrapper(self):
365        key = id(self), _thread.get_ident()
366        if key in repr_running:
367            return '...'
368        repr_running.add(key)
369        try:
370            result = user_function(self)
371        finally:
372            repr_running.discard(key)
373        return result
374    return wrapper
375
376
377def _create_fn(name, args, body, *, globals=None, locals=None,
378               return_type=MISSING):
379    # Note that we mutate locals when exec() is called.  Caller
380    # beware!  The only callers are internal to this module, so no
381    # worries about external callers.
382    if locals is None:
383        locals = {}
384    if 'BUILTINS' not in locals:
385        locals['BUILTINS'] = builtins
386    return_annotation = ''
387    if return_type is not MISSING:
388        locals['_return_type'] = return_type
389        return_annotation = '->_return_type'
390    args = ','.join(args)
391    body = '\n'.join(f'  {b}' for b in body)
392
393    # Compute the text of the entire function.
394    txt = f' def {name}({args}){return_annotation}:\n{body}'
395
396    local_vars = ', '.join(locals.keys())
397    txt = f"def __create_fn__({local_vars}):\n{txt}\n return {name}"
398
399    ns = {}
400    exec(txt, globals, ns)
401    return ns['__create_fn__'](**locals)
402
403
404def _field_assign(frozen, name, value, self_name):
405    # If we're a frozen class, then assign to our fields in __init__
406    # via object.__setattr__.  Otherwise, just use a simple
407    # assignment.
408    #
409    # self_name is what "self" is called in this function: don't
410    # hard-code "self", since that might be a field name.
411    if frozen:
412        return f'BUILTINS.object.__setattr__({self_name},{name!r},{value})'
413    return f'{self_name}.{name}={value}'
414
415
416def _field_init(f, frozen, globals, self_name):
417    # Return the text of the line in the body of __init__ that will
418    # initialize this field.
419
420    default_name = f'_dflt_{f.name}'
421    if f.default_factory is not MISSING:
422        if f.init:
423            # This field has a default factory.  If a parameter is
424            # given, use it.  If not, call the factory.
425            globals[default_name] = f.default_factory
426            value = (f'{default_name}() '
427                     f'if {f.name} is _HAS_DEFAULT_FACTORY '
428                     f'else {f.name}')
429        else:
430            # This is a field that's not in the __init__ params, but
431            # has a default factory function.  It needs to be
432            # initialized here by calling the factory function,
433            # because there's no other way to initialize it.
434
435            # For a field initialized with a default=defaultvalue, the
436            # class dict just has the default value
437            # (cls.fieldname=defaultvalue).  But that won't work for a
438            # default factory, the factory must be called in __init__
439            # and we must assign that to self.fieldname.  We can't
440            # fall back to the class dict's value, both because it's
441            # not set, and because it might be different per-class
442            # (which, after all, is why we have a factory function!).
443
444            globals[default_name] = f.default_factory
445            value = f'{default_name}()'
446    else:
447        # No default factory.
448        if f.init:
449            if f.default is MISSING:
450                # There's no default, just do an assignment.
451                value = f.name
452            elif f.default is not MISSING:
453                globals[default_name] = f.default
454                value = f.name
455        else:
456            # This field does not need initialization.  Signify that
457            # to the caller by returning None.
458            return None
459
460    # Only test this now, so that we can create variables for the
461    # default.  However, return None to signify that we're not going
462    # to actually do the assignment statement for InitVars.
463    if f._field_type is _FIELD_INITVAR:
464        return None
465
466    # Now, actually generate the field assignment.
467    return _field_assign(frozen, f.name, value, self_name)
468
469
470def _init_param(f):
471    # Return the __init__ parameter string for this field.  For
472    # example, the equivalent of 'x:int=3' (except instead of 'int',
473    # reference a variable set to int, and instead of '3', reference a
474    # variable set to 3).
475    if f.default is MISSING and f.default_factory is MISSING:
476        # There's no default, and no default_factory, just output the
477        # variable name and type.
478        default = ''
479    elif f.default is not MISSING:
480        # There's a default, this will be the name that's used to look
481        # it up.
482        default = f'=_dflt_{f.name}'
483    elif f.default_factory is not MISSING:
484        # There's a factory function.  Set a marker.
485        default = '=_HAS_DEFAULT_FACTORY'
486    return f'{f.name}:_type_{f.name}{default}'
487
488
489def _init_fn(fields, frozen, has_post_init, self_name, globals):
490    # fields contains both real fields and InitVar pseudo-fields.
491
492    # Make sure we don't have fields without defaults following fields
493    # with defaults.  This actually would be caught when exec-ing the
494    # function source code, but catching it here gives a better error
495    # message, and future-proofs us in case we build up the function
496    # using ast.
497    seen_default = False
498    for f in fields:
499        # Only consider fields in the __init__ call.
500        if f.init:
501            if not (f.default is MISSING and f.default_factory is MISSING):
502                seen_default = True
503            elif seen_default:
504                raise TypeError(f'non-default argument {f.name!r} '
505                                'follows default argument')
506
507    locals = {f'_type_{f.name}': f.type for f in fields}
508    locals.update({
509        'MISSING': MISSING,
510        '_HAS_DEFAULT_FACTORY': _HAS_DEFAULT_FACTORY,
511    })
512
513    body_lines = []
514    for f in fields:
515        line = _field_init(f, frozen, locals, self_name)
516        # line is None means that this field doesn't require
517        # initialization (it's a pseudo-field).  Just skip it.
518        if line:
519            body_lines.append(line)
520
521    # Does this class have a post-init function?
522    if has_post_init:
523        params_str = ','.join(f.name for f in fields
524                              if f._field_type is _FIELD_INITVAR)
525        body_lines.append(f'{self_name}.{_POST_INIT_NAME}({params_str})')
526
527    # If no body lines, use 'pass'.
528    if not body_lines:
529        body_lines = ['pass']
530
531    return _create_fn('__init__',
532                      [self_name] + [_init_param(f) for f in fields if f.init],
533                      body_lines,
534                      locals=locals,
535                      globals=globals,
536                      return_type=None)
537
538
539def _repr_fn(fields, globals):
540    fn = _create_fn('__repr__',
541                    ('self',),
542                    ['return self.__class__.__qualname__ + f"(' +
543                     ', '.join([f"{f.name}={{self.{f.name}!r}}"
544                                for f in fields]) +
545                     ')"'],
546                     globals=globals)
547    return _recursive_repr(fn)
548
549
550def _frozen_get_del_attr(cls, fields, globals):
551    locals = {'cls': cls,
552              'FrozenInstanceError': FrozenInstanceError}
553    if fields:
554        fields_str = '(' + ','.join(repr(f.name) for f in fields) + ',)'
555    else:
556        # Special case for the zero-length tuple.
557        fields_str = '()'
558    return (_create_fn('__setattr__',
559                      ('self', 'name', 'value'),
560                      (f'if type(self) is cls or name in {fields_str}:',
561                        ' raise FrozenInstanceError(f"cannot assign to field {name!r}")',
562                       f'super(cls, self).__setattr__(name, value)'),
563                       locals=locals,
564                       globals=globals),
565            _create_fn('__delattr__',
566                      ('self', 'name'),
567                      (f'if type(self) is cls or name in {fields_str}:',
568                        ' raise FrozenInstanceError(f"cannot delete field {name!r}")',
569                       f'super(cls, self).__delattr__(name)'),
570                       locals=locals,
571                       globals=globals),
572            )
573
574
575def _cmp_fn(name, op, self_tuple, other_tuple, globals):
576    # Create a comparison function.  If the fields in the object are
577    # named 'x' and 'y', then self_tuple is the string
578    # '(self.x,self.y)' and other_tuple is the string
579    # '(other.x,other.y)'.
580
581    return _create_fn(name,
582                      ('self', 'other'),
583                      [ 'if other.__class__ is self.__class__:',
584                       f' return {self_tuple}{op}{other_tuple}',
585                        'return NotImplemented'],
586                      globals=globals)
587
588
589def _hash_fn(fields, globals):
590    self_tuple = _tuple_str('self', fields)
591    return _create_fn('__hash__',
592                      ('self',),
593                      [f'return hash({self_tuple})'],
594                      globals=globals)
595
596
597def _is_classvar(a_type, typing):
598    # This test uses a typing internal class, but it's the best way to
599    # test if this is a ClassVar.
600    return (a_type is typing.ClassVar
601            or (type(a_type) is typing._GenericAlias
602                and a_type.__origin__ is typing.ClassVar))
603
604
605def _is_initvar(a_type, dataclasses):
606    # The module we're checking against is the module we're
607    # currently in (dataclasses.py).
608    return (a_type is dataclasses.InitVar
609            or type(a_type) is dataclasses.InitVar)
610
611
612def _is_type(annotation, cls, a_module, a_type, is_type_predicate):
613    # Given a type annotation string, does it refer to a_type in
614    # a_module?  For example, when checking that annotation denotes a
615    # ClassVar, then a_module is typing, and a_type is
616    # typing.ClassVar.
617
618    # It's possible to look up a_module given a_type, but it involves
619    # looking in sys.modules (again!), and seems like a waste since
620    # the caller already knows a_module.
621
622    # - annotation is a string type annotation
623    # - cls is the class that this annotation was found in
624    # - a_module is the module we want to match
625    # - a_type is the type in that module we want to match
626    # - is_type_predicate is a function called with (obj, a_module)
627    #   that determines if obj is of the desired type.
628
629    # Since this test does not do a local namespace lookup (and
630    # instead only a module (global) lookup), there are some things it
631    # gets wrong.
632
633    # With string annotations, cv0 will be detected as a ClassVar:
634    #   CV = ClassVar
635    #   @dataclass
636    #   class C0:
637    #     cv0: CV
638
639    # But in this example cv1 will not be detected as a ClassVar:
640    #   @dataclass
641    #   class C1:
642    #     CV = ClassVar
643    #     cv1: CV
644
645    # In C1, the code in this function (_is_type) will look up "CV" in
646    # the module and not find it, so it will not consider cv1 as a
647    # ClassVar.  This is a fairly obscure corner case, and the best
648    # way to fix it would be to eval() the string "CV" with the
649    # correct global and local namespaces.  However that would involve
650    # a eval() penalty for every single field of every dataclass
651    # that's defined.  It was judged not worth it.
652
653    match = _MODULE_IDENTIFIER_RE.match(annotation)
654    if match:
655        ns = None
656        module_name = match.group(1)
657        if not module_name:
658            # No module name, assume the class's module did
659            # "from dataclasses import InitVar".
660            ns = sys.modules.get(cls.__module__).__dict__
661        else:
662            # Look up module_name in the class's module.
663            module = sys.modules.get(cls.__module__)
664            if module and module.__dict__.get(module_name) is a_module:
665                ns = sys.modules.get(a_type.__module__).__dict__
666        if ns and is_type_predicate(ns.get(match.group(2)), a_module):
667            return True
668    return False
669
670
671def _get_field(cls, a_name, a_type):
672    # Return a Field object for this field name and type.  ClassVars
673    # and InitVars are also returned, but marked as such (see
674    # f._field_type).
675
676    # If the default value isn't derived from Field, then it's only a
677    # normal default value.  Convert it to a Field().
678    default = getattr(cls, a_name, MISSING)
679    if isinstance(default, Field):
680        f = default
681    else:
682        if isinstance(default, types.MemberDescriptorType):
683            # This is a field in __slots__, so it has no default value.
684            default = MISSING
685        f = field(default=default)
686
687    # Only at this point do we know the name and the type.  Set them.
688    f.name = a_name
689    f.type = a_type
690
691    # Assume it's a normal field until proven otherwise.  We're next
692    # going to decide if it's a ClassVar or InitVar, everything else
693    # is just a normal field.
694    f._field_type = _FIELD
695
696    # In addition to checking for actual types here, also check for
697    # string annotations.  get_type_hints() won't always work for us
698    # (see https://github.com/python/typing/issues/508 for example),
699    # plus it's expensive and would require an eval for every string
700    # annotation.  So, make a best effort to see if this is a ClassVar
701    # or InitVar using regex's and checking that the thing referenced
702    # is actually of the correct type.
703
704    # For the complete discussion, see https://bugs.python.org/issue33453
705
706    # If typing has not been imported, then it's impossible for any
707    # annotation to be a ClassVar.  So, only look for ClassVar if
708    # typing has been imported by any module (not necessarily cls's
709    # module).
710    typing = sys.modules.get('typing')
711    if typing:
712        if (_is_classvar(a_type, typing)
713            or (isinstance(f.type, str)
714                and _is_type(f.type, cls, typing, typing.ClassVar,
715                             _is_classvar))):
716            f._field_type = _FIELD_CLASSVAR
717
718    # If the type is InitVar, or if it's a matching string annotation,
719    # then it's an InitVar.
720    if f._field_type is _FIELD:
721        # The module we're checking against is the module we're
722        # currently in (dataclasses.py).
723        dataclasses = sys.modules[__name__]
724        if (_is_initvar(a_type, dataclasses)
725            or (isinstance(f.type, str)
726                and _is_type(f.type, cls, dataclasses, dataclasses.InitVar,
727                             _is_initvar))):
728            f._field_type = _FIELD_INITVAR
729
730    # Validations for individual fields.  This is delayed until now,
731    # instead of in the Field() constructor, since only here do we
732    # know the field name, which allows for better error reporting.
733
734    # Special restrictions for ClassVar and InitVar.
735    if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR):
736        if f.default_factory is not MISSING:
737            raise TypeError(f'field {f.name} cannot have a '
738                            'default factory')
739        # Should I check for other field settings? default_factory
740        # seems the most serious to check for.  Maybe add others.  For
741        # example, how about init=False (or really,
742        # init=<not-the-default-init-value>)?  It makes no sense for
743        # ClassVar and InitVar to specify init=<anything>.
744
745    # For real fields, disallow mutable defaults for known types.
746    if f._field_type is _FIELD and isinstance(f.default, (list, dict, set)):
747        raise ValueError(f'mutable default {type(f.default)} for field '
748                         f'{f.name} is not allowed: use default_factory')
749
750    return f
751
752
753def _set_new_attribute(cls, name, value):
754    # Never overwrites an existing attribute.  Returns True if the
755    # attribute already exists.
756    if name in cls.__dict__:
757        return True
758    setattr(cls, name, value)
759    return False
760
761
762# Decide if/how we're going to create a hash function.  Key is
763# (unsafe_hash, eq, frozen, does-hash-exist).  Value is the action to
764# take.  The common case is to do nothing, so instead of providing a
765# function that is a no-op, use None to signify that.
766
767def _hash_set_none(cls, fields, globals):
768    return None
769
770def _hash_add(cls, fields, globals):
771    flds = [f for f in fields if (f.compare if f.hash is None else f.hash)]
772    return _hash_fn(flds, globals)
773
774def _hash_exception(cls, fields, globals):
775    # Raise an exception.
776    raise TypeError(f'Cannot overwrite attribute __hash__ '
777                    f'in class {cls.__name__}')
778
779#
780#                +-------------------------------------- unsafe_hash?
781#                |      +------------------------------- eq?
782#                |      |      +------------------------ frozen?
783#                |      |      |      +----------------  has-explicit-hash?
784#                |      |      |      |
785#                |      |      |      |        +-------  action
786#                |      |      |      |        |
787#                v      v      v      v        v
788_hash_action = {(False, False, False, False): None,
789                (False, False, False, True ): None,
790                (False, False, True,  False): None,
791                (False, False, True,  True ): None,
792                (False, True,  False, False): _hash_set_none,
793                (False, True,  False, True ): None,
794                (False, True,  True,  False): _hash_add,
795                (False, True,  True,  True ): None,
796                (True,  False, False, False): _hash_add,
797                (True,  False, False, True ): _hash_exception,
798                (True,  False, True,  False): _hash_add,
799                (True,  False, True,  True ): _hash_exception,
800                (True,  True,  False, False): _hash_add,
801                (True,  True,  False, True ): _hash_exception,
802                (True,  True,  True,  False): _hash_add,
803                (True,  True,  True,  True ): _hash_exception,
804                }
805# See https://bugs.python.org/issue32929#msg312829 for an if-statement
806# version of this table.
807
808
809def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen):
810    # Now that dicts retain insertion order, there's no reason to use
811    # an ordered dict.  I am leveraging that ordering here, because
812    # derived class fields overwrite base class fields, but the order
813    # is defined by the base class, which is found first.
814    fields = {}
815
816    if cls.__module__ in sys.modules:
817        globals = sys.modules[cls.__module__].__dict__
818    else:
819        # Theoretically this can happen if someone writes
820        # a custom string to cls.__module__.  In which case
821        # such dataclass won't be fully introspectable
822        # (w.r.t. typing.get_type_hints) but will still function
823        # correctly.
824        globals = {}
825
826    setattr(cls, _PARAMS, _DataclassParams(init, repr, eq, order,
827                                           unsafe_hash, frozen))
828
829    # Find our base classes in reverse MRO order, and exclude
830    # ourselves.  In reversed order so that more derived classes
831    # override earlier field definitions in base classes.  As long as
832    # we're iterating over them, see if any are frozen.
833    any_frozen_base = False
834    has_dataclass_bases = False
835    for b in cls.__mro__[-1:0:-1]:
836        # Only process classes that have been processed by our
837        # decorator.  That is, they have a _FIELDS attribute.
838        base_fields = getattr(b, _FIELDS, None)
839        if base_fields is not None:
840            has_dataclass_bases = True
841            for f in base_fields.values():
842                fields[f.name] = f
843            if getattr(b, _PARAMS).frozen:
844                any_frozen_base = True
845
846    # Annotations that are defined in this class (not in base
847    # classes).  If __annotations__ isn't present, then this class
848    # adds no new annotations.  We use this to compute fields that are
849    # added by this class.
850    #
851    # Fields are found from cls_annotations, which is guaranteed to be
852    # ordered.  Default values are from class attributes, if a field
853    # has a default.  If the default value is a Field(), then it
854    # contains additional info beyond (and possibly including) the
855    # actual default value.  Pseudo-fields ClassVars and InitVars are
856    # included, despite the fact that they're not real fields.  That's
857    # dealt with later.
858    cls_annotations = cls.__dict__.get('__annotations__', {})
859
860    # Now find fields in our class.  While doing so, validate some
861    # things, and set the default values (as class attributes) where
862    # we can.
863    cls_fields = [_get_field(cls, name, type)
864                  for name, type in cls_annotations.items()]
865    for f in cls_fields:
866        fields[f.name] = f
867
868        # If the class attribute (which is the default value for this
869        # field) exists and is of type 'Field', replace it with the
870        # real default.  This is so that normal class introspection
871        # sees a real default value, not a Field.
872        if isinstance(getattr(cls, f.name, None), Field):
873            if f.default is MISSING:
874                # If there's no default, delete the class attribute.
875                # This happens if we specify field(repr=False), for
876                # example (that is, we specified a field object, but
877                # no default value).  Also if we're using a default
878                # factory.  The class attribute should not be set at
879                # all in the post-processed class.
880                delattr(cls, f.name)
881            else:
882                setattr(cls, f.name, f.default)
883
884    # Do we have any Field members that don't also have annotations?
885    for name, value in cls.__dict__.items():
886        if isinstance(value, Field) and not name in cls_annotations:
887            raise TypeError(f'{name!r} is a field but has no type annotation')
888
889    # Check rules that apply if we are derived from any dataclasses.
890    if has_dataclass_bases:
891        # Raise an exception if any of our bases are frozen, but we're not.
892        if any_frozen_base and not frozen:
893            raise TypeError('cannot inherit non-frozen dataclass from a '
894                            'frozen one')
895
896        # Raise an exception if we're frozen, but none of our bases are.
897        if not any_frozen_base and frozen:
898            raise TypeError('cannot inherit frozen dataclass from a '
899                            'non-frozen one')
900
901    # Remember all of the fields on our class (including bases).  This
902    # also marks this class as being a dataclass.
903    setattr(cls, _FIELDS, fields)
904
905    # Was this class defined with an explicit __hash__?  Note that if
906    # __eq__ is defined in this class, then python will automatically
907    # set __hash__ to None.  This is a heuristic, as it's possible
908    # that such a __hash__ == None was not auto-generated, but it
909    # close enough.
910    class_hash = cls.__dict__.get('__hash__', MISSING)
911    has_explicit_hash = not (class_hash is MISSING or
912                             (class_hash is None and '__eq__' in cls.__dict__))
913
914    # If we're generating ordering methods, we must be generating the
915    # eq methods.
916    if order and not eq:
917        raise ValueError('eq must be true if order is true')
918
919    if init:
920        # Does this class have a post-init function?
921        has_post_init = hasattr(cls, _POST_INIT_NAME)
922
923        # Include InitVars and regular fields (so, not ClassVars).
924        flds = [f for f in fields.values()
925                if f._field_type in (_FIELD, _FIELD_INITVAR)]
926        _set_new_attribute(cls, '__init__',
927                           _init_fn(flds,
928                                    frozen,
929                                    has_post_init,
930                                    # The name to use for the "self"
931                                    # param in __init__.  Use "self"
932                                    # if possible.
933                                    '__dataclass_self__' if 'self' in fields
934                                            else 'self',
935                                    globals,
936                          ))
937
938    # Get the fields as a list, and include only real fields.  This is
939    # used in all of the following methods.
940    field_list = [f for f in fields.values() if f._field_type is _FIELD]
941
942    if repr:
943        flds = [f for f in field_list if f.repr]
944        _set_new_attribute(cls, '__repr__', _repr_fn(flds, globals))
945
946    if eq:
947        # Create __eq__ method.  There's no need for a __ne__ method,
948        # since python will call __eq__ and negate it.
949        flds = [f for f in field_list if f.compare]
950        self_tuple = _tuple_str('self', flds)
951        other_tuple = _tuple_str('other', flds)
952        _set_new_attribute(cls, '__eq__',
953                           _cmp_fn('__eq__', '==',
954                                   self_tuple, other_tuple,
955                                   globals=globals))
956
957    if order:
958        # Create and set the ordering methods.
959        flds = [f for f in field_list if f.compare]
960        self_tuple = _tuple_str('self', flds)
961        other_tuple = _tuple_str('other', flds)
962        for name, op in [('__lt__', '<'),
963                         ('__le__', '<='),
964                         ('__gt__', '>'),
965                         ('__ge__', '>='),
966                         ]:
967            if _set_new_attribute(cls, name,
968                                  _cmp_fn(name, op, self_tuple, other_tuple,
969                                          globals=globals)):
970                raise TypeError(f'Cannot overwrite attribute {name} '
971                                f'in class {cls.__name__}. Consider using '
972                                'functools.total_ordering')
973
974    if frozen:
975        for fn in _frozen_get_del_attr(cls, field_list, globals):
976            if _set_new_attribute(cls, fn.__name__, fn):
977                raise TypeError(f'Cannot overwrite attribute {fn.__name__} '
978                                f'in class {cls.__name__}')
979
980    # Decide if/how we're going to create a hash function.
981    hash_action = _hash_action[bool(unsafe_hash),
982                               bool(eq),
983                               bool(frozen),
984                               has_explicit_hash]
985    if hash_action:
986        # No need to call _set_new_attribute here, since by the time
987        # we're here the overwriting is unconditional.
988        cls.__hash__ = hash_action(cls, field_list, globals)
989
990    if not getattr(cls, '__doc__'):
991        # Create a class doc-string.
992        cls.__doc__ = (cls.__name__ +
993                       str(inspect.signature(cls)).replace(' -> None', ''))
994
995    return cls
996
997
998def dataclass(cls=None, /, *, init=True, repr=True, eq=True, order=False,
999              unsafe_hash=False, frozen=False):
1000    """Returns the same class as was passed in, with dunder methods
1001    added based on the fields defined in the class.
1002
1003    Examines PEP 526 __annotations__ to determine fields.
1004
1005    If init is true, an __init__() method is added to the class. If
1006    repr is true, a __repr__() method is added. If order is true, rich
1007    comparison dunder methods are added. If unsafe_hash is true, a
1008    __hash__() method function is added. If frozen is true, fields may
1009    not be assigned to after instance creation.
1010    """
1011
1012    def wrap(cls):
1013        return _process_class(cls, init, repr, eq, order, unsafe_hash, frozen)
1014
1015    # See if we're being called as @dataclass or @dataclass().
1016    if cls is None:
1017        # We're called with parens.
1018        return wrap
1019
1020    # We're called as @dataclass without parens.
1021    return wrap(cls)
1022
1023
1024def fields(class_or_instance):
1025    """Return a tuple describing the fields of this dataclass.
1026
1027    Accepts a dataclass or an instance of one. Tuple elements are of
1028    type Field.
1029    """
1030
1031    # Might it be worth caching this, per class?
1032    try:
1033        fields = getattr(class_or_instance, _FIELDS)
1034    except AttributeError:
1035        raise TypeError('must be called with a dataclass type or instance')
1036
1037    # Exclude pseudo-fields.  Note that fields is sorted by insertion
1038    # order, so the order of the tuple is as the fields were defined.
1039    return tuple(f for f in fields.values() if f._field_type is _FIELD)
1040
1041
1042def _is_dataclass_instance(obj):
1043    """Returns True if obj is an instance of a dataclass."""
1044    return hasattr(type(obj), _FIELDS)
1045
1046
1047def is_dataclass(obj):
1048    """Returns True if obj is a dataclass or an instance of a
1049    dataclass."""
1050    cls = obj if isinstance(obj, type) else type(obj)
1051    return hasattr(cls, _FIELDS)
1052
1053
1054def asdict(obj, *, dict_factory=dict):
1055    """Return the fields of a dataclass instance as a new dictionary mapping
1056    field names to field values.
1057
1058    Example usage:
1059
1060      @dataclass
1061      class C:
1062          x: int
1063          y: int
1064
1065      c = C(1, 2)
1066      assert asdict(c) == {'x': 1, 'y': 2}
1067
1068    If given, 'dict_factory' will be used instead of built-in dict.
1069    The function applies recursively to field values that are
1070    dataclass instances. This will also look into built-in containers:
1071    tuples, lists, and dicts.
1072    """
1073    if not _is_dataclass_instance(obj):
1074        raise TypeError("asdict() should be called on dataclass instances")
1075    return _asdict_inner(obj, dict_factory)
1076
1077
1078def _asdict_inner(obj, dict_factory):
1079    if _is_dataclass_instance(obj):
1080        result = []
1081        for f in fields(obj):
1082            value = _asdict_inner(getattr(obj, f.name), dict_factory)
1083            result.append((f.name, value))
1084        return dict_factory(result)
1085    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
1086        # obj is a namedtuple.  Recurse into it, but the returned
1087        # object is another namedtuple of the same type.  This is
1088        # similar to how other list- or tuple-derived classes are
1089        # treated (see below), but we just need to create them
1090        # differently because a namedtuple's __init__ needs to be
1091        # called differently (see bpo-34363).
1092
1093        # I'm not using namedtuple's _asdict()
1094        # method, because:
1095        # - it does not recurse in to the namedtuple fields and
1096        #   convert them to dicts (using dict_factory).
1097        # - I don't actually want to return a dict here.  The main
1098        #   use case here is json.dumps, and it handles converting
1099        #   namedtuples to lists.  Admittedly we're losing some
1100        #   information here when we produce a json list instead of a
1101        #   dict.  Note that if we returned dicts here instead of
1102        #   namedtuples, we could no longer call asdict() on a data
1103        #   structure where a namedtuple was used as a dict key.
1104
1105        return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj])
1106    elif isinstance(obj, (list, tuple)):
1107        # Assume we can create an object of this type by passing in a
1108        # generator (which is not true for namedtuples, handled
1109        # above).
1110        return type(obj)(_asdict_inner(v, dict_factory) for v in obj)
1111    elif isinstance(obj, dict):
1112        return type(obj)((_asdict_inner(k, dict_factory),
1113                          _asdict_inner(v, dict_factory))
1114                         for k, v in obj.items())
1115    else:
1116        return copy.deepcopy(obj)
1117
1118
1119def astuple(obj, *, tuple_factory=tuple):
1120    """Return the fields of a dataclass instance as a new tuple of field values.
1121
1122    Example usage::
1123
1124      @dataclass
1125      class C:
1126          x: int
1127          y: int
1128
1129    c = C(1, 2)
1130    assert astuple(c) == (1, 2)
1131
1132    If given, 'tuple_factory' will be used instead of built-in tuple.
1133    The function applies recursively to field values that are
1134    dataclass instances. This will also look into built-in containers:
1135    tuples, lists, and dicts.
1136    """
1137
1138    if not _is_dataclass_instance(obj):
1139        raise TypeError("astuple() should be called on dataclass instances")
1140    return _astuple_inner(obj, tuple_factory)
1141
1142
1143def _astuple_inner(obj, tuple_factory):
1144    if _is_dataclass_instance(obj):
1145        result = []
1146        for f in fields(obj):
1147            value = _astuple_inner(getattr(obj, f.name), tuple_factory)
1148            result.append(value)
1149        return tuple_factory(result)
1150    elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
1151        # obj is a namedtuple.  Recurse into it, but the returned
1152        # object is another namedtuple of the same type.  This is
1153        # similar to how other list- or tuple-derived classes are
1154        # treated (see below), but we just need to create them
1155        # differently because a namedtuple's __init__ needs to be
1156        # called differently (see bpo-34363).
1157        return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj])
1158    elif isinstance(obj, (list, tuple)):
1159        # Assume we can create an object of this type by passing in a
1160        # generator (which is not true for namedtuples, handled
1161        # above).
1162        return type(obj)(_astuple_inner(v, tuple_factory) for v in obj)
1163    elif isinstance(obj, dict):
1164        return type(obj)((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory))
1165                          for k, v in obj.items())
1166    else:
1167        return copy.deepcopy(obj)
1168
1169
1170def make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True,
1171                   repr=True, eq=True, order=False, unsafe_hash=False,
1172                   frozen=False):
1173    """Return a new dynamically created dataclass.
1174
1175    The dataclass name will be 'cls_name'.  'fields' is an iterable
1176    of either (name), (name, type) or (name, type, Field) objects. If type is
1177    omitted, use the string 'typing.Any'.  Field objects are created by
1178    the equivalent of calling 'field(name, type [, Field-info])'.
1179
1180      C = make_dataclass('C', ['x', ('y', int), ('z', int, field(init=False))], bases=(Base,))
1181
1182    is equivalent to:
1183
1184      @dataclass
1185      class C(Base):
1186          x: 'typing.Any'
1187          y: int
1188          z: int = field(init=False)
1189
1190    For the bases and namespace parameters, see the builtin type() function.
1191
1192    The parameters init, repr, eq, order, unsafe_hash, and frozen are passed to
1193    dataclass().
1194    """
1195
1196    if namespace is None:
1197        namespace = {}
1198    else:
1199        # Copy namespace since we're going to mutate it.
1200        namespace = namespace.copy()
1201
1202    # While we're looking through the field names, validate that they
1203    # are identifiers, are not keywords, and not duplicates.
1204    seen = set()
1205    anns = {}
1206    for item in fields:
1207        if isinstance(item, str):
1208            name = item
1209            tp = 'typing.Any'
1210        elif len(item) == 2:
1211            name, tp, = item
1212        elif len(item) == 3:
1213            name, tp, spec = item
1214            namespace[name] = spec
1215        else:
1216            raise TypeError(f'Invalid field: {item!r}')
1217
1218        if not isinstance(name, str) or not name.isidentifier():
1219            raise TypeError(f'Field names must be valid identifiers: {name!r}')
1220        if keyword.iskeyword(name):
1221            raise TypeError(f'Field names must not be keywords: {name!r}')
1222        if name in seen:
1223            raise TypeError(f'Field name duplicated: {name!r}')
1224
1225        seen.add(name)
1226        anns[name] = tp
1227
1228    namespace['__annotations__'] = anns
1229    # We use `types.new_class()` instead of simply `type()` to allow dynamic creation
1230    # of generic dataclassses.
1231    cls = types.new_class(cls_name, bases, {}, lambda ns: ns.update(namespace))
1232    return dataclass(cls, init=init, repr=repr, eq=eq, order=order,
1233                     unsafe_hash=unsafe_hash, frozen=frozen)
1234
1235
1236def replace(obj, /, **changes):
1237    """Return a new object replacing specified fields with new values.
1238
1239    This is especially useful for frozen classes.  Example usage:
1240
1241      @dataclass(frozen=True)
1242      class C:
1243          x: int
1244          y: int
1245
1246      c = C(1, 2)
1247      c1 = replace(c, x=3)
1248      assert c1.x == 3 and c1.y == 2
1249      """
1250
1251    # We're going to mutate 'changes', but that's okay because it's a
1252    # new dict, even if called with 'replace(obj, **my_changes)'.
1253
1254    if not _is_dataclass_instance(obj):
1255        raise TypeError("replace() should be called on dataclass instances")
1256
1257    # It's an error to have init=False fields in 'changes'.
1258    # If a field is not in 'changes', read its value from the provided obj.
1259
1260    for f in getattr(obj, _FIELDS).values():
1261        # Only consider normal fields or InitVars.
1262        if f._field_type is _FIELD_CLASSVAR:
1263            continue
1264
1265        if not f.init:
1266            # Error if this field is specified in changes.
1267            if f.name in changes:
1268                raise ValueError(f'field {f.name} is declared with '
1269                                 'init=False, it cannot be specified with '
1270                                 'replace()')
1271            continue
1272
1273        if f.name not in changes:
1274            if f._field_type is _FIELD_INITVAR and f.default is MISSING:
1275                raise ValueError(f"InitVar {f.name!r} "
1276                                 'must be specified with replace()')
1277            changes[f.name] = getattr(obj, f.name)
1278
1279    # Create the new object, which calls __init__() and
1280    # __post_init__() (if defined), using all of the init fields we've
1281    # added and/or left in 'changes'.  If there are values supplied in
1282    # changes that aren't fields, this will correctly raise a
1283    # TypeError.
1284    return obj.__class__(**changes)
1285