1import re 2import sys 3import copy 4import types 5import inspect 6import keyword 7import builtins 8import functools 9import _thread 10from types import GenericAlias 11 12 13__all__ = ['dataclass', 14 'field', 15 'Field', 16 'FrozenInstanceError', 17 'InitVar', 18 'MISSING', 19 20 # Helper functions. 21 'fields', 22 'asdict', 23 'astuple', 24 'make_dataclass', 25 'replace', 26 'is_dataclass', 27 ] 28 29# Conditions for adding methods. The boxes indicate what action the 30# dataclass decorator takes. For all of these tables, when I talk 31# about init=, repr=, eq=, order=, unsafe_hash=, or frozen=, I'm 32# referring to the arguments to the @dataclass decorator. When 33# checking if a dunder method already exists, I mean check for an 34# entry in the class's __dict__. I never check to see if an attribute 35# is defined in a base class. 36 37# Key: 38# +=========+=========================================+ 39# + Value | Meaning | 40# +=========+=========================================+ 41# | <blank> | No action: no method is added. | 42# +---------+-----------------------------------------+ 43# | add | Generated method is added. | 44# +---------+-----------------------------------------+ 45# | raise | TypeError is raised. | 46# +---------+-----------------------------------------+ 47# | None | Attribute is set to None. | 48# +=========+=========================================+ 49 50# __init__ 51# 52# +--- init= parameter 53# | 54# v | | | 55# | no | yes | <--- class has __init__ in __dict__? 56# +=======+=======+=======+ 57# | False | | | 58# +-------+-------+-------+ 59# | True | add | | <- the default 60# +=======+=======+=======+ 61 62# __repr__ 63# 64# +--- repr= parameter 65# | 66# v | | | 67# | no | yes | <--- class has __repr__ in __dict__? 68# +=======+=======+=======+ 69# | False | | | 70# +-------+-------+-------+ 71# | True | add | | <- the default 72# +=======+=======+=======+ 73 74 75# __setattr__ 76# __delattr__ 77# 78# +--- frozen= parameter 79# | 80# v | | | 81# | no | yes | <--- class has __setattr__ or __delattr__ in __dict__? 82# +=======+=======+=======+ 83# | False | | | <- the default 84# +-------+-------+-------+ 85# | True | add | raise | 86# +=======+=======+=======+ 87# Raise because not adding these methods would break the "frozen-ness" 88# of the class. 89 90# __eq__ 91# 92# +--- eq= parameter 93# | 94# v | | | 95# | no | yes | <--- class has __eq__ in __dict__? 96# +=======+=======+=======+ 97# | False | | | 98# +-------+-------+-------+ 99# | True | add | | <- the default 100# +=======+=======+=======+ 101 102# __lt__ 103# __le__ 104# __gt__ 105# __ge__ 106# 107# +--- order= parameter 108# | 109# v | | | 110# | no | yes | <--- class has any comparison method in __dict__? 111# +=======+=======+=======+ 112# | False | | | <- the default 113# +-------+-------+-------+ 114# | True | add | raise | 115# +=======+=======+=======+ 116# Raise because to allow this case would interfere with using 117# functools.total_ordering. 118 119# __hash__ 120 121# +------------------- unsafe_hash= parameter 122# | +----------- eq= parameter 123# | | +--- frozen= parameter 124# | | | 125# v v v | | | 126# | no | yes | <--- class has explicitly defined __hash__ 127# +=======+=======+=======+========+========+ 128# | False | False | False | | | No __eq__, use the base class __hash__ 129# +-------+-------+-------+--------+--------+ 130# | False | False | True | | | No __eq__, use the base class __hash__ 131# +-------+-------+-------+--------+--------+ 132# | False | True | False | None | | <-- the default, not hashable 133# +-------+-------+-------+--------+--------+ 134# | False | True | True | add | | Frozen, so hashable, allows override 135# +-------+-------+-------+--------+--------+ 136# | True | False | False | add | raise | Has no __eq__, but hashable 137# +-------+-------+-------+--------+--------+ 138# | True | False | True | add | raise | Has no __eq__, but hashable 139# +-------+-------+-------+--------+--------+ 140# | True | True | False | add | raise | Not frozen, but hashable 141# +-------+-------+-------+--------+--------+ 142# | True | True | True | add | raise | Frozen, so hashable 143# +=======+=======+=======+========+========+ 144# For boxes that are blank, __hash__ is untouched and therefore 145# inherited from the base class. If the base is object, then 146# id-based hashing is used. 147# 148# Note that a class may already have __hash__=None if it specified an 149# __eq__ method in the class body (not one that was created by 150# @dataclass). 151# 152# See _hash_action (below) for a coded version of this table. 153 154 155# Raised when an attempt is made to modify a frozen class. 156class FrozenInstanceError(AttributeError): pass 157 158# A sentinel object for default values to signal that a default 159# factory will be used. This is given a nice repr() which will appear 160# in the function signature of dataclasses' constructors. 161class _HAS_DEFAULT_FACTORY_CLASS: 162 def __repr__(self): 163 return '<factory>' 164_HAS_DEFAULT_FACTORY = _HAS_DEFAULT_FACTORY_CLASS() 165 166# A sentinel object to detect if a parameter is supplied or not. Use 167# a class to give it a better repr. 168class _MISSING_TYPE: 169 pass 170MISSING = _MISSING_TYPE() 171 172# Since most per-field metadata will be unused, create an empty 173# read-only proxy that can be shared among all fields. 174_EMPTY_METADATA = types.MappingProxyType({}) 175 176# Markers for the various kinds of fields and pseudo-fields. 177class _FIELD_BASE: 178 def __init__(self, name): 179 self.name = name 180 def __repr__(self): 181 return self.name 182_FIELD = _FIELD_BASE('_FIELD') 183_FIELD_CLASSVAR = _FIELD_BASE('_FIELD_CLASSVAR') 184_FIELD_INITVAR = _FIELD_BASE('_FIELD_INITVAR') 185 186# The name of an attribute on the class where we store the Field 187# objects. Also used to check if a class is a Data Class. 188_FIELDS = '__dataclass_fields__' 189 190# The name of an attribute on the class that stores the parameters to 191# @dataclass. 192_PARAMS = '__dataclass_params__' 193 194# The name of the function, that if it exists, is called at the end of 195# __init__. 196_POST_INIT_NAME = '__post_init__' 197 198# String regex that string annotations for ClassVar or InitVar must match. 199# Allows "identifier.identifier[" or "identifier[". 200# https://bugs.python.org/issue33453 for details. 201_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)') 202 203class InitVar: 204 __slots__ = ('type', ) 205 206 def __init__(self, type): 207 self.type = type 208 209 def __repr__(self): 210 if isinstance(self.type, type): 211 type_name = self.type.__name__ 212 else: 213 # typing objects, e.g. List[int] 214 type_name = repr(self.type) 215 return f'dataclasses.InitVar[{type_name}]' 216 217 def __class_getitem__(cls, type): 218 return InitVar(type) 219 220 221# Instances of Field are only ever created from within this module, 222# and only from the field() function, although Field instances are 223# exposed externally as (conceptually) read-only objects. 224# 225# name and type are filled in after the fact, not in __init__. 226# They're not known at the time this class is instantiated, but it's 227# convenient if they're available later. 228# 229# When cls._FIELDS is filled in with a list of Field objects, the name 230# and type fields will have been populated. 231class Field: 232 __slots__ = ('name', 233 'type', 234 'default', 235 'default_factory', 236 'repr', 237 'hash', 238 'init', 239 'compare', 240 'metadata', 241 '_field_type', # Private: not to be used by user code. 242 ) 243 244 def __init__(self, default, default_factory, init, repr, hash, compare, 245 metadata): 246 self.name = None 247 self.type = None 248 self.default = default 249 self.default_factory = default_factory 250 self.init = init 251 self.repr = repr 252 self.hash = hash 253 self.compare = compare 254 self.metadata = (_EMPTY_METADATA 255 if metadata is None else 256 types.MappingProxyType(metadata)) 257 self._field_type = None 258 259 def __repr__(self): 260 return ('Field(' 261 f'name={self.name!r},' 262 f'type={self.type!r},' 263 f'default={self.default!r},' 264 f'default_factory={self.default_factory!r},' 265 f'init={self.init!r},' 266 f'repr={self.repr!r},' 267 f'hash={self.hash!r},' 268 f'compare={self.compare!r},' 269 f'metadata={self.metadata!r},' 270 f'_field_type={self._field_type}' 271 ')') 272 273 # This is used to support the PEP 487 __set_name__ protocol in the 274 # case where we're using a field that contains a descriptor as a 275 # default value. For details on __set_name__, see 276 # https://www.python.org/dev/peps/pep-0487/#implementation-details. 277 # 278 # Note that in _process_class, this Field object is overwritten 279 # with the default value, so the end result is a descriptor that 280 # had __set_name__ called on it at the right time. 281 def __set_name__(self, owner, name): 282 func = getattr(type(self.default), '__set_name__', None) 283 if func: 284 # There is a __set_name__ method on the descriptor, call 285 # it. 286 func(self.default, owner, name) 287 288 __class_getitem__ = classmethod(GenericAlias) 289 290 291class _DataclassParams: 292 __slots__ = ('init', 293 'repr', 294 'eq', 295 'order', 296 'unsafe_hash', 297 'frozen', 298 ) 299 300 def __init__(self, init, repr, eq, order, unsafe_hash, frozen): 301 self.init = init 302 self.repr = repr 303 self.eq = eq 304 self.order = order 305 self.unsafe_hash = unsafe_hash 306 self.frozen = frozen 307 308 def __repr__(self): 309 return ('_DataclassParams(' 310 f'init={self.init!r},' 311 f'repr={self.repr!r},' 312 f'eq={self.eq!r},' 313 f'order={self.order!r},' 314 f'unsafe_hash={self.unsafe_hash!r},' 315 f'frozen={self.frozen!r}' 316 ')') 317 318 319# This function is used instead of exposing Field creation directly, 320# so that a type checker can be told (via overloads) that this is a 321# function whose type depends on its parameters. 322def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True, 323 hash=None, compare=True, metadata=None): 324 """Return an object to identify dataclass fields. 325 326 default is the default value of the field. default_factory is a 327 0-argument function called to initialize a field's value. If init 328 is True, the field will be a parameter to the class's __init__() 329 function. If repr is True, the field will be included in the 330 object's repr(). If hash is True, the field will be included in 331 the object's hash(). If compare is True, the field will be used 332 in comparison functions. metadata, if specified, must be a 333 mapping which is stored but not otherwise examined by dataclass. 334 335 It is an error to specify both default and default_factory. 336 """ 337 338 if default is not MISSING and default_factory is not MISSING: 339 raise ValueError('cannot specify both default and default_factory') 340 return Field(default, default_factory, init, repr, hash, compare, 341 metadata) 342 343 344def _tuple_str(obj_name, fields): 345 # Return a string representing each field of obj_name as a tuple 346 # member. So, if fields is ['x', 'y'] and obj_name is "self", 347 # return "(self.x,self.y)". 348 349 # Special case for the 0-tuple. 350 if not fields: 351 return '()' 352 # Note the trailing comma, needed if this turns out to be a 1-tuple. 353 return f'({",".join([f"{obj_name}.{f.name}" for f in fields])},)' 354 355 356# This function's logic is copied from "recursive_repr" function in 357# reprlib module to avoid dependency. 358def _recursive_repr(user_function): 359 # Decorator to make a repr function return "..." for a recursive 360 # call. 361 repr_running = set() 362 363 @functools.wraps(user_function) 364 def wrapper(self): 365 key = id(self), _thread.get_ident() 366 if key in repr_running: 367 return '...' 368 repr_running.add(key) 369 try: 370 result = user_function(self) 371 finally: 372 repr_running.discard(key) 373 return result 374 return wrapper 375 376 377def _create_fn(name, args, body, *, globals=None, locals=None, 378 return_type=MISSING): 379 # Note that we mutate locals when exec() is called. Caller 380 # beware! The only callers are internal to this module, so no 381 # worries about external callers. 382 if locals is None: 383 locals = {} 384 if 'BUILTINS' not in locals: 385 locals['BUILTINS'] = builtins 386 return_annotation = '' 387 if return_type is not MISSING: 388 locals['_return_type'] = return_type 389 return_annotation = '->_return_type' 390 args = ','.join(args) 391 body = '\n'.join(f' {b}' for b in body) 392 393 # Compute the text of the entire function. 394 txt = f' def {name}({args}){return_annotation}:\n{body}' 395 396 local_vars = ', '.join(locals.keys()) 397 txt = f"def __create_fn__({local_vars}):\n{txt}\n return {name}" 398 399 ns = {} 400 exec(txt, globals, ns) 401 return ns['__create_fn__'](**locals) 402 403 404def _field_assign(frozen, name, value, self_name): 405 # If we're a frozen class, then assign to our fields in __init__ 406 # via object.__setattr__. Otherwise, just use a simple 407 # assignment. 408 # 409 # self_name is what "self" is called in this function: don't 410 # hard-code "self", since that might be a field name. 411 if frozen: 412 return f'BUILTINS.object.__setattr__({self_name},{name!r},{value})' 413 return f'{self_name}.{name}={value}' 414 415 416def _field_init(f, frozen, globals, self_name): 417 # Return the text of the line in the body of __init__ that will 418 # initialize this field. 419 420 default_name = f'_dflt_{f.name}' 421 if f.default_factory is not MISSING: 422 if f.init: 423 # This field has a default factory. If a parameter is 424 # given, use it. If not, call the factory. 425 globals[default_name] = f.default_factory 426 value = (f'{default_name}() ' 427 f'if {f.name} is _HAS_DEFAULT_FACTORY ' 428 f'else {f.name}') 429 else: 430 # This is a field that's not in the __init__ params, but 431 # has a default factory function. It needs to be 432 # initialized here by calling the factory function, 433 # because there's no other way to initialize it. 434 435 # For a field initialized with a default=defaultvalue, the 436 # class dict just has the default value 437 # (cls.fieldname=defaultvalue). But that won't work for a 438 # default factory, the factory must be called in __init__ 439 # and we must assign that to self.fieldname. We can't 440 # fall back to the class dict's value, both because it's 441 # not set, and because it might be different per-class 442 # (which, after all, is why we have a factory function!). 443 444 globals[default_name] = f.default_factory 445 value = f'{default_name}()' 446 else: 447 # No default factory. 448 if f.init: 449 if f.default is MISSING: 450 # There's no default, just do an assignment. 451 value = f.name 452 elif f.default is not MISSING: 453 globals[default_name] = f.default 454 value = f.name 455 else: 456 # This field does not need initialization. Signify that 457 # to the caller by returning None. 458 return None 459 460 # Only test this now, so that we can create variables for the 461 # default. However, return None to signify that we're not going 462 # to actually do the assignment statement for InitVars. 463 if f._field_type is _FIELD_INITVAR: 464 return None 465 466 # Now, actually generate the field assignment. 467 return _field_assign(frozen, f.name, value, self_name) 468 469 470def _init_param(f): 471 # Return the __init__ parameter string for this field. For 472 # example, the equivalent of 'x:int=3' (except instead of 'int', 473 # reference a variable set to int, and instead of '3', reference a 474 # variable set to 3). 475 if f.default is MISSING and f.default_factory is MISSING: 476 # There's no default, and no default_factory, just output the 477 # variable name and type. 478 default = '' 479 elif f.default is not MISSING: 480 # There's a default, this will be the name that's used to look 481 # it up. 482 default = f'=_dflt_{f.name}' 483 elif f.default_factory is not MISSING: 484 # There's a factory function. Set a marker. 485 default = '=_HAS_DEFAULT_FACTORY' 486 return f'{f.name}:_type_{f.name}{default}' 487 488 489def _init_fn(fields, frozen, has_post_init, self_name, globals): 490 # fields contains both real fields and InitVar pseudo-fields. 491 492 # Make sure we don't have fields without defaults following fields 493 # with defaults. This actually would be caught when exec-ing the 494 # function source code, but catching it here gives a better error 495 # message, and future-proofs us in case we build up the function 496 # using ast. 497 seen_default = False 498 for f in fields: 499 # Only consider fields in the __init__ call. 500 if f.init: 501 if not (f.default is MISSING and f.default_factory is MISSING): 502 seen_default = True 503 elif seen_default: 504 raise TypeError(f'non-default argument {f.name!r} ' 505 'follows default argument') 506 507 locals = {f'_type_{f.name}': f.type for f in fields} 508 locals.update({ 509 'MISSING': MISSING, 510 '_HAS_DEFAULT_FACTORY': _HAS_DEFAULT_FACTORY, 511 }) 512 513 body_lines = [] 514 for f in fields: 515 line = _field_init(f, frozen, locals, self_name) 516 # line is None means that this field doesn't require 517 # initialization (it's a pseudo-field). Just skip it. 518 if line: 519 body_lines.append(line) 520 521 # Does this class have a post-init function? 522 if has_post_init: 523 params_str = ','.join(f.name for f in fields 524 if f._field_type is _FIELD_INITVAR) 525 body_lines.append(f'{self_name}.{_POST_INIT_NAME}({params_str})') 526 527 # If no body lines, use 'pass'. 528 if not body_lines: 529 body_lines = ['pass'] 530 531 return _create_fn('__init__', 532 [self_name] + [_init_param(f) for f in fields if f.init], 533 body_lines, 534 locals=locals, 535 globals=globals, 536 return_type=None) 537 538 539def _repr_fn(fields, globals): 540 fn = _create_fn('__repr__', 541 ('self',), 542 ['return self.__class__.__qualname__ + f"(' + 543 ', '.join([f"{f.name}={{self.{f.name}!r}}" 544 for f in fields]) + 545 ')"'], 546 globals=globals) 547 return _recursive_repr(fn) 548 549 550def _frozen_get_del_attr(cls, fields, globals): 551 locals = {'cls': cls, 552 'FrozenInstanceError': FrozenInstanceError} 553 if fields: 554 fields_str = '(' + ','.join(repr(f.name) for f in fields) + ',)' 555 else: 556 # Special case for the zero-length tuple. 557 fields_str = '()' 558 return (_create_fn('__setattr__', 559 ('self', 'name', 'value'), 560 (f'if type(self) is cls or name in {fields_str}:', 561 ' raise FrozenInstanceError(f"cannot assign to field {name!r}")', 562 f'super(cls, self).__setattr__(name, value)'), 563 locals=locals, 564 globals=globals), 565 _create_fn('__delattr__', 566 ('self', 'name'), 567 (f'if type(self) is cls or name in {fields_str}:', 568 ' raise FrozenInstanceError(f"cannot delete field {name!r}")', 569 f'super(cls, self).__delattr__(name)'), 570 locals=locals, 571 globals=globals), 572 ) 573 574 575def _cmp_fn(name, op, self_tuple, other_tuple, globals): 576 # Create a comparison function. If the fields in the object are 577 # named 'x' and 'y', then self_tuple is the string 578 # '(self.x,self.y)' and other_tuple is the string 579 # '(other.x,other.y)'. 580 581 return _create_fn(name, 582 ('self', 'other'), 583 [ 'if other.__class__ is self.__class__:', 584 f' return {self_tuple}{op}{other_tuple}', 585 'return NotImplemented'], 586 globals=globals) 587 588 589def _hash_fn(fields, globals): 590 self_tuple = _tuple_str('self', fields) 591 return _create_fn('__hash__', 592 ('self',), 593 [f'return hash({self_tuple})'], 594 globals=globals) 595 596 597def _is_classvar(a_type, typing): 598 # This test uses a typing internal class, but it's the best way to 599 # test if this is a ClassVar. 600 return (a_type is typing.ClassVar 601 or (type(a_type) is typing._GenericAlias 602 and a_type.__origin__ is typing.ClassVar)) 603 604 605def _is_initvar(a_type, dataclasses): 606 # The module we're checking against is the module we're 607 # currently in (dataclasses.py). 608 return (a_type is dataclasses.InitVar 609 or type(a_type) is dataclasses.InitVar) 610 611 612def _is_type(annotation, cls, a_module, a_type, is_type_predicate): 613 # Given a type annotation string, does it refer to a_type in 614 # a_module? For example, when checking that annotation denotes a 615 # ClassVar, then a_module is typing, and a_type is 616 # typing.ClassVar. 617 618 # It's possible to look up a_module given a_type, but it involves 619 # looking in sys.modules (again!), and seems like a waste since 620 # the caller already knows a_module. 621 622 # - annotation is a string type annotation 623 # - cls is the class that this annotation was found in 624 # - a_module is the module we want to match 625 # - a_type is the type in that module we want to match 626 # - is_type_predicate is a function called with (obj, a_module) 627 # that determines if obj is of the desired type. 628 629 # Since this test does not do a local namespace lookup (and 630 # instead only a module (global) lookup), there are some things it 631 # gets wrong. 632 633 # With string annotations, cv0 will be detected as a ClassVar: 634 # CV = ClassVar 635 # @dataclass 636 # class C0: 637 # cv0: CV 638 639 # But in this example cv1 will not be detected as a ClassVar: 640 # @dataclass 641 # class C1: 642 # CV = ClassVar 643 # cv1: CV 644 645 # In C1, the code in this function (_is_type) will look up "CV" in 646 # the module and not find it, so it will not consider cv1 as a 647 # ClassVar. This is a fairly obscure corner case, and the best 648 # way to fix it would be to eval() the string "CV" with the 649 # correct global and local namespaces. However that would involve 650 # a eval() penalty for every single field of every dataclass 651 # that's defined. It was judged not worth it. 652 653 match = _MODULE_IDENTIFIER_RE.match(annotation) 654 if match: 655 ns = None 656 module_name = match.group(1) 657 if not module_name: 658 # No module name, assume the class's module did 659 # "from dataclasses import InitVar". 660 ns = sys.modules.get(cls.__module__).__dict__ 661 else: 662 # Look up module_name in the class's module. 663 module = sys.modules.get(cls.__module__) 664 if module and module.__dict__.get(module_name) is a_module: 665 ns = sys.modules.get(a_type.__module__).__dict__ 666 if ns and is_type_predicate(ns.get(match.group(2)), a_module): 667 return True 668 return False 669 670 671def _get_field(cls, a_name, a_type): 672 # Return a Field object for this field name and type. ClassVars 673 # and InitVars are also returned, but marked as such (see 674 # f._field_type). 675 676 # If the default value isn't derived from Field, then it's only a 677 # normal default value. Convert it to a Field(). 678 default = getattr(cls, a_name, MISSING) 679 if isinstance(default, Field): 680 f = default 681 else: 682 if isinstance(default, types.MemberDescriptorType): 683 # This is a field in __slots__, so it has no default value. 684 default = MISSING 685 f = field(default=default) 686 687 # Only at this point do we know the name and the type. Set them. 688 f.name = a_name 689 f.type = a_type 690 691 # Assume it's a normal field until proven otherwise. We're next 692 # going to decide if it's a ClassVar or InitVar, everything else 693 # is just a normal field. 694 f._field_type = _FIELD 695 696 # In addition to checking for actual types here, also check for 697 # string annotations. get_type_hints() won't always work for us 698 # (see https://github.com/python/typing/issues/508 for example), 699 # plus it's expensive and would require an eval for every string 700 # annotation. So, make a best effort to see if this is a ClassVar 701 # or InitVar using regex's and checking that the thing referenced 702 # is actually of the correct type. 703 704 # For the complete discussion, see https://bugs.python.org/issue33453 705 706 # If typing has not been imported, then it's impossible for any 707 # annotation to be a ClassVar. So, only look for ClassVar if 708 # typing has been imported by any module (not necessarily cls's 709 # module). 710 typing = sys.modules.get('typing') 711 if typing: 712 if (_is_classvar(a_type, typing) 713 or (isinstance(f.type, str) 714 and _is_type(f.type, cls, typing, typing.ClassVar, 715 _is_classvar))): 716 f._field_type = _FIELD_CLASSVAR 717 718 # If the type is InitVar, or if it's a matching string annotation, 719 # then it's an InitVar. 720 if f._field_type is _FIELD: 721 # The module we're checking against is the module we're 722 # currently in (dataclasses.py). 723 dataclasses = sys.modules[__name__] 724 if (_is_initvar(a_type, dataclasses) 725 or (isinstance(f.type, str) 726 and _is_type(f.type, cls, dataclasses, dataclasses.InitVar, 727 _is_initvar))): 728 f._field_type = _FIELD_INITVAR 729 730 # Validations for individual fields. This is delayed until now, 731 # instead of in the Field() constructor, since only here do we 732 # know the field name, which allows for better error reporting. 733 734 # Special restrictions for ClassVar and InitVar. 735 if f._field_type in (_FIELD_CLASSVAR, _FIELD_INITVAR): 736 if f.default_factory is not MISSING: 737 raise TypeError(f'field {f.name} cannot have a ' 738 'default factory') 739 # Should I check for other field settings? default_factory 740 # seems the most serious to check for. Maybe add others. For 741 # example, how about init=False (or really, 742 # init=<not-the-default-init-value>)? It makes no sense for 743 # ClassVar and InitVar to specify init=<anything>. 744 745 # For real fields, disallow mutable defaults for known types. 746 if f._field_type is _FIELD and isinstance(f.default, (list, dict, set)): 747 raise ValueError(f'mutable default {type(f.default)} for field ' 748 f'{f.name} is not allowed: use default_factory') 749 750 return f 751 752 753def _set_new_attribute(cls, name, value): 754 # Never overwrites an existing attribute. Returns True if the 755 # attribute already exists. 756 if name in cls.__dict__: 757 return True 758 setattr(cls, name, value) 759 return False 760 761 762# Decide if/how we're going to create a hash function. Key is 763# (unsafe_hash, eq, frozen, does-hash-exist). Value is the action to 764# take. The common case is to do nothing, so instead of providing a 765# function that is a no-op, use None to signify that. 766 767def _hash_set_none(cls, fields, globals): 768 return None 769 770def _hash_add(cls, fields, globals): 771 flds = [f for f in fields if (f.compare if f.hash is None else f.hash)] 772 return _hash_fn(flds, globals) 773 774def _hash_exception(cls, fields, globals): 775 # Raise an exception. 776 raise TypeError(f'Cannot overwrite attribute __hash__ ' 777 f'in class {cls.__name__}') 778 779# 780# +-------------------------------------- unsafe_hash? 781# | +------------------------------- eq? 782# | | +------------------------ frozen? 783# | | | +---------------- has-explicit-hash? 784# | | | | 785# | | | | +------- action 786# | | | | | 787# v v v v v 788_hash_action = {(False, False, False, False): None, 789 (False, False, False, True ): None, 790 (False, False, True, False): None, 791 (False, False, True, True ): None, 792 (False, True, False, False): _hash_set_none, 793 (False, True, False, True ): None, 794 (False, True, True, False): _hash_add, 795 (False, True, True, True ): None, 796 (True, False, False, False): _hash_add, 797 (True, False, False, True ): _hash_exception, 798 (True, False, True, False): _hash_add, 799 (True, False, True, True ): _hash_exception, 800 (True, True, False, False): _hash_add, 801 (True, True, False, True ): _hash_exception, 802 (True, True, True, False): _hash_add, 803 (True, True, True, True ): _hash_exception, 804 } 805# See https://bugs.python.org/issue32929#msg312829 for an if-statement 806# version of this table. 807 808 809def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen): 810 # Now that dicts retain insertion order, there's no reason to use 811 # an ordered dict. I am leveraging that ordering here, because 812 # derived class fields overwrite base class fields, but the order 813 # is defined by the base class, which is found first. 814 fields = {} 815 816 if cls.__module__ in sys.modules: 817 globals = sys.modules[cls.__module__].__dict__ 818 else: 819 # Theoretically this can happen if someone writes 820 # a custom string to cls.__module__. In which case 821 # such dataclass won't be fully introspectable 822 # (w.r.t. typing.get_type_hints) but will still function 823 # correctly. 824 globals = {} 825 826 setattr(cls, _PARAMS, _DataclassParams(init, repr, eq, order, 827 unsafe_hash, frozen)) 828 829 # Find our base classes in reverse MRO order, and exclude 830 # ourselves. In reversed order so that more derived classes 831 # override earlier field definitions in base classes. As long as 832 # we're iterating over them, see if any are frozen. 833 any_frozen_base = False 834 has_dataclass_bases = False 835 for b in cls.__mro__[-1:0:-1]: 836 # Only process classes that have been processed by our 837 # decorator. That is, they have a _FIELDS attribute. 838 base_fields = getattr(b, _FIELDS, None) 839 if base_fields is not None: 840 has_dataclass_bases = True 841 for f in base_fields.values(): 842 fields[f.name] = f 843 if getattr(b, _PARAMS).frozen: 844 any_frozen_base = True 845 846 # Annotations that are defined in this class (not in base 847 # classes). If __annotations__ isn't present, then this class 848 # adds no new annotations. We use this to compute fields that are 849 # added by this class. 850 # 851 # Fields are found from cls_annotations, which is guaranteed to be 852 # ordered. Default values are from class attributes, if a field 853 # has a default. If the default value is a Field(), then it 854 # contains additional info beyond (and possibly including) the 855 # actual default value. Pseudo-fields ClassVars and InitVars are 856 # included, despite the fact that they're not real fields. That's 857 # dealt with later. 858 cls_annotations = cls.__dict__.get('__annotations__', {}) 859 860 # Now find fields in our class. While doing so, validate some 861 # things, and set the default values (as class attributes) where 862 # we can. 863 cls_fields = [_get_field(cls, name, type) 864 for name, type in cls_annotations.items()] 865 for f in cls_fields: 866 fields[f.name] = f 867 868 # If the class attribute (which is the default value for this 869 # field) exists and is of type 'Field', replace it with the 870 # real default. This is so that normal class introspection 871 # sees a real default value, not a Field. 872 if isinstance(getattr(cls, f.name, None), Field): 873 if f.default is MISSING: 874 # If there's no default, delete the class attribute. 875 # This happens if we specify field(repr=False), for 876 # example (that is, we specified a field object, but 877 # no default value). Also if we're using a default 878 # factory. The class attribute should not be set at 879 # all in the post-processed class. 880 delattr(cls, f.name) 881 else: 882 setattr(cls, f.name, f.default) 883 884 # Do we have any Field members that don't also have annotations? 885 for name, value in cls.__dict__.items(): 886 if isinstance(value, Field) and not name in cls_annotations: 887 raise TypeError(f'{name!r} is a field but has no type annotation') 888 889 # Check rules that apply if we are derived from any dataclasses. 890 if has_dataclass_bases: 891 # Raise an exception if any of our bases are frozen, but we're not. 892 if any_frozen_base and not frozen: 893 raise TypeError('cannot inherit non-frozen dataclass from a ' 894 'frozen one') 895 896 # Raise an exception if we're frozen, but none of our bases are. 897 if not any_frozen_base and frozen: 898 raise TypeError('cannot inherit frozen dataclass from a ' 899 'non-frozen one') 900 901 # Remember all of the fields on our class (including bases). This 902 # also marks this class as being a dataclass. 903 setattr(cls, _FIELDS, fields) 904 905 # Was this class defined with an explicit __hash__? Note that if 906 # __eq__ is defined in this class, then python will automatically 907 # set __hash__ to None. This is a heuristic, as it's possible 908 # that such a __hash__ == None was not auto-generated, but it 909 # close enough. 910 class_hash = cls.__dict__.get('__hash__', MISSING) 911 has_explicit_hash = not (class_hash is MISSING or 912 (class_hash is None and '__eq__' in cls.__dict__)) 913 914 # If we're generating ordering methods, we must be generating the 915 # eq methods. 916 if order and not eq: 917 raise ValueError('eq must be true if order is true') 918 919 if init: 920 # Does this class have a post-init function? 921 has_post_init = hasattr(cls, _POST_INIT_NAME) 922 923 # Include InitVars and regular fields (so, not ClassVars). 924 flds = [f for f in fields.values() 925 if f._field_type in (_FIELD, _FIELD_INITVAR)] 926 _set_new_attribute(cls, '__init__', 927 _init_fn(flds, 928 frozen, 929 has_post_init, 930 # The name to use for the "self" 931 # param in __init__. Use "self" 932 # if possible. 933 '__dataclass_self__' if 'self' in fields 934 else 'self', 935 globals, 936 )) 937 938 # Get the fields as a list, and include only real fields. This is 939 # used in all of the following methods. 940 field_list = [f for f in fields.values() if f._field_type is _FIELD] 941 942 if repr: 943 flds = [f for f in field_list if f.repr] 944 _set_new_attribute(cls, '__repr__', _repr_fn(flds, globals)) 945 946 if eq: 947 # Create __eq__ method. There's no need for a __ne__ method, 948 # since python will call __eq__ and negate it. 949 flds = [f for f in field_list if f.compare] 950 self_tuple = _tuple_str('self', flds) 951 other_tuple = _tuple_str('other', flds) 952 _set_new_attribute(cls, '__eq__', 953 _cmp_fn('__eq__', '==', 954 self_tuple, other_tuple, 955 globals=globals)) 956 957 if order: 958 # Create and set the ordering methods. 959 flds = [f for f in field_list if f.compare] 960 self_tuple = _tuple_str('self', flds) 961 other_tuple = _tuple_str('other', flds) 962 for name, op in [('__lt__', '<'), 963 ('__le__', '<='), 964 ('__gt__', '>'), 965 ('__ge__', '>='), 966 ]: 967 if _set_new_attribute(cls, name, 968 _cmp_fn(name, op, self_tuple, other_tuple, 969 globals=globals)): 970 raise TypeError(f'Cannot overwrite attribute {name} ' 971 f'in class {cls.__name__}. Consider using ' 972 'functools.total_ordering') 973 974 if frozen: 975 for fn in _frozen_get_del_attr(cls, field_list, globals): 976 if _set_new_attribute(cls, fn.__name__, fn): 977 raise TypeError(f'Cannot overwrite attribute {fn.__name__} ' 978 f'in class {cls.__name__}') 979 980 # Decide if/how we're going to create a hash function. 981 hash_action = _hash_action[bool(unsafe_hash), 982 bool(eq), 983 bool(frozen), 984 has_explicit_hash] 985 if hash_action: 986 # No need to call _set_new_attribute here, since by the time 987 # we're here the overwriting is unconditional. 988 cls.__hash__ = hash_action(cls, field_list, globals) 989 990 if not getattr(cls, '__doc__'): 991 # Create a class doc-string. 992 cls.__doc__ = (cls.__name__ + 993 str(inspect.signature(cls)).replace(' -> None', '')) 994 995 return cls 996 997 998def dataclass(cls=None, /, *, init=True, repr=True, eq=True, order=False, 999 unsafe_hash=False, frozen=False): 1000 """Returns the same class as was passed in, with dunder methods 1001 added based on the fields defined in the class. 1002 1003 Examines PEP 526 __annotations__ to determine fields. 1004 1005 If init is true, an __init__() method is added to the class. If 1006 repr is true, a __repr__() method is added. If order is true, rich 1007 comparison dunder methods are added. If unsafe_hash is true, a 1008 __hash__() method function is added. If frozen is true, fields may 1009 not be assigned to after instance creation. 1010 """ 1011 1012 def wrap(cls): 1013 return _process_class(cls, init, repr, eq, order, unsafe_hash, frozen) 1014 1015 # See if we're being called as @dataclass or @dataclass(). 1016 if cls is None: 1017 # We're called with parens. 1018 return wrap 1019 1020 # We're called as @dataclass without parens. 1021 return wrap(cls) 1022 1023 1024def fields(class_or_instance): 1025 """Return a tuple describing the fields of this dataclass. 1026 1027 Accepts a dataclass or an instance of one. Tuple elements are of 1028 type Field. 1029 """ 1030 1031 # Might it be worth caching this, per class? 1032 try: 1033 fields = getattr(class_or_instance, _FIELDS) 1034 except AttributeError: 1035 raise TypeError('must be called with a dataclass type or instance') 1036 1037 # Exclude pseudo-fields. Note that fields is sorted by insertion 1038 # order, so the order of the tuple is as the fields were defined. 1039 return tuple(f for f in fields.values() if f._field_type is _FIELD) 1040 1041 1042def _is_dataclass_instance(obj): 1043 """Returns True if obj is an instance of a dataclass.""" 1044 return hasattr(type(obj), _FIELDS) 1045 1046 1047def is_dataclass(obj): 1048 """Returns True if obj is a dataclass or an instance of a 1049 dataclass.""" 1050 cls = obj if isinstance(obj, type) else type(obj) 1051 return hasattr(cls, _FIELDS) 1052 1053 1054def asdict(obj, *, dict_factory=dict): 1055 """Return the fields of a dataclass instance as a new dictionary mapping 1056 field names to field values. 1057 1058 Example usage: 1059 1060 @dataclass 1061 class C: 1062 x: int 1063 y: int 1064 1065 c = C(1, 2) 1066 assert asdict(c) == {'x': 1, 'y': 2} 1067 1068 If given, 'dict_factory' will be used instead of built-in dict. 1069 The function applies recursively to field values that are 1070 dataclass instances. This will also look into built-in containers: 1071 tuples, lists, and dicts. 1072 """ 1073 if not _is_dataclass_instance(obj): 1074 raise TypeError("asdict() should be called on dataclass instances") 1075 return _asdict_inner(obj, dict_factory) 1076 1077 1078def _asdict_inner(obj, dict_factory): 1079 if _is_dataclass_instance(obj): 1080 result = [] 1081 for f in fields(obj): 1082 value = _asdict_inner(getattr(obj, f.name), dict_factory) 1083 result.append((f.name, value)) 1084 return dict_factory(result) 1085 elif isinstance(obj, tuple) and hasattr(obj, '_fields'): 1086 # obj is a namedtuple. Recurse into it, but the returned 1087 # object is another namedtuple of the same type. This is 1088 # similar to how other list- or tuple-derived classes are 1089 # treated (see below), but we just need to create them 1090 # differently because a namedtuple's __init__ needs to be 1091 # called differently (see bpo-34363). 1092 1093 # I'm not using namedtuple's _asdict() 1094 # method, because: 1095 # - it does not recurse in to the namedtuple fields and 1096 # convert them to dicts (using dict_factory). 1097 # - I don't actually want to return a dict here. The main 1098 # use case here is json.dumps, and it handles converting 1099 # namedtuples to lists. Admittedly we're losing some 1100 # information here when we produce a json list instead of a 1101 # dict. Note that if we returned dicts here instead of 1102 # namedtuples, we could no longer call asdict() on a data 1103 # structure where a namedtuple was used as a dict key. 1104 1105 return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) 1106 elif isinstance(obj, (list, tuple)): 1107 # Assume we can create an object of this type by passing in a 1108 # generator (which is not true for namedtuples, handled 1109 # above). 1110 return type(obj)(_asdict_inner(v, dict_factory) for v in obj) 1111 elif isinstance(obj, dict): 1112 return type(obj)((_asdict_inner(k, dict_factory), 1113 _asdict_inner(v, dict_factory)) 1114 for k, v in obj.items()) 1115 else: 1116 return copy.deepcopy(obj) 1117 1118 1119def astuple(obj, *, tuple_factory=tuple): 1120 """Return the fields of a dataclass instance as a new tuple of field values. 1121 1122 Example usage:: 1123 1124 @dataclass 1125 class C: 1126 x: int 1127 y: int 1128 1129 c = C(1, 2) 1130 assert astuple(c) == (1, 2) 1131 1132 If given, 'tuple_factory' will be used instead of built-in tuple. 1133 The function applies recursively to field values that are 1134 dataclass instances. This will also look into built-in containers: 1135 tuples, lists, and dicts. 1136 """ 1137 1138 if not _is_dataclass_instance(obj): 1139 raise TypeError("astuple() should be called on dataclass instances") 1140 return _astuple_inner(obj, tuple_factory) 1141 1142 1143def _astuple_inner(obj, tuple_factory): 1144 if _is_dataclass_instance(obj): 1145 result = [] 1146 for f in fields(obj): 1147 value = _astuple_inner(getattr(obj, f.name), tuple_factory) 1148 result.append(value) 1149 return tuple_factory(result) 1150 elif isinstance(obj, tuple) and hasattr(obj, '_fields'): 1151 # obj is a namedtuple. Recurse into it, but the returned 1152 # object is another namedtuple of the same type. This is 1153 # similar to how other list- or tuple-derived classes are 1154 # treated (see below), but we just need to create them 1155 # differently because a namedtuple's __init__ needs to be 1156 # called differently (see bpo-34363). 1157 return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj]) 1158 elif isinstance(obj, (list, tuple)): 1159 # Assume we can create an object of this type by passing in a 1160 # generator (which is not true for namedtuples, handled 1161 # above). 1162 return type(obj)(_astuple_inner(v, tuple_factory) for v in obj) 1163 elif isinstance(obj, dict): 1164 return type(obj)((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory)) 1165 for k, v in obj.items()) 1166 else: 1167 return copy.deepcopy(obj) 1168 1169 1170def make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True, 1171 repr=True, eq=True, order=False, unsafe_hash=False, 1172 frozen=False): 1173 """Return a new dynamically created dataclass. 1174 1175 The dataclass name will be 'cls_name'. 'fields' is an iterable 1176 of either (name), (name, type) or (name, type, Field) objects. If type is 1177 omitted, use the string 'typing.Any'. Field objects are created by 1178 the equivalent of calling 'field(name, type [, Field-info])'. 1179 1180 C = make_dataclass('C', ['x', ('y', int), ('z', int, field(init=False))], bases=(Base,)) 1181 1182 is equivalent to: 1183 1184 @dataclass 1185 class C(Base): 1186 x: 'typing.Any' 1187 y: int 1188 z: int = field(init=False) 1189 1190 For the bases and namespace parameters, see the builtin type() function. 1191 1192 The parameters init, repr, eq, order, unsafe_hash, and frozen are passed to 1193 dataclass(). 1194 """ 1195 1196 if namespace is None: 1197 namespace = {} 1198 else: 1199 # Copy namespace since we're going to mutate it. 1200 namespace = namespace.copy() 1201 1202 # While we're looking through the field names, validate that they 1203 # are identifiers, are not keywords, and not duplicates. 1204 seen = set() 1205 anns = {} 1206 for item in fields: 1207 if isinstance(item, str): 1208 name = item 1209 tp = 'typing.Any' 1210 elif len(item) == 2: 1211 name, tp, = item 1212 elif len(item) == 3: 1213 name, tp, spec = item 1214 namespace[name] = spec 1215 else: 1216 raise TypeError(f'Invalid field: {item!r}') 1217 1218 if not isinstance(name, str) or not name.isidentifier(): 1219 raise TypeError(f'Field names must be valid identifiers: {name!r}') 1220 if keyword.iskeyword(name): 1221 raise TypeError(f'Field names must not be keywords: {name!r}') 1222 if name in seen: 1223 raise TypeError(f'Field name duplicated: {name!r}') 1224 1225 seen.add(name) 1226 anns[name] = tp 1227 1228 namespace['__annotations__'] = anns 1229 # We use `types.new_class()` instead of simply `type()` to allow dynamic creation 1230 # of generic dataclassses. 1231 cls = types.new_class(cls_name, bases, {}, lambda ns: ns.update(namespace)) 1232 return dataclass(cls, init=init, repr=repr, eq=eq, order=order, 1233 unsafe_hash=unsafe_hash, frozen=frozen) 1234 1235 1236def replace(obj, /, **changes): 1237 """Return a new object replacing specified fields with new values. 1238 1239 This is especially useful for frozen classes. Example usage: 1240 1241 @dataclass(frozen=True) 1242 class C: 1243 x: int 1244 y: int 1245 1246 c = C(1, 2) 1247 c1 = replace(c, x=3) 1248 assert c1.x == 3 and c1.y == 2 1249 """ 1250 1251 # We're going to mutate 'changes', but that's okay because it's a 1252 # new dict, even if called with 'replace(obj, **my_changes)'. 1253 1254 if not _is_dataclass_instance(obj): 1255 raise TypeError("replace() should be called on dataclass instances") 1256 1257 # It's an error to have init=False fields in 'changes'. 1258 # If a field is not in 'changes', read its value from the provided obj. 1259 1260 for f in getattr(obj, _FIELDS).values(): 1261 # Only consider normal fields or InitVars. 1262 if f._field_type is _FIELD_CLASSVAR: 1263 continue 1264 1265 if not f.init: 1266 # Error if this field is specified in changes. 1267 if f.name in changes: 1268 raise ValueError(f'field {f.name} is declared with ' 1269 'init=False, it cannot be specified with ' 1270 'replace()') 1271 continue 1272 1273 if f.name not in changes: 1274 if f._field_type is _FIELD_INITVAR and f.default is MISSING: 1275 raise ValueError(f"InitVar {f.name!r} " 1276 'must be specified with replace()') 1277 changes[f.name] = getattr(obj, f.name) 1278 1279 # Create the new object, which calls __init__() and 1280 # __post_init__() (if defined), using all of the init fields we've 1281 # added and/or left in 'changes'. If there are values supplied in 1282 # changes that aren't fields, this will correctly raise a 1283 # TypeError. 1284 return obj.__class__(**changes) 1285