1# -*- coding: utf-8 -*- 2""" 3 Title: Dumpscript management command 4 Project: Hardytools (queryset-refactor version) 5 Author: Will Hardy (http://willhardy.com.au) 6 Date: June 2008 7 Usage: python manage.py dumpscript appname > scripts/scriptname.py 8 $Revision: 217 $ 9 10Description: 11 Generates a Python script that will repopulate the database using objects. 12 The advantage of this approach is that it is easy to understand, and more 13 flexible than directly populating the database, or using XML. 14 15 * It also allows for new defaults to take effect and only transfers what is 16 needed. 17 * If a new database schema has a NEW ATTRIBUTE, it is simply not 18 populated (using a default value will make the transition smooth :) 19 * If a new database schema REMOVES AN ATTRIBUTE, it is simply ignored 20 and the data moves across safely (I'm assuming we don't want this 21 attribute anymore. 22 * Problems may only occur if there is a new model and is now a required 23 ForeignKey for an existing model. But this is easy to fix by editing the 24 populate script. Half of the job is already done as all ForeingKey 25 lookups occur though the locate_object() function in the generated script. 26 27Improvements: 28 See TODOs and FIXMEs scattered throughout :-) 29 30""" 31import datetime 32import sys 33 34from django.apps import apps 35from django.contrib.contenttypes.models import ContentType 36from django.core.exceptions import ObjectDoesNotExist 37from django.core.management.base import BaseCommand 38from django.db import router 39from django.db.models import ( 40 AutoField, BooleanField, DateField, DateTimeField, FileField, ForeignKey, 41) 42from django.db.models.deletion import Collector 43from django.utils import timezone 44from django.utils.encoding import force_str, smart_text 45 46from django_extensions.management.utils import signalcommand 47 48 49def orm_item_locator(orm_obj): 50 """ 51 Is called every time an object that will not be exported is required. 52 Where orm_obj is the referred object. 53 We postpone the lookup to locate_object() which will be run on the generated script 54 """ 55 56 the_class = orm_obj._meta.object_name 57 original_class = the_class 58 pk_name = orm_obj._meta.pk.name 59 original_pk_name = pk_name 60 pk_value = getattr(orm_obj, pk_name) 61 62 while hasattr(pk_value, "_meta") and hasattr(pk_value._meta, "pk") and hasattr(pk_value._meta.pk, "name"): 63 the_class = pk_value._meta.object_name 64 pk_name = pk_value._meta.pk.name 65 pk_value = getattr(pk_value, pk_name) 66 67 clean_dict = make_clean_dict(orm_obj.__dict__) 68 69 for key in clean_dict: 70 v = clean_dict[key] 71 if v is not None: 72 if isinstance(v, datetime.datetime): 73 v = timezone.make_aware(v) 74 clean_dict[key] = StrToCodeChanger('dateutil.parser.parse("%s")' % v.isoformat()) 75 elif not isinstance(v, (str, int, float)): 76 clean_dict[key] = str("%s" % v) 77 78 output = """ importer.locate_object(%s, "%s", %s, "%s", %s, %s ) """ % ( 79 original_class, original_pk_name, 80 the_class, pk_name, pk_value, clean_dict 81 ) 82 83 return output 84 85 86class Command(BaseCommand): 87 help = 'Dumps the data as a customised python script.' 88 89 def add_arguments(self, parser): 90 super().add_arguments(parser) 91 parser.add_argument('appname', nargs='+') 92 parser.add_argument( 93 '--autofield', action='store_false', dest='skip_autofield', 94 default=True, help='Include Autofields (like pk fields)' 95 ) 96 97 @signalcommand 98 def handle(self, *args, **options): 99 app_labels = options['appname'] 100 101 # Get the models we want to export 102 models = get_models(app_labels) 103 104 # A dictionary is created to keep track of all the processed objects, 105 # so that foreign key references can be made using python variable names. 106 # This variable "context" will be passed around like the town bicycle. 107 context = {} 108 109 # Create a dumpscript object and let it format itself as a string 110 script = Script( 111 models=models, 112 context=context, 113 stdout=self.stdout, 114 stderr=self.stderr, 115 options=options, 116 ) 117 self.stdout.write(str(script)) 118 self.stdout.write("\n") 119 120 121def get_models(app_labels): 122 """ 123 Get a list of models for the given app labels, with some exceptions. 124 TODO: If a required model is referenced, it should also be included. 125 Or at least discovered with a get_or_create() call. 126 """ 127 128 # These models are not to be output, e.g. because they can be generated automatically 129 # TODO: This should be "appname.modelname" string 130 EXCLUDED_MODELS = (ContentType, ) 131 132 models = [] 133 134 # If no app labels are given, return all 135 if not app_labels: 136 for app in apps.get_app_configs(): 137 models += [m for m in apps.get_app_config(app.label).get_models() 138 if m not in EXCLUDED_MODELS] 139 return models 140 141 # Get all relevant apps 142 for app_label in app_labels: 143 # If a specific model is mentioned, get only that model 144 if "." in app_label: 145 app_label, model_name = app_label.split(".", 1) 146 models.append(apps.get_model(app_label, model_name)) 147 # Get all models for a given app 148 else: 149 models += [m for m in apps.get_app_config(app_label).get_models() 150 if m not in EXCLUDED_MODELS] 151 152 return models 153 154 155class Code: 156 """ 157 A snippet of python script. 158 This keeps track of import statements and can be output to a string. 159 In the future, other features such as custom indentation might be included 160 in this class. 161 """ 162 163 def __init__(self, indent=-1, stdout=None, stderr=None): 164 165 if not stdout: 166 stdout = sys.stdout 167 if not stderr: 168 stderr = sys.stderr 169 170 self.indent = indent 171 self.stdout = stdout 172 self.stderr = stderr 173 174 def __str__(self): 175 """ Return a string representation of this script. """ 176 if self.imports: 177 self.stderr.write(repr(self.import_lines)) 178 return flatten_blocks([""] + self.import_lines + [""] + self.lines, num_indents=self.indent) 179 else: 180 return flatten_blocks(self.lines, num_indents=self.indent) 181 182 def get_import_lines(self): 183 """ Take the stored imports and converts them to lines """ 184 if self.imports: 185 return ["from %s import %s" % (value, key) for key, value in self.imports.items()] 186 else: 187 return [] 188 import_lines = property(get_import_lines) 189 190 191class ModelCode(Code): 192 """ Produces a python script that can recreate data for a given model class. """ 193 194 def __init__(self, model, context=None, stdout=None, stderr=None, options=None): 195 super().__init__(indent=0, stdout=stdout, stderr=stderr) 196 self.model = model 197 if context is None: 198 context = {} 199 self.context = context 200 self.options = options 201 self.instances = [] 202 203 def get_imports(self): 204 """ 205 Return a dictionary of import statements, with the variable being 206 defined as the key. 207 """ 208 return {self.model.__name__: smart_text(self.model.__module__)} 209 imports = property(get_imports) 210 211 def get_lines(self): 212 """ 213 Return a list of lists or strings, representing the code body. 214 Each list is a block, each string is a statement. 215 """ 216 code = [] 217 218 for counter, item in enumerate(self.model._default_manager.all()): 219 instance = InstanceCode(instance=item, id=counter + 1, context=self.context, stdout=self.stdout, stderr=self.stderr, options=self.options) 220 self.instances.append(instance) 221 if instance.waiting_list: 222 code += instance.lines 223 224 # After each instance has been processed, try again. 225 # This allows self referencing fields to work. 226 for instance in self.instances: 227 if instance.waiting_list: 228 code += instance.lines 229 230 return code 231 232 lines = property(get_lines) 233 234 235class InstanceCode(Code): 236 """ Produces a python script that can recreate data for a given model instance. """ 237 238 def __init__(self, instance, id, context=None, stdout=None, stderr=None, options=None): 239 """ We need the instance in question and an id """ 240 241 super().__init__(indent=0, stdout=stdout, stderr=stderr) 242 self.imports = {} 243 244 self.options = options 245 self.instance = instance 246 self.model = self.instance.__class__ 247 if context is None: 248 context = {} 249 self.context = context 250 self.variable_name = "%s_%s" % (self.instance._meta.db_table, id) 251 self.skip_me = None 252 self.instantiated = False 253 254 self.waiting_list = list(self.model._meta.fields) 255 256 self.many_to_many_waiting_list = {} 257 for field in self.model._meta.many_to_many: 258 try: 259 if not field.remote_field.through._meta.auto_created: 260 continue 261 except AttributeError: 262 pass 263 self.many_to_many_waiting_list[field] = list(getattr(self.instance, field.name).all()) 264 265 def get_lines(self, force=False): 266 """ 267 Return a list of lists or strings, representing the code body. 268 Each list is a block, each string is a statement. 269 270 force (True or False): if an attribute object cannot be included, 271 it is usually skipped to be processed later. With 'force' set, there 272 will be no waiting: a get_or_create() call is written instead. 273 """ 274 code_lines = [] 275 276 # Don't return anything if this is an instance that should be skipped 277 if self.skip(): 278 return [] 279 280 # Initialise our new object 281 # e.g. model_name_35 = Model() 282 code_lines += self.instantiate() 283 284 # Add each field 285 # e.g. model_name_35.field_one = 1034.91 286 # model_name_35.field_two = "text" 287 code_lines += self.get_waiting_list() 288 289 if force: 290 # TODO: Check that M2M are not affected 291 code_lines += self.get_waiting_list(force=force) 292 293 # Print the save command for our new object 294 # e.g. model_name_35.save() 295 if code_lines: 296 code_lines.append("%s = importer.save_or_locate(%s)\n" % (self.variable_name, self.variable_name)) 297 298 code_lines += self.get_many_to_many_lines(force=force) 299 300 return code_lines 301 lines = property(get_lines) 302 303 def skip(self): 304 """ 305 Determine whether or not this object should be skipped. 306 If this model instance is a parent of a single subclassed 307 instance, skip it. The subclassed instance will create this 308 parent instance for us. 309 310 TODO: Allow the user to force its creation? 311 """ 312 if self.skip_me is not None: 313 return self.skip_me 314 315 cls = self.instance.__class__ 316 using = router.db_for_write(cls, instance=self.instance) 317 collector = Collector(using=using) 318 collector.collect([self.instance], collect_related=False) 319 sub_objects = sum([list(i) for i in collector.data.values()], []) 320 sub_objects_parents = [so._meta.parents for so in sub_objects] 321 if [self.model in p for p in sub_objects_parents].count(True) == 1: 322 # since this instance isn't explicitly created, it's variable name 323 # can't be referenced in the script, so record None in context dict 324 pk_name = self.instance._meta.pk.name 325 key = '%s_%s' % (self.model.__name__, getattr(self.instance, pk_name)) 326 self.context[key] = None 327 self.skip_me = True 328 else: 329 self.skip_me = False 330 331 return self.skip_me 332 333 def instantiate(self): 334 """ Write lines for instantiation """ 335 # e.g. model_name_35 = Model() 336 code_lines = [] 337 338 if not self.instantiated: 339 code_lines.append("%s = %s()" % (self.variable_name, self.model.__name__)) 340 self.instantiated = True 341 342 # Store our variable name for future foreign key references 343 pk_name = self.instance._meta.pk.name 344 key = '%s_%s' % (self.model.__name__, getattr(self.instance, pk_name)) 345 self.context[key] = self.variable_name 346 347 return code_lines 348 349 def get_waiting_list(self, force=False): 350 """ Add lines for any waiting fields that can be completed now. """ 351 352 code_lines = [] 353 skip_autofield = self.options['skip_autofield'] 354 355 # Process normal fields 356 for field in list(self.waiting_list): 357 try: 358 # Find the value, add the line, remove from waiting list and move on 359 value = get_attribute_value(self.instance, field, self.context, force=force, skip_autofield=skip_autofield) 360 code_lines.append('%s.%s = %s' % (self.variable_name, field.name, value)) 361 self.waiting_list.remove(field) 362 except SkipValue: 363 # Remove from the waiting list and move on 364 self.waiting_list.remove(field) 365 continue 366 except DoLater: 367 # Move on, maybe next time 368 continue 369 370 return code_lines 371 372 def get_many_to_many_lines(self, force=False): 373 """ Generate lines that define many to many relations for this instance. """ 374 375 lines = [] 376 377 for field, rel_items in self.many_to_many_waiting_list.items(): 378 for rel_item in list(rel_items): 379 try: 380 pk_name = rel_item._meta.pk.name 381 key = '%s_%s' % (rel_item.__class__.__name__, getattr(rel_item, pk_name)) 382 value = "%s" % self.context[key] 383 lines.append('%s.%s.add(%s)' % (self.variable_name, field.name, value)) 384 self.many_to_many_waiting_list[field].remove(rel_item) 385 except KeyError: 386 if force: 387 item_locator = orm_item_locator(rel_item) 388 self.context["__extra_imports"][rel_item._meta.object_name] = rel_item.__module__ 389 lines.append('%s.%s.add( %s )' % (self.variable_name, field.name, item_locator)) 390 self.many_to_many_waiting_list[field].remove(rel_item) 391 392 if lines: 393 lines.append("") 394 395 return lines 396 397 398class Script(Code): 399 """ Produces a complete python script that can recreate data for the given apps. """ 400 401 def __init__(self, models, context=None, stdout=None, stderr=None, options=None): 402 super().__init__(stdout=stdout, stderr=stderr) 403 self.imports = {} 404 405 self.models = models 406 if context is None: 407 context = {} 408 self.context = context 409 410 self.context["__avaliable_models"] = set(models) 411 self.context["__extra_imports"] = {} 412 413 self.options = options 414 415 def _queue_models(self, models, context): 416 """ 417 Work an an appropriate ordering for the models. 418 This isn't essential, but makes the script look nicer because 419 more instances can be defined on their first try. 420 """ 421 model_queue = [] 422 number_remaining_models = len(models) 423 # Max number of cycles allowed before we call it an infinite loop. 424 MAX_CYCLES = number_remaining_models 425 allowed_cycles = MAX_CYCLES 426 427 while number_remaining_models > 0: 428 previous_number_remaining_models = number_remaining_models 429 430 model = models.pop(0) 431 432 # If the model is ready to be processed, add it to the list 433 if check_dependencies(model, model_queue, context["__avaliable_models"]): 434 model_class = ModelCode(model=model, context=context, stdout=self.stdout, stderr=self.stderr, options=self.options) 435 model_queue.append(model_class) 436 437 # Otherwise put the model back at the end of the list 438 else: 439 models.append(model) 440 441 # Check for infinite loops. 442 # This means there is a cyclic foreign key structure 443 # That cannot be resolved by re-ordering 444 number_remaining_models = len(models) 445 if number_remaining_models == previous_number_remaining_models: 446 allowed_cycles -= 1 447 if allowed_cycles <= 0: 448 # Add the remaining models, but do not remove them from the model list 449 missing_models = [ModelCode(model=m, context=context, stdout=self.stdout, stderr=self.stderr, options=self.options) for m in models] 450 model_queue += missing_models 451 # Replace the models with the model class objects 452 # (sure, this is a little bit of hackery) 453 models[:] = missing_models 454 break 455 else: 456 allowed_cycles = MAX_CYCLES 457 458 return model_queue 459 460 def get_lines(self): 461 """ 462 Return a list of lists or strings, representing the code body. 463 Each list is a block, each string is a statement. 464 """ 465 code = [self.FILE_HEADER.strip()] 466 467 # Queue and process the required models 468 for model_class in self._queue_models(self.models, context=self.context): 469 msg = 'Processing model: %s.%s\n' % (model_class.model.__module__, model_class.model.__name__) 470 self.stderr.write(msg) 471 code.append(" # " + msg) 472 code.append(model_class.import_lines) 473 code.append("") 474 code.append(model_class.lines) 475 476 # Process left over foreign keys from cyclic models 477 for model in self.models: 478 msg = 'Re-processing model: %s.%s\n' % (model.model.__module__, model.model.__name__) 479 self.stderr.write(msg) 480 code.append(" # " + msg) 481 for instance in model.instances: 482 if instance.waiting_list or instance.many_to_many_waiting_list: 483 code.append(instance.get_lines(force=True)) 484 485 code.insert(1, " # Initial Imports") 486 code.insert(2, "") 487 for key, value in self.context["__extra_imports"].items(): 488 code.insert(2, " from %s import %s" % (value, key)) 489 490 return code 491 492 lines = property(get_lines) 493 494 # A user-friendly file header 495 FILE_HEADER = """ 496 497#!/usr/bin/env python 498 499 500# This file has been automatically generated. 501# Instead of changing it, create a file called import_helper.py 502# and put there a class called ImportHelper(object) in it. 503# 504# This class will be specially casted so that instead of extending object, 505# it will actually extend the class BasicImportHelper() 506# 507# That means you just have to overload the methods you want to 508# change, leaving the other ones intact. 509# 510# Something that you might want to do is use transactions, for example. 511# 512# Also, don't forget to add the necessary Django imports. 513# 514# This file was generated with the following command: 515# %s 516# 517# to restore it, run 518# manage.py runscript module_name.this_script_name 519# 520# example: if manage.py is at ./manage.py 521# and the script is at ./some_folder/some_script.py 522# you must make sure ./some_folder/__init__.py exists 523# and run ./manage.py runscript some_folder.some_script 524import os, sys 525from django.db import transaction 526 527class BasicImportHelper: 528 529 def pre_import(self): 530 pass 531 532 @transaction.atomic 533 def run_import(self, import_data): 534 import_data() 535 536 def post_import(self): 537 pass 538 539 def locate_similar(self, current_object, search_data): 540 # You will probably want to call this method from save_or_locate() 541 # Example: 542 # new_obj = self.locate_similar(the_obj, {"national_id": the_obj.national_id } ) 543 544 the_obj = current_object.__class__.objects.get(**search_data) 545 return the_obj 546 547 def locate_object(self, original_class, original_pk_name, the_class, pk_name, pk_value, obj_content): 548 # You may change this function to do specific lookup for specific objects 549 # 550 # original_class class of the django orm's object that needs to be located 551 # original_pk_name the primary key of original_class 552 # the_class parent class of original_class which contains obj_content 553 # pk_name the primary key of original_class 554 # pk_value value of the primary_key 555 # obj_content content of the object which was not exported. 556 # 557 # You should use obj_content to locate the object on the target db 558 # 559 # An example where original_class and the_class are different is 560 # when original_class is Farmer and the_class is Person. The table 561 # may refer to a Farmer but you will actually need to locate Person 562 # in order to instantiate that Farmer 563 # 564 # Example: 565 # if the_class == SurveyResultFormat or the_class == SurveyType or the_class == SurveyState: 566 # pk_name="name" 567 # pk_value=obj_content[pk_name] 568 # if the_class == StaffGroup: 569 # pk_value=8 570 571 search_data = { pk_name: pk_value } 572 the_obj = the_class.objects.get(**search_data) 573 #print(the_obj) 574 return the_obj 575 576 577 def save_or_locate(self, the_obj): 578 # Change this if you want to locate the object in the database 579 try: 580 the_obj.save() 581 except: 582 print("---------------") 583 print("Error saving the following object:") 584 print(the_obj.__class__) 585 print(" ") 586 print(the_obj.__dict__) 587 print(" ") 588 print(the_obj) 589 print(" ") 590 print("---------------") 591 592 raise 593 return the_obj 594 595 596importer = None 597try: 598 import import_helper 599 # We need this so ImportHelper can extend BasicImportHelper, although import_helper.py 600 # has no knowlodge of this class 601 importer = type("DynamicImportHelper", (import_helper.ImportHelper, BasicImportHelper ) , {} )() 602except ImportError as e: 603 # From Python 3.3 we can check e.name - string match is for backward compatibility. 604 if 'import_helper' in str(e): 605 importer = BasicImportHelper() 606 else: 607 raise 608 609import datetime 610from decimal import Decimal 611from django.contrib.contenttypes.models import ContentType 612 613try: 614 import dateutil.parser 615 from dateutil.tz import tzoffset 616except ImportError: 617 print("Please install python-dateutil") 618 sys.exit(os.EX_USAGE) 619 620def run(): 621 importer.pre_import() 622 importer.run_import(import_data) 623 importer.post_import() 624 625def import_data(): 626 627""" % " ".join(sys.argv) 628 629 630# HELPER FUNCTIONS 631# ------------------------------------------------------------------------------- 632 633def flatten_blocks(lines, num_indents=-1): 634 """ 635 Take a list (block) or string (statement) and flattens it into a string 636 with indentation. 637 """ 638 # The standard indent is four spaces 639 INDENTATION = " " * 4 640 641 if not lines: 642 return "" 643 644 # If this is a string, add the indentation and finish here 645 if isinstance(lines, str): 646 return INDENTATION * num_indents + lines 647 648 # If this is not a string, join the lines and recurse 649 return "\n".join([flatten_blocks(line, num_indents + 1) for line in lines]) 650 651 652def get_attribute_value(item, field, context, force=False, skip_autofield=True): 653 """ Get a string version of the given attribute's value, like repr() might. """ 654 # Find the value of the field, catching any database issues 655 try: 656 value = getattr(item, field.name) 657 except ObjectDoesNotExist: 658 raise SkipValue('Could not find object for %s.%s, ignoring.\n' % (item.__class__.__name__, field.name)) 659 660 # AutoField: We don't include the auto fields, they'll be automatically recreated 661 if skip_autofield and isinstance(field, AutoField): 662 raise SkipValue() 663 664 # Some databases (eg MySQL) might store boolean values as 0/1, this needs to be cast as a bool 665 elif isinstance(field, BooleanField) and value is not None: 666 return repr(bool(value)) 667 668 # Post file-storage-refactor, repr() on File/ImageFields no longer returns the path 669 elif isinstance(field, FileField): 670 return repr(force_str(value)) 671 672 # ForeignKey fields, link directly using our stored python variable name 673 elif isinstance(field, ForeignKey) and value is not None: 674 675 # Special case for contenttype foreign keys: no need to output any 676 # content types in this script, as they can be generated again 677 # automatically. 678 # NB: Not sure if "is" will always work 679 if field.remote_field.model is ContentType: 680 return 'ContentType.objects.get(app_label="%s", model="%s")' % (value.app_label, value.model) 681 682 # Generate an identifier (key) for this foreign object 683 pk_name = value._meta.pk.name 684 key = '%s_%s' % (value.__class__.__name__, getattr(value, pk_name)) 685 686 if key in context: 687 variable_name = context[key] 688 # If the context value is set to None, this should be skipped. 689 # This identifies models that have been skipped (inheritance) 690 if variable_name is None: 691 raise SkipValue() 692 # Return the variable name listed in the context 693 return "%s" % variable_name 694 elif value.__class__ not in context["__avaliable_models"] or force: 695 context["__extra_imports"][value._meta.object_name] = value.__module__ 696 item_locator = orm_item_locator(value) 697 return item_locator 698 else: 699 raise DoLater('(FK) %s.%s\n' % (item.__class__.__name__, field.name)) 700 701 elif isinstance(field, (DateField, DateTimeField)) and value is not None: 702 return "dateutil.parser.parse(\"%s\")" % value.isoformat() 703 704 # A normal field (e.g. a python built-in) 705 else: 706 return repr(value) 707 708 709def make_clean_dict(the_dict): 710 if "_state" in the_dict: 711 clean_dict = the_dict.copy() 712 del clean_dict["_state"] 713 return clean_dict 714 return the_dict 715 716 717def check_dependencies(model, model_queue, avaliable_models): 718 """ Check that all the depenedencies for this model are already in the queue. """ 719 # A list of allowed links: existing fields, itself and the special case ContentType 720 allowed_links = [m.model.__name__ for m in model_queue] + [model.__name__, 'ContentType'] 721 722 # For each ForeignKey or ManyToMany field, check that a link is possible 723 724 for field in model._meta.fields: 725 if not field.remote_field: 726 continue 727 if field.remote_field.model.__name__ not in allowed_links: 728 if field.remote_field.model not in avaliable_models: 729 continue 730 return False 731 732 for field in model._meta.many_to_many: 733 if not field.remote_field: 734 continue 735 if field.remote_field.model.__name__ not in allowed_links: 736 return False 737 738 return True 739 740 741# EXCEPTIONS 742# ------------------------------------------------------------------------------- 743 744class SkipValue(Exception): 745 """ Value could not be parsed or should simply be skipped. """ 746 747 748class DoLater(Exception): 749 """ Value could not be parsed or should simply be skipped. """ 750 751 752class StrToCodeChanger: 753 754 def __init__(self, string): 755 self.repr = string 756 757 def __repr__(self): 758 return self.repr 759