1# -*- coding: utf-8 -*-
2"""
3      Title: Dumpscript management command
4    Project: Hardytools (queryset-refactor version)
5     Author: Will Hardy (http://willhardy.com.au)
6       Date: June 2008
7      Usage: python manage.py dumpscript appname > scripts/scriptname.py
8  $Revision: 217 $
9
10Description:
11    Generates a Python script that will repopulate the database using objects.
12    The advantage of this approach is that it is easy to understand, and more
13    flexible than directly populating the database, or using XML.
14
15    * It also allows for new defaults to take effect and only transfers what is
16      needed.
17    * If a new database schema has a NEW ATTRIBUTE, it is simply not
18      populated (using a default value will make the transition smooth :)
19    * If a new database schema REMOVES AN ATTRIBUTE, it is simply ignored
20      and the data moves across safely (I'm assuming we don't want this
21      attribute anymore.
22    * Problems may only occur if there is a new model and is now a required
23      ForeignKey for an existing model. But this is easy to fix by editing the
24      populate script. Half of the job is already done as all ForeingKey
25      lookups occur though the locate_object() function in the generated script.
26
27Improvements:
28    See TODOs and FIXMEs scattered throughout :-)
29
30"""
31import datetime
32import sys
33
34from django.apps import apps
35from django.contrib.contenttypes.models import ContentType
36from django.core.exceptions import ObjectDoesNotExist
37from django.core.management.base import BaseCommand
38from django.db import router
39from django.db.models import (
40    AutoField, BooleanField, DateField, DateTimeField, FileField, ForeignKey,
41)
42from django.db.models.deletion import Collector
43from django.utils import timezone
44from django.utils.encoding import force_str, smart_text
45
46from django_extensions.management.utils import signalcommand
47
48
49def orm_item_locator(orm_obj):
50    """
51    Is called every time an object that will not be exported is required.
52    Where orm_obj is the referred object.
53    We postpone the lookup to locate_object() which will be run on the generated script
54    """
55
56    the_class = orm_obj._meta.object_name
57    original_class = the_class
58    pk_name = orm_obj._meta.pk.name
59    original_pk_name = pk_name
60    pk_value = getattr(orm_obj, pk_name)
61
62    while hasattr(pk_value, "_meta") and hasattr(pk_value._meta, "pk") and hasattr(pk_value._meta.pk, "name"):
63        the_class = pk_value._meta.object_name
64        pk_name = pk_value._meta.pk.name
65        pk_value = getattr(pk_value, pk_name)
66
67    clean_dict = make_clean_dict(orm_obj.__dict__)
68
69    for key in clean_dict:
70        v = clean_dict[key]
71        if v is not None:
72            if isinstance(v, datetime.datetime):
73                v = timezone.make_aware(v)
74                clean_dict[key] = StrToCodeChanger('dateutil.parser.parse("%s")' % v.isoformat())
75            elif not isinstance(v, (str, int, float)):
76                clean_dict[key] = str("%s" % v)
77
78    output = """ importer.locate_object(%s, "%s", %s, "%s", %s, %s ) """ % (
79        original_class, original_pk_name,
80        the_class, pk_name, pk_value, clean_dict
81    )
82
83    return output
84
85
86class Command(BaseCommand):
87    help = 'Dumps the data as a customised python script.'
88
89    def add_arguments(self, parser):
90        super().add_arguments(parser)
91        parser.add_argument('appname', nargs='+')
92        parser.add_argument(
93            '--autofield', action='store_false', dest='skip_autofield',
94            default=True, help='Include Autofields (like pk fields)'
95        )
96
97    @signalcommand
98    def handle(self, *args, **options):
99        app_labels = options['appname']
100
101        # Get the models we want to export
102        models = get_models(app_labels)
103
104        # A dictionary is created to keep track of all the processed objects,
105        # so that foreign key references can be made using python variable names.
106        # This variable "context" will be passed around like the town bicycle.
107        context = {}
108
109        # Create a dumpscript object and let it format itself as a string
110        script = Script(
111            models=models,
112            context=context,
113            stdout=self.stdout,
114            stderr=self.stderr,
115            options=options,
116        )
117        self.stdout.write(str(script))
118        self.stdout.write("\n")
119
120
121def get_models(app_labels):
122    """
123    Get a list of models for the given app labels, with some exceptions.
124    TODO: If a required model is referenced, it should also be included.
125    Or at least discovered with a get_or_create() call.
126    """
127
128    # These models are not to be output, e.g. because they can be generated automatically
129    # TODO: This should be "appname.modelname" string
130    EXCLUDED_MODELS = (ContentType, )
131
132    models = []
133
134    # If no app labels are given, return all
135    if not app_labels:
136        for app in apps.get_app_configs():
137            models += [m for m in apps.get_app_config(app.label).get_models()
138                       if m not in EXCLUDED_MODELS]
139        return models
140
141    # Get all relevant apps
142    for app_label in app_labels:
143        # If a specific model is mentioned, get only that model
144        if "." in app_label:
145            app_label, model_name = app_label.split(".", 1)
146            models.append(apps.get_model(app_label, model_name))
147        # Get all models for a given app
148        else:
149            models += [m for m in apps.get_app_config(app_label).get_models()
150                       if m not in EXCLUDED_MODELS]
151
152    return models
153
154
155class Code:
156    """
157    A snippet of python script.
158    This keeps track of import statements and can be output to a string.
159    In the future, other features such as custom indentation might be included
160    in this class.
161    """
162
163    def __init__(self, indent=-1, stdout=None, stderr=None):
164
165        if not stdout:
166            stdout = sys.stdout
167        if not stderr:
168            stderr = sys.stderr
169
170        self.indent = indent
171        self.stdout = stdout
172        self.stderr = stderr
173
174    def __str__(self):
175        """ Return a string representation of this script. """
176        if self.imports:
177            self.stderr.write(repr(self.import_lines))
178            return flatten_blocks([""] + self.import_lines + [""] + self.lines, num_indents=self.indent)
179        else:
180            return flatten_blocks(self.lines, num_indents=self.indent)
181
182    def get_import_lines(self):
183        """ Take the stored imports and converts them to lines """
184        if self.imports:
185            return ["from %s import %s" % (value, key) for key, value in self.imports.items()]
186        else:
187            return []
188    import_lines = property(get_import_lines)
189
190
191class ModelCode(Code):
192    """ Produces a python script that can recreate data for a given model class. """
193
194    def __init__(self, model, context=None, stdout=None, stderr=None, options=None):
195        super().__init__(indent=0, stdout=stdout, stderr=stderr)
196        self.model = model
197        if context is None:
198            context = {}
199        self.context = context
200        self.options = options
201        self.instances = []
202
203    def get_imports(self):
204        """
205        Return a dictionary of import statements, with the variable being
206        defined as the key.
207        """
208        return {self.model.__name__: smart_text(self.model.__module__)}
209    imports = property(get_imports)
210
211    def get_lines(self):
212        """
213        Return a list of lists or strings, representing the code body.
214        Each list is a block, each string is a statement.
215        """
216        code = []
217
218        for counter, item in enumerate(self.model._default_manager.all()):
219            instance = InstanceCode(instance=item, id=counter + 1, context=self.context, stdout=self.stdout, stderr=self.stderr, options=self.options)
220            self.instances.append(instance)
221            if instance.waiting_list:
222                code += instance.lines
223
224        # After each instance has been processed, try again.
225        # This allows self referencing fields to work.
226        for instance in self.instances:
227            if instance.waiting_list:
228                code += instance.lines
229
230        return code
231
232    lines = property(get_lines)
233
234
235class InstanceCode(Code):
236    """ Produces a python script that can recreate data for a given model instance. """
237
238    def __init__(self, instance, id, context=None, stdout=None, stderr=None, options=None):
239        """ We need the instance in question and an id """
240
241        super().__init__(indent=0, stdout=stdout, stderr=stderr)
242        self.imports = {}
243
244        self.options = options
245        self.instance = instance
246        self.model = self.instance.__class__
247        if context is None:
248            context = {}
249        self.context = context
250        self.variable_name = "%s_%s" % (self.instance._meta.db_table, id)
251        self.skip_me = None
252        self.instantiated = False
253
254        self.waiting_list = list(self.model._meta.fields)
255
256        self.many_to_many_waiting_list = {}
257        for field in self.model._meta.many_to_many:
258            try:
259                if not field.remote_field.through._meta.auto_created:
260                    continue
261            except AttributeError:
262                pass
263            self.many_to_many_waiting_list[field] = list(getattr(self.instance, field.name).all())
264
265    def get_lines(self, force=False):
266        """
267        Return a list of lists or strings, representing the code body.
268        Each list is a block, each string is a statement.
269
270        force (True or False): if an attribute object cannot be included,
271        it is usually skipped to be processed later. With 'force' set, there
272        will be no waiting: a get_or_create() call is written instead.
273        """
274        code_lines = []
275
276        # Don't return anything if this is an instance that should be skipped
277        if self.skip():
278            return []
279
280        # Initialise our new object
281        # e.g. model_name_35 = Model()
282        code_lines += self.instantiate()
283
284        # Add each field
285        # e.g. model_name_35.field_one = 1034.91
286        #      model_name_35.field_two = "text"
287        code_lines += self.get_waiting_list()
288
289        if force:
290            # TODO: Check that M2M are not affected
291            code_lines += self.get_waiting_list(force=force)
292
293        # Print the save command for our new object
294        # e.g. model_name_35.save()
295        if code_lines:
296            code_lines.append("%s = importer.save_or_locate(%s)\n" % (self.variable_name, self.variable_name))
297
298        code_lines += self.get_many_to_many_lines(force=force)
299
300        return code_lines
301    lines = property(get_lines)
302
303    def skip(self):
304        """
305        Determine whether or not this object should be skipped.
306        If this model instance is a parent of a single subclassed
307        instance, skip it. The subclassed instance will create this
308        parent instance for us.
309
310        TODO: Allow the user to force its creation?
311        """
312        if self.skip_me is not None:
313            return self.skip_me
314
315        cls = self.instance.__class__
316        using = router.db_for_write(cls, instance=self.instance)
317        collector = Collector(using=using)
318        collector.collect([self.instance], collect_related=False)
319        sub_objects = sum([list(i) for i in collector.data.values()], [])
320        sub_objects_parents = [so._meta.parents for so in sub_objects]
321        if [self.model in p for p in sub_objects_parents].count(True) == 1:
322            # since this instance isn't explicitly created, it's variable name
323            # can't be referenced in the script, so record None in context dict
324            pk_name = self.instance._meta.pk.name
325            key = '%s_%s' % (self.model.__name__, getattr(self.instance, pk_name))
326            self.context[key] = None
327            self.skip_me = True
328        else:
329            self.skip_me = False
330
331        return self.skip_me
332
333    def instantiate(self):
334        """ Write lines for instantiation """
335        # e.g. model_name_35 = Model()
336        code_lines = []
337
338        if not self.instantiated:
339            code_lines.append("%s = %s()" % (self.variable_name, self.model.__name__))
340            self.instantiated = True
341
342            # Store our variable name for future foreign key references
343            pk_name = self.instance._meta.pk.name
344            key = '%s_%s' % (self.model.__name__, getattr(self.instance, pk_name))
345            self.context[key] = self.variable_name
346
347        return code_lines
348
349    def get_waiting_list(self, force=False):
350        """ Add lines for any waiting fields that can be completed now. """
351
352        code_lines = []
353        skip_autofield = self.options['skip_autofield']
354
355        # Process normal fields
356        for field in list(self.waiting_list):
357            try:
358                # Find the value, add the line, remove from waiting list and move on
359                value = get_attribute_value(self.instance, field, self.context, force=force, skip_autofield=skip_autofield)
360                code_lines.append('%s.%s = %s' % (self.variable_name, field.name, value))
361                self.waiting_list.remove(field)
362            except SkipValue:
363                # Remove from the waiting list and move on
364                self.waiting_list.remove(field)
365                continue
366            except DoLater:
367                # Move on, maybe next time
368                continue
369
370        return code_lines
371
372    def get_many_to_many_lines(self, force=False):
373        """ Generate lines that define many to many relations for this instance. """
374
375        lines = []
376
377        for field, rel_items in self.many_to_many_waiting_list.items():
378            for rel_item in list(rel_items):
379                try:
380                    pk_name = rel_item._meta.pk.name
381                    key = '%s_%s' % (rel_item.__class__.__name__, getattr(rel_item, pk_name))
382                    value = "%s" % self.context[key]
383                    lines.append('%s.%s.add(%s)' % (self.variable_name, field.name, value))
384                    self.many_to_many_waiting_list[field].remove(rel_item)
385                except KeyError:
386                    if force:
387                        item_locator = orm_item_locator(rel_item)
388                        self.context["__extra_imports"][rel_item._meta.object_name] = rel_item.__module__
389                        lines.append('%s.%s.add( %s )' % (self.variable_name, field.name, item_locator))
390                        self.many_to_many_waiting_list[field].remove(rel_item)
391
392        if lines:
393            lines.append("")
394
395        return lines
396
397
398class Script(Code):
399    """ Produces a complete python script that can recreate data for the given apps. """
400
401    def __init__(self, models, context=None, stdout=None, stderr=None, options=None):
402        super().__init__(stdout=stdout, stderr=stderr)
403        self.imports = {}
404
405        self.models = models
406        if context is None:
407            context = {}
408        self.context = context
409
410        self.context["__avaliable_models"] = set(models)
411        self.context["__extra_imports"] = {}
412
413        self.options = options
414
415    def _queue_models(self, models, context):
416        """
417        Work an an appropriate ordering for the models.
418        This isn't essential, but makes the script look nicer because
419        more instances can be defined on their first try.
420        """
421        model_queue = []
422        number_remaining_models = len(models)
423        # Max number of cycles allowed before we call it an infinite loop.
424        MAX_CYCLES = number_remaining_models
425        allowed_cycles = MAX_CYCLES
426
427        while number_remaining_models > 0:
428            previous_number_remaining_models = number_remaining_models
429
430            model = models.pop(0)
431
432            # If the model is ready to be processed, add it to the list
433            if check_dependencies(model, model_queue, context["__avaliable_models"]):
434                model_class = ModelCode(model=model, context=context, stdout=self.stdout, stderr=self.stderr, options=self.options)
435                model_queue.append(model_class)
436
437            # Otherwise put the model back at the end of the list
438            else:
439                models.append(model)
440
441            # Check for infinite loops.
442            # This means there is a cyclic foreign key structure
443            # That cannot be resolved by re-ordering
444            number_remaining_models = len(models)
445            if number_remaining_models == previous_number_remaining_models:
446                allowed_cycles -= 1
447                if allowed_cycles <= 0:
448                    # Add the remaining models, but do not remove them from the model list
449                    missing_models = [ModelCode(model=m, context=context, stdout=self.stdout, stderr=self.stderr, options=self.options) for m in models]
450                    model_queue += missing_models
451                    # Replace the models with the model class objects
452                    # (sure, this is a little bit of hackery)
453                    models[:] = missing_models
454                    break
455            else:
456                allowed_cycles = MAX_CYCLES
457
458        return model_queue
459
460    def get_lines(self):
461        """
462        Return a list of lists or strings, representing the code body.
463        Each list is a block, each string is a statement.
464        """
465        code = [self.FILE_HEADER.strip()]
466
467        # Queue and process the required models
468        for model_class in self._queue_models(self.models, context=self.context):
469            msg = 'Processing model: %s.%s\n' % (model_class.model.__module__, model_class.model.__name__)
470            self.stderr.write(msg)
471            code.append("    # " + msg)
472            code.append(model_class.import_lines)
473            code.append("")
474            code.append(model_class.lines)
475
476        # Process left over foreign keys from cyclic models
477        for model in self.models:
478            msg = 'Re-processing model: %s.%s\n' % (model.model.__module__, model.model.__name__)
479            self.stderr.write(msg)
480            code.append("    # " + msg)
481            for instance in model.instances:
482                if instance.waiting_list or instance.many_to_many_waiting_list:
483                    code.append(instance.get_lines(force=True))
484
485        code.insert(1, "    # Initial Imports")
486        code.insert(2, "")
487        for key, value in self.context["__extra_imports"].items():
488            code.insert(2, "    from %s import %s" % (value, key))
489
490        return code
491
492    lines = property(get_lines)
493
494    # A user-friendly file header
495    FILE_HEADER = """
496
497#!/usr/bin/env python
498
499
500# This file has been automatically generated.
501# Instead of changing it, create a file called import_helper.py
502# and put there a class called ImportHelper(object) in it.
503#
504# This class will be specially casted so that instead of extending object,
505# it will actually extend the class BasicImportHelper()
506#
507# That means you just have to overload the methods you want to
508# change, leaving the other ones intact.
509#
510# Something that you might want to do is use transactions, for example.
511#
512# Also, don't forget to add the necessary Django imports.
513#
514# This file was generated with the following command:
515# %s
516#
517# to restore it, run
518# manage.py runscript module_name.this_script_name
519#
520# example: if manage.py is at ./manage.py
521# and the script is at ./some_folder/some_script.py
522# you must make sure ./some_folder/__init__.py exists
523# and run  ./manage.py runscript some_folder.some_script
524import os, sys
525from django.db import transaction
526
527class BasicImportHelper:
528
529    def pre_import(self):
530        pass
531
532    @transaction.atomic
533    def run_import(self, import_data):
534        import_data()
535
536    def post_import(self):
537        pass
538
539    def locate_similar(self, current_object, search_data):
540        # You will probably want to call this method from save_or_locate()
541        # Example:
542        #   new_obj = self.locate_similar(the_obj, {"national_id": the_obj.national_id } )
543
544        the_obj = current_object.__class__.objects.get(**search_data)
545        return the_obj
546
547    def locate_object(self, original_class, original_pk_name, the_class, pk_name, pk_value, obj_content):
548        # You may change this function to do specific lookup for specific objects
549        #
550        # original_class class of the django orm's object that needs to be located
551        # original_pk_name the primary key of original_class
552        # the_class      parent class of original_class which contains obj_content
553        # pk_name        the primary key of original_class
554        # pk_value       value of the primary_key
555        # obj_content    content of the object which was not exported.
556        #
557        # You should use obj_content to locate the object on the target db
558        #
559        # An example where original_class and the_class are different is
560        # when original_class is Farmer and the_class is Person. The table
561        # may refer to a Farmer but you will actually need to locate Person
562        # in order to instantiate that Farmer
563        #
564        # Example:
565        #   if the_class == SurveyResultFormat or the_class == SurveyType or the_class == SurveyState:
566        #       pk_name="name"
567        #       pk_value=obj_content[pk_name]
568        #   if the_class == StaffGroup:
569        #       pk_value=8
570
571        search_data = { pk_name: pk_value }
572        the_obj = the_class.objects.get(**search_data)
573        #print(the_obj)
574        return the_obj
575
576
577    def save_or_locate(self, the_obj):
578        # Change this if you want to locate the object in the database
579        try:
580            the_obj.save()
581        except:
582            print("---------------")
583            print("Error saving the following object:")
584            print(the_obj.__class__)
585            print(" ")
586            print(the_obj.__dict__)
587            print(" ")
588            print(the_obj)
589            print(" ")
590            print("---------------")
591
592            raise
593        return the_obj
594
595
596importer = None
597try:
598    import import_helper
599    # We need this so ImportHelper can extend BasicImportHelper, although import_helper.py
600    # has no knowlodge of this class
601    importer = type("DynamicImportHelper", (import_helper.ImportHelper, BasicImportHelper ) , {} )()
602except ImportError as e:
603    # From Python 3.3 we can check e.name - string match is for backward compatibility.
604    if 'import_helper' in str(e):
605        importer = BasicImportHelper()
606    else:
607        raise
608
609import datetime
610from decimal import Decimal
611from django.contrib.contenttypes.models import ContentType
612
613try:
614    import dateutil.parser
615    from dateutil.tz import tzoffset
616except ImportError:
617    print("Please install python-dateutil")
618    sys.exit(os.EX_USAGE)
619
620def run():
621    importer.pre_import()
622    importer.run_import(import_data)
623    importer.post_import()
624
625def import_data():
626
627""" % " ".join(sys.argv)
628
629
630# HELPER FUNCTIONS
631# -------------------------------------------------------------------------------
632
633def flatten_blocks(lines, num_indents=-1):
634    """
635    Take a list (block) or string (statement) and flattens it into a string
636    with indentation.
637    """
638    # The standard indent is four spaces
639    INDENTATION = " " * 4
640
641    if not lines:
642        return ""
643
644    # If this is a string, add the indentation and finish here
645    if isinstance(lines, str):
646        return INDENTATION * num_indents + lines
647
648    # If this is not a string, join the lines and recurse
649    return "\n".join([flatten_blocks(line, num_indents + 1) for line in lines])
650
651
652def get_attribute_value(item, field, context, force=False, skip_autofield=True):
653    """ Get a string version of the given attribute's value, like repr() might. """
654    # Find the value of the field, catching any database issues
655    try:
656        value = getattr(item, field.name)
657    except ObjectDoesNotExist:
658        raise SkipValue('Could not find object for %s.%s, ignoring.\n' % (item.__class__.__name__, field.name))
659
660    # AutoField: We don't include the auto fields, they'll be automatically recreated
661    if skip_autofield and isinstance(field, AutoField):
662        raise SkipValue()
663
664    # Some databases (eg MySQL) might store boolean values as 0/1, this needs to be cast as a bool
665    elif isinstance(field, BooleanField) and value is not None:
666        return repr(bool(value))
667
668    # Post file-storage-refactor, repr() on File/ImageFields no longer returns the path
669    elif isinstance(field, FileField):
670        return repr(force_str(value))
671
672    # ForeignKey fields, link directly using our stored python variable name
673    elif isinstance(field, ForeignKey) and value is not None:
674
675        # Special case for contenttype foreign keys: no need to output any
676        # content types in this script, as they can be generated again
677        # automatically.
678        # NB: Not sure if "is" will always work
679        if field.remote_field.model is ContentType:
680            return 'ContentType.objects.get(app_label="%s", model="%s")' % (value.app_label, value.model)
681
682        # Generate an identifier (key) for this foreign object
683        pk_name = value._meta.pk.name
684        key = '%s_%s' % (value.__class__.__name__, getattr(value, pk_name))
685
686        if key in context:
687            variable_name = context[key]
688            # If the context value is set to None, this should be skipped.
689            # This identifies models that have been skipped (inheritance)
690            if variable_name is None:
691                raise SkipValue()
692            # Return the variable name listed in the context
693            return "%s" % variable_name
694        elif value.__class__ not in context["__avaliable_models"] or force:
695            context["__extra_imports"][value._meta.object_name] = value.__module__
696            item_locator = orm_item_locator(value)
697            return item_locator
698        else:
699            raise DoLater('(FK) %s.%s\n' % (item.__class__.__name__, field.name))
700
701    elif isinstance(field, (DateField, DateTimeField)) and value is not None:
702        return "dateutil.parser.parse(\"%s\")" % value.isoformat()
703
704    # A normal field (e.g. a python built-in)
705    else:
706        return repr(value)
707
708
709def make_clean_dict(the_dict):
710    if "_state" in the_dict:
711        clean_dict = the_dict.copy()
712        del clean_dict["_state"]
713        return clean_dict
714    return the_dict
715
716
717def check_dependencies(model, model_queue, avaliable_models):
718    """ Check that all the depenedencies for this model are already in the queue. """
719    # A list of allowed links: existing fields, itself and the special case ContentType
720    allowed_links = [m.model.__name__ for m in model_queue] + [model.__name__, 'ContentType']
721
722    # For each ForeignKey or ManyToMany field, check that a link is possible
723
724    for field in model._meta.fields:
725        if not field.remote_field:
726            continue
727        if field.remote_field.model.__name__ not in allowed_links:
728            if field.remote_field.model not in avaliable_models:
729                continue
730            return False
731
732    for field in model._meta.many_to_many:
733        if not field.remote_field:
734            continue
735        if field.remote_field.model.__name__ not in allowed_links:
736            return False
737
738    return True
739
740
741# EXCEPTIONS
742# -------------------------------------------------------------------------------
743
744class SkipValue(Exception):
745    """ Value could not be parsed or should simply be skipped. """
746
747
748class DoLater(Exception):
749    """ Value could not be parsed or should simply be skipped. """
750
751
752class StrToCodeChanger:
753
754    def __init__(self, string):
755        self.repr = string
756
757    def __repr__(self):
758        return self.repr
759