1# frozen_string_literal: true 2 3# ActiveModel::Serialization (https://github.com/rails/rails/blob/v5.0.7/activemodel/lib/active_model/serialization.rb#L184) 4# is simple in that it recursively calls `as_json` on each object to 5# serialize everything. However, for a model like a Project, this can 6# generate a query for every single association, which can add up to tens 7# of thousands of queries and lead to memory bloat. 8# 9# To improve this, we can do several things: 10 11# 1. Use the option tree in http://api.rubyonrails.org/classes/ActiveModel/Serializers/JSON.html 12# to generate the necessary preload clauses. 13# 14# 2. We observe that a single project has many issues, merge requests, 15# etc. Instead of serializing everything at once, which could lead to 16# database timeouts and high memory usage, we take each top-level 17# association and serialize the data in batches. 18# 19# For example, we serialize the first 100 issues and preload all of 20# their associated events, notes, etc. before moving onto the next 21# batch. When we're done, we serialize merge requests in the same way. 22# We repeat this pattern for the remaining associations specified in 23# import_export.yml. 24module Gitlab 25 module ImportExport 26 class FastHashSerializer 27 attr_reader :subject, :tree 28 29 # Usage of this class results in delayed 30 # serialization of relation. The serialization 31 # will be triggered when the `JSON.generate` 32 # is exected. 33 # 34 # This class uses memory-optimised, lazily 35 # initialised, fast to recycle relation 36 # serialization. 37 # 38 # The `JSON.generate` does use `#to_json`, 39 # that returns raw JSON content that is written 40 # directly to file. 41 class JSONBatchRelation 42 include Gitlab::Utils::StrongMemoize 43 44 def initialize(relation, options, preloads) 45 @relation = relation 46 @options = options 47 @preloads = preloads 48 end 49 50 def raw_json 51 strong_memoize(:raw_json) do 52 result = +'' 53 54 batch = @relation 55 batch = batch.preload(@preloads) if @preloads 56 batch.each do |item| 57 result.concat(",") unless result.empty? 58 result.concat(item.to_json(@options)) 59 end 60 61 result 62 end 63 end 64 65 def to_json(options = {}) 66 raw_json 67 end 68 69 def as_json(*) 70 raise NotImplementedError 71 end 72 end 73 74 BATCH_SIZE = 100 75 76 def initialize(subject, tree, batch_size: BATCH_SIZE) 77 @subject = subject 78 @batch_size = batch_size 79 @tree = tree 80 end 81 82 # With the usage of `JSONBatchRelation`, it returns partially 83 # serialized hash which is not easily accessible. 84 # It means you can only manipulate and replace top-level objects. 85 # All future mutations of the hash (such as `fix_project_tree`) 86 # should be aware of that. 87 def execute 88 simple_serialize.merge(serialize_includes) 89 end 90 91 private 92 93 def simple_serialize 94 subject.as_json( 95 tree.merge(include: nil, preloads: nil)) 96 end 97 98 def serialize_includes 99 return {} unless includes 100 101 includes 102 .map(&method(:serialize_include_definition)) 103 .tap { |entries| entries.compact! } 104 .to_h 105 end 106 107 # definition: 108 # { labels: { includes: ... } } 109 def serialize_include_definition(definition) 110 raise ArgumentError, 'definition needs to be Hash' unless definition.is_a?(Hash) 111 raise ArgumentError, 'definition needs to have exactly one Hash element' unless definition.one? 112 113 key = definition.first.first 114 options = definition.first.second 115 116 record = subject.public_send(key) # rubocop: disable GitlabSecurity/PublicSend 117 return unless record 118 119 serialized_record = serialize_record(key, record, options) 120 return unless serialized_record 121 122 # `#as_json` always returns keys as `strings` 123 [key.to_s, serialized_record] 124 end 125 126 def serialize_record(key, record, options) 127 unless record.respond_to?(:as_json) 128 raise "Invalid type of #{key} is #{record.class}" 129 end 130 131 # no has-many relation 132 unless record.is_a?(ActiveRecord::Relation) 133 return record.as_json(options) 134 end 135 136 data = [] 137 138 record.in_batches(of: @batch_size) do |batch| # rubocop:disable Cop/InBatches 139 # order each batch by it's primary key to ensure 140 # consistent and predictable ordering of each exported relation 141 # as additional `WHERE` clauses can impact the order in which data is being 142 # returned by database when no `ORDER` is specified 143 batch = batch.reorder(batch.klass.primary_key) 144 145 data.append(JSONBatchRelation.new(batch, options, preloads[key]).tap(&:raw_json)) 146 end 147 148 data 149 end 150 151 def includes 152 tree[:include] 153 end 154 155 def preloads 156 tree[:preload] 157 end 158 end 159 end 160end 161