1# frozen_string_literal: true
2
3class MergeRequestDiff < ApplicationRecord
4  include Sortable
5  include Importable
6  include ManualInverseAssociation
7  include EachBatch
8  include Gitlab::Utils::StrongMemoize
9  include ObjectStorage::BackgroundMove
10  include BulkInsertableAssociations
11
12  # Don't display more than 100 commits at once
13  COMMITS_SAFE_SIZE = 100
14  BATCH_SIZE = 1000
15
16  # Applies to closed or merged MRs when determining whether to migrate their
17  # diffs to external storage
18  EXTERNAL_DIFF_CUTOFF = 7.days.freeze
19
20  # The files_count column is a 2-byte signed integer. Look up the true value
21  # from the database if this sentinel is seen
22  FILES_COUNT_SENTINEL = 2**15 - 1
23
24  belongs_to :merge_request
25
26  manual_inverse_association :merge_request, :merge_request_diff
27
28  has_many :merge_request_diff_files,
29    -> { order(:merge_request_diff_id, :relative_order) },
30    inverse_of: :merge_request_diff
31
32  has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
33
34  validates :base_commit_sha, :head_commit_sha, :start_commit_sha, sha: true
35  validates :merge_request_id, uniqueness: { scope: :diff_type }, if: :merge_head?
36
37  state_machine :state, initial: :empty do
38    event :clean do
39      transition any => :without_files
40    end
41
42    state :collected
43    state :overflow
44    # Diff files have been deleted by the system
45    state :without_files
46    # Deprecated states: these are no longer used but these values may still occur
47    # in the database.
48    state :timeout
49    state :overflow_commits_safe_size
50    state :overflow_diff_files_limit
51    state :overflow_diff_lines_limit
52  end
53
54  enum diff_type: {
55    regular: 1,
56    merge_head: 2
57  }
58
59  scope :with_files, -> { without_states(:without_files, :empty) }
60  scope :viewable, -> { without_state(:empty) }
61  scope :by_commit_sha, ->(sha) do
62    joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil)
63  end
64
65  scope :by_project_id, -> (project_id) do
66    joins(:merge_request).where(merge_requests: { target_project_id: project_id })
67  end
68
69  scope :recent, -> (limit = 100) { order(id: :desc).limit(limit) }
70
71  scope :files_in_database, -> do
72    where(stored_externally: [false, nil]).where(arel_table[:files_count].gt(0))
73  end
74
75  scope :not_latest_diffs, -> do
76    merge_requests = MergeRequest.arel_table
77    mr_diffs = arel_table
78
79    join_condition = merge_requests[:id].eq(mr_diffs[:merge_request_id])
80      .and(mr_diffs[:id].not_eq(merge_requests[:latest_merge_request_diff_id]))
81      .and(mr_diffs[:diff_type].eq(diff_types[:regular]))
82
83    arel_join = mr_diffs.join(merge_requests).on(join_condition)
84    joins(arel_join.join_sources)
85  end
86
87  scope :old_merged_diffs, -> (before) do
88    merge_requests = MergeRequest.arel_table
89    mr_metrics = MergeRequest::Metrics.arel_table
90    mr_diffs = arel_table
91
92    mr_join = mr_diffs
93      .join(merge_requests)
94      .on(mr_diffs[:merge_request_id].eq(merge_requests[:id]))
95
96    metrics_join_condition = mr_diffs[:merge_request_id]
97      .eq(mr_metrics[:merge_request_id])
98      .and(mr_metrics[:merged_at].not_eq(nil))
99
100    metrics_join = mr_diffs.join(mr_metrics).on(metrics_join_condition)
101
102    condition = MergeRequest.arel_table[:state_id].eq(MergeRequest.available_states[:merged])
103      .and(MergeRequest::Metrics.arel_table[:merged_at].lteq(before))
104      .and(MergeRequest::Metrics.arel_table[:merged_at].not_eq(nil))
105
106    joins(metrics_join.join_sources, mr_join.join_sources).where(condition)
107  end
108
109  scope :old_closed_diffs, -> (before) do
110    condition = MergeRequest.arel_table[:state_id].eq(MergeRequest.available_states[:closed])
111      .and(MergeRequest::Metrics.arel_table[:latest_closed_at].lteq(before))
112
113    joins(merge_request: :metrics).where(condition)
114  end
115
116  # This scope uses LATERAL JOIN to find the most recent MR diff association for the given merge requests.
117  # To avoid joining the merge_requests table, we build an in memory table using the merge request ids.
118  # Example:
119  # SELECT ...
120  # FROM (VALUES (MR_ID_1),(MR_ID_2)) merge_requests (id)
121  # INNER JOIN LATERAL (...)
122  scope :latest_diff_for_merge_requests, -> (merge_requests) do
123    mrs = Array(merge_requests)
124    return MergeRequestDiff.none if mrs.empty?
125
126    merge_request_table = MergeRequest.arel_table
127    merge_request_diff_table = MergeRequestDiff.arel_table
128
129    join_query = MergeRequestDiff
130      .where(merge_request_table[:id].eq(merge_request_diff_table[:merge_request_id]))
131      .order(created_at: :desc)
132      .limit(1)
133
134    mr_id_list = mrs.map { |mr| "(#{Integer(mr.id)})" }.join(",")
135
136    MergeRequestDiff
137      .from("(VALUES #{mr_id_list}) merge_requests (id)")
138      .joins("INNER JOIN LATERAL (#{join_query.to_sql}) #{MergeRequestDiff.table_name} ON TRUE")
139      .includes(:merge_request_diff_commits)
140  end
141
142  class << self
143    def ids_for_external_storage_migration(limit:)
144      return [] unless Gitlab.config.external_diffs.enabled
145
146      case Gitlab.config.external_diffs.when
147      when 'always'
148        ids_for_external_storage_migration_strategy_always(limit: limit)
149      when 'outdated'
150        ids_for_external_storage_migration_strategy_outdated(limit: limit)
151      else
152        []
153      end
154    end
155
156    def ids_for_external_storage_migration_strategy_always(limit:)
157      files_in_database.limit(limit).pluck(:id)
158    end
159
160    def ids_for_external_storage_migration_strategy_outdated(limit:)
161      # Outdated is too complex to be a single SQL query, so split into three
162      before = EXTERNAL_DIFF_CUTOFF.ago
163
164      ids = files_in_database
165        .old_merged_diffs(before)
166        .limit(limit)
167        .pluck(:id)
168
169      return ids if ids.size >= limit
170
171      ids += files_in_database
172        .old_closed_diffs(before)
173        .limit(limit - ids.size)
174        .pluck(:id)
175
176      return ids if ids.size >= limit
177
178      ids + files_in_database
179        .not_latest_diffs
180        .limit(limit - ids.size)
181        .pluck(:id)
182    end
183  end
184
185  mount_uploader :external_diff, ExternalDiffUploader
186
187  # All diff information is collected from repository after object is created.
188  # It allows you to override variables like head_commit_sha before getting diff.
189  after_create :save_git_content, unless: :importing?
190  after_create_commit :set_as_latest_diff, unless: :importing?
191
192  after_save :update_external_diff_store
193  after_save :set_count_columns
194
195  def self.find_by_diff_refs(diff_refs)
196    find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
197  end
198
199  def viewable?
200    collected? || without_files? || overflow?
201  end
202
203  # Collect information about commits and diff from repository
204  # and save it to the database as serialized data
205  def save_git_content
206    ensure_commit_shas
207    save_commits
208    save_diffs
209
210    # Another set of `after_save` hooks will be called here when we update the record
211    save
212    # We need to reset so that dirty tracking is reset when running the original set
213    # of `after_save` hooks that come after this `after_create` hook. Otherwise, the
214    # hooks that run when an attribute was changed are run twice.
215    reset
216
217    keep_around_commits unless importing?
218  end
219
220  def set_as_latest_diff
221    # Don't set merge_head diff as latest so it won't get considered as the
222    # MergeRequest#merge_request_diff.
223    return if merge_head?
224
225    MergeRequest
226      .where('id = ? AND COALESCE(latest_merge_request_diff_id, 0) < ?', self.merge_request_id, self.id)
227      .update_all(latest_merge_request_diff_id: self.id)
228  end
229
230  def ensure_commit_shas
231    self.start_commit_sha ||= merge_request.target_branch_sha
232
233    if merge_head? && merge_request.merge_ref_head.present?
234      diff_refs = merge_request.merge_ref_head.diff_refs
235
236      self.head_commit_sha  ||= diff_refs.head_sha
237      self.base_commit_sha  ||= diff_refs.base_sha
238    else
239      self.head_commit_sha  ||= merge_request.source_branch_sha
240      self.base_commit_sha  ||= find_base_sha
241    end
242  end
243
244  # Override head_commit_sha to keep compatibility with merge request diff
245  # created before version 8.4 that does not store head_commit_sha in separate db field.
246  def head_commit_sha
247    if persisted? && super.nil?
248      last_commit_sha
249    else
250      super
251    end
252  end
253
254  def files_count
255    db_value = read_attribute(:files_count)
256
257    case db_value
258    when nil, FILES_COUNT_SENTINEL
259      merge_request_diff_files.count
260    else
261      db_value
262    end
263  end
264
265  # This method will rely on repository branch sha
266  # in case start_commit_sha is nil. Its necesarry for old merge request diff
267  # created before version 8.4 to work
268  def safe_start_commit_sha
269    start_commit_sha || merge_request.target_branch_sha
270  end
271
272  def size
273    real_size.presence || raw_diffs.size
274  end
275
276  def lines_count
277    strong_memoize(:lines_count) do
278      raw_diffs(limits: false).line_count
279    end
280  end
281
282  def raw_diffs(options = {})
283    if options[:ignore_whitespace_change]
284      @diffs_no_whitespace ||= compare.diffs(options)
285    else
286      @raw_diffs ||= {}
287      @raw_diffs[options] ||= load_diffs(options)
288    end
289  end
290
291  def commits(limit: nil, load_from_gitaly: false)
292    strong_memoize(:"commits_#{limit || 'all'}_#{load_from_gitaly}") do
293      load_commits(limit: limit, load_from_gitaly: load_from_gitaly)
294    end
295  end
296
297  def last_commit_sha
298    commit_shas(limit: 1).first
299  end
300
301  def first_commit
302    commits.last
303  end
304
305  def last_commit
306    commits.first
307  end
308
309  def base_commit
310    return unless base_commit_sha
311
312    project.commit_by(oid: base_commit_sha)
313  end
314
315  def start_commit
316    return unless start_commit_sha
317
318    project.commit_by(oid: start_commit_sha)
319  end
320
321  def head_commit
322    return unless head_commit_sha
323
324    project.commit_by(oid: head_commit_sha)
325  end
326
327  def commit_shas(limit: nil)
328    if association(:merge_request_diff_commits).loaded?
329      sorted_diff_commits = merge_request_diff_commits.sort_by { |diff_commit| [diff_commit.id, diff_commit.relative_order] }
330      sorted_diff_commits = sorted_diff_commits.take(limit) if limit
331      sorted_diff_commits.map(&:sha)
332    else
333      merge_request_diff_commits.limit(limit).pluck(:sha)
334    end
335  end
336
337  def includes_any_commits?(shas)
338    return false if shas.blank?
339
340    # when the number of shas is huge (1000+) we don't want
341    # to pass them all as an SQL param, let's pass them in batches
342    shas.each_slice(BATCH_SIZE).any? do |batched_shas|
343      merge_request_diff_commits.where(sha: batched_shas).exists?
344    end
345  end
346
347  def diff_refs=(new_diff_refs)
348    self.base_commit_sha = new_diff_refs&.base_sha
349    self.start_commit_sha = new_diff_refs&.start_sha
350    self.head_commit_sha = new_diff_refs&.head_sha
351  end
352
353  def diff_refs
354    return unless start_commit_sha || base_commit_sha
355
356    Gitlab::Diff::DiffRefs.new(
357      base_sha:  base_commit_sha,
358      start_sha: start_commit_sha,
359      head_sha:  head_commit_sha
360    )
361  end
362
363  # MRs created before 8.4 don't store their true diff refs (start and base),
364  # but we need to get a commit SHA for the "View file @ ..." link by a file,
365  # so we use an approximation of the diff refs if we can't get the actual one.
366  #
367  # These will not be the actual diff refs if the target branch was merged into
368  # the source branch after the merge request was created, but it is good enough
369  # for the specific purpose of linking to a commit.
370  #
371  # It is not good enough for highlighting diffs, so we can't simply pass
372  # these as `diff_refs.`
373  def fallback_diff_refs
374    real_refs = diff_refs
375    return real_refs if real_refs
376
377    likely_base_commit_sha = (first_commit&.parent || first_commit)&.sha
378
379    Gitlab::Diff::DiffRefs.new(
380      base_sha:  likely_base_commit_sha,
381      start_sha: safe_start_commit_sha,
382      head_sha:  head_commit_sha
383    )
384  end
385
386  def diff_refs_by_sha?
387    base_commit_sha? && head_commit_sha? && start_commit_sha?
388  end
389
390  def diffs_in_batch(batch_page, batch_size, diff_options:)
391    fetching_repository_diffs(diff_options) do |comparison|
392      reorder_diff_files!
393      diffs_batch = diffs_in_batch_collection(batch_page, batch_size, diff_options: diff_options)
394
395      if comparison
396        if diff_options[:paths].blank? && !without_files?
397          # Return the empty MergeRequestDiffBatch for an out of bound batch request
398          break diffs_batch if diffs_batch.diff_paths.blank?
399
400          diff_options.merge!(
401            paths: diffs_batch.diff_paths,
402            pagination_data: diffs_batch.pagination_data
403          )
404        end
405
406        comparison.diffs(diff_options)
407      else
408        diffs_batch
409      end
410    end
411  end
412
413  def diffs(diff_options = nil)
414    fetching_repository_diffs(diff_options) do |comparison|
415      # It should fetch the repository when diffs are cleaned by the system.
416      # We don't keep these for storage overload purposes.
417      # See https://gitlab.com/gitlab-org/gitlab-foss/issues/37639
418      if comparison
419        comparison.diffs(diff_options)
420      else
421        reorder_diff_files!
422        diffs_collection(diff_options)
423      end
424    end
425  end
426
427  # Should always return the DB persisted diffs collection
428  # (e.g. Gitlab::Diff::FileCollection::MergeRequestDiff.
429  # It's useful when trying to invalidate old caches through
430  # FileCollection::MergeRequestDiff#clear_cache!
431  def diffs_collection(diff_options = nil)
432    Gitlab::Diff::FileCollection::MergeRequestDiff.new(self, diff_options: diff_options)
433  end
434
435  def project
436    merge_request.target_project
437  end
438
439  def compare
440    @compare ||=
441      Gitlab::Git::Compare.new(
442        repository.raw_repository,
443        safe_start_commit_sha,
444        head_commit_sha
445      )
446  end
447
448  def latest?
449    self.id == merge_request.latest_merge_request_diff_id
450  end
451
452  # rubocop: disable CodeReuse/ServiceClass
453  def compare_with(sha)
454    # When compare merge request versions we want diff A..B instead of A...B
455    # so we handle cases when user does squash and rebase of the commits between versions.
456    # For this reason we set straight to true by default.
457    CompareService.new(project, head_commit_sha).execute(project, sha, straight: true)
458  end
459  # rubocop: enable CodeReuse/ServiceClass
460
461  def modified_paths(fallback_on_overflow: false)
462    if fallback_on_overflow && overflow?
463      # This is an extremely slow means to find the modified paths for a given
464      #   MergeRequestDiff. This should be avoided, except where the limit of
465      #   1_000 (as of %12.10) entries returned by the default behavior is an
466      #   issue.
467      strong_memoize(:overflowed_modified_paths) do
468        project.repository.diff_stats(
469          base_commit_sha,
470          head_commit_sha
471        ).paths
472      end
473    else
474      strong_memoize(:modified_paths) do
475        merge_request_diff_files.pluck(:new_path, :old_path).flatten.uniq
476      end
477    end
478  end
479
480  def update_external_diff_store
481    return unless saved_change_to_external_diff? || saved_change_to_stored_externally?
482
483    update_column(:external_diff_store, external_diff.object_store)
484  end
485
486  # If enabled, yields the external file containing the diff. Otherwise, yields
487  # nil. This method is not thread-safe, but it *is* re-entrant, which allows
488  # multiple merge_request_diff_files to load their data efficiently
489  def opening_external_diff
490    return yield(nil) unless stored_externally?
491    return yield(@external_diff_file) if @external_diff_file
492
493    external_diff.open do |file|
494      @external_diff_file = file
495
496      yield(@external_diff_file)
497    ensure
498      @external_diff_file = nil
499    end
500  end
501
502  # Transactionally migrate the current merge_request_diff_files entries to
503  # external storage. If external storage isn't an option for this diff, the
504  # method is a no-op.
505  def migrate_files_to_external_storage!
506    return if stored_externally? || !use_external_diff? || files_count == 0
507
508    rows = build_merge_request_diff_files(merge_request_diff_files)
509    rows = build_external_merge_request_diff_files(rows)
510
511    # Perform carrierwave activity before entering the database transaction.
512    # This is safe as until the `external_diff_store` column is changed, we will
513    # continue to consult the in-database content.
514    self.external_diff.store!
515
516    transaction do
517      MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
518      ApplicationRecord.legacy_bulk_insert('merge_request_diff_files', rows) # rubocop:disable Gitlab/BulkInsert
519      save!
520    end
521
522    merge_request_diff_files.reset
523  end
524
525  # Transactionally migrate the current merge_request_diff_files entries from
526  # external storage, back to the database. This is the rollback operation for
527  # +migrate_files_to_external_storage!+
528  #
529  # If this diff isn't in external storage, the method is a no-op.
530  def migrate_files_to_database!
531    return unless stored_externally?
532    return if files_count == 0
533
534    rows = convert_external_diffs_to_database
535
536    transaction do
537      MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
538      ApplicationRecord.legacy_bulk_insert('merge_request_diff_files', rows) # rubocop:disable Gitlab/BulkInsert
539      update!(stored_externally: false)
540    end
541
542    # Only delete the external diff file after the contents have been saved to
543    # the database
544    remove_external_diff!
545    merge_request_diff_files.reset
546  end
547
548  private
549
550  def convert_external_diffs_to_database
551    opening_external_diff do |external_file|
552      merge_request_diff_files.map do |diff_file|
553        row = diff_file.attributes.except('diff')
554
555        raise "Diff file lacks external diff offset or size: #{row.inspect}" unless
556          row['external_diff_offset'] && row['external_diff_size']
557
558        # The diff in the external file is already base64-encoded if necessary,
559        # matching the 'binary' attribute of the row. Reading it directly allows
560        # a cycle of decode-encode to be skipped
561        external_file.seek(row.delete('external_diff_offset'))
562        row['diff'] = external_file.read(row.delete('external_diff_size'))
563
564        row
565      end
566    end
567  end
568
569  def diffs_in_batch_collection(batch_page, batch_size, diff_options:)
570    Gitlab::Diff::FileCollection::MergeRequestDiffBatch.new(self,
571                                                            batch_page,
572                                                            batch_size,
573                                                            diff_options: diff_options)
574  end
575
576  def encode_in_base64?(diff_text)
577    return false if diff_text.nil?
578
579    (diff_text.encoding == Encoding::BINARY && !diff_text.ascii_only?) ||
580      diff_text.include?("\0")
581  end
582
583  def build_external_merge_request_diff_files(rows)
584    tempfile = build_external_diff_tempfile(rows)
585
586    self.external_diff = tempfile
587    self.stored_externally = true
588
589    rows
590  ensure
591    tempfile&.unlink
592  end
593
594  def create_merge_request_diff_files(rows)
595    rows = build_external_merge_request_diff_files(rows) if use_external_diff?
596
597    # Faster inserts
598    ApplicationRecord.legacy_bulk_insert('merge_request_diff_files', rows) # rubocop:disable Gitlab/BulkInsert
599  end
600
601  def build_external_diff_tempfile(rows)
602    Tempfile.open(external_diff.filename) do |file|
603      rows.each do |row|
604        data = row.delete(:diff)
605        row[:external_diff_offset] = file.pos
606        row[:external_diff_size] = data&.bytesize || 0
607
608        file.write(data)
609      end
610
611      file
612    end
613  end
614
615  def build_merge_request_diff_files(diffs)
616    sort_diffs(diffs).map.with_index do |diff, index|
617      diff_hash = diff.to_hash.merge(
618        binary: false,
619        merge_request_diff_id: self.id,
620        relative_order: index
621      )
622
623      # Compatibility with old diffs created with Psych.
624      diff_hash.tap do |hash|
625        diff_text = hash[:diff]
626
627        if encode_in_base64?(diff_text)
628          hash[:binary] = true
629          hash[:diff] = [diff_text].pack('m0')
630        end
631      end
632    end
633  end
634
635  # Yields the block with the repository Compare object if it should
636  # fetch diffs from the repository instead DB.
637  def fetching_repository_diffs(diff_options)
638    return unless block_given?
639
640    diff_options ||= {}
641
642    # Can be read as: fetch the persisted diffs if yielded without the
643    # Compare object.
644    return yield unless without_files? || diff_options[:ignore_whitespace_change]
645    return yield unless diff_refs&.complete?
646
647    comparison = diff_refs.compare_in(repository.project)
648
649    return yield unless comparison
650
651    yield(comparison)
652  end
653
654  def use_external_diff?
655    return false unless Gitlab.config.external_diffs.enabled
656
657    case Gitlab.config.external_diffs.when
658    when 'always'
659      true
660    when 'outdated'
661      outdated_by_merge? || outdated_by_closure? || old_version?
662    else
663      false # Disable external diffs if misconfigured
664    end
665  end
666
667  def outdated_by_merge?
668    return false unless merge_request&.metrics&.merged_at
669
670    merge_request.merged? && merge_request.metrics.merged_at < EXTERNAL_DIFF_CUTOFF.ago
671  end
672
673  def outdated_by_closure?
674    return false unless merge_request&.metrics&.latest_closed_at
675
676    merge_request.closed? && merge_request.metrics.latest_closed_at < EXTERNAL_DIFF_CUTOFF.ago
677  end
678
679  def old_version?
680    latest_id = MergeRequest
681      .where(id: merge_request_id)
682      .limit(1)
683      .pluck(:latest_merge_request_diff_id)
684      .first
685
686    latest_id && self.id < latest_id
687  end
688
689  def load_diffs(options)
690    # Ensure all diff files operate on the same external diff file instance if
691    # present. This reduces file open/close overhead.
692    opening_external_diff do
693      collection = merge_request_diff_files
694
695      if paths = options[:paths]
696        collection = collection.where('old_path IN (?) OR new_path IN (?)', paths, paths)
697      end
698
699      Gitlab::Git::DiffCollection.new(collection.map(&:to_hash), options)
700    end
701  end
702
703  def load_commits(limit: nil, load_from_gitaly: false)
704    if load_from_gitaly
705      commits = Gitlab::Git::Commit.batch_by_oid(repository, merge_request_diff_commits.limit(limit).map(&:sha))
706      commits = Commit.decorate(commits, project)
707    else
708      commits = merge_request_diff_commits.with_users.limit(limit)
709        .map { |commit| Commit.from_hash(commit.to_hash, project) }
710    end
711
712    CommitCollection
713      .new(merge_request.target_project, commits, merge_request.target_branch)
714  end
715
716  def save_diffs
717    new_attributes = {}
718
719    if compare.commits.empty?
720      new_attributes[:state] = :empty
721    else
722      diff_collection = compare.diffs(Commit.max_diff_options)
723      new_attributes[:real_size] = diff_collection.real_size
724
725      if diff_collection.any?
726        new_attributes[:state] = :collected
727
728        rows = build_merge_request_diff_files(diff_collection)
729        create_merge_request_diff_files(rows)
730        new_attributes[:sorted] = true
731        self.class.uncached { merge_request_diff_files.reset }
732      end
733
734      # Set our state to 'overflow' to make the #empty? and #collected?
735      # methods (generated by StateMachine) return false.
736      #
737      # This attribution has to come at the end of the method so 'overflow'
738      # state does not get overridden by 'collected'.
739      new_attributes[:state] = :overflow if diff_collection.overflow?
740    end
741
742    assign_attributes(new_attributes)
743  end
744
745  def save_commits
746    MergeRequestDiffCommit.create_bulk(self.id, compare.commits.reverse)
747    self.class.uncached { merge_request_diff_commits.reset }
748  end
749
750  def set_count_columns
751    update_columns(
752      commits_count: merge_request_diff_commits.size,
753      files_count: [FILES_COUNT_SENTINEL, merge_request_diff_files.size].min
754    )
755  end
756
757  def repository
758    project.repository
759  end
760
761  def find_base_sha
762    return unless head_commit_sha && start_commit_sha
763
764    project.merge_base_commit(head_commit_sha, start_commit_sha).try(:sha)
765  end
766
767  def keep_around_commits
768    [repository, merge_request.source_project.repository].uniq.each do |repo|
769      repo.keep_around(start_commit_sha, head_commit_sha, base_commit_sha)
770    end
771  end
772
773  def reorder_diff_files!
774    return if sorted? || merge_request_diff_files.empty?
775
776    diff_files = sort_diffs(merge_request_diff_files)
777
778    diff_files.each_with_index do |diff_file, index|
779      diff_file.relative_order = index
780    end
781
782    transaction do
783      # The `merge_request_diff_files` table doesn't have an `id` column so
784      # we cannot use `Gitlab::Database::BulkUpdate`.
785      MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
786      MergeRequestDiffFile.bulk_insert!(diff_files)
787      update_column(:sorted, true)
788    end
789  end
790
791  def sort_diffs(diffs)
792    Gitlab::Diff::FileCollectionSorter.new(diffs).sort
793  end
794end
795
796MergeRequestDiff.prepend_mod_with('MergeRequestDiff')
797