1# frozen_string_literal: true
2
3module Gitlab
4  module Diff
5    class File
6      include Gitlab::Utils::StrongMemoize
7
8      attr_reader :diff, :repository, :diff_refs, :fallback_diff_refs, :unique_identifier
9
10      delegate :new_file?, :deleted_file?, :renamed_file?,
11        :old_path, :new_path, :a_mode, :b_mode, :mode_changed?,
12        :submodule?, :expanded?, :too_large?, :collapsed?, :line_count, :has_binary_notice?, to: :diff, prefix: false
13
14      # Finding a viewer for a diff file happens based only on extension and whether the
15      # diff file blobs are binary or text, which means 1 diff file should only be matched by 1 viewer,
16      # and the order of these viewers doesn't really matter.
17      #
18      # However, when the diff file blobs are LFS pointers, we cannot know for sure whether the
19      # file being pointed to is binary or text. In this case, we match only on
20      # extension, preferring binary viewers over text ones if both exist, since the
21      # large files referred to in "Large File Storage" are much more likely to be
22      # binary than text.
23      RICH_VIEWERS = [
24        DiffViewer::Image
25      ].sort_by { |v| v.binary? ? 0 : 1 }.freeze
26
27      def initialize(
28        diff,
29        repository:,
30        diff_refs: nil,
31        fallback_diff_refs: nil,
32        stats: nil,
33        unique_identifier: nil)
34
35        @diff = diff
36        @stats = stats
37        @repository = repository
38        @diff_refs = diff_refs
39        @fallback_diff_refs = fallback_diff_refs
40        @unique_identifier = unique_identifier
41        @unfolded = false
42
43        # Ensure items are collected in the the batch
44        new_blob_lazy
45        old_blob_lazy
46
47        diff.diff = Gitlab::Diff::CustomDiff.preprocess_before_diff(diff.new_path, old_blob_lazy, new_blob_lazy) || diff.diff if use_custom_diff?
48      end
49
50      def use_custom_diff?
51        strong_memoize(:_custom_diff_enabled) { Feature.enabled?(:jupyter_clean_diffs, repository.project, default_enabled: true) }
52      end
53
54      def position(position_marker, position_type: :text)
55        return unless diff_refs
56
57        data = {
58          diff_refs: diff_refs,
59          position_type: position_type.to_s,
60          old_path: old_path,
61          new_path: new_path
62        }
63
64        if position_type == :text
65          data.merge!(text_position_properties(position_marker))
66        else
67          data.merge!(image_position_properties(position_marker))
68        end
69
70        Position.new(data)
71      end
72
73      def line_code(line)
74        return if line.meta?
75
76        Gitlab::Git.diff_line_code(file_path, line.new_pos, line.old_pos)
77      end
78
79      def line_for_line_code(code)
80        diff_lines.find { |line| line_code(line) == code }
81      end
82
83      def line_for_position(pos)
84        return unless pos.position_type == 'text'
85
86        # This method is normally used to find which line the diff was
87        # commented on, and in this context, it's normally the raw diff persisted
88        # at `note_diff_files`, which is a fraction of the entire diff
89        # (it goes from the first line, to the commented line, or
90        # one line below). Therefore it's more performant to fetch
91        # from bottom to top instead of the other way around.
92        diff_lines
93          .reverse_each
94          .find { |line| line.old_line == pos.old_line && line.new_line == pos.new_line }
95      end
96
97      def position_for_line_code(code)
98        line = line_for_line_code(code)
99        position(line) if line
100      end
101
102      def line_code_for_position(pos)
103        line = line_for_position(pos)
104        line_code(line) if line
105      end
106
107      # Returns the raw diff content up to the given line index
108      def diff_hunk(diff_line)
109        diff_line_index = diff_line.index
110        # @@ (match) header is not kept if it's found in the top of the file,
111        # therefore we should keep an extra line on this scenario.
112        diff_line_index += 1 unless diff_lines.first.match?
113
114        diff_lines.select { |line| line.index <= diff_line_index }.map(&:text).join("\n")
115      end
116
117      def old_sha
118        diff_refs&.base_sha
119      end
120
121      def new_sha
122        diff_refs&.head_sha
123      end
124
125      def new_content_sha
126        return if deleted_file?
127        return @new_content_sha if defined?(@new_content_sha)
128
129        refs = diff_refs || fallback_diff_refs
130        @new_content_sha = refs&.head_sha
131      end
132
133      def old_content_sha
134        return if new_file?
135        return @old_content_sha if defined?(@old_content_sha)
136
137        refs = diff_refs || fallback_diff_refs
138        @old_content_sha = refs&.base_sha
139      end
140
141      def new_blob
142        strong_memoize(:new_blob) do
143          new_blob_lazy&.itself
144        end
145      end
146
147      def old_blob
148        strong_memoize(:old_blob) do
149          old_blob_lazy&.itself
150        end
151      end
152
153      def new_blob_lines_between(from_line, to_line)
154        return [] unless new_blob
155
156        from_index = from_line - 1
157        to_index = to_line - 1
158
159        new_blob.load_all_data!
160        new_blob.data.lines[from_index..to_index]
161      end
162
163      def content_sha
164        new_content_sha || old_content_sha
165      end
166
167      def blob
168        new_blob || old_blob
169      end
170
171      def highlighted_diff_lines=(value)
172        clear_memoization(:diff_lines_for_serializer)
173        @highlighted_diff_lines = value
174      end
175
176      # Array of Gitlab::Diff::Line objects
177      def diff_lines
178        @diff_lines ||=
179          Gitlab::Diff::Parser.new.parse(raw_diff.each_line, diff_file: self).to_a
180      end
181
182      # Changes diff_lines according to the given position. That is,
183      # it checks whether the position requires blob lines into the diff
184      # in order to be presented.
185      def unfold_diff_lines(position)
186        return unless position
187
188        unfolder = Gitlab::Diff::LinesUnfolder.new(self, position)
189
190        if unfolder.unfold_required?
191          @diff_lines = unfolder.unfolded_diff_lines
192          @unfolded = true
193        end
194      end
195
196      def unfolded?
197        @unfolded
198      end
199
200      def highlight_loaded?
201        @highlighted_diff_lines.present?
202      end
203
204      def highlighted_diff_lines
205        @highlighted_diff_lines ||=
206          Gitlab::Diff::Highlight.new(self, repository: self.repository).highlight
207      end
208
209      # Array[<Hash>] with right/left keys that contains Gitlab::Diff::Line objects which text is highlighted
210      def parallel_diff_lines
211        @parallel_diff_lines ||= Gitlab::Diff::ParallelDiff.new(self).parallelize
212      end
213
214      def raw_diff
215        diff.diff.to_s
216      end
217
218      def next_line(index)
219        diff_lines[index + 1]
220      end
221
222      def prev_line(index)
223        diff_lines[index - 1] if index > 0
224      end
225
226      def paths
227        [old_path, new_path].compact
228      end
229
230      def file_path
231        new_path.presence || old_path
232      end
233
234      def file_hash
235        Digest::SHA1.hexdigest(file_path)
236      end
237
238      def added_lines
239        strong_memoize(:added_lines) do
240          @stats&.additions || diff_lines.count(&:added?)
241        end
242      end
243
244      def removed_lines
245        strong_memoize(:removed_lines) do
246          @stats&.deletions || diff_lines.count(&:removed?)
247        end
248      end
249
250      def file_identifier
251        "#{file_path}-#{new_file?}-#{deleted_file?}-#{renamed_file?}"
252      end
253
254      def file_identifier_hash
255        Digest::SHA1.hexdigest(file_identifier)
256      end
257
258      def diffable?
259        diffable_by_attribute? && !text_with_binary_notice?
260      end
261
262      def binary_in_repo?
263        has_binary_notice? || try_blobs(:binary_in_repo?)
264      end
265
266      def text_in_repo?
267        !binary_in_repo?
268      end
269
270      def external_storage_error?
271        try_blobs(:external_storage_error?)
272      end
273
274      def stored_externally?
275        try_blobs(:stored_externally?)
276      end
277
278      def external_storage
279        try_blobs(:external_storage)
280      end
281
282      def content_changed?
283        return blobs_changed? if diff_refs
284        return false if new_file? || deleted_file? || renamed_file?
285
286        text? && diff_lines.any?
287      end
288
289      def different_type?
290        old_blob && new_blob && old_blob.binary? != new_blob.binary?
291      end
292
293      # rubocop: disable CodeReuse/ActiveRecord
294      def size
295        valid_blobs.map(&:size).sum
296      end
297      # rubocop: enable CodeReuse/ActiveRecord
298
299      # rubocop: disable CodeReuse/ActiveRecord
300      def raw_size
301        valid_blobs.map(&:raw_size).sum
302      end
303      # rubocop: enable CodeReuse/ActiveRecord
304
305      def empty?
306        valid_blobs.map(&:empty?).all?
307      end
308
309      def binary?
310        strong_memoize(:is_binary) do
311          try_blobs(:binary?)
312        end
313      end
314
315      def text?
316        strong_memoize(:is_text) do
317          !binary? && !different_type?
318        end
319      end
320
321      def viewer
322        rich_viewer || simple_viewer
323      end
324
325      def simple_viewer
326        @simple_viewer ||= simple_viewer_class.new(self)
327      end
328
329      def rich_viewer
330        return @rich_viewer if defined?(@rich_viewer)
331
332        @rich_viewer = rich_viewer_class&.new(self)
333      end
334
335      def alternate_viewer
336        alternate_viewer_class&.new(self)
337      end
338
339      def rendered_as_text?(ignore_errors: true)
340        simple_viewer.is_a?(DiffViewer::Text) && (ignore_errors || simple_viewer.render_error.nil?)
341      end
342
343      # This adds the bottom match line to the array if needed. It contains
344      # the data to load more context lines.
345      def diff_lines_for_serializer
346        strong_memoize(:diff_lines_for_serializer) do
347          lines = highlighted_diff_lines
348
349          next if lines.empty?
350          next if blob.nil?
351
352          last_line = lines.last
353
354          if last_line.new_pos < total_blob_lines(blob) && !deleted_file?
355            match_line = Gitlab::Diff::Line.new("", 'match', nil, last_line.old_pos, last_line.new_pos)
356            lines.push(match_line)
357          end
358
359          lines
360        end
361      end
362
363      def fully_expanded?
364        return true if binary?
365
366        lines = diff_lines_for_serializer
367
368        return true if lines.nil?
369
370        lines.none? { |line| line.type.to_s == 'match' }
371      end
372
373      private
374
375      def diffable_by_attribute?
376        repository.attributes(file_path).fetch('diff') { true }
377      end
378
379      # NOTE: Files with unsupported encodings (e.g. UTF-16) are treated as binary by git, but they are recognized as text files during encoding detection. These files have `Binary files a/filename and b/filename differ' as their raw diff content which cannot be used. We need to handle this special case and avoid displaying incorrect diff.
380      def text_with_binary_notice?
381        text? && has_binary_notice?
382      end
383
384      def fetch_blob(sha, path)
385        return unless sha
386
387        Blob.lazy(repository, sha, path)
388      end
389
390      def total_blob_lines(blob)
391        @total_lines ||= begin
392          line_count = blob.lines.size
393          line_count -= 1 if line_count > 0 && blob.lines.last.blank?
394          line_count
395        end
396      end
397
398      def modified_file?
399        new_file? || deleted_file? || content_changed?
400      end
401
402      # We can't use Object#try because Blob doesn't inherit from Object, but
403      # from BasicObject (via SimpleDelegator).
404      def try_blobs(meth)
405        old_blob&.public_send(meth) || new_blob&.public_send(meth)
406      end
407
408      def valid_blobs
409        [old_blob, new_blob].compact
410      end
411
412      def text_position_properties(line)
413        { old_line: line.old_line, new_line: line.new_line }
414      end
415
416      def image_position_properties(image_point)
417        image_point.to_h
418      end
419
420      def blobs_changed?
421        old_blob && new_blob && old_blob.id != new_blob.id
422      end
423
424      def new_blob_lazy
425        fetch_blob(new_content_sha, file_path)
426      end
427
428      def old_blob_lazy
429        fetch_blob(old_content_sha, old_path)
430      end
431
432      def simple_viewer_class
433        return DiffViewer::Collapsed if collapsed?
434        return DiffViewer::NotDiffable unless diffable?
435        return DiffViewer::Text if modified_file? && text?
436        return DiffViewer::NoPreview if content_changed?
437        return DiffViewer::Added if new_file?
438        return DiffViewer::Deleted if deleted_file?
439        return DiffViewer::Renamed if renamed_file?
440        return DiffViewer::ModeChanged if mode_changed?
441
442        DiffViewer::NoPreview
443      end
444
445      def rich_viewer_class
446        viewer_class_from(RICH_VIEWERS)
447      end
448
449      def viewer_class_from(classes)
450        return if collapsed?
451        return unless diffable?
452        return unless modified_file?
453
454        find_renderable_viewer_class(classes)
455      end
456
457      def alternate_viewer_class
458        return unless viewer.instance_of?(DiffViewer::Renamed)
459
460        find_renderable_viewer_class(RICH_VIEWERS) || (DiffViewer::Text if text?)
461      end
462
463      def find_renderable_viewer_class(classes)
464        return if different_type? || external_storage_error?
465
466        verify_binary = !stored_externally?
467
468        classes.find { |viewer_class| viewer_class.can_render?(self, verify_binary: verify_binary) }
469      end
470    end
471  end
472end
473