1# frozen_string_literal: true 2 3module Gitlab 4 module Diff 5 class File 6 include Gitlab::Utils::StrongMemoize 7 8 attr_reader :diff, :repository, :diff_refs, :fallback_diff_refs, :unique_identifier 9 10 delegate :new_file?, :deleted_file?, :renamed_file?, 11 :old_path, :new_path, :a_mode, :b_mode, :mode_changed?, 12 :submodule?, :expanded?, :too_large?, :collapsed?, :line_count, :has_binary_notice?, to: :diff, prefix: false 13 14 # Finding a viewer for a diff file happens based only on extension and whether the 15 # diff file blobs are binary or text, which means 1 diff file should only be matched by 1 viewer, 16 # and the order of these viewers doesn't really matter. 17 # 18 # However, when the diff file blobs are LFS pointers, we cannot know for sure whether the 19 # file being pointed to is binary or text. In this case, we match only on 20 # extension, preferring binary viewers over text ones if both exist, since the 21 # large files referred to in "Large File Storage" are much more likely to be 22 # binary than text. 23 RICH_VIEWERS = [ 24 DiffViewer::Image 25 ].sort_by { |v| v.binary? ? 0 : 1 }.freeze 26 27 def initialize( 28 diff, 29 repository:, 30 diff_refs: nil, 31 fallback_diff_refs: nil, 32 stats: nil, 33 unique_identifier: nil) 34 35 @diff = diff 36 @stats = stats 37 @repository = repository 38 @diff_refs = diff_refs 39 @fallback_diff_refs = fallback_diff_refs 40 @unique_identifier = unique_identifier 41 @unfolded = false 42 43 # Ensure items are collected in the the batch 44 new_blob_lazy 45 old_blob_lazy 46 47 diff.diff = Gitlab::Diff::CustomDiff.preprocess_before_diff(diff.new_path, old_blob_lazy, new_blob_lazy) || diff.diff if use_custom_diff? 48 end 49 50 def use_custom_diff? 51 strong_memoize(:_custom_diff_enabled) { Feature.enabled?(:jupyter_clean_diffs, repository.project, default_enabled: true) } 52 end 53 54 def position(position_marker, position_type: :text) 55 return unless diff_refs 56 57 data = { 58 diff_refs: diff_refs, 59 position_type: position_type.to_s, 60 old_path: old_path, 61 new_path: new_path 62 } 63 64 if position_type == :text 65 data.merge!(text_position_properties(position_marker)) 66 else 67 data.merge!(image_position_properties(position_marker)) 68 end 69 70 Position.new(data) 71 end 72 73 def line_code(line) 74 return if line.meta? 75 76 Gitlab::Git.diff_line_code(file_path, line.new_pos, line.old_pos) 77 end 78 79 def line_for_line_code(code) 80 diff_lines.find { |line| line_code(line) == code } 81 end 82 83 def line_for_position(pos) 84 return unless pos.position_type == 'text' 85 86 # This method is normally used to find which line the diff was 87 # commented on, and in this context, it's normally the raw diff persisted 88 # at `note_diff_files`, which is a fraction of the entire diff 89 # (it goes from the first line, to the commented line, or 90 # one line below). Therefore it's more performant to fetch 91 # from bottom to top instead of the other way around. 92 diff_lines 93 .reverse_each 94 .find { |line| line.old_line == pos.old_line && line.new_line == pos.new_line } 95 end 96 97 def position_for_line_code(code) 98 line = line_for_line_code(code) 99 position(line) if line 100 end 101 102 def line_code_for_position(pos) 103 line = line_for_position(pos) 104 line_code(line) if line 105 end 106 107 # Returns the raw diff content up to the given line index 108 def diff_hunk(diff_line) 109 diff_line_index = diff_line.index 110 # @@ (match) header is not kept if it's found in the top of the file, 111 # therefore we should keep an extra line on this scenario. 112 diff_line_index += 1 unless diff_lines.first.match? 113 114 diff_lines.select { |line| line.index <= diff_line_index }.map(&:text).join("\n") 115 end 116 117 def old_sha 118 diff_refs&.base_sha 119 end 120 121 def new_sha 122 diff_refs&.head_sha 123 end 124 125 def new_content_sha 126 return if deleted_file? 127 return @new_content_sha if defined?(@new_content_sha) 128 129 refs = diff_refs || fallback_diff_refs 130 @new_content_sha = refs&.head_sha 131 end 132 133 def old_content_sha 134 return if new_file? 135 return @old_content_sha if defined?(@old_content_sha) 136 137 refs = diff_refs || fallback_diff_refs 138 @old_content_sha = refs&.base_sha 139 end 140 141 def new_blob 142 strong_memoize(:new_blob) do 143 new_blob_lazy&.itself 144 end 145 end 146 147 def old_blob 148 strong_memoize(:old_blob) do 149 old_blob_lazy&.itself 150 end 151 end 152 153 def new_blob_lines_between(from_line, to_line) 154 return [] unless new_blob 155 156 from_index = from_line - 1 157 to_index = to_line - 1 158 159 new_blob.load_all_data! 160 new_blob.data.lines[from_index..to_index] 161 end 162 163 def content_sha 164 new_content_sha || old_content_sha 165 end 166 167 def blob 168 new_blob || old_blob 169 end 170 171 def highlighted_diff_lines=(value) 172 clear_memoization(:diff_lines_for_serializer) 173 @highlighted_diff_lines = value 174 end 175 176 # Array of Gitlab::Diff::Line objects 177 def diff_lines 178 @diff_lines ||= 179 Gitlab::Diff::Parser.new.parse(raw_diff.each_line, diff_file: self).to_a 180 end 181 182 # Changes diff_lines according to the given position. That is, 183 # it checks whether the position requires blob lines into the diff 184 # in order to be presented. 185 def unfold_diff_lines(position) 186 return unless position 187 188 unfolder = Gitlab::Diff::LinesUnfolder.new(self, position) 189 190 if unfolder.unfold_required? 191 @diff_lines = unfolder.unfolded_diff_lines 192 @unfolded = true 193 end 194 end 195 196 def unfolded? 197 @unfolded 198 end 199 200 def highlight_loaded? 201 @highlighted_diff_lines.present? 202 end 203 204 def highlighted_diff_lines 205 @highlighted_diff_lines ||= 206 Gitlab::Diff::Highlight.new(self, repository: self.repository).highlight 207 end 208 209 # Array[<Hash>] with right/left keys that contains Gitlab::Diff::Line objects which text is highlighted 210 def parallel_diff_lines 211 @parallel_diff_lines ||= Gitlab::Diff::ParallelDiff.new(self).parallelize 212 end 213 214 def raw_diff 215 diff.diff.to_s 216 end 217 218 def next_line(index) 219 diff_lines[index + 1] 220 end 221 222 def prev_line(index) 223 diff_lines[index - 1] if index > 0 224 end 225 226 def paths 227 [old_path, new_path].compact 228 end 229 230 def file_path 231 new_path.presence || old_path 232 end 233 234 def file_hash 235 Digest::SHA1.hexdigest(file_path) 236 end 237 238 def added_lines 239 strong_memoize(:added_lines) do 240 @stats&.additions || diff_lines.count(&:added?) 241 end 242 end 243 244 def removed_lines 245 strong_memoize(:removed_lines) do 246 @stats&.deletions || diff_lines.count(&:removed?) 247 end 248 end 249 250 def file_identifier 251 "#{file_path}-#{new_file?}-#{deleted_file?}-#{renamed_file?}" 252 end 253 254 def file_identifier_hash 255 Digest::SHA1.hexdigest(file_identifier) 256 end 257 258 def diffable? 259 diffable_by_attribute? && !text_with_binary_notice? 260 end 261 262 def binary_in_repo? 263 has_binary_notice? || try_blobs(:binary_in_repo?) 264 end 265 266 def text_in_repo? 267 !binary_in_repo? 268 end 269 270 def external_storage_error? 271 try_blobs(:external_storage_error?) 272 end 273 274 def stored_externally? 275 try_blobs(:stored_externally?) 276 end 277 278 def external_storage 279 try_blobs(:external_storage) 280 end 281 282 def content_changed? 283 return blobs_changed? if diff_refs 284 return false if new_file? || deleted_file? || renamed_file? 285 286 text? && diff_lines.any? 287 end 288 289 def different_type? 290 old_blob && new_blob && old_blob.binary? != new_blob.binary? 291 end 292 293 # rubocop: disable CodeReuse/ActiveRecord 294 def size 295 valid_blobs.map(&:size).sum 296 end 297 # rubocop: enable CodeReuse/ActiveRecord 298 299 # rubocop: disable CodeReuse/ActiveRecord 300 def raw_size 301 valid_blobs.map(&:raw_size).sum 302 end 303 # rubocop: enable CodeReuse/ActiveRecord 304 305 def empty? 306 valid_blobs.map(&:empty?).all? 307 end 308 309 def binary? 310 strong_memoize(:is_binary) do 311 try_blobs(:binary?) 312 end 313 end 314 315 def text? 316 strong_memoize(:is_text) do 317 !binary? && !different_type? 318 end 319 end 320 321 def viewer 322 rich_viewer || simple_viewer 323 end 324 325 def simple_viewer 326 @simple_viewer ||= simple_viewer_class.new(self) 327 end 328 329 def rich_viewer 330 return @rich_viewer if defined?(@rich_viewer) 331 332 @rich_viewer = rich_viewer_class&.new(self) 333 end 334 335 def alternate_viewer 336 alternate_viewer_class&.new(self) 337 end 338 339 def rendered_as_text?(ignore_errors: true) 340 simple_viewer.is_a?(DiffViewer::Text) && (ignore_errors || simple_viewer.render_error.nil?) 341 end 342 343 # This adds the bottom match line to the array if needed. It contains 344 # the data to load more context lines. 345 def diff_lines_for_serializer 346 strong_memoize(:diff_lines_for_serializer) do 347 lines = highlighted_diff_lines 348 349 next if lines.empty? 350 next if blob.nil? 351 352 last_line = lines.last 353 354 if last_line.new_pos < total_blob_lines(blob) && !deleted_file? 355 match_line = Gitlab::Diff::Line.new("", 'match', nil, last_line.old_pos, last_line.new_pos) 356 lines.push(match_line) 357 end 358 359 lines 360 end 361 end 362 363 def fully_expanded? 364 return true if binary? 365 366 lines = diff_lines_for_serializer 367 368 return true if lines.nil? 369 370 lines.none? { |line| line.type.to_s == 'match' } 371 end 372 373 private 374 375 def diffable_by_attribute? 376 repository.attributes(file_path).fetch('diff') { true } 377 end 378 379 # NOTE: Files with unsupported encodings (e.g. UTF-16) are treated as binary by git, but they are recognized as text files during encoding detection. These files have `Binary files a/filename and b/filename differ' as their raw diff content which cannot be used. We need to handle this special case and avoid displaying incorrect diff. 380 def text_with_binary_notice? 381 text? && has_binary_notice? 382 end 383 384 def fetch_blob(sha, path) 385 return unless sha 386 387 Blob.lazy(repository, sha, path) 388 end 389 390 def total_blob_lines(blob) 391 @total_lines ||= begin 392 line_count = blob.lines.size 393 line_count -= 1 if line_count > 0 && blob.lines.last.blank? 394 line_count 395 end 396 end 397 398 def modified_file? 399 new_file? || deleted_file? || content_changed? 400 end 401 402 # We can't use Object#try because Blob doesn't inherit from Object, but 403 # from BasicObject (via SimpleDelegator). 404 def try_blobs(meth) 405 old_blob&.public_send(meth) || new_blob&.public_send(meth) 406 end 407 408 def valid_blobs 409 [old_blob, new_blob].compact 410 end 411 412 def text_position_properties(line) 413 { old_line: line.old_line, new_line: line.new_line } 414 end 415 416 def image_position_properties(image_point) 417 image_point.to_h 418 end 419 420 def blobs_changed? 421 old_blob && new_blob && old_blob.id != new_blob.id 422 end 423 424 def new_blob_lazy 425 fetch_blob(new_content_sha, file_path) 426 end 427 428 def old_blob_lazy 429 fetch_blob(old_content_sha, old_path) 430 end 431 432 def simple_viewer_class 433 return DiffViewer::Collapsed if collapsed? 434 return DiffViewer::NotDiffable unless diffable? 435 return DiffViewer::Text if modified_file? && text? 436 return DiffViewer::NoPreview if content_changed? 437 return DiffViewer::Added if new_file? 438 return DiffViewer::Deleted if deleted_file? 439 return DiffViewer::Renamed if renamed_file? 440 return DiffViewer::ModeChanged if mode_changed? 441 442 DiffViewer::NoPreview 443 end 444 445 def rich_viewer_class 446 viewer_class_from(RICH_VIEWERS) 447 end 448 449 def viewer_class_from(classes) 450 return if collapsed? 451 return unless diffable? 452 return unless modified_file? 453 454 find_renderable_viewer_class(classes) 455 end 456 457 def alternate_viewer_class 458 return unless viewer.instance_of?(DiffViewer::Renamed) 459 460 find_renderable_viewer_class(RICH_VIEWERS) || (DiffViewer::Text if text?) 461 end 462 463 def find_renderable_viewer_class(classes) 464 return if different_type? || external_storage_error? 465 466 verify_binary = !stored_externally? 467 468 classes.find { |viewer_class| viewer_class.can_render?(self, verify_binary: verify_binary) } 469 end 470 end 471 end 472end 473