1# frozen_string_literal: true 2 3module Gitlab 4 module BitbucketServerImport 5 class Importer 6 attr_reader :recover_missing_commits 7 attr_reader :project, :project_key, :repository_slug, :client, :errors, :users, :already_imported_cache_key 8 attr_accessor :logger 9 10 BATCH_SIZE = 100 11 # The base cache key to use for tracking already imported objects. 12 ALREADY_IMPORTED_CACHE_KEY = 13 'bitbucket_server-importer/already-imported/%{project}/%{collection}' 14 15 TempBranch = Struct.new(:name, :sha) 16 17 def self.imports_repository? 18 true 19 end 20 21 def self.refmap 22 # We omit :heads and :tags since these are fetched in the import_repository 23 ['+refs/pull-requests/*/to:refs/merge-requests/*/head'] 24 end 25 26 # Unlike GitHub, you can't grab the commit SHAs for pull requests that 27 # have been closed but not merged even though Bitbucket has these 28 # commits internally. We can recover these pull requests by creating a 29 # branch with the Bitbucket REST API, but by default we turn this 30 # behavior off. 31 def initialize(project, recover_missing_commits: false) 32 @project = project 33 @recover_missing_commits = recover_missing_commits 34 @project_key = project.import_data.data['project_key'] 35 @repository_slug = project.import_data.data['repo_slug'] 36 @client = BitbucketServer::Client.new(project.import_data.credentials) 37 @formatter = Gitlab::ImportFormatter.new 38 @errors = [] 39 @users = {} 40 @temp_branches = [] 41 @logger = Gitlab::Import::Logger.build 42 @already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY % 43 { project: project.id, collection: collection_method } 44 end 45 46 def collection_method 47 :pull_requests 48 end 49 50 def execute 51 import_repository 52 import_pull_requests 53 download_lfs_objects 54 delete_temp_branches 55 handle_errors 56 metrics.track_finished_import 57 58 log_info(stage: "complete") 59 60 Gitlab::Cache::Import::Caching.expire(already_imported_cache_key, Gitlab::Cache::Import::Caching::SHORTER_TIMEOUT) 61 true 62 end 63 64 private 65 66 def handle_errors 67 return unless errors.any? 68 69 project.import_state.update_column(:last_error, { 70 message: 'The remote data could not be fully imported.', 71 errors: errors 72 }.to_json) 73 end 74 75 def find_user_id(by:, value:) 76 return unless value 77 78 return users[value] if users.key?(value) 79 80 user = if by == :email 81 User.find_by_any_email(value, confirmed: true) 82 else 83 User.find_by_username(value) 84 end 85 86 users[value] = user&.id 87 88 user&.id 89 end 90 91 def repo 92 @repo ||= client.repo(project_key, repository_slug) 93 end 94 95 def sha_exists?(sha) 96 project.repository.commit(sha) 97 end 98 99 def temp_branch_name(pull_request, suffix) 100 "gitlab/import/pull-request/#{pull_request.iid}/#{suffix}" 101 end 102 103 # This method restores required SHAs that GitLab needs to create diffs 104 # into branch names as the following: 105 # 106 # gitlab/import/pull-request/N/{to,from} 107 def restore_branches(pull_requests) 108 shas_to_restore = [] 109 110 pull_requests.each do |pull_request| 111 shas_to_restore << TempBranch.new(temp_branch_name(pull_request, :from), 112 pull_request.source_branch_sha) 113 shas_to_restore << TempBranch.new(temp_branch_name(pull_request, :to), 114 pull_request.target_branch_sha) 115 end 116 117 # Create the branches on the Bitbucket Server first 118 created_branches = restore_branch_shas(shas_to_restore) 119 120 @temp_branches += created_branches 121 # Now sync the repository so we get the new branches 122 import_repository unless created_branches.empty? 123 end 124 125 def restore_branch_shas(shas_to_restore) 126 shas_to_restore.each_with_object([]) do |temp_branch, branches_created| 127 branch_name = temp_branch.name 128 sha = temp_branch.sha 129 130 next if sha_exists?(sha) 131 132 begin 133 client.create_branch(project_key, repository_slug, branch_name, sha) 134 branches_created << temp_branch 135 rescue BitbucketServer::Connection::ConnectionError => e 136 log_warn(message: "Unable to recreate branch", sha: sha, error: e.message) 137 end 138 end 139 end 140 141 def import_repository 142 log_info(stage: 'import_repository', message: 'starting import') 143 144 project.repository.import_repository(project.import_url) 145 project.repository.fetch_as_mirror(project.import_url, refmap: self.class.refmap) 146 147 log_info(stage: 'import_repository', message: 'finished import') 148 rescue ::Gitlab::Git::CommandError => e 149 Gitlab::ErrorTracking.log_exception( 150 e, 151 stage: 'import_repository', message: 'failed import', error: e.message 152 ) 153 154 # Expire cache to prevent scenarios such as: 155 # 1. First import failed, but the repo was imported successfully, so +exists?+ returns true 156 # 2. Retried import, repo is broken or not imported but +exists?+ still returns true 157 project.repository.expire_content_cache if project.repository_exists? 158 159 raise 160 end 161 162 def download_lfs_objects 163 result = Projects::LfsPointers::LfsImportService.new(project).execute 164 165 if result[:status] == :error 166 errors << { type: :lfs_objects, errors: "The Lfs import process failed. #{result[:message]}" } 167 end 168 end 169 170 # Bitbucket Server keeps tracks of references for open pull requests in 171 # refs/heads/pull-requests, but closed and merged requests get moved 172 # into hidden internal refs under stash-refs/pull-requests. Unless the 173 # SHAs involved are at the tip of a branch or tag, there is no way to 174 # retrieve the server for those commits. 175 # 176 # To avoid losing history, we use the Bitbucket API to re-create the branch 177 # on the remote server. Then we have to issue a `git fetch` to download these 178 # branches. 179 def import_pull_requests 180 page = 0 181 182 log_info(stage: 'import_pull_requests', message: "starting") 183 184 loop do 185 log_debug(stage: 'import_pull_requests', message: "importing page #{page} and batch-size #{BATCH_SIZE} from #{page * BATCH_SIZE} to #{(page + 1) * BATCH_SIZE}") 186 187 pull_requests = client.pull_requests(project_key, repository_slug, page_offset: page, limit: BATCH_SIZE).to_a 188 189 break if pull_requests.empty? 190 191 # Creating branches on the server and fetching the newly-created branches 192 # may take a number of network round-trips. This used to be done in batches to 193 # avoid doing a git fetch for every new branch, as the whole process is now 194 # batched, we do not need to separately do this in batches. 195 restore_branches(pull_requests) if recover_missing_commits 196 197 pull_requests.each do |pull_request| 198 if already_imported?(pull_request) 199 log_info(stage: 'import_pull_requests', message: 'already imported', iid: pull_request.iid) 200 else 201 import_bitbucket_pull_request(pull_request) 202 end 203 rescue StandardError => e 204 Gitlab::ErrorTracking.log_exception( 205 e, 206 stage: 'import_pull_requests', iid: pull_request.iid, error: e.message 207 ) 208 209 backtrace = Gitlab::BacktraceCleaner.clean_backtrace(e.backtrace) 210 errors << { type: :pull_request, iid: pull_request.iid, errors: e.message, backtrace: backtrace.join("\n"), raw_response: pull_request.raw } 211 end 212 213 log_debug(stage: 'import_pull_requests', message: "finished page #{page} and batch-size #{BATCH_SIZE}") 214 page += 1 215 end 216 end 217 218 # Returns true if the given object has already been imported, false 219 # otherwise. 220 # 221 # object - The object to check. 222 def already_imported?(pull_request) 223 Gitlab::Cache::Import::Caching.set_includes?(already_imported_cache_key, pull_request.iid) 224 end 225 226 # Marks the given object as "already imported". 227 def mark_as_imported(pull_request) 228 Gitlab::Cache::Import::Caching.set_add(already_imported_cache_key, pull_request.iid) 229 end 230 231 def delete_temp_branches 232 @temp_branches.each do |branch| 233 client.delete_branch(project_key, repository_slug, branch.name, branch.sha) 234 project.repository.delete_branch(branch.name) 235 rescue BitbucketServer::Connection::ConnectionError => e 236 Gitlab::ErrorTracking.log_exception( 237 e, 238 stage: 'delete_temp_branches', branch: branch.name, error: e.message 239 ) 240 241 @errors << { type: :delete_temp_branches, branch_name: branch.name, errors: e.message } 242 end 243 end 244 245 def import_bitbucket_pull_request(pull_request) 246 log_info(stage: 'import_bitbucket_pull_requests', message: 'starting', iid: pull_request.iid) 247 248 description = '' 249 description += author_line(pull_request) 250 description += pull_request.description if pull_request.description 251 252 attributes = { 253 iid: pull_request.iid, 254 title: pull_request.title, 255 description: description, 256 source_project_id: project.id, 257 source_branch: Gitlab::Git.ref_name(pull_request.source_branch_name), 258 source_branch_sha: pull_request.source_branch_sha, 259 target_project_id: project.id, 260 target_branch: Gitlab::Git.ref_name(pull_request.target_branch_name), 261 target_branch_sha: pull_request.target_branch_sha, 262 state_id: MergeRequest.available_states[pull_request.state], 263 author_id: author_id(pull_request), 264 created_at: pull_request.created_at, 265 updated_at: pull_request.updated_at 266 } 267 268 creator = Gitlab::Import::MergeRequestCreator.new(project) 269 merge_request = creator.execute(attributes) 270 271 if merge_request.persisted? 272 import_pull_request_comments(pull_request, merge_request) 273 274 metrics.merge_requests_counter.increment 275 end 276 277 log_info(stage: 'import_bitbucket_pull_requests', message: 'finished', iid: pull_request.iid) 278 mark_as_imported(pull_request) 279 end 280 281 def import_pull_request_comments(pull_request, merge_request) 282 log_info(stage: 'import_pull_request_comments', message: 'starting', iid: merge_request.iid) 283 284 comments, other_activities = client.activities(project_key, repository_slug, pull_request.iid).partition(&:comment?) 285 286 merge_event = other_activities.find(&:merge_event?) 287 import_merge_event(merge_request, merge_event) if merge_event 288 289 inline_comments, pr_comments = comments.partition(&:inline_comment?) 290 291 import_inline_comments(inline_comments.map(&:comment), merge_request) 292 import_standalone_pr_comments(pr_comments.map(&:comment), merge_request) 293 294 log_info(stage: 'import_pull_request_comments', message: 'finished', iid: merge_request.iid, 295 merge_event_found: merge_event.present?, 296 inline_comments_count: inline_comments.count, 297 standalone_pr_comments: pr_comments.count) 298 end 299 300 # rubocop: disable CodeReuse/ActiveRecord 301 def import_merge_event(merge_request, merge_event) 302 log_info(stage: 'import_merge_event', message: 'starting', iid: merge_request.iid) 303 304 committer = merge_event.committer_email 305 306 user_id = find_user_id(by: :email, value: committer) || project.creator_id 307 timestamp = merge_event.merge_timestamp 308 merge_request.update({ merge_commit_sha: merge_event.merge_commit }) 309 metric = MergeRequest::Metrics.find_or_initialize_by(merge_request: merge_request) 310 metric.update(merged_by_id: user_id, merged_at: timestamp) 311 312 log_info(stage: 'import_merge_event', message: 'finished', iid: merge_request.iid) 313 end 314 # rubocop: enable CodeReuse/ActiveRecord 315 316 def import_inline_comments(inline_comments, merge_request) 317 log_info(stage: 'import_inline_comments', message: 'starting', iid: merge_request.iid) 318 319 inline_comments.each do |comment| 320 position = build_position(merge_request, comment) 321 parent = create_diff_note(merge_request, comment, position) 322 323 next unless parent&.persisted? 324 325 discussion_id = parent.discussion_id 326 327 comment.comments.each do |reply| 328 create_diff_note(merge_request, reply, position, discussion_id) 329 end 330 end 331 332 log_info(stage: 'import_inline_comments', message: 'finished', iid: merge_request.iid) 333 end 334 335 def create_diff_note(merge_request, comment, position, discussion_id = nil) 336 attributes = pull_request_comment_attributes(comment) 337 attributes.merge!(position: position, type: 'DiffNote') 338 attributes[:discussion_id] = discussion_id if discussion_id 339 340 note = merge_request.notes.build(attributes) 341 342 if note.valid? 343 note.save 344 return note 345 end 346 347 log_info(stage: 'create_diff_note', message: 'creating fallback DiffNote', iid: merge_request.iid) 348 349 # Bitbucket Server supports the ability to comment on any line, not just the 350 # line in the diff. If we can't add the note as a DiffNote, fallback to creating 351 # a regular note. 352 create_fallback_diff_note(merge_request, comment, position) 353 rescue StandardError => e 354 Gitlab::ErrorTracking.log_exception( 355 e, 356 stage: 'create_diff_note', comment_id: comment.id, error: e.message 357 ) 358 359 errors << { type: :pull_request, id: comment.id, errors: e.message } 360 nil 361 end 362 363 def create_fallback_diff_note(merge_request, comment, position) 364 attributes = pull_request_comment_attributes(comment) 365 note = "*Comment on" 366 367 note += " #{position.old_path}:#{position.old_line} -->" if position.old_line 368 note += " #{position.new_path}:#{position.new_line}" if position.new_line 369 note += "*\n\n#{comment.note}" 370 371 attributes[:note] = note 372 merge_request.notes.create!(attributes) 373 end 374 375 def build_position(merge_request, pr_comment) 376 params = { 377 diff_refs: merge_request.diff_refs, 378 old_path: pr_comment.file_path, 379 new_path: pr_comment.file_path, 380 old_line: pr_comment.old_pos, 381 new_line: pr_comment.new_pos 382 } 383 384 Gitlab::Diff::Position.new(params) 385 end 386 387 def import_standalone_pr_comments(pr_comments, merge_request) 388 pr_comments.each do |comment| 389 merge_request.notes.create!(pull_request_comment_attributes(comment)) 390 391 comment.comments.each do |replies| 392 merge_request.notes.create!(pull_request_comment_attributes(replies)) 393 end 394 rescue StandardError => e 395 Gitlab::ErrorTracking.log_exception( 396 e, 397 stage: 'import_standalone_pr_comments', merge_request_id: merge_request.id, comment_id: comment.id, error: e.message 398 ) 399 400 errors << { type: :pull_request, comment_id: comment.id, errors: e.message } 401 end 402 end 403 404 def pull_request_comment_attributes(comment) 405 author = uid(comment) 406 note = '' 407 408 unless author 409 author = project.creator_id 410 note = "*By #{comment.author_username} (#{comment.author_email})*\n\n" 411 end 412 413 note += 414 # Provide some context for replying 415 if comment.parent_comment 416 "> #{comment.parent_comment.note.truncate(80)}\n\n#{comment.note}" 417 else 418 comment.note 419 end 420 421 { 422 project: project, 423 note: note, 424 author_id: author, 425 created_at: comment.created_at, 426 updated_at: comment.updated_at 427 } 428 end 429 430 def log_debug(details) 431 logger.debug(log_base_data.merge(details)) 432 end 433 434 def log_info(details) 435 logger.info(log_base_data.merge(details)) 436 end 437 438 def log_warn(details) 439 logger.warn(log_base_data.merge(details)) 440 end 441 442 def log_base_data 443 { 444 class: self.class.name, 445 project_id: project.id, 446 project_path: project.full_path 447 } 448 end 449 450 def metrics 451 @metrics ||= Gitlab::Import::Metrics.new(:bitbucket_server_importer, @project) 452 end 453 454 def author_line(rep_object) 455 return '' if uid(rep_object) 456 457 @formatter.author_line(rep_object.author) 458 end 459 460 def author_id(rep_object) 461 uid(rep_object) || project.creator_id 462 end 463 464 def uid(rep_object) 465 # We want this explicit to only be username on the FF 466 # Otherwise, match email. 467 # There should be no default fall-through on username. Fall-through to import user 468 if Feature.enabled?(:bitbucket_server_user_mapping_by_username) 469 find_user_id(by: :username, value: rep_object.author_username) 470 else 471 find_user_id(by: :email, value: rep_object.author_email) 472 end 473 end 474 end 475 end 476end 477