1# frozen_string_literal: true
2
3module Gitlab
4  module BitbucketServerImport
5    class Importer
6      attr_reader :recover_missing_commits
7      attr_reader :project, :project_key, :repository_slug, :client, :errors, :users, :already_imported_cache_key
8      attr_accessor :logger
9
10      BATCH_SIZE = 100
11      # The base cache key to use for tracking already imported objects.
12      ALREADY_IMPORTED_CACHE_KEY =
13        'bitbucket_server-importer/already-imported/%{project}/%{collection}'
14
15      TempBranch = Struct.new(:name, :sha)
16
17      def self.imports_repository?
18        true
19      end
20
21      def self.refmap
22        # We omit :heads and :tags since these are fetched in the import_repository
23        ['+refs/pull-requests/*/to:refs/merge-requests/*/head']
24      end
25
26      # Unlike GitHub, you can't grab the commit SHAs for pull requests that
27      # have been closed but not merged even though Bitbucket has these
28      # commits internally. We can recover these pull requests by creating a
29      # branch with the Bitbucket REST API, but by default we turn this
30      # behavior off.
31      def initialize(project, recover_missing_commits: false)
32        @project = project
33        @recover_missing_commits = recover_missing_commits
34        @project_key = project.import_data.data['project_key']
35        @repository_slug = project.import_data.data['repo_slug']
36        @client = BitbucketServer::Client.new(project.import_data.credentials)
37        @formatter = Gitlab::ImportFormatter.new
38        @errors = []
39        @users = {}
40        @temp_branches = []
41        @logger = Gitlab::Import::Logger.build
42        @already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY %
43          { project: project.id, collection: collection_method }
44      end
45
46      def collection_method
47        :pull_requests
48      end
49
50      def execute
51        import_repository
52        import_pull_requests
53        download_lfs_objects
54        delete_temp_branches
55        handle_errors
56        metrics.track_finished_import
57
58        log_info(stage: "complete")
59
60        Gitlab::Cache::Import::Caching.expire(already_imported_cache_key, Gitlab::Cache::Import::Caching::SHORTER_TIMEOUT)
61        true
62      end
63
64      private
65
66      def handle_errors
67        return unless errors.any?
68
69        project.import_state.update_column(:last_error, {
70          message: 'The remote data could not be fully imported.',
71          errors: errors
72        }.to_json)
73      end
74
75      def find_user_id(by:, value:)
76        return unless value
77
78        return users[value] if users.key?(value)
79
80        user = if by == :email
81                 User.find_by_any_email(value, confirmed: true)
82               else
83                 User.find_by_username(value)
84               end
85
86        users[value] = user&.id
87
88        user&.id
89      end
90
91      def repo
92        @repo ||= client.repo(project_key, repository_slug)
93      end
94
95      def sha_exists?(sha)
96        project.repository.commit(sha)
97      end
98
99      def temp_branch_name(pull_request, suffix)
100        "gitlab/import/pull-request/#{pull_request.iid}/#{suffix}"
101      end
102
103      # This method restores required SHAs that GitLab needs to create diffs
104      # into branch names as the following:
105      #
106      # gitlab/import/pull-request/N/{to,from}
107      def restore_branches(pull_requests)
108        shas_to_restore = []
109
110        pull_requests.each do |pull_request|
111          shas_to_restore << TempBranch.new(temp_branch_name(pull_request, :from),
112                                            pull_request.source_branch_sha)
113          shas_to_restore << TempBranch.new(temp_branch_name(pull_request, :to),
114                                            pull_request.target_branch_sha)
115        end
116
117        # Create the branches on the Bitbucket Server first
118        created_branches = restore_branch_shas(shas_to_restore)
119
120        @temp_branches += created_branches
121        # Now sync the repository so we get the new branches
122        import_repository unless created_branches.empty?
123      end
124
125      def restore_branch_shas(shas_to_restore)
126        shas_to_restore.each_with_object([]) do |temp_branch, branches_created|
127          branch_name = temp_branch.name
128          sha = temp_branch.sha
129
130          next if sha_exists?(sha)
131
132          begin
133            client.create_branch(project_key, repository_slug, branch_name, sha)
134            branches_created << temp_branch
135          rescue BitbucketServer::Connection::ConnectionError => e
136            log_warn(message: "Unable to recreate branch", sha: sha, error: e.message)
137          end
138        end
139      end
140
141      def import_repository
142        log_info(stage: 'import_repository', message: 'starting import')
143
144        project.repository.import_repository(project.import_url)
145        project.repository.fetch_as_mirror(project.import_url, refmap: self.class.refmap)
146
147        log_info(stage: 'import_repository', message: 'finished import')
148      rescue ::Gitlab::Git::CommandError => e
149        Gitlab::ErrorTracking.log_exception(
150          e,
151          stage: 'import_repository', message: 'failed import', error: e.message
152        )
153
154        # Expire cache to prevent scenarios such as:
155        # 1. First import failed, but the repo was imported successfully, so +exists?+ returns true
156        # 2. Retried import, repo is broken or not imported but +exists?+ still returns true
157        project.repository.expire_content_cache if project.repository_exists?
158
159        raise
160      end
161
162      def download_lfs_objects
163        result = Projects::LfsPointers::LfsImportService.new(project).execute
164
165        if result[:status] == :error
166          errors << { type: :lfs_objects, errors: "The Lfs import process failed. #{result[:message]}" }
167        end
168      end
169
170      # Bitbucket Server keeps tracks of references for open pull requests in
171      # refs/heads/pull-requests, but closed and merged requests get moved
172      # into hidden internal refs under stash-refs/pull-requests. Unless the
173      # SHAs involved are at the tip of a branch or tag, there is no way to
174      # retrieve the server for those commits.
175      #
176      # To avoid losing history, we use the Bitbucket API to re-create the branch
177      # on the remote server. Then we have to issue a `git fetch` to download these
178      # branches.
179      def import_pull_requests
180        page = 0
181
182        log_info(stage: 'import_pull_requests', message: "starting")
183
184        loop do
185          log_debug(stage: 'import_pull_requests', message: "importing page #{page} and batch-size #{BATCH_SIZE} from #{page * BATCH_SIZE} to #{(page + 1) * BATCH_SIZE}")
186
187          pull_requests = client.pull_requests(project_key, repository_slug, page_offset: page, limit: BATCH_SIZE).to_a
188
189          break if pull_requests.empty?
190
191          # Creating branches on the server and fetching the newly-created branches
192          # may take a number of network round-trips. This used to be done in batches to
193          # avoid doing a git fetch for every new branch, as the whole process is now
194          # batched, we do not need to separately do this in batches.
195          restore_branches(pull_requests) if recover_missing_commits
196
197          pull_requests.each do |pull_request|
198            if already_imported?(pull_request)
199              log_info(stage: 'import_pull_requests', message: 'already imported', iid: pull_request.iid)
200            else
201              import_bitbucket_pull_request(pull_request)
202            end
203          rescue StandardError => e
204            Gitlab::ErrorTracking.log_exception(
205              e,
206              stage: 'import_pull_requests', iid: pull_request.iid, error: e.message
207            )
208
209            backtrace = Gitlab::BacktraceCleaner.clean_backtrace(e.backtrace)
210            errors << { type: :pull_request, iid: pull_request.iid, errors: e.message, backtrace: backtrace.join("\n"), raw_response: pull_request.raw }
211          end
212
213          log_debug(stage: 'import_pull_requests', message: "finished page #{page} and batch-size #{BATCH_SIZE}")
214          page += 1
215        end
216      end
217
218      # Returns true if the given object has already been imported, false
219      # otherwise.
220      #
221      # object - The object to check.
222      def already_imported?(pull_request)
223        Gitlab::Cache::Import::Caching.set_includes?(already_imported_cache_key, pull_request.iid)
224      end
225
226      # Marks the given object as "already imported".
227      def mark_as_imported(pull_request)
228        Gitlab::Cache::Import::Caching.set_add(already_imported_cache_key, pull_request.iid)
229      end
230
231      def delete_temp_branches
232        @temp_branches.each do |branch|
233          client.delete_branch(project_key, repository_slug, branch.name, branch.sha)
234          project.repository.delete_branch(branch.name)
235        rescue BitbucketServer::Connection::ConnectionError => e
236          Gitlab::ErrorTracking.log_exception(
237            e,
238            stage: 'delete_temp_branches', branch: branch.name, error: e.message
239          )
240
241          @errors << { type: :delete_temp_branches, branch_name: branch.name, errors: e.message }
242        end
243      end
244
245      def import_bitbucket_pull_request(pull_request)
246        log_info(stage: 'import_bitbucket_pull_requests', message: 'starting', iid: pull_request.iid)
247
248        description = ''
249        description += author_line(pull_request)
250        description += pull_request.description if pull_request.description
251
252        attributes = {
253          iid: pull_request.iid,
254          title: pull_request.title,
255          description: description,
256          source_project_id: project.id,
257          source_branch: Gitlab::Git.ref_name(pull_request.source_branch_name),
258          source_branch_sha: pull_request.source_branch_sha,
259          target_project_id: project.id,
260          target_branch: Gitlab::Git.ref_name(pull_request.target_branch_name),
261          target_branch_sha: pull_request.target_branch_sha,
262          state_id: MergeRequest.available_states[pull_request.state],
263          author_id: author_id(pull_request),
264          created_at: pull_request.created_at,
265          updated_at: pull_request.updated_at
266        }
267
268        creator = Gitlab::Import::MergeRequestCreator.new(project)
269        merge_request = creator.execute(attributes)
270
271        if merge_request.persisted?
272          import_pull_request_comments(pull_request, merge_request)
273
274          metrics.merge_requests_counter.increment
275        end
276
277        log_info(stage: 'import_bitbucket_pull_requests', message: 'finished', iid: pull_request.iid)
278        mark_as_imported(pull_request)
279      end
280
281      def import_pull_request_comments(pull_request, merge_request)
282        log_info(stage: 'import_pull_request_comments', message: 'starting', iid: merge_request.iid)
283
284        comments, other_activities = client.activities(project_key, repository_slug, pull_request.iid).partition(&:comment?)
285
286        merge_event = other_activities.find(&:merge_event?)
287        import_merge_event(merge_request, merge_event) if merge_event
288
289        inline_comments, pr_comments = comments.partition(&:inline_comment?)
290
291        import_inline_comments(inline_comments.map(&:comment), merge_request)
292        import_standalone_pr_comments(pr_comments.map(&:comment), merge_request)
293
294        log_info(stage: 'import_pull_request_comments', message: 'finished', iid: merge_request.iid,
295                 merge_event_found: merge_event.present?,
296                 inline_comments_count: inline_comments.count,
297                 standalone_pr_comments: pr_comments.count)
298      end
299
300      # rubocop: disable CodeReuse/ActiveRecord
301      def import_merge_event(merge_request, merge_event)
302        log_info(stage: 'import_merge_event', message: 'starting', iid: merge_request.iid)
303
304        committer = merge_event.committer_email
305
306        user_id = find_user_id(by: :email, value: committer) || project.creator_id
307        timestamp = merge_event.merge_timestamp
308        merge_request.update({ merge_commit_sha: merge_event.merge_commit })
309        metric = MergeRequest::Metrics.find_or_initialize_by(merge_request: merge_request)
310        metric.update(merged_by_id: user_id, merged_at: timestamp)
311
312        log_info(stage: 'import_merge_event', message: 'finished', iid: merge_request.iid)
313      end
314      # rubocop: enable CodeReuse/ActiveRecord
315
316      def import_inline_comments(inline_comments, merge_request)
317        log_info(stage: 'import_inline_comments', message: 'starting', iid: merge_request.iid)
318
319        inline_comments.each do |comment|
320          position = build_position(merge_request, comment)
321          parent = create_diff_note(merge_request, comment, position)
322
323          next unless parent&.persisted?
324
325          discussion_id = parent.discussion_id
326
327          comment.comments.each do |reply|
328            create_diff_note(merge_request, reply, position, discussion_id)
329          end
330        end
331
332        log_info(stage: 'import_inline_comments', message: 'finished', iid: merge_request.iid)
333      end
334
335      def create_diff_note(merge_request, comment, position, discussion_id = nil)
336        attributes = pull_request_comment_attributes(comment)
337        attributes.merge!(position: position, type: 'DiffNote')
338        attributes[:discussion_id] = discussion_id if discussion_id
339
340        note = merge_request.notes.build(attributes)
341
342        if note.valid?
343          note.save
344          return note
345        end
346
347        log_info(stage: 'create_diff_note', message: 'creating fallback DiffNote', iid: merge_request.iid)
348
349        # Bitbucket Server supports the ability to comment on any line, not just the
350        # line in the diff. If we can't add the note as a DiffNote, fallback to creating
351        # a regular note.
352        create_fallback_diff_note(merge_request, comment, position)
353      rescue StandardError => e
354        Gitlab::ErrorTracking.log_exception(
355          e,
356          stage: 'create_diff_note', comment_id: comment.id, error: e.message
357        )
358
359        errors << { type: :pull_request, id: comment.id, errors: e.message }
360        nil
361      end
362
363      def create_fallback_diff_note(merge_request, comment, position)
364        attributes = pull_request_comment_attributes(comment)
365        note = "*Comment on"
366
367        note += " #{position.old_path}:#{position.old_line} -->" if position.old_line
368        note += " #{position.new_path}:#{position.new_line}" if position.new_line
369        note += "*\n\n#{comment.note}"
370
371        attributes[:note] = note
372        merge_request.notes.create!(attributes)
373      end
374
375      def build_position(merge_request, pr_comment)
376        params = {
377          diff_refs: merge_request.diff_refs,
378          old_path: pr_comment.file_path,
379          new_path: pr_comment.file_path,
380          old_line: pr_comment.old_pos,
381          new_line: pr_comment.new_pos
382        }
383
384        Gitlab::Diff::Position.new(params)
385      end
386
387      def import_standalone_pr_comments(pr_comments, merge_request)
388        pr_comments.each do |comment|
389          merge_request.notes.create!(pull_request_comment_attributes(comment))
390
391          comment.comments.each do |replies|
392            merge_request.notes.create!(pull_request_comment_attributes(replies))
393          end
394        rescue StandardError => e
395          Gitlab::ErrorTracking.log_exception(
396            e,
397            stage: 'import_standalone_pr_comments', merge_request_id: merge_request.id, comment_id: comment.id, error: e.message
398          )
399
400          errors << { type: :pull_request, comment_id: comment.id, errors: e.message }
401        end
402      end
403
404      def pull_request_comment_attributes(comment)
405        author = uid(comment)
406        note = ''
407
408        unless author
409          author = project.creator_id
410          note = "*By #{comment.author_username} (#{comment.author_email})*\n\n"
411        end
412
413        note +=
414          # Provide some context for replying
415          if comment.parent_comment
416            "> #{comment.parent_comment.note.truncate(80)}\n\n#{comment.note}"
417          else
418            comment.note
419          end
420
421        {
422          project: project,
423          note: note,
424          author_id: author,
425          created_at: comment.created_at,
426          updated_at: comment.updated_at
427        }
428      end
429
430      def log_debug(details)
431        logger.debug(log_base_data.merge(details))
432      end
433
434      def log_info(details)
435        logger.info(log_base_data.merge(details))
436      end
437
438      def log_warn(details)
439        logger.warn(log_base_data.merge(details))
440      end
441
442      def log_base_data
443        {
444          class: self.class.name,
445          project_id: project.id,
446          project_path: project.full_path
447        }
448      end
449
450      def metrics
451        @metrics ||= Gitlab::Import::Metrics.new(:bitbucket_server_importer, @project)
452      end
453
454      def author_line(rep_object)
455        return '' if uid(rep_object)
456
457        @formatter.author_line(rep_object.author)
458      end
459
460      def author_id(rep_object)
461        uid(rep_object) || project.creator_id
462      end
463
464      def uid(rep_object)
465        # We want this explicit to only be username on the FF
466        # Otherwise, match email.
467        # There should be no default fall-through on username. Fall-through to import user
468        if Feature.enabled?(:bitbucket_server_user_mapping_by_username)
469          find_user_id(by: :username, value: rep_object.author_username)
470        else
471          find_user_id(by: :email, value: rep_object.author_email)
472        end
473      end
474    end
475  end
476end
477