1# frozen_string_literal: true 2 3module Gitlab 4 module LegacyGithubImport 5 class Importer 6 def self.refmap 7 Gitlab::GithubImport.refmap 8 end 9 10 attr_reader :errors, :project, :repo, :repo_url 11 12 def initialize(project) 13 @project = project 14 @repo = project.import_source 15 @repo_url = project.import_url 16 @errors = [] 17 @labels = {} 18 end 19 20 def client 21 return @client if defined?(@client) 22 23 unless credentials 24 raise Projects::ImportService::Error, 25 "Unable to find project import data credentials for project ID: #{@project.id}" 26 end 27 28 opts = {} 29 # Gitea plan to be GitHub compliant 30 if project.gitea_import? 31 uri = URI.parse(project.import_url) 32 host = "#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}".sub(%r{/?[\w-]+/[\w-]+\.git\z}, '') 33 opts = { 34 host: host, 35 api_version: 'v1' 36 } 37 end 38 39 @client = Client.new(credentials[:user], **opts) 40 end 41 42 def execute 43 # The ordering of importing is important here due to the way GitHub structures their data 44 # 1. Labels are required by other items while not having a dependency on anything else 45 # so need to be first 46 # 2. Pull requests must come before issues. Every pull request is also an issue but not 47 # all issues are pull requests. Only the issue entity has labels defined in GitHub. GitLab 48 # doesn't structure data like this so we need to make sure that we've created the MRs 49 # before we attempt to add the labels defined in the GitHub issue for the related, already 50 # imported, pull request 51 import_labels 52 import_milestones 53 import_pull_requests 54 import_issues 55 import_comments(:issues) 56 57 # Gitea doesn't have an API endpoint for pull requests comments 58 unless project.gitea_import? 59 import_comments(:pull_requests) 60 end 61 62 import_wiki 63 64 # Gitea doesn't have a Release API yet 65 # See https://github.com/go-gitea/gitea/issues/330 66 unless project.gitea_import? 67 import_releases 68 end 69 70 handle_errors 71 72 true 73 end 74 75 private 76 77 def credentials 78 return @credentials if defined?(@credentials) 79 80 @credentials = project.import_data ? project.import_data.credentials : nil 81 end 82 83 def handle_errors 84 return unless errors.any? 85 86 project.import_state.update_column(:last_error, { 87 message: 'The remote data could not be fully imported.', 88 errors: errors 89 }.to_json) 90 end 91 92 def import_labels 93 fetch_resources(:labels, repo, per_page: 100) do |labels| 94 labels.each do |raw| 95 gh_label = LabelFormatter.new(project, raw) 96 gh_label.create! 97 rescue StandardError => e 98 errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(gh_label.url), errors: e.message } 99 end 100 end 101 102 cache_labels! 103 end 104 105 def import_milestones 106 fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones| 107 milestones.each do |raw| 108 gh_milestone = MilestoneFormatter.new(project, raw) 109 gh_milestone.create! 110 rescue StandardError => e 111 errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(gh_milestone.url), errors: e.message } 112 end 113 end 114 end 115 116 # rubocop: disable CodeReuse/ActiveRecord 117 def import_issues 118 fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues| 119 issues.each do |raw| 120 gh_issue = IssueFormatter.new(project, raw, client) 121 122 begin 123 issuable = 124 if gh_issue.pull_request? 125 MergeRequest.find_by(target_project_id: project.id, iid: gh_issue.number) 126 else 127 gh_issue.create! 128 end 129 130 apply_labels(issuable, raw) 131 rescue StandardError => e 132 errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(gh_issue.url), errors: e.message } 133 end 134 end 135 end 136 end 137 # rubocop: enable CodeReuse/ActiveRecord 138 139 def import_pull_requests 140 fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests| 141 pull_requests.each do |raw| 142 gh_pull_request = PullRequestFormatter.new(project, raw, client) 143 144 next unless gh_pull_request.valid? 145 146 begin 147 restore_source_branch(gh_pull_request) unless gh_pull_request.source_branch_exists? 148 restore_target_branch(gh_pull_request) unless gh_pull_request.target_branch_exists? 149 150 merge_request = gh_pull_request.create! 151 152 # Gitea doesn't return PR in the Issue API endpoint, so labels must be assigned at this stage 153 if project.gitea_import? 154 apply_labels(merge_request, raw) 155 end 156 rescue StandardError => e 157 errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(gh_pull_request.url), errors: e.message } 158 ensure 159 clean_up_restored_branches(gh_pull_request) 160 end 161 end 162 end 163 164 project.repository.after_remove_branch 165 end 166 167 def restore_source_branch(pull_request) 168 project.repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha) 169 end 170 171 def restore_target_branch(pull_request) 172 project.repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha) 173 end 174 175 def remove_branch(name) 176 project.repository.delete_branch(name) 177 rescue Gitlab::Git::Repository::DeleteBranchFailed 178 errors << { type: :remove_branch, name: name } 179 end 180 181 def clean_up_restored_branches(pull_request) 182 return if pull_request.opened? 183 184 remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists? 185 remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists? 186 end 187 188 def apply_labels(issuable, raw) 189 return unless raw.labels.count > 0 190 191 label_ids = raw.labels 192 .map { |attrs| @labels[attrs.name] } 193 .compact 194 195 issuable.update_attribute(:label_ids, label_ids) 196 end 197 198 # rubocop: disable CodeReuse/ActiveRecord 199 def import_comments(issuable_type) 200 resource_type = "#{issuable_type}_comments".to_sym 201 202 # Two notes here: 203 # 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note, 204 # compare it against every comment in the current imported page until we find match, and that's where start importing 205 # 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns 206 # only comments on diffs, so select last note not based on noteable_type but on line_code 207 line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL' 208 last_note = project.notes.where("line_code IS #{line_code_is}").last 209 210 fetch_resources(resource_type, repo, per_page: 100) do |comments| 211 if last_note 212 discard_inserted_comments(comments, last_note) 213 last_note = nil 214 end 215 216 create_comments(comments) 217 end 218 end 219 # rubocop: enable CodeReuse/ActiveRecord 220 221 # rubocop: disable CodeReuse/ActiveRecord 222 def create_comments(comments) 223 ActiveRecord::Base.no_touching do 224 comments.each do |raw| 225 comment = CommentFormatter.new(project, raw, client) 226 227 # GH does not return info about comment's parent, so we guess it by checking its URL! 228 *_, parent, iid = URI(raw.html_url).path.split('/') 229 230 issuable = if parent == 'issues' 231 Issue.find_by(project_id: project.id, iid: iid) 232 else 233 MergeRequest.find_by(target_project_id: project.id, iid: iid) 234 end 235 236 next unless issuable 237 238 issuable.notes.create!(comment.attributes) 239 rescue StandardError => e 240 errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message } 241 end 242 end 243 end 244 # rubocop: enable CodeReuse/ActiveRecord 245 246 def discard_inserted_comments(comments, last_note) 247 last_note_attrs = nil 248 249 cut_off_index = comments.find_index do |raw| 250 comment = CommentFormatter.new(project, raw) 251 comment_attrs = comment.attributes 252 last_note_attrs ||= last_note.slice(*comment_attrs.keys) 253 254 comment_attrs.with_indifferent_access == last_note_attrs 255 end 256 257 # No matching resource in the collection, which means we got halted right on the end of the last page, so all good 258 return unless cut_off_index 259 260 # Otherwise, remove the resources we've already inserted 261 comments.shift(cut_off_index + 1) 262 end 263 264 def import_wiki 265 return if project.wiki.repository_exists? 266 267 wiki = WikiFormatter.new(project) 268 project.wiki.repository.import_repository(wiki.import_url) 269 rescue ::Gitlab::Git::CommandError => e 270 # GitHub error message when the wiki repo has not been created, 271 # this means that repo has wiki enabled, but have no pages. So, 272 # we can skip the import. 273 if e.message !~ /repository not exported/ 274 errors << { type: :wiki, errors: e.message } 275 end 276 end 277 278 def import_releases 279 fetch_resources(:releases, repo, per_page: 100) do |releases| 280 releases.each do |raw| 281 gh_release = ReleaseFormatter.new(project, raw) 282 gh_release.create! if gh_release.valid? 283 rescue StandardError => e 284 errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(gh_release.url), errors: e.message } 285 end 286 end 287 end 288 289 def cache_labels! 290 project.labels.select(:id, :title).find_each do |label| 291 @labels[label.title] = label.id 292 end 293 end 294 295 def fetch_resources(resource_type, *opts) 296 return if imported?(resource_type) 297 298 opts.last[:page] = current_page(resource_type) 299 300 client.public_send(resource_type, *opts) do |resources| # rubocop:disable GitlabSecurity/PublicSend 301 yield resources 302 increment_page(resource_type) 303 end 304 305 imported!(resource_type) 306 rescue ::Octokit::NotFound => e 307 errors << { type: resource_type, errors: e.message } 308 end 309 310 def imported?(resource_type) 311 Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported") 312 end 313 314 def imported!(resource_type) 315 Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day) 316 end 317 318 def increment_page(resource_type) 319 key = "#{cache_key_prefix}:#{resource_type}:current-page" 320 321 # Rails.cache.increment calls INCRBY directly on the value stored under the key, which is 322 # a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around 323 page = Rails.cache.read(key) 324 page += 1 325 Rails.cache.write(key, page) 326 327 page 328 end 329 330 def current_page(resource_type) 331 Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 } 332 end 333 334 def cache_key_prefix 335 @cache_key_prefix ||= "github-import:#{project.id}" 336 end 337 end 338 end 339end 340