1# frozen_string_literal: true
2
3# Detected SSH host keys are transiently stored in Redis
4class SshHostKey
5  class Fingerprint < Gitlab::SSHPublicKey
6    attr_reader :index
7
8    def initialize(key, index: nil)
9      super(key)
10
11      @index = index
12    end
13
14    def as_json(*)
15      { bits: bits, fingerprint: fingerprint, type: type, index: index }
16    end
17  end
18
19  include ReactiveCaching
20
21  self.reactive_cache_key = ->(key) { [key.class.to_s, key.id] }
22
23  # Do not refresh the data in the background - it is not expected to change.
24  # This is achieved by making the lifetime shorter than the refresh interval.
25  self.reactive_cache_refresh_interval = 15.minutes
26  self.reactive_cache_lifetime = 10.minutes
27  self.reactive_cache_work_type = :external_dependency
28
29  def self.find_by(opts = {})
30    opts = HashWithIndifferentAccess.new(opts)
31    return unless opts.key?(:id)
32
33    project_id, url = opts[:id].split(':', 2)
34    project = Project.find_by(id: project_id)
35
36    project.presence && new(project: project, url: url)
37  end
38
39  def self.fingerprint_host_keys(data)
40    return [] unless data.is_a?(String)
41
42    data
43      .each_line
44      .each_with_index
45      .map { |line, index| Fingerprint.new(line, index: index) }
46      .select(&:valid?)
47  end
48
49  attr_reader :project, :url, :compare_host_keys
50
51  def initialize(project:, url:, compare_host_keys: nil)
52    @project = project
53    @url = normalize_url(url)
54    @compare_host_keys = compare_host_keys
55  end
56
57  # Needed for reactive caching
58  def self.primary_key
59    :id
60  end
61
62  def id
63    [project.id, url].join(':')
64  end
65
66  def as_json(*)
67    {
68      host_keys_changed: host_keys_changed?,
69      fingerprints: fingerprints,
70      known_hosts: known_hosts
71    }
72  end
73
74  def known_hosts
75    with_reactive_cache { |data| data[:known_hosts] }
76  end
77
78  def fingerprints
79    @fingerprints ||= self.class.fingerprint_host_keys(known_hosts)
80  end
81
82  # Returns true if the known_hosts data differs from the version passed in at
83  # initialization as `compare_host_keys`. Comments, ordering, etc, is ignored
84  def host_keys_changed?
85    cleanup(known_hosts) != cleanup(compare_host_keys)
86  end
87
88  def error
89    with_reactive_cache { |data| data[:error] }
90  end
91
92  def calculate_reactive_cache
93    known_hosts, errors, status =
94      Open3.popen3({}, *%W[ssh-keyscan -T 5 -p #{url.port} -f-]) do |stdin, stdout, stderr, wait_thr|
95        stdin.puts(url.host)
96        stdin.close
97
98        [
99          cleanup(stdout.read),
100          cleanup(stderr.read),
101          wait_thr.value
102        ]
103      end
104
105    # ssh-keyscan returns an exit code 0 in several error conditions, such as an
106    # unknown hostname, so check both STDERR and the exit code
107    if status.success? && !errors.present?
108      { known_hosts: known_hosts }
109    else
110      Gitlab::AppLogger.debug("Failed to detect SSH host keys for #{id}: #{errors}")
111
112      { error: 'Failed to detect SSH host keys' }
113    end
114  end
115
116  private
117
118  # Remove comments and duplicate entries
119  def cleanup(data)
120    data
121      .to_s
122      .each_line
123      .reject { |line| line.start_with?('#') || line.chomp.empty? }
124      .uniq
125      .sort
126      .join
127  end
128
129  def normalize_url(url)
130    full_url = ::Addressable::URI.parse(url)
131    raise ArgumentError, "Invalid URL" unless full_url&.scheme == 'ssh'
132
133    Addressable::URI.parse("ssh://#{full_url.host}:#{full_url.inferred_port}")
134  rescue Addressable::URI::InvalidURIError
135    raise ArgumentError, "Invalid URL"
136  end
137end
138