1# frozen_string_literal: true 2 3module Gitlab 4 module Git 5 class Blob 6 include Gitlab::BlobHelper 7 include Gitlab::EncodingHelper 8 extend Gitlab::Git::WrapsGitalyErrors 9 10 # This number is the maximum amount of data that we want to display to 11 # the user. We load as much as we can for encoding detection and LFS 12 # pointer parsing. All other cases where we need full blob data should 13 # use load_all_data!. 14 MAX_DATA_DISPLAY_SIZE = 10.megabytes 15 16 # The number of blobs loaded in a single Gitaly call 17 # When a large number of blobs requested, we'd want to fetch them in 18 # multiple Gitaly calls 19 BATCH_SIZE = 250 20 21 # These limits are used as a heuristic to ignore files which can't be LFS 22 # pointers. The format of these is described in 23 # https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md#the-pointer 24 LFS_POINTER_MIN_SIZE = 120.bytes 25 LFS_POINTER_MAX_SIZE = 200.bytes 26 27 attr_accessor :size, :mode, :id, :commit_id, :loaded_size, :binary 28 attr_writer :name, :path, :data 29 30 def self.gitlab_blob_truncated_true 31 @gitlab_blob_truncated_true ||= ::Gitlab::Metrics.counter(:gitlab_blob_truncated_true, 'blob.truncated? == true') 32 end 33 34 def self.gitlab_blob_truncated_false 35 @gitlab_blob_truncated_false ||= ::Gitlab::Metrics.counter(:gitlab_blob_truncated_false, 'blob.truncated? == false') 36 end 37 38 def self.gitlab_blob_size 39 @gitlab_blob_size ||= ::Gitlab::Metrics.histogram( 40 :gitlab_blob_size, 41 'Gitlab::Git::Blob size', 42 {}, 43 [1_000, 5_000, 10_000, 50_000, 100_000, 500_000, 1_000_000] 44 ) 45 end 46 47 class << self 48 def find(repository, sha, path, limit: MAX_DATA_DISPLAY_SIZE) 49 tree_entry(repository, sha, path, limit) 50 end 51 52 def tree_entry(repository, sha, path, limit) 53 return unless path 54 55 path = path.sub(%r{\A/*}, '') 56 path = '/' if path.empty? 57 name = File.basename(path) 58 59 # Gitaly will think that setting the limit to 0 means unlimited, while 60 # the client might only need the metadata and thus set the limit to 0. 61 # In this method we'll then set the limit to 1, but clear the byte of data 62 # that we got back so for the outside world it looks like the limit was 63 # actually 0. 64 req_limit = limit == 0 ? 1 : limit 65 66 entry = Gitlab::GitalyClient::CommitService.new(repository).tree_entry(sha, path, req_limit) 67 return unless entry 68 69 entry.data = "" if limit == 0 70 71 case entry.type 72 when :COMMIT 73 new(id: entry.oid, name: name, size: 0, data: '', path: path, commit_id: sha) 74 when :BLOB 75 new(id: entry.oid, name: name, size: entry.size, data: entry.data.dup, mode: entry.mode.to_s(8), 76 path: path, commit_id: sha, binary: binary?(entry.data)) 77 end 78 end 79 80 def raw(repository, sha, limit: MAX_DATA_DISPLAY_SIZE) 81 repository.gitaly_blob_client.get_blob(oid: sha, limit: limit) 82 end 83 84 # Returns an array of Blob instances, specified in blob_references as 85 # [[commit_sha, path], [commit_sha, path], ...]. If blob_size_limit < 0 then the 86 # full blob contents are returned. If blob_size_limit >= 0 then each blob will 87 # contain no more than limit bytes in its data attribute. 88 # 89 # Keep in mind that this method may allocate a lot of memory. It is up 90 # to the caller to limit the number of blobs and blob_size_limit. 91 # 92 def batch(repository, blob_references, blob_size_limit: MAX_DATA_DISPLAY_SIZE) 93 blob_references.each_slice(BATCH_SIZE).flat_map do |refs| 94 repository.gitaly_blob_client.get_blobs(refs, blob_size_limit).to_a 95 end 96 end 97 98 # Returns an array of Blob instances just with the metadata, that means 99 # the data attribute has no content. 100 def batch_metadata(repository, blob_references) 101 batch(repository, blob_references, blob_size_limit: 0) 102 end 103 104 # Find LFS blobs given an array of sha ids 105 # Returns array of Gitlab::Git::Blob 106 # Does not guarantee blob data will be set 107 def batch_lfs_pointers(repository, blob_ids) 108 wrapped_gitaly_errors do 109 repository.gitaly_blob_client.batch_lfs_pointers(blob_ids.to_a) 110 end 111 end 112 113 def binary?(data, cache_key: nil) 114 EncodingHelper.detect_libgit2_binary?(data, cache_key: cache_key) 115 end 116 117 def size_could_be_lfs?(size) 118 size.between?(LFS_POINTER_MIN_SIZE, LFS_POINTER_MAX_SIZE) 119 end 120 end 121 122 def initialize(options) 123 %w(id name path size data mode commit_id binary).each do |key| 124 self.__send__("#{key}=", options[key.to_sym]) # rubocop:disable GitlabSecurity/PublicSend 125 end 126 127 # Retain the actual size before it is encoded 128 @loaded_size = @data.bytesize if @data 129 @loaded_all_data = @loaded_size == size 130 131 record_metric_blob_size 132 record_metric_truncated(truncated?) 133 end 134 135 def binary_in_repo? 136 @binary.nil? ? super : @binary == true 137 end 138 139 def data 140 encode! @data 141 end 142 143 # Load all blob data (not just the first MAX_DATA_DISPLAY_SIZE bytes) into 144 # memory as a Ruby string. 145 def load_all_data!(repository) 146 return if @data == '' # don't mess with submodule blobs 147 148 # Even if we return early, recalculate whether this blob is binary in 149 # case a blob was initialized as text but the full data isn't 150 @binary = nil 151 152 return if @loaded_all_data 153 154 @data = repository.gitaly_blob_client.get_blob(oid: id, limit: -1).data 155 @loaded_all_data = true 156 @loaded_size = @data.bytesize 157 end 158 159 def name 160 encode! @name 161 end 162 163 def path 164 encode! @path 165 end 166 167 def truncated? 168 return false unless size && loaded_size 169 170 size > loaded_size 171 end 172 173 # Valid LFS object pointer is a text file consisting of 174 # version 175 # oid 176 # size 177 # see https://github.com/github/git-lfs/blob/v1.1.0/docs/spec.md#the-pointer 178 def lfs_pointer? 179 self.class.size_could_be_lfs?(size) && has_lfs_version_key? && lfs_oid.present? && lfs_size.present? 180 end 181 182 def lfs_oid 183 if has_lfs_version_key? 184 oid = data.match(/(?<=sha256:)([0-9a-f]{64})/) 185 return oid[1] if oid 186 end 187 188 nil 189 end 190 191 def lfs_size 192 if has_lfs_version_key? 193 size = data.match(/(?<=size )([0-9]+)/) 194 return size[1].to_i if size 195 end 196 197 nil 198 end 199 200 def external_storage 201 return unless lfs_pointer? 202 203 :lfs 204 end 205 206 alias_method :external_size, :lfs_size 207 208 private 209 210 def record_metric_blob_size 211 return unless size 212 213 self.class.gitlab_blob_size.observe({}, size) 214 end 215 216 def record_metric_truncated(bool) 217 if bool 218 self.class.gitlab_blob_truncated_true.increment 219 else 220 self.class.gitlab_blob_truncated_false.increment 221 end 222 end 223 224 def has_lfs_version_key? 225 !empty? && text_in_repo? && data.start_with?("version https://git-lfs.github.com/spec") 226 end 227 end 228 end 229end 230 231Gitlab::Git::Blob.singleton_class.prepend Gitlab::Git::RuggedImpl::Blob::ClassMethods 232