1# frozen_string_literal: true 2 3# For large tables, PostgreSQL can take a long time to count rows due to MVCC. 4# Implements a distinct and ordinary batch counter 5# Needs indexes on the column below to calculate max, min and range queries 6# For larger tables just set use higher batch_size with index optimization 7# 8# In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count 9# the start and finish can be sent specifically 10# 11# Grouped relations can be used as well. However, the preferred batch count should be around 10K because group by count is more expensive. 12# 13# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705 14# 15# Examples: 16# extend ::Gitlab::Database::BatchCount 17# batch_count(User.active) 18# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id) 19# batch_count(Namespace.group(:type)) 20# batch_distinct_count(::Project, :creator_id) 21# batch_distinct_count(::Project.aimed_for_deletion.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id)) 22# batch_distinct_count(Project.group(:visibility_level), :creator_id) 23# batch_sum(User, :sign_in_count) 24# batch_sum(Issue.group(:state_id), :weight)) 25module Gitlab 26 module Database 27 module BatchCount 28 def batch_count(relation, column = nil, batch_size: nil, start: nil, finish: nil) 29 BatchCounter.new(relation, column: column).count(batch_size: batch_size, start: start, finish: finish) 30 end 31 32 def batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil) 33 BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size, start: start, finish: finish) 34 end 35 36 def batch_sum(relation, column, batch_size: nil, start: nil, finish: nil) 37 BatchCounter.new(relation, column: nil, operation: :sum, operation_args: [column]).count(batch_size: batch_size, start: start, finish: finish) 38 end 39 40 class << self 41 include BatchCount 42 end 43 end 44 end 45end 46