1# frozen_string_literal: true 2 3module Gitlab 4 module Database 5 module Reindexing 6 # This is a >= PG12 reindexing strategy based on `REINDEX CONCURRENTLY` 7 class ReindexConcurrently 8 ReindexError = Class.new(StandardError) 9 10 TEMPORARY_INDEX_PATTERN = '\_ccnew[0-9]*' 11 STATEMENT_TIMEOUT = 24.hours 12 PG_MAX_INDEX_NAME_LENGTH = 63 13 14 attr_reader :index, :logger 15 16 def initialize(index, logger: Gitlab::AppLogger) 17 @index = index 18 @logger = logger 19 end 20 21 def perform 22 raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion? 23 raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name =~ /#{TEMPORARY_INDEX_PATTERN}/ 24 25 # Expression indexes require additional statistics in `pg_statistic`: 26 # select * from pg_statistic where starelid = (select oid from pg_class where relname = 'some_index'); 27 # 28 # In PG12, this has been fixed in https://gitlab.com/postgres/postgres/-/commit/b17ff07aa3eb142d2cde2ea00e4a4e8f63686f96. 29 # Discussion happened in https://www.postgresql.org/message-id/flat/CAFcNs%2BqpFPmiHd1oTXvcPdvAHicJDA9qBUSujgAhUMJyUMb%2BSA%40mail.gmail.com 30 # following a GitLab.com incident that surfaced this (https://gitlab.com/gitlab-com/gl-infra/production/-/issues/2885). 31 # 32 # While this has been backpatched, we continue to disable expression indexes until further review. 33 raise ReindexError, 'expression indexes are currently not supported' if index.expression? 34 35 begin 36 with_logging do 37 set_statement_timeout do 38 execute("REINDEX INDEX CONCURRENTLY #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}") 39 end 40 end 41 ensure 42 cleanup_dangling_indexes 43 end 44 end 45 46 private 47 48 def with_logging 49 bloat_size = index.bloat_size 50 ondisk_size_before = index.ondisk_size_bytes 51 52 logger.info( 53 message: "Starting reindex of #{index}", 54 index: index.identifier, 55 table: index.tablename, 56 estimated_bloat_bytes: bloat_size, 57 index_size_before_bytes: ondisk_size_before, 58 relative_bloat_level: index.relative_bloat_level 59 ) 60 61 duration = Benchmark.realtime do 62 yield 63 end 64 65 index.reset 66 67 logger.info( 68 message: "Finished reindex of #{index}", 69 index: index.identifier, 70 table: index.tablename, 71 estimated_bloat_bytes: bloat_size, 72 index_size_before_bytes: ondisk_size_before, 73 index_size_after_bytes: index.ondisk_size_bytes, 74 relative_bloat_level: index.relative_bloat_level, 75 duration_s: duration.round(2) 76 ) 77 end 78 79 def cleanup_dangling_indexes 80 Gitlab::Database::PostgresIndex.match("#{TEMPORARY_INDEX_PATTERN}$").each do |lingering_index| 81 # Example lingering index name: some_index_ccnew1 82 83 # Example prefix: 'some_index' 84 prefix = lingering_index.name.gsub(/#{TEMPORARY_INDEX_PATTERN}/, '') 85 86 # Example suffix: '_ccnew1' 87 suffix = lingering_index.name.match(/#{TEMPORARY_INDEX_PATTERN}/)[0] 88 89 # Only remove if the lingering index name could have been chosen 90 # as a result of a REINDEX operation (considering that PostgreSQL 91 # truncates index names to 63 chars and adds a suffix). 92 if index.name[0...PG_MAX_INDEX_NAME_LENGTH - suffix.length] == prefix 93 remove_index(lingering_index) 94 end 95 end 96 end 97 98 def remove_index(index) 99 logger.info("Removing dangling index #{index.identifier}") 100 101 retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new( 102 connection: connection, 103 timing_configuration: REMOVE_INDEX_RETRY_CONFIG, 104 klass: self.class, 105 logger: logger 106 ) 107 108 retries.run(raise_on_exhaustion: false) do 109 execute("DROP INDEX CONCURRENTLY IF EXISTS #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}") 110 end 111 end 112 113 def set_statement_timeout 114 execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT) 115 yield 116 ensure 117 execute('RESET statement_timeout') 118 end 119 120 delegate :execute, :quote_table_name, to: :connection 121 def connection 122 @connection ||= index.connection 123 end 124 end 125 end 126 end 127end 128