1# frozen_string_literal: true
2
3module Gitlab
4  module Database
5    module Reindexing
6      # This is a >= PG12 reindexing strategy based on `REINDEX CONCURRENTLY`
7      class ReindexConcurrently
8        ReindexError = Class.new(StandardError)
9
10        TEMPORARY_INDEX_PATTERN = '\_ccnew[0-9]*'
11        STATEMENT_TIMEOUT = 24.hours
12        PG_MAX_INDEX_NAME_LENGTH = 63
13
14        attr_reader :index, :logger
15
16        def initialize(index, logger: Gitlab::AppLogger)
17          @index = index
18          @logger = logger
19        end
20
21        def perform
22          raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
23          raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name =~ /#{TEMPORARY_INDEX_PATTERN}/
24
25          # Expression indexes require additional statistics in `pg_statistic`:
26          # select * from pg_statistic where starelid = (select oid from pg_class where relname = 'some_index');
27          #
28          # In PG12, this has been fixed in https://gitlab.com/postgres/postgres/-/commit/b17ff07aa3eb142d2cde2ea00e4a4e8f63686f96.
29          # Discussion happened in https://www.postgresql.org/message-id/flat/CAFcNs%2BqpFPmiHd1oTXvcPdvAHicJDA9qBUSujgAhUMJyUMb%2BSA%40mail.gmail.com
30          # following a GitLab.com incident that surfaced this (https://gitlab.com/gitlab-com/gl-infra/production/-/issues/2885).
31          #
32          # While this has been backpatched, we continue to disable expression indexes until further review.
33          raise ReindexError, 'expression indexes are currently not supported' if index.expression?
34
35          begin
36            with_logging do
37              set_statement_timeout do
38                execute("REINDEX INDEX CONCURRENTLY #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
39              end
40            end
41          ensure
42            cleanup_dangling_indexes
43          end
44        end
45
46        private
47
48        def with_logging
49          bloat_size = index.bloat_size
50          ondisk_size_before = index.ondisk_size_bytes
51
52          logger.info(
53            message: "Starting reindex of #{index}",
54            index: index.identifier,
55            table: index.tablename,
56            estimated_bloat_bytes: bloat_size,
57            index_size_before_bytes: ondisk_size_before,
58            relative_bloat_level: index.relative_bloat_level
59          )
60
61          duration = Benchmark.realtime do
62            yield
63          end
64
65          index.reset
66
67          logger.info(
68            message: "Finished reindex of #{index}",
69            index: index.identifier,
70            table: index.tablename,
71            estimated_bloat_bytes: bloat_size,
72            index_size_before_bytes: ondisk_size_before,
73            index_size_after_bytes: index.ondisk_size_bytes,
74            relative_bloat_level: index.relative_bloat_level,
75            duration_s: duration.round(2)
76          )
77        end
78
79        def cleanup_dangling_indexes
80          Gitlab::Database::PostgresIndex.match("#{TEMPORARY_INDEX_PATTERN}$").each do |lingering_index|
81            # Example lingering index name: some_index_ccnew1
82
83            # Example prefix: 'some_index'
84            prefix = lingering_index.name.gsub(/#{TEMPORARY_INDEX_PATTERN}/, '')
85
86            # Example suffix: '_ccnew1'
87            suffix = lingering_index.name.match(/#{TEMPORARY_INDEX_PATTERN}/)[0]
88
89            # Only remove if the lingering index name could have been chosen
90            # as a result of a REINDEX operation (considering that PostgreSQL
91            # truncates index names to 63 chars and adds a suffix).
92            if index.name[0...PG_MAX_INDEX_NAME_LENGTH - suffix.length] == prefix
93              remove_index(lingering_index)
94            end
95          end
96        end
97
98        def remove_index(index)
99          logger.info("Removing dangling index #{index.identifier}")
100
101          retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
102            connection: connection,
103            timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
104            klass: self.class,
105            logger: logger
106          )
107
108          retries.run(raise_on_exhaustion: false) do
109            execute("DROP INDEX CONCURRENTLY IF EXISTS #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
110          end
111        end
112
113        def set_statement_timeout
114          execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
115          yield
116        ensure
117          execute('RESET statement_timeout')
118        end
119
120        delegate :execute, :quote_table_name, to: :connection
121        def connection
122          @connection ||= index.connection
123        end
124      end
125    end
126  end
127end
128