1# frozen_string_literal: true
2
3module Gitlab
4  # Retrieving of parent or child objects based on a base ActiveRecord relation.
5  #
6  # This class uses recursive CTEs and as a result will only work on PostgreSQL.
7  class ObjectHierarchy
8    DEPTH_COLUMN = :depth
9
10    attr_reader :ancestors_base, :descendants_base, :model, :options, :unscoped_model
11
12    # ancestors_base - An instance of ActiveRecord::Relation for which to
13    #                  get parent objects.
14    # descendants_base - An instance of ActiveRecord::Relation for which to
15    #                    get child objects. If omitted, ancestors_base is used.
16    def initialize(ancestors_base, descendants_base = ancestors_base, options: {})
17      raise ArgumentError, "Model of ancestors_base does not match model of descendants_base" if ancestors_base.model != descendants_base.model
18
19      @ancestors_base = ancestors_base
20      @descendants_base = descendants_base
21      @model = ancestors_base.model
22      @unscoped_model = @model.unscoped
23      @options = options
24    end
25
26    # Returns the set of descendants of a given relation, but excluding the given
27    # relation
28    # rubocop: disable CodeReuse/ActiveRecord
29    def descendants
30      base_and_descendants.where.not(id: descendants_base.select(:id))
31    end
32    # rubocop: enable CodeReuse/ActiveRecord
33
34    # Returns the maximum depth starting from the base
35    # A base object with no children has a maximum depth of `1`
36    def max_descendants_depth
37      base_and_descendants(with_depth: true).maximum(DEPTH_COLUMN)
38    end
39
40    # Returns the set of ancestors of a given relation, but excluding the given
41    # relation
42    #
43    # Passing an `upto` will stop the recursion once the specified parent_id is
44    # reached. So all ancestors *lower* than the specified ancestor will be
45    # included.
46    # rubocop: disable CodeReuse/ActiveRecord
47    def ancestors(upto: nil, hierarchy_order: nil)
48      base_and_ancestors(upto: upto, hierarchy_order: hierarchy_order).where.not(id: ancestors_base.select(:id))
49    end
50    # rubocop: enable CodeReuse/ActiveRecord
51
52    # Returns a relation that includes the ancestors_base set of objects
53    # and all their ancestors (recursively).
54    #
55    # Passing an `upto` will stop the recursion once the specified parent_id is
56    # reached. So all ancestors *lower* than the specified ancestor will be
57    # included.
58    #
59    # Passing a `hierarchy_order` with either `:asc` or `:desc` will cause the
60    # recursive query order from most nested object to root or from the root
61    # ancestor to most nested object respectively. This uses a `depth` column
62    # where `1` is defined as the depth for the base and increment as we go up
63    # each parent.
64    #
65    # Note: By default the order is breadth-first
66    # rubocop: disable CodeReuse/ActiveRecord
67    def base_and_ancestors(upto: nil, hierarchy_order: nil)
68      cte = base_and_ancestors_cte(upto, hierarchy_order)
69
70      recursive_query = if hierarchy_order
71                          # othewise depth won't be available for outer query
72                          cte.apply_to(unscoped_model.all.select(objects_table[Arel.star])).order(depth: hierarchy_order)
73                        else
74                          cte.apply_to(unscoped_model.all)
75                        end
76
77      read_only(recursive_query)
78    end
79    # rubocop: enable CodeReuse/ActiveRecord
80
81    # Returns a relation that includes the descendants_base set of objects
82    # and all their descendants (recursively).
83    #
84    # When `with_depth` is `true`, a `depth` column is included where it starts with `1` for the base objects
85    # and incremented as we go down the descendant tree
86    # rubocop: disable CodeReuse/ActiveRecord
87    def base_and_descendants(with_depth: false)
88      outer_select_relation = unscoped_model.all
89      outer_select_relation = outer_select_relation.select(objects_table[Arel.star]) if with_depth # Otherwise Active Record will not select `depth` as it's not a table column
90
91      read_only(base_and_descendants_cte(with_depth: with_depth).apply_to(outer_select_relation))
92    end
93    # rubocop: enable CodeReuse/ActiveRecord
94
95    # Returns a relation that includes the base objects, their ancestors,
96    # and the descendants of the base objects.
97    #
98    # The resulting query will roughly look like the following:
99    #
100    #     WITH RECURSIVE ancestors AS ( ... ),
101    #       descendants AS ( ... )
102    #     SELECT *
103    #     FROM (
104    #       SELECT *
105    #       FROM ancestors namespaces
106    #
107    #       UNION
108    #
109    #       SELECT *
110    #       FROM descendants namespaces
111    #     ) groups;
112    #
113    # Using this approach allows us to further add criteria to the relation with
114    # Rails thinking it's selecting data the usual way.
115    #
116    # If nested objects are not supported, ancestors_base is returned.
117    # rubocop: disable CodeReuse/ActiveRecord
118    def all_objects
119      ancestors = base_and_ancestors_cte
120      descendants = base_and_descendants_cte
121
122      ancestors_table = ancestors.alias_to(objects_table)
123      descendants_table = descendants.alias_to(objects_table)
124
125      ancestors_scope = unscoped_model.from(ancestors_table)
126      descendants_scope = unscoped_model.from(descendants_table)
127
128      relation = unscoped_model
129        .with
130        .recursive(ancestors.to_arel, descendants.to_arel)
131        .from_union([
132          ancestors_scope,
133          descendants_scope
134        ])
135
136      read_only(relation)
137    end
138    # rubocop: enable CodeReuse/ActiveRecord
139
140    private
141
142    # Remove the extra `depth` field using an INNER JOIN to avoid breaking UNION queries
143    # and ordering the rows based on the `depth` column to maintain the row order.
144    #
145    # rubocop: disable CodeReuse/ActiveRecord
146    def remove_depth_and_maintain_order(relation, hierarchy_order: :asc)
147      joined_relation = model.joins("INNER JOIN (#{relation.select(:id, :depth).to_sql}) namespaces_join_table on namespaces_join_table.id = #{model.table_name}.id").order("namespaces_join_table.depth" => hierarchy_order)
148
149      model.from(Arel::Nodes::As.new(joined_relation.arel, objects_table))
150    end
151    # rubocop: enable CodeReuse/ActiveRecord
152
153    # rubocop: disable CodeReuse/ActiveRecord
154    def base_and_ancestors_cte(stop_id = nil, hierarchy_order = nil)
155      cte = SQL::RecursiveCTE.new(:base_and_ancestors)
156
157      base_query = ancestors_base.except(:order)
158      base_query = base_query.select("1 as #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", base_query.default_select_columns) if hierarchy_order
159
160      cte << base_query
161
162      # Recursively get all the ancestors of the base set.
163      parent_query = unscoped_model
164        .from(from_tables(cte))
165        .where(ancestor_conditions(cte))
166        .except(:order)
167
168      if hierarchy_order
169        quoted_objects_table_name = model.connection.quote_table_name(objects_table.name)
170
171        parent_query = parent_query.select(
172          cte.table[DEPTH_COLUMN] + 1,
173          "tree_path || #{quoted_objects_table_name}.id",
174          "#{quoted_objects_table_name}.id = ANY(tree_path)",
175          parent_query.default_select_columns
176        ).where(cte.table[:tree_cycle].eq(false))
177      end
178
179      parent_query = parent_query.where(parent_id_column(cte).not_eq(stop_id)) if stop_id
180
181      cte << parent_query
182      cte
183    end
184    # rubocop: enable CodeReuse/ActiveRecord
185
186    # rubocop: disable CodeReuse/ActiveRecord
187    def base_and_descendants_cte(with_depth: false)
188      cte = SQL::RecursiveCTE.new(:base_and_descendants)
189
190      base_query = descendants_base.except(:order)
191      base_query = base_query.select("1 AS #{DEPTH_COLUMN}", "ARRAY[#{objects_table.name}.id] AS tree_path", "false AS tree_cycle", base_query.default_select_columns) if with_depth
192
193      cte << base_query
194
195      # Recursively get all the descendants of the base set.
196      descendants_query = unscoped_model
197        .from(from_tables(cte))
198        .where(descendant_conditions(cte))
199        .except(:order)
200
201      if with_depth
202        quoted_objects_table_name = model.connection.quote_table_name(objects_table.name)
203
204        descendants_query = descendants_query.select(
205          cte.table[DEPTH_COLUMN] + 1,
206          "tree_path || #{quoted_objects_table_name}.id",
207          "#{quoted_objects_table_name}.id = ANY(tree_path)",
208          descendants_query.default_select_columns
209        ).where(cte.table[:tree_cycle].eq(false))
210      end
211
212      cte << descendants_query
213      cte
214    end
215    # rubocop: enable CodeReuse/ActiveRecord
216
217    def objects_table
218      model.arel_table
219    end
220
221    def parent_id_column(cte)
222      cte.table[:parent_id]
223    end
224
225    def from_tables(cte)
226      [objects_table, cte.table]
227    end
228
229    def ancestor_conditions(cte)
230      objects_table[:id].eq(cte.table[:parent_id])
231    end
232
233    def descendant_conditions(cte)
234      objects_table[:parent_id].eq(cte.table[:id])
235    end
236
237    def read_only(relation)
238      # relations using a CTE are not safe to use with update_all as it will
239      # throw away the CTE, hence we mark them as read-only.
240      relation.extend(Gitlab::Database::ReadOnlyRelation)
241      relation
242    end
243  end
244end
245
246Gitlab::ObjectHierarchy.prepend_mod_with('Gitlab::ObjectHierarchy')
247