1 /**
2  *
3  *   Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(_at_LIP6) & Christophe GONZALES(_at_AMU)
4  *   info_at_agrum_dot_org
5  *
6  *  This library is free software: you can redistribute it and/or modify
7  *  it under the terms of the GNU Lesser General Public License as published by
8  *  the Free Software Foundation, either version 3 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU Lesser General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Lesser General Public License
17  *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /** @file
23  * @brief Class for fast retrieval of simplicial and quasi/almost simplicial
24  * nodes
25  *
26  * @author Christophe GONZALES(_at_AMU) and Pierre-Henri WUILLEMIN(_at_LIP6)
27  */
28 #ifndef GUM_SIMPLICIAL_SET_H
29 #define GUM_SIMPLICIAL_SET_H
30 
31 #include <iostream>
32 #include <utility>
33 
34 #include <agrum/agrum.h>
35 
36 #include <agrum/tools/core/priorityQueue.h>
37 #include <agrum/tools/graphs/cliqueGraph.h>
38 
39 #define GUM_QUASI_RATIO      0.99
40 #define GUM_WEIGHT_THRESHOLD 0.0
41 
42 namespace gum {
43 
44   /* ===========================================================================
45    */
46   /* ===  CLASS FOR RETRIEVING SIMPLICIAL, ALMOST AND QUASI SIMPLICIAL NODES ===
47    */
48   /* ===========================================================================
49    */
50   /** @class SimplicialSet
51    * @brief Class enabling fast retrieval of simplicial, quasi and almost
52    * simplicial nodes
53    *
54    * \ingroup graph_group
55    *
56    */
57   /* ===========================================================================
58    */
59   class SimplicialSet {
60     public:
61     // ############################################################################
62     /// @name Constructors / Destructors
63     // ############################################################################
64     /// @{
65 
66     /// constructor. initializes the simplicial set w.r.t. a given graph
67     /** creates a class for managing the simplicial sets of a given undirected
68      * graph. When we add or remove nodes or edges within this undirected graph,
69      * the simplicial set updates its list of simplicial, almost simplicial
70      * and quasi simplicial sets. Recall that a node is simplicial if, along
71      * with
72      * its neighbors, it forms a clique. A node X is almost simplicial if it
73      * has a neighbor, say Y, such that, after removing Y, X and its neighbors
74      * form a clique.
75      *
76      * @param graph The undirected graph the simplicial sets of which we are
77      * interested in.
78      * @param log_domain_sizes The logarithm of the domain sizes of the
79      * nodes/variables. This is used for two different reasons: i) it enables
80      * to retrieve the simplicial nodes that have the smallest domain size
81      * (useful for triangulations); and ii) it enables to compute and update
82      * the log-weight of the cliques containing the nodes (the log-weight of a
83      * clique is the sum of the log_domain_sizes of its nodes).
84      * @param log_weights The logarithm of the weights of cliques.
85      * @param theRatio Let L be the number of edges between neighbors of a
86      * given node and let L' be the number of all the possible edges between
87      * these neighbors (n * (n+1) / 2). If L/L' >= theRatio, then we consider
88      * that the node and its neighbors quasi form a clique and, hence is a
89      * quasi-simplicial node.
90      * @param theThreshold for a safe triangulation (see Bodlaender), almost and
91      * quasi-simplicial nodes should not be eliminated, unless their weight is
92      * lower than the highest weight of the cliques created so far. Here, we
93      * consider it safe if the weight of a new clique is lower than
94      * (1+theThreshold) * this highest weight. This enables flexibility.
95      * @warning  Note that, by the aGrUM's constructor parameter's rule, the
96      * fact that an argument is passed as a pointer means that it is not copied
97      * within the SimplicialSet, but rather it is only referenced within it.
98      * @throws OperationNotAllowed exception is thrown if the graph, the
99      * log_modalities or the log_weights are null pointers.
100      * @warning Note that we allow log_domain_sizes to be defined over
101      * nodes/variables that do not belong to graph. These sizes will simply be
102      * ignored. However, it is compulsory that all the nodes of graph belong to
103      * log_domain_sizes. */
104     explicit SimplicialSet(UndiGraph*                    graph,
105                            const NodeProperty< double >* log_domain_sizes,
106                            NodeProperty< double >*       log_weights,
107                            double                        theRatio     = GUM_QUASI_RATIO,
108                            double                        theThreshold = GUM_WEIGHT_THRESHOLD);
109 
110     /// copy constructor
111     /** The constructor tries to make a copy of simplicial_from. In addition, it
112      * requires a graph that is a perfect copy of that of simplicial_from, as
113      * well as perfect copies of the log domain sizes and weights of
114      * simplicial_from. This requirement is necessary to avoid a mess: as the
115      * graph, the log domain sizes and the log weights are the only data that
116      * are not copied into the SimplicialSet, creating a copy of simplicial_from
117      * without using, say, a new graph would result in the new SimplicialSet
118      * asserting that some nodes are simplicial while they are not because the
119      * graph has been changed by simplicial_from. With these new copies, this
120      * kind of case cannot occur.
121      * @param simplicial_from the simplicial set we wish to copy
122      * @param graph The undirected graph the simplicial sets of which we are
123      * interested in. It should be identical to that used by simplicial_from.
124      * @param log_domain_sizes The logarithm of the domain sizes of the
125      * nodes/variabless. This is used for two different reasons: i) it enables
126      * to retrieve the simplicial nodes that have the smallest domain sizes
127      * (useful for triangulations); and ii) it enables to compute and update
128      * the log-weight of the cliques containing the nodes (the log-weight of a
129      * clique is the sum of the log_domain_sizes of its nodes).
130      * log_domain_sizes should be identical to that used by simplicial_from.
131      * @param log_weights The logarithm of the weights of the cliques.
132      * @param avoid_check if this Boolean is set to true, the SimplicialSet will
133      * not check whether the graph, the log_domain_sizes and the log_weights
134      * are OK. It will simply assume that everything is OK. Never use this
135      * unless
136      * you know what you do: setting avoid_check to true results in a faster
137      * constructor but can also lead to a mess that is quite complicated to fix.
138      * @warning Note that, by the aGrUM's constructor parameter's rule, the
139      * fact that an argument is passed as a pointer means that it is not copied
140      * within the SimplicialSet, but rather it is only referenced within it.
141      * @warning Note that we allow log_domain_sizes to be defined over
142      * nodes/variables that do not belong to graph. These sizes will simply be
143      * ignored. However, it is compulsory that all the nodes of graph belong to
144      * log_domain_sizes.
145      * @throws OperationNotAllowed exception is thrown if the graph, the
146      * log_domain_sizes or the log_weights are null pointers, or if these data
147      * are
148      * different from those stored into simplicial_from */
149     SimplicialSet(const SimplicialSet&          simplicial_from,
150                   UndiGraph*                    graph,
151                   const NodeProperty< double >* log_domain_sizes,
152                   NodeProperty< double >*       log_weights,
153                   bool                          avoid_check = false);
154 
155     /// move constructor
156     SimplicialSet(SimplicialSet&& from);
157 
158     /// destructor
159     ~SimplicialSet();
160 
161     /// @}
162 
163     // ############################################################################
164     /// @name Accessors / Modifiers
165     // ############################################################################
166     /// @{
167 
168     /// adds the necessary edges so that node 'id' and its neighbors form a
169     /// clique
170     /** @param id the node which will form, with its neighbors, a clique */
171     void makeClique(const NodeId id);
172 
173     /// removes a node and its adjacent edges from the underlying graph
174     /** The node should form a clique with its neighbors.
175      * @param id the id of the node which, along with its neighbors, forms the
176      * clique that will be removed
177      * @throw NotFound exception is thrown if the node cannot be found
178      * in the graph or if it is not a clique. */
179     void eraseClique(const NodeId id);
180 
181     /// removes a node and its adjacent edges from the underlying graph
182     /** @param id the id of the node which, along with its adjacent edges, will
183      * be removed
184      * @throw NotFound exception is thrown if the node cannot be found
185      * in the graph. */
186     void eraseNode(const NodeId id);
187 
188     /// removes an edge from the graph and recomputes the simplicial set
189     /** @param edge the edge to be removed
190      * @warning if the edge does not exist, nothing is done. In particular, no
191      * exception is thrown. */
192     void eraseEdge(const Edge& edge);
193 
194     /// adds a new edge to the graph and recomputes the simplicial set
195     /** @param first the id of one extremal node of the new inserted edge
196      * @param second the id of the other extremal node of the new inserted edge
197      * @warning if the edge already exists, nothing is done. In particular, no
198      * exception is raised.
199      * @throw InvalidNode if first and/or second do not belong to the
200      * graph nodes */
201     void addEdge(NodeId first, NodeId second);
202 
203     /// indicates whether a given node is a simplicial node
204     /** A simplicial node is a node such that the latter and its neighbors form
205      * a clique.
206      * @param id the ID of the node the simpliciality of which we test */
207     bool isSimplicial(const NodeId id);
208 
209     /// indicates whether there exists a simplicial node
210     /** A simplicial node is a node such that the latter and its neighbors form
211      * a clique. */
212     bool hasSimplicialNode();
213 
214     /// indicates whether there exists an almost simplicial node
215     bool hasAlmostSimplicialNode();
216 
217     /// indicates whether there exists a quasi simplicial node
218     bool hasQuasiSimplicialNode();
219 
220     /// returns the simplicial node with the lowest clique weight
221     /** A simplicial node is a node such that the latter and its neighbors form
222      * a clique. */
223     NodeId bestSimplicialNode();
224 
225     /// returns all the simplicial nodes
226     /** In the priority queue returned, the doubles correspond to the
227      * log-weight of the cliques the nodes belong to. */
228     const PriorityQueue< NodeId, double >& allSimplicialNodes();
229 
230     /// gets the almost simplicial node with the lowest clique weight
231     NodeId bestAlmostSimplicialNode();
232 
233     /// returns all the almost simplicial nodes
234     /** In the priority queue returned, the doubles correspond to the
235      * log-weight of the cliques formed by the nodes and their neighbors. */
236     const PriorityQueue< NodeId, double >& allAlmostSimplicialNodes();
237 
238     /// gets a quasi simplicial node with the lowest clique weight
239     NodeId bestQuasiSimplicialNode();
240 
241     /// returns all the quasi simplicial nodes
242     /** In the priority queue returned, the doubles correspond to the weight of
243      * cliques formed by the nodes and their neighbors. */
244     const PriorityQueue< NodeId, double >& allQuasiSimplicialNodes();
245 
246     /// sets/unset the fill-ins storage in the standard triangulation procedure
247     /** @param on_off when true means that the SimplicialSet will compute the
248      * fill-ins added to the graph. When on_off is false, the fill-ins are not
249      * computed. Note that, to produce a correct result, you should call
250      * setFillIns before any modification to the graph. */
251     void setFillIns(bool on_off);
252 
253     /// returns the set of all the fill-ins added to the graph so far
254     const EdgeSet& fillIns() const;
255 
256     /// initialize the simplicial set w.r.t. a new graph
257     /** @param graph The undirected graph the simplicial sets of which we are
258      * interested in.
259      * @param log_domain_sizes The logarithm of the domain sizes of the
260      * nodes/variables. This is used for two different reasons: i) it enables
261      * to retrieve the simplicial nodes that have the smallest domain sizes
262      * (useful for triangulations); and ii) it enables to compute and update
263      * the log-weight of the cliques containing the nodes (the log-weight of a
264      * clique is the sum of the log_domain_sizes of its nodes).
265      * @param log_weights The logarithm of the weights of the cliques.
266      * @param theRatio Let L be the number of edges between neighbors of a
267      * given node and let L' be the number of all the possible edges between
268      * these neighbors (n * (n+1) / 2). If L/L' >= theRatio, then we consider
269      * that the node and its neighbors quasi form a clique and, hence is a
270      * quasi-simplicial node.
271      * @param theThreshold for a safe triangulation (see Bodlaender), almost and
272      * quasi-simplicial nodes should not be eliminated, unless their weight is
273      * lower than the highest weight of the cliques created so far. Here, we
274      * consider it safe if the weight of a new clique is lower than
275      * (1+theThreshold) * this highest weight. This enables flexibility.
276      * @warning Note that we allow log_domain_sizes to be defined over
277      * nodes/variables that do not belong to graph. These sizes will simply be
278      * ignored. However, it is compulsory that all the nodes of graph belong to
279      * log_domain_sizes.
280      * @warning  Note that, by the aGrUM's constructor parameter's rule, the
281      * fact that an argument is passed as a pointer means that it is not copied
282      * within the SimplicialSet, but rather it is only referenced within it. */
283     void setGraph(UndiGraph*                    graph,
284                   const NodeProperty< double >* log_domain_sizes,
285                   NodeProperty< double >*       log_weights,
286                   double                        theRatio     = GUM_QUASI_RATIO,
287                   double                        theThreshold = GUM_WEIGHT_THRESHOLD);
288 
289     /// reassigns a new set of cliques' log weights (with the same content)
290     /** This method is useful for move constructors in elimination sequences.
291      * @throws InvalidArgument is raised if the old_weights argument is
292      * different from the current  _log_weights_ pointer. */
293     void replaceLogWeights(NodeProperty< double >* old_weigths,
294                            NodeProperty< double >* new_weights);
295 
296     /// @}
297 
298 
299     private:
300     /// the graph on which we perform the simplicial computations
301     UndiGraph* _graph_;
302 
303     /// the weights of the nodes (i.e., weight of their clique)
304     NodeProperty< double >* _log_weights_;
305 
306     /// the log of the modalities of the nodes
307     const NodeProperty< double >* _log_domain_sizes_;
308 
309     /// a queue of the simplicial nodes ordered by increasing node weight
310     PriorityQueue< NodeId, double > _simplicial_nodes_;
311 
312     /// a queue of the almost simplicial nodes ordered by increasing node weight
313     PriorityQueue< NodeId, double > _almost_simplicial_nodes_;
314 
315     /// a queue of the quasi simplicial nodes ordered by increasing node weight
316     PriorityQueue< NodeId, double > _quasi_simplicial_nodes_;
317 
318     /** @brief indicates for each node to which list (simplicial, almost
319      * simplicial, quasi simplicial) it belongs */
320     enum class _Belong_ : char
321     {
322       SIMPLICIAL,
323       ALMOST_SIMPLICIAL,
324       QUASI_SIMPLICIAL,
325       NO_LIST
326     };
327     NodeProperty< _Belong_ > _containing_list_;
328 
329     /** @brief for each edge, keep track of the number of triangles passing
330      * through this egde */
331     EdgeProperty< Size > _nb_triangles_;
332 
333     /// for each node, the number of pairs of adjacent neighbours
334     NodeProperty< Size > _nb_adjacent_neighbours_;
335 
336     /// the current (induced) tree width
337     /** @warning Note that what we call tree width here is not the classical
338      * definition, i.e., the number of nodes in the largest clique, as this is
339      * not, to our mind, what is important for computations: what is important
340      * is the size of the tables that would be stored into the cliques, i.e.,
341      * the product of the modalities of the nodes/variables contained in
342      * the cliques
343      */
344     double _log_tree_width_;
345 
346     /** @brief for a given node, if the number of pairs of neighbors that are
347      * adjacent / the number of adjacent neighbors in a clique is greater than
348      * the quasi ratio, then the node should belong the quasi simplicial list */
349     double _quasi_ratio_;
350 
351     /** @brief quasi and almost simplicial nodes may not be eliminated unless
352      * their weight is lower than (1 + threshold) * tree_width */
353     double _log_threshold_;
354 
355     /// the set of nodes that have potentially changed of status
356     NodeSet _changed_status_;
357 
358     /** @brief a boolean indicating if we want fill-ins list with the standard
359      * triangulation method */
360     bool _we_want_fill_ins_{false};
361 
362     /// fill-ins list
363     EdgeSet _fill_ins_list_;
364 
365 
366     /** @brief put node id in the correct simplicial/almost simplicial/quasi
367      * simplicial list */
368     void _updateList_(const NodeId id);
369 
370     /// put all the nodes in their appropriate list
371     void _updateAllNodes_();
372 
373     /** @brief initialize: compute  _nb_triangles_,  _nb_adjacent_neighbors_, etc
374      * when a new graph is set
375      *
376      * This method initializes the log_weights, the number of triangles and the
377      * number of adjacent neighbors given the current graph. This is to be used
378      * in constructors and method setGraph */
379     void _initialize_();
380 
381     /// prevent a copy operator to be used
382     /** If we did not prevent this operator to be used, we would be in a mess
383      * since the graph, the modalities and the weights would be shared and
384      * updated
385      * by several Simplicial sets whereas the number of triangles and the number
386      * of joined neighbors would not be shared. */
387     SimplicialSet& operator=(const SimplicialSet&);
388 
389     /// prevent the default copy constructor
390     /** If we did not prevent this operator to be used, we would be in a mess
391      * since the graph, the domain sizes and the weights would be shared and
392      * updated by several Simplicial sets whereas the number of triangles and
393      * the number of joined neighbors would not be shared. */
394     SimplicialSet(const SimplicialSet&);
395   };
396 
397 } /* namespace gum */
398 
399 #ifndef GUM_NO_INLINE
400 #  include <agrum/tools/graphs/algorithms/simplicialSet_inl.h>
401 #endif /* GUM_NO_INLINE */
402 
403 #endif /* GUM_SIMPLICIAL_SET_H */
404