1 /**
2  *
3  *   Copyright (c) 2005-2021 by Pierre-Henri WUILLEMIN(_at_LIP6) & Christophe GONZALES(_at_AMU)
4  *   info_at_agrum_dot_org
5  *
6  *  This library is free software: you can redistribute it and/or modify
7  *  it under the terms of the GNU Lesser General Public License as published by
8  *  the Free Software Foundation, either version 3 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU Lesser General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Lesser General Public License
17  *  along with this library.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 
22 /**
23  * @file
24  *
25  * @brief the pattern used by all the "basename" projections of multidim tables
26  *
27  * @author Christophe GONZALES(_at_AMU) and Pierre-Henri WUILLEMIN(_at_LIP6)
28  */
29 
30 // check if we allowed these patterns to be used
31 #ifndef GUM_PROJECTION_PATTERN_ALLOWED
32 
33 // #warning To use projectionPattern, you must define
34 // GUM_PROJECTION_PATTERN_ALLOWED
35 
36 #else
37 namespace gum {
38 
39   // a specialized function for projecting a multiDimImplementation over a subset
40   // of its variables
41 
42 #  ifdef GUM_MULTI_DIM_PROJECTION_NAME
43 #    define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
44   template < typename GUM_SCALAR >
45   MultiDimImplementation< GUM_SCALAR >*
GUM_MULTI_DIM_PROJECTION_NAME(const MultiDimImplementation<GUM_SCALAR> * table,const Set<const DiscreteVariable * > & del_vars)46      GUM_MULTI_DIM_PROJECTION_NAME(const MultiDimImplementation< GUM_SCALAR >* table,
47                                    const Set< const DiscreteVariable* >&       del_vars)
48 #  endif
49 
50   // clang-format off
51 
52 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_NAME
53 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
54 #define GUM_MULTI_DIM_PROJECTION_POINTER
55   template <typename GUM_SCALAR>
56   MultiDimImplementation<GUM_SCALAR*>* GUM_MULTI_DIM_PROJECTION_POINTER_NAME(
57       const MultiDimImplementation<GUM_SCALAR*>* table,
58       const Set<const DiscreteVariable*>& del_vars )
59 #endif
60 
61 #ifdef GUM_MULTI_DIM_PROJECTION_NAME_F
62 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR
63   template <typename GUM_SCALAR>
64   MultiDimImplementation<GUM_SCALAR>* GUM_MULTI_DIM_PROJECTION_NAME_F(
65       const MultiDimImplementation<GUM_SCALAR>* table,
66       const Set<const DiscreteVariable*>& del_vars,
67       GUM_SCALAR ( *f )( const GUM_SCALAR&, const GUM_SCALAR& ) )
68 #endif
69 
70 #ifdef GUM_MULTI_DIM_PROJECTION_POINTER_NAME_F
71 #define GUM_MULTI_DIM_PROJECTION_TYPE GUM_SCALAR *
72 #define GUM_MULTI_DIM_PROJECTION_POINTER
73   template <typename GUM_SCALAR>
74   MultiDimImplementation<GUM_SCALAR*>*
75   GUM_MULTI_DIM_PROJECTION_POINTER_NAME_F(
76       const MultiDimImplementation<GUM_SCALAR*>* table,
77       const Set<const DiscreteVariable*>& del_vars,
78       GUM_SCALAR* ( *f )( const GUM_SCALAR const*,
79                           const GUM_SCALAR const* ) )
80 #endif
81 
82   // clang-format on
83 
84   {
85 
86     // create the neutral element used to fill the result upon its
87     // creation
88     const GUM_SCALAR neutral_element = GUM_MULTI_DIM_PROJECTION_NEUTRAL;
89 
90     // first, compute whether we should loop over table or over the projected
91     // table first to get a faster algorithm.
92     const Sequence< const DiscreteVariable* >& table_vars = table->variablesSequence();
93     bool need_swapping = table_vars.size() >= 2 * del_vars.size();
94 
95     if (!need_swapping) {
96       // Compute the variables that belong to both the projection set and
97       // table. Store the domain size of the Cartesian product of these
98       // variables (result_domain_size) as well as the domain size of the
99       // Cartesian product of the variables of table that do not belong to
100       // projection set, i.e., those variables that belong to table but not to
101       // del_vars (table_alone_domain_size).  In addition, store the number of
102       // increments in the computation loops at the end of the function before
103       // which the variables of the projection set need be incremented (vector
104       // before incr).
105       std::vector< Idx >                  table_and_result_offset;
106       std::vector< Idx >                  table_and_result_domain;
107       std::vector< Idx >                  before_incr;
108       unsigned int                        nb_positive_before_incr = 0;
109       Idx                                 table_alone_domain_size = 1;
110       Idx                                 result_domain_size      = 1;
111       Idx                                 table_domain_size       = 1;
112       Sequence< const DiscreteVariable* > result_varSeq;
113       {
114         Idx  tmp_before_incr = 1;
115         bool has_before_incr = false;
116 
117         for (const auto var: table_vars) {
118           table_domain_size *= var->domainSize();
119 
120           if (!del_vars.exists(var)) {
121             if (has_before_incr) {
122               before_incr.push_back(tmp_before_incr - 1);
123               has_before_incr = false;
124               ++nb_positive_before_incr;
125             } else {
126               before_incr.push_back(0);
127             }
128 
129             table_and_result_domain.push_back(var->domainSize());
130             table_and_result_offset.push_back(result_domain_size);
131             result_domain_size *= var->domainSize();
132             tmp_before_incr = 1;
133             result_varSeq << var;
134           } else {
135             tmp_before_incr *= var->domainSize();
136             has_before_incr = true;
137             table_alone_domain_size *= var->domainSize();
138           }
139         }
140       }
141       std::vector< Idx > table_and_result_value = table_and_result_domain;
142       std::vector< Idx > current_incr           = before_incr;
143       std::vector< Idx > table_and_result_down  = table_and_result_offset;
144 
145       for (unsigned int i = 0; i < table_and_result_down.size(); ++i) {
146         table_and_result_down[i] *= (table_and_result_domain[i] - 1);
147       }
148 
149       // create a table "result" containing only the variables of the
150       // projection: the variables are stored in the order in which they appear
151       // in "table" Hence, ++ operations on an instantiation on table will more
152       // or less correspond to a ++ operation on an instantiation on result
153       MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >* result
154          = new MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >;
155 
156       if (!result_varSeq.size()) { return result; }
157 
158       result->beginMultipleChanges();
159 
160       for (const auto var: result_varSeq)
161         *result << *var;
162 
163       result->endMultipleChanges();
164 
165 // fill the matrix with the neutral element
166 #  ifdef GUM_MULTI_DIM_PROJECTION_POINTER
167 
168       for (Idx i = 0; i < result_domain_size; ++i) {
169         result->unsafeSet(i, new GUM_SCALAR(neutral_element));
170       }
171 
172 #  else
173       result->fill(neutral_element);
174 #  endif
175 
176       // compute the projection: first loop over the variables X's in table
177       // that do not belong to result and, for each value of these X's, loop
178       // over the variables in both table and result. As such, in the internal
179       // loop, the offsets of "result" need only be incremented as usually to
180       // parse appropriately this table. For result, the problem is slightly
181       // more complicated: in the outer for loop, we shall always reset
182       // resul_offset to 0.  For the inner loop, result_offset should be
183       // incremented (++) only when t1 before_incr[xxx] steps in the loop have
184       // already been made.
185 
186       // but before doing so, check whether there exist positive_before_incr.
187       // If this is not the case, optimize by not using before_incr at all
188       if (!nb_positive_before_incr) {
189         Idx           result_offset = 0;
190         Instantiation table_inst(table);
191 
192         for (Idx i = 0; i < table_alone_domain_size; ++i) {
193           for (Idx j = 0; j < result_domain_size; ++j) {
194 #  ifdef GUM_MULTI_DIM_PROJECTION_POINTER
195             GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
196             *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
197 #  else
198             GUM_MULTI_DIM_PROJECTION_TYPE& res
199                = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(result->unsafeGet(result_offset));
200             res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
201 #  endif
202 
203             // update the offset of table and result
204             ++table_inst;
205             ++result_offset;
206           }
207 
208           // update the offset of result
209           result_offset = 0;
210         }
211       } else {
212         // here there are positive before_incr and we should use them to know
213         // when result_offset needs be changed
214         Idx           result_offset = 0;
215         Instantiation table_inst(table);
216 
217         for (Idx i = 0; i < table_domain_size; ++i) {
218 #  ifdef GUM_MULTI_DIM_PROJECTION_POINTER
219           GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
220           *res                              = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
221 #  else
222           GUM_MULTI_DIM_PROJECTION_TYPE& res
223              = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(result->unsafeGet(result_offset));
224           res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
225 #  endif
226 
227           // update the offset of table
228           ++table_inst;
229 
230           // update the offset of result
231           for (unsigned int k = 0; k < current_incr.size(); ++k) {
232             // check if we need modify result_offset
233             if (current_incr[k]) {
234               --current_incr[k];
235               break;
236             }
237 
238             current_incr[k] = before_incr[k];
239 
240             // here we shall modify result_offset
241             --table_and_result_value[k];
242 
243             if (table_and_result_value[k]) {
244               result_offset += table_and_result_offset[k];
245               break;
246             }
247 
248             table_and_result_value[k] = table_and_result_domain[k];
249             result_offset -= table_and_result_down[k];
250           }
251         }
252       }
253 
254       return result;
255     } else {   // need_swapping = true
256 
257       // Compute the variables that are in t1 but not in t2. For these
258       // variables, get the increment in the offset of t1 that would result
259       // from an increment in one of these variables (vector t1_alone_offset).
260       // Also store the domain size of these variables (vector t1_alone_domain)
261       // and, for the computation loops at the end of this function, |variable|
262       // - the current values of these variables (vector t1_alone_value). All
263       // these vectors reference the variables of t1 \ t2 in the order in which
264       // they appear in seq1. Keep as well the size of the Cartesian product of
265       // these variables.
266       std::vector< Idx >                        table_alone_offset;
267       std::vector< Idx >                        table_alone_domain;
268       Idx                                       offset                  = 1;
269       Idx                                       table_alone_domain_size = 1;
270       HashTable< const DiscreteVariable*, Idx > var1offset(table_vars.size());
271 
272       for (const auto var: table_vars) {
273         if (del_vars.exists(var)) {
274           table_alone_domain.push_back(var->domainSize());
275           table_alone_offset.push_back(offset);
276           table_alone_domain_size *= var->domainSize();
277         }
278 
279         var1offset.insert(var, offset);
280         offset *= var->domainSize();
281       }
282 
283       std::vector< Idx > table_alone_value = table_alone_domain;
284       std::vector< Idx > table_alone_down  = table_alone_offset;
285 
286       for (unsigned int i = 0; i < table_alone_down.size(); ++i)
287         table_alone_down[i] *= (table_alone_domain[i] - 1);
288 
289       // Compute the same vectors for the variables that belong to both t1 and
290       // t2.  In this case, All these vectors reference the variables in the
291       // order in which they appear in seq2. In addition, store the number of
292       // increments in the computation loops at the end of the function before
293       // which the variables of t1 cap t2 need be incremented (vector
294       // t1_and_t2_before incr).  Similarly, store the number of such
295       // increments currently still needed before the next incrementation of
296       // the variables of t1 cap t2. Keep as well the size of the Cartesian
297       // product of these variables.
298       Sequence< const DiscreteVariable* > result_varSeq;
299       std::vector< Idx >                  table_and_result_offset;
300       std::vector< Idx >                  table_and_result_domain;
301       Idx                                 result_domain_size = 1;
302       bool                                has_before_incr    = false;
303       bool                                found_proj_var     = false;
304 
305       for (const auto var: table_vars) {
306         if (!del_vars.exists(var)) {
307           table_and_result_domain.push_back(var->domainSize());
308           table_and_result_offset.push_back(var1offset[var]);
309           found_proj_var = true;
310           result_domain_size *= var->domainSize();
311           result_varSeq << var;
312         } else {
313           if (found_proj_var) has_before_incr = true;
314         }
315       }
316 
317       std::vector< Idx > table_and_result_value = table_and_result_domain;
318       std::vector< Idx > table_and_result_down  = table_and_result_offset;
319 
320       for (unsigned int i = 0; i < table_and_result_down.size(); ++i) {
321         table_and_result_down[i] *= (table_and_result_domain[i] - 1);
322       }
323 
324       // create a table "result" containing only the variables of the
325       // projection: the variables are stored in the order in which they appear
326       // in "table" Hence, ++ operations on an instantiation on table will more
327       // or less correspond to a ++ operation on an instantiation on result
328       MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >* result
329          = new MultiDimArray< GUM_MULTI_DIM_PROJECTION_TYPE >;
330       result->beginMultipleChanges();
331 
332       for (const auto var: result_varSeq)
333         *result << *var;
334 
335       result->endMultipleChanges();
336 
337 // fill the matrix with the neutral element
338 #  ifdef GUM_MULTI_DIM_PROJECTION_POINTER
339 
340       for (Idx i = 0; i < result_domain_size; ++i) {
341         result->unsafeSet(i, new GUM_SCALAR(neutral_element));
342       }
343 
344 #  else
345       result->fill(neutral_element);
346 #  endif
347 
348       // compute the sum: first loop over the variables X's both in table and
349       // in result and, for each value of these X's, loop over the variables
350       // that are in table but not result. As such, in the internal loop, there
351       // is no increment in the offset of "result", and in the outer loop, this
352       // offset is incremented using a simple ++ operator.  For table, the
353       // problem is slightly more complicated: in the outer for loop, we shall
354       // increment the variables of table cap result according to vectors
355       // table_and_result_xxx. Each time a variable of these vectors has been
356       // incremented up to its max, we shall put it down to 0 and increment the
357       // next one, and so on. For the inner loop, this is similar except that
358       // we shall do these operations only when before_incr[xxx] steps in the
359       // loop have already been made.
360 
361       // but before doing so, check whether there exist positive_before_incr.
362       // If this is not the case, optimize by not using before_incr at all
363       if (!has_before_incr) {
364         Idx           result_offset = 0;
365         Instantiation table_inst(table);
366 
367         for (Idx i = 0; i < result_domain_size; ++i) {
368           for (Idx j = 0; j < table_alone_domain_size; ++j) {
369 #  ifdef GUM_MULTI_DIM_PROJECTION_POINTER
370             GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
371             *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
372 #  else
373             GUM_MULTI_DIM_PROJECTION_TYPE& res
374                = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(result->unsafeGet(result_offset));
375             res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
376 #  endif
377 
378             // update the offset of table
379             ++table_inst;
380           }
381 
382           // update the offset of result
383           ++result_offset;
384         }
385       } else {
386         // here there are positive before_incr and we should use them to know
387         // when result_offset needs be changed
388         Idx           result_offset = 0;
389         Instantiation table_inst(table);
390 
391         for (Idx j = 0; j < result_domain_size; ++j) {
392           for (Idx i = 0; i < table_alone_domain_size; ++i) {
393 #  ifdef GUM_MULTI_DIM_PROJECTION_POINTER
394             GUM_MULTI_DIM_PROJECTION_TYPE res = result->unsafeGet(result_offset);
395             *res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
396 #  else
397             GUM_MULTI_DIM_PROJECTION_TYPE& res
398                = const_cast< GUM_MULTI_DIM_PROJECTION_TYPE& >(result->unsafeGet(result_offset));
399             res = GUM_MULTI_DIM_PROJECTION(res, table->get(table_inst));
400 #  endif
401 
402             // update the increment of table for the inner loop
403             for (unsigned int k = 0; k < table_alone_value.size(); ++k) {
404               --table_alone_value[k];
405 
406               if (table_alone_value[k]) {
407                 table_inst += table_alone_offset[k];
408                 break;
409               }
410 
411               table_alone_value[k] = table_alone_domain[k];
412               table_inst -= table_alone_down[k];
413             }
414           }
415 
416           // update the offset of table for the outer loop
417           for (unsigned int k = 0; k < table_and_result_value.size(); ++k) {
418             --table_and_result_value[k];
419 
420             if (table_and_result_value[k]) {
421               table_inst += table_and_result_offset[k];
422               break;
423             }
424 
425             table_and_result_value[k] = table_and_result_domain[k];
426             table_inst -= table_and_result_down[k];
427           }
428 
429           // update the offset of result for the outer loop
430           ++result_offset;
431         }
432       }
433 
434       return result;
435     }
436   }
437 
438 #  undef GUM_MULTI_DIM_PROJECTION_TYPE
439 
440 #  ifdef GUM_MULTI_DIM_PROJECTION_POINTER
441 #    undef GUM_MULTI_DIM_PROJECTION_POINTER
442 #  endif
443 }   // namespace gum
444 #endif /* GUM_PROJECTION_PATTERN_ALLOWED */
445