1 /*
2  * Copyright © 2004 Ondra Kamenik
3  * Copyright © 2019 Dynare Team
4  *
5  * This file is part of Dynare.
6  *
7  * Dynare is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * Dynare is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "ps_tensor.hh"
22 #include "fs_tensor.hh"
23 #include "tl_exception.hh"
24 #include "tl_static.hh"
25 #include "stack_container.hh"
26 
27 #include <iostream>
28 
29 /* Here we decide, what method for filling a slice in slicing constructor to
30    use. A few experiments suggest, that if the tensor is more than 8% filled,
31    the first method (fillFromSparseOne()) is better. For fill factors less than
32    1%, the second can be 3 times quicker. */
33 
34 UPSTensor::fill_method
decideFillMethod(const FSSparseTensor & t)35 UPSTensor::decideFillMethod(const FSSparseTensor &t)
36 {
37   if (t.getFillFactor() > 0.08)
38     return fill_method::first;
39   else
40     return fill_method::second;
41 }
42 
43 /* Here we make a slice. We decide what fill method to use and set it. */
44 
UPSTensor(const FSSparseTensor & t,const IntSequence & ss,const IntSequence & coor,PerTensorDimens ptd)45 UPSTensor::UPSTensor(const FSSparseTensor &t, const IntSequence &ss,
46                      const IntSequence &coor, PerTensorDimens ptd)
47   : UTensor(indor::along_col, ptd.getNVX(),
48             t.nrows(), ptd.calcUnfoldMaxOffset(), ptd.dimen()),
49     tdims(std::move(ptd))
50 {
51   TL_RAISE_IF(coor.size() != t.dimen(),
52               "Wrong coordinates length of stacks for UPSTensor slicing constructor");
53   TL_RAISE_IF(ss.sum() != t.nvar(),
54               "Wrong length of stacks for UPSTensor slicing constructor");
55 
56   if (decideFillMethod(t) == fill_method::first)
57     fillFromSparseOne(t, ss, coor);
58   else
59     fillFromSparseTwo(t, ss, coor);
60 }
61 
62 void
increment(IntSequence & v) const63 UPSTensor::increment(IntSequence &v) const
64 {
65   TL_RAISE_IF(v.size() != dimen(),
66               "Wrong input/output vector size in UPSTensor::increment");
67 
68   UTensor::increment(v, tdims.getNVX());
69 }
70 
71 void
decrement(IntSequence & v) const72 UPSTensor::decrement(IntSequence &v) const
73 {
74   TL_RAISE_IF(v.size() != dimen(),
75               "Wrong input/output vector size in UPSTensor::decrement");
76 
77   UTensor::decrement(v, tdims.getNVX());
78 }
79 
80 std::unique_ptr<FTensor>
fold() const81 UPSTensor::fold() const
82 {
83   TL_RAISE("Never should come to this place in UPSTensor::fold");
84 }
85 
86 int
getOffset(const IntSequence & v) const87 UPSTensor::getOffset(const IntSequence &v) const
88 {
89   TL_RAISE_IF(v.size() != dimen(),
90               "Wrong input vector size in UPSTensor::getOffset");
91 
92   return UTensor::getOffset(v, tdims.getNVX());
93 }
94 
95 void
addTo(FGSTensor & out) const96 UPSTensor::addTo(FGSTensor &out) const
97 {
98   TL_RAISE_IF(out.getDims() != tdims,
99               "Tensors have incompatible dimens in UPSTensor::addTo");
100   for (index in = out.begin(); in != out.end(); ++in)
101     {
102       IntSequence vtmp(dimen());
103       tdims.getPer().apply(in.getCoor(), vtmp);
104       index tin(*this, vtmp);
105       out.addColumn(*this, *tin, *in);
106     }
107 }
108 
109 /* In here, we have to add this permuted symmetry unfolded tensor to an
110    unfolded not permuted tensor. One easy way would be to go through the
111    target tensor, permute each index, and add the column.
112 
113    However, it may happen, that the permutation has some non-empty
114    identity tail. In this case, we can add not only individual columns,
115    but much bigger data chunks, which is usually more
116    efficient. Therefore, the code is quite dirty, because we have not an
117    iterator, which iterates over tensor at some higher levels. So we
118    simulate it by the following code.
119 
120    First we set ‘cols’ to the length of the data chunk and ‘off’ to its
121    dimension. Then we need a front part of ‘nvmax’ of ‘out’, which is
122    ‘nvmax_part’. Our iterator here is an integer sequence ‘outrun’ with
123    full length, and ‘outrun_part’ its front part. The ‘outrun’ is
124    initialized to zeros. In each step we need to increment ‘outrun’
125    ‘cols’-times, this is done by incrementing its prefix ‘outrun_part’.
126 
127    So we loop over all ‘cols’-wide partitions of ‘out’, permute ‘outrun’
128    to obtain ‘perrun’ to obtain column of this matrix. (note that the
129    trailing part of ‘perrun’ is the same as of ‘outrun’. Then we
130    construct submatrices, add them, and increment ‘outrun’. */
131 
132 void
addTo(UGSTensor & out) const133 UPSTensor::addTo(UGSTensor &out) const
134 {
135   TL_RAISE_IF(out.getDims() != tdims,
136               "Tensors have incompatible dimens in UPSTensor::addTo");
137   int cols = tailIdentitySize();
138   int off = tdims.tailIdentity();
139   IntSequence outrun(out.dimen(), 0);
140   IntSequence outrun_part(outrun, 0, out.dimen()-off);
141   IntSequence nvmax_part(out.getDims().getNVX(), 0, out.dimen()-off);
142   for (int out_col = 0; out_col < out.ncols(); out_col += cols)
143     {
144       // permute ‘outrun’
145       IntSequence perrun(out.dimen());
146       tdims.getPer().apply(outrun, perrun);
147       index from(*this, perrun);
148       // construct submatrices
149       ConstTwoDMatrix subfrom(*this, *from, cols);
150       TwoDMatrix subout(out, out_col, cols);
151       // add
152       subout.add(1, subfrom);
153       // increment ‘outrun’ by cols
154       UTensor::increment(outrun_part, nvmax_part);
155     }
156 }
157 
158 /* This returns a product of all items in ‘nvmax’ which make up the
159    trailing identity part. */
160 
161 int
tailIdentitySize() const162 UPSTensor::tailIdentitySize() const
163 {
164   return tdims.getNVX().mult(dimen()-tdims.tailIdentity(), dimen());
165 }
166 
167 /* This fill method is pretty dumb. We go through all columns in ‘this’
168    tensor, translate coordinates to sparse tensor, sort them and find an
169    item in the sparse tensor. There are many not successful lookups for
170    really sparse tensor, that is why the second method works better for
171    really sparse tensors. */
172 
173 void
fillFromSparseOne(const FSSparseTensor & t,const IntSequence & ss,const IntSequence & coor)174 UPSTensor::fillFromSparseOne(const FSSparseTensor &t, const IntSequence &ss,
175                              const IntSequence &coor)
176 {
177   IntSequence cumtmp(ss.size());
178   cumtmp[0] = 0;
179   for (int i = 1; i < ss.size(); i++)
180     cumtmp[i] = cumtmp[i-1] + ss[i-1];
181   IntSequence cum(coor.size());
182   for (int i = 0; i < coor.size(); i++)
183     cum[i] = cumtmp[coor[i]];
184 
185   zeros();
186   for (Tensor::index run = begin(); run != end(); ++run)
187     {
188       IntSequence c(run.getCoor());
189       c.add(1, cum);
190       c.sort();
191       auto sl = t.getMap().lower_bound(c);
192       if (sl != t.getMap().end())
193         {
194           auto su = t.getMap().upper_bound(c);
195           for (auto srun = sl; srun != su; ++srun)
196             get(srun->second.first, *run) = srun->second.second;
197         }
198     }
199 }
200 
201 /* This is the second way of filling the slice. For instance, let the slice
202    correspond to partitions “abac”. In here we first calculate lower and upper
203    bounds for index of the sparse tensor for the slice. These are ‘lb_srt’ and
204    ‘ub_srt’ respectively. They corresponds to ordering “aabc”. Then we go
205    through that interval, and select items which are really between the bounds.
206    Then we take the index, subtract the lower bound to get it to coordinates of
207    the slice. We get something like (i_a,j_a,k_b,l_c). Then we apply the
208    inverse permutation as of the sorting form abac ↦ aabc to get index
209    (i_a,k_b,j_a,l_c). Recall that the slice is unfolded, so we have to apply
210    all permutations preserving the stack coordinates “abac”. In our case we get
211    list of indices (i_a,k_b,j_a,l_c) and (j_a,k_b,i_a,l_c). For all we copy the
212    item of the sparse tensor to the appropriate column. */
213 
214 void
fillFromSparseTwo(const FSSparseTensor & t,const IntSequence & ss,const IntSequence & coor)215 UPSTensor::fillFromSparseTwo(const FSSparseTensor &t, const IntSequence &ss,
216                              const IntSequence &coor)
217 {
218   IntSequence coor_srt(coor);
219   coor_srt.sort();
220   IntSequence cum(ss.size());
221   cum[0] = 0;
222   for (int i = 1; i < ss.size(); i++)
223     cum[i] = cum[i-1] + ss[i-1];
224   IntSequence lb_srt(coor.size());
225   IntSequence ub_srt(coor.size());
226   for (int i = 0; i < coor.size(); i++)
227     {
228       lb_srt[i] = cum[coor_srt[i]];
229       ub_srt[i] = cum[coor_srt[i]] + ss[coor_srt[i]] - 1;
230     }
231 
232   const PermutationSet &pset = TLStatic::getPerm(coor.size());
233   std::vector<Permutation> pp = pset.getPreserving(coor);
234 
235   Permutation unsort(coor);
236   zeros();
237   auto lbi = t.getMap().lower_bound(lb_srt);
238   auto ubi = t.getMap().upper_bound(ub_srt);
239   for (auto run = lbi; run != ubi; ++run)
240     {
241       if (lb_srt.lessEq(run->first) && run->first.lessEq(ub_srt))
242         {
243           IntSequence c(run->first);
244           c.add(-1, lb_srt);
245           unsort.apply(c);
246           for (auto &i : pp)
247             {
248               IntSequence cp(coor.size());
249               i.apply(c, cp);
250               Tensor::index ind(*this, cp);
251               TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
252                           "Internal error in slicing constructor of UPSTensor");
253               get(run->second.first, *ind) = run->second.second;
254             }
255         }
256     }
257 }
258 
259 /* Here we calculate the maximum offsets in each folded dimension
260    (dimension sizes, hence ‘ds’). */
261 
262 void
setDimensionSizes()263 PerTensorDimens2::setDimensionSizes()
264 {
265   const IntSequence &nvs = getNVS();
266   for (int i = 0; i < numSyms(); i++)
267     {
268       TensorDimens td(syms[i], nvs);
269       ds[i] = td.calcFoldMaxOffset();
270     }
271 }
272 
273 /* If there are two folded dimensions, the offset in such a dimension is offset
274    of the second plus offset of the first times the maximum offset of the
275    second. If there are n+1 dimensions, the offset is a sum of offsets of the
276    last dimension plus the offset in the first n dimensions multiplied by the
277    maximum offset of the last dimension. This is exactly what the following
278    code does. */
279 
280 int
calcOffset(const IntSequence & coor) const281 PerTensorDimens2::calcOffset(const IntSequence &coor) const
282 {
283   TL_RAISE_IF(coor.size() != dimen(),
284               "Wrong length of coordinates in PerTensorDimens2::calcOffset");
285   IntSequence cc(coor);
286   int ret = 0;
287   int off = 0;
288   for (int i = 0; i < numSyms(); i++)
289     {
290       TensorDimens td(syms[i], getNVS());
291       IntSequence c(cc, off, off+syms[i].dimen());
292       int a = td.calcFoldOffset(c);
293       ret = ret*ds[i] + a;
294       off += syms[i].dimen();
295     }
296   return ret;
297 }
298 
299 void
print() const300 PerTensorDimens2::print() const
301 {
302   std::cout << "nvmax: ";
303   nvmax.print();
304   std::cout << "per:   ";
305   per.print();
306   std::cout << "syms:  ";
307   syms.print();
308   std::cout << "dims:  ";
309   ds.print();
310 }
311 
312 /* Here we increment the given integer sequence. It corresponds to
313    UTensor::increment() of the whole sequence, and then partial monotonizing of
314    the subsequences with respect to the symmetries of each dimension. */
315 
316 void
increment(IntSequence & v) const317 FPSTensor::increment(IntSequence &v) const
318 {
319   TL_RAISE_IF(v.size() != dimen(),
320               "Wrong length of coordinates in FPSTensor::increment");
321   UTensor::increment(v, tdims.getNVX());
322   int off = 0;
323   for (int i = 0; i < tdims.numSyms(); i++)
324     {
325       IntSequence c(v, off, off+tdims.getSym(i).dimen());
326       c.pmonotone(tdims.getSym(i));
327       off += tdims.getSym(i).dimen();
328     }
329 }
330 
331 void
decrement(IntSequence & v) const332 FPSTensor::decrement(IntSequence &v) const
333 {
334   TL_RAISE("FPSTensor::decrement not implemented");
335 }
336 
337 std::unique_ptr<UTensor>
unfold() const338 FPSTensor::unfold() const
339 {
340   TL_RAISE("Unfolding of FPSTensor not implemented");
341 }
342 
343 /* We only call calcOffset() on the PerTensorDimens2. */
344 
345 int
getOffset(const IntSequence & v) const346 FPSTensor::getOffset(const IntSequence &v) const
347 {
348   return tdims.calcOffset(v);
349 }
350 
351 /* Here we add the tensor to ‘out’. We go through all columns of the
352    ‘out’, apply the permutation to get index in the tensor, and add the
353    column. Note that if the permutation is identity, then the dimensions
354    of the tensors might not be the same (since this tensor is partially
355    folded). */
356 
357 void
addTo(FGSTensor & out) const358 FPSTensor::addTo(FGSTensor &out) const
359 {
360   for (index tar = out.begin(); tar != out.end(); ++tar)
361     {
362       IntSequence coor(dimen());
363       tdims.getPer().apply(tar.getCoor(), coor);
364       index src(*this, coor);
365       out.addColumn(*this, *src, *tar);
366     }
367 }
368 
369 /* Here is the constructor which multiplies the Kronecker product with
370    the general symmetry sparse tensor GSSparseTensor. The main idea is
371    to go through items in the sparse tensor (each item selects rows in
372    the matrices form the Kornecker product), then to Kronecker multiply
373    the rows and multiply with the item, and to add the resulting row to
374    the appropriate row of the resulting FPSTensor.
375 
376    The realization of this idea is a bit more complicated since we have
377    to go through all items, and each item must be added as many times as
378    it has its symmetric elements. Moreover, the permutations shuffle
379    order of rows in their Kronecker product.
380 
381    So, we through all unfolded indices in a tensor with the same
382    dimensions as the GSSparseTensor (sparse slice). For each such index
383    we calculate its folded version (corresponds to ordering of
384    subsequences within symmetries), we test if there is an item in the
385    sparse slice with such coordinates, and if there is, we construct the
386    Kronecker product of the rows, and go through all of items with the
387    coordinates, and add to appropriate rows of ‘this’ tensor. */
388 
FPSTensor(const TensorDimens & td,const Equivalence & e,const Permutation & p,const GSSparseTensor & a,const KronProdAll & kp)389 FPSTensor::FPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
390                      const GSSparseTensor &a, const KronProdAll &kp)
391   : FTensor(indor::along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
392             a.nrows(), kp.ncols(), td.dimen()),
393     tdims(td, e, p)
394 {
395   zeros();
396 
397   UGSTensor dummy(0, a.getDims());
398   for (Tensor::index run = dummy.begin(); run != dummy.end(); ++run)
399     {
400       Tensor::index fold_ind = dummy.getFirstIndexOf(run);
401       const IntSequence &c = fold_ind.getCoor();
402       auto sl = a.getMap().lower_bound(c);
403       if (sl != a.getMap().end())
404         {
405           auto row_prod = kp.multRows(run.getCoor());
406           auto su = a.getMap().upper_bound(c);
407           for (auto srun = sl; srun != su; ++srun)
408             {
409               Vector out_row{getRow(srun->second.first)};
410               out_row.add(srun->second.second, *row_prod);
411             }
412         }
413     }
414 }
415