1 /*
2 * Copyright © 2004 Ondra Kamenik
3 * Copyright © 2019 Dynare Team
4 *
5 * This file is part of Dynare.
6 *
7 * Dynare is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * Dynare is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with Dynare. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include "ps_tensor.hh"
22 #include "fs_tensor.hh"
23 #include "tl_exception.hh"
24 #include "tl_static.hh"
25 #include "stack_container.hh"
26
27 #include <iostream>
28
29 /* Here we decide, what method for filling a slice in slicing constructor to
30 use. A few experiments suggest, that if the tensor is more than 8% filled,
31 the first method (fillFromSparseOne()) is better. For fill factors less than
32 1%, the second can be 3 times quicker. */
33
34 UPSTensor::fill_method
decideFillMethod(const FSSparseTensor & t)35 UPSTensor::decideFillMethod(const FSSparseTensor &t)
36 {
37 if (t.getFillFactor() > 0.08)
38 return fill_method::first;
39 else
40 return fill_method::second;
41 }
42
43 /* Here we make a slice. We decide what fill method to use and set it. */
44
UPSTensor(const FSSparseTensor & t,const IntSequence & ss,const IntSequence & coor,PerTensorDimens ptd)45 UPSTensor::UPSTensor(const FSSparseTensor &t, const IntSequence &ss,
46 const IntSequence &coor, PerTensorDimens ptd)
47 : UTensor(indor::along_col, ptd.getNVX(),
48 t.nrows(), ptd.calcUnfoldMaxOffset(), ptd.dimen()),
49 tdims(std::move(ptd))
50 {
51 TL_RAISE_IF(coor.size() != t.dimen(),
52 "Wrong coordinates length of stacks for UPSTensor slicing constructor");
53 TL_RAISE_IF(ss.sum() != t.nvar(),
54 "Wrong length of stacks for UPSTensor slicing constructor");
55
56 if (decideFillMethod(t) == fill_method::first)
57 fillFromSparseOne(t, ss, coor);
58 else
59 fillFromSparseTwo(t, ss, coor);
60 }
61
62 void
increment(IntSequence & v) const63 UPSTensor::increment(IntSequence &v) const
64 {
65 TL_RAISE_IF(v.size() != dimen(),
66 "Wrong input/output vector size in UPSTensor::increment");
67
68 UTensor::increment(v, tdims.getNVX());
69 }
70
71 void
decrement(IntSequence & v) const72 UPSTensor::decrement(IntSequence &v) const
73 {
74 TL_RAISE_IF(v.size() != dimen(),
75 "Wrong input/output vector size in UPSTensor::decrement");
76
77 UTensor::decrement(v, tdims.getNVX());
78 }
79
80 std::unique_ptr<FTensor>
fold() const81 UPSTensor::fold() const
82 {
83 TL_RAISE("Never should come to this place in UPSTensor::fold");
84 }
85
86 int
getOffset(const IntSequence & v) const87 UPSTensor::getOffset(const IntSequence &v) const
88 {
89 TL_RAISE_IF(v.size() != dimen(),
90 "Wrong input vector size in UPSTensor::getOffset");
91
92 return UTensor::getOffset(v, tdims.getNVX());
93 }
94
95 void
addTo(FGSTensor & out) const96 UPSTensor::addTo(FGSTensor &out) const
97 {
98 TL_RAISE_IF(out.getDims() != tdims,
99 "Tensors have incompatible dimens in UPSTensor::addTo");
100 for (index in = out.begin(); in != out.end(); ++in)
101 {
102 IntSequence vtmp(dimen());
103 tdims.getPer().apply(in.getCoor(), vtmp);
104 index tin(*this, vtmp);
105 out.addColumn(*this, *tin, *in);
106 }
107 }
108
109 /* In here, we have to add this permuted symmetry unfolded tensor to an
110 unfolded not permuted tensor. One easy way would be to go through the
111 target tensor, permute each index, and add the column.
112
113 However, it may happen, that the permutation has some non-empty
114 identity tail. In this case, we can add not only individual columns,
115 but much bigger data chunks, which is usually more
116 efficient. Therefore, the code is quite dirty, because we have not an
117 iterator, which iterates over tensor at some higher levels. So we
118 simulate it by the following code.
119
120 First we set ‘cols’ to the length of the data chunk and ‘off’ to its
121 dimension. Then we need a front part of ‘nvmax’ of ‘out’, which is
122 ‘nvmax_part’. Our iterator here is an integer sequence ‘outrun’ with
123 full length, and ‘outrun_part’ its front part. The ‘outrun’ is
124 initialized to zeros. In each step we need to increment ‘outrun’
125 ‘cols’-times, this is done by incrementing its prefix ‘outrun_part’.
126
127 So we loop over all ‘cols’-wide partitions of ‘out’, permute ‘outrun’
128 to obtain ‘perrun’ to obtain column of this matrix. (note that the
129 trailing part of ‘perrun’ is the same as of ‘outrun’. Then we
130 construct submatrices, add them, and increment ‘outrun’. */
131
132 void
addTo(UGSTensor & out) const133 UPSTensor::addTo(UGSTensor &out) const
134 {
135 TL_RAISE_IF(out.getDims() != tdims,
136 "Tensors have incompatible dimens in UPSTensor::addTo");
137 int cols = tailIdentitySize();
138 int off = tdims.tailIdentity();
139 IntSequence outrun(out.dimen(), 0);
140 IntSequence outrun_part(outrun, 0, out.dimen()-off);
141 IntSequence nvmax_part(out.getDims().getNVX(), 0, out.dimen()-off);
142 for (int out_col = 0; out_col < out.ncols(); out_col += cols)
143 {
144 // permute ‘outrun’
145 IntSequence perrun(out.dimen());
146 tdims.getPer().apply(outrun, perrun);
147 index from(*this, perrun);
148 // construct submatrices
149 ConstTwoDMatrix subfrom(*this, *from, cols);
150 TwoDMatrix subout(out, out_col, cols);
151 // add
152 subout.add(1, subfrom);
153 // increment ‘outrun’ by cols
154 UTensor::increment(outrun_part, nvmax_part);
155 }
156 }
157
158 /* This returns a product of all items in ‘nvmax’ which make up the
159 trailing identity part. */
160
161 int
tailIdentitySize() const162 UPSTensor::tailIdentitySize() const
163 {
164 return tdims.getNVX().mult(dimen()-tdims.tailIdentity(), dimen());
165 }
166
167 /* This fill method is pretty dumb. We go through all columns in ‘this’
168 tensor, translate coordinates to sparse tensor, sort them and find an
169 item in the sparse tensor. There are many not successful lookups for
170 really sparse tensor, that is why the second method works better for
171 really sparse tensors. */
172
173 void
fillFromSparseOne(const FSSparseTensor & t,const IntSequence & ss,const IntSequence & coor)174 UPSTensor::fillFromSparseOne(const FSSparseTensor &t, const IntSequence &ss,
175 const IntSequence &coor)
176 {
177 IntSequence cumtmp(ss.size());
178 cumtmp[0] = 0;
179 for (int i = 1; i < ss.size(); i++)
180 cumtmp[i] = cumtmp[i-1] + ss[i-1];
181 IntSequence cum(coor.size());
182 for (int i = 0; i < coor.size(); i++)
183 cum[i] = cumtmp[coor[i]];
184
185 zeros();
186 for (Tensor::index run = begin(); run != end(); ++run)
187 {
188 IntSequence c(run.getCoor());
189 c.add(1, cum);
190 c.sort();
191 auto sl = t.getMap().lower_bound(c);
192 if (sl != t.getMap().end())
193 {
194 auto su = t.getMap().upper_bound(c);
195 for (auto srun = sl; srun != su; ++srun)
196 get(srun->second.first, *run) = srun->second.second;
197 }
198 }
199 }
200
201 /* This is the second way of filling the slice. For instance, let the slice
202 correspond to partitions “abac”. In here we first calculate lower and upper
203 bounds for index of the sparse tensor for the slice. These are ‘lb_srt’ and
204 ‘ub_srt’ respectively. They corresponds to ordering “aabc”. Then we go
205 through that interval, and select items which are really between the bounds.
206 Then we take the index, subtract the lower bound to get it to coordinates of
207 the slice. We get something like (i_a,j_a,k_b,l_c). Then we apply the
208 inverse permutation as of the sorting form abac ↦ aabc to get index
209 (i_a,k_b,j_a,l_c). Recall that the slice is unfolded, so we have to apply
210 all permutations preserving the stack coordinates “abac”. In our case we get
211 list of indices (i_a,k_b,j_a,l_c) and (j_a,k_b,i_a,l_c). For all we copy the
212 item of the sparse tensor to the appropriate column. */
213
214 void
fillFromSparseTwo(const FSSparseTensor & t,const IntSequence & ss,const IntSequence & coor)215 UPSTensor::fillFromSparseTwo(const FSSparseTensor &t, const IntSequence &ss,
216 const IntSequence &coor)
217 {
218 IntSequence coor_srt(coor);
219 coor_srt.sort();
220 IntSequence cum(ss.size());
221 cum[0] = 0;
222 for (int i = 1; i < ss.size(); i++)
223 cum[i] = cum[i-1] + ss[i-1];
224 IntSequence lb_srt(coor.size());
225 IntSequence ub_srt(coor.size());
226 for (int i = 0; i < coor.size(); i++)
227 {
228 lb_srt[i] = cum[coor_srt[i]];
229 ub_srt[i] = cum[coor_srt[i]] + ss[coor_srt[i]] - 1;
230 }
231
232 const PermutationSet &pset = TLStatic::getPerm(coor.size());
233 std::vector<Permutation> pp = pset.getPreserving(coor);
234
235 Permutation unsort(coor);
236 zeros();
237 auto lbi = t.getMap().lower_bound(lb_srt);
238 auto ubi = t.getMap().upper_bound(ub_srt);
239 for (auto run = lbi; run != ubi; ++run)
240 {
241 if (lb_srt.lessEq(run->first) && run->first.lessEq(ub_srt))
242 {
243 IntSequence c(run->first);
244 c.add(-1, lb_srt);
245 unsort.apply(c);
246 for (auto &i : pp)
247 {
248 IntSequence cp(coor.size());
249 i.apply(c, cp);
250 Tensor::index ind(*this, cp);
251 TL_RAISE_IF(*ind < 0 || *ind >= ncols(),
252 "Internal error in slicing constructor of UPSTensor");
253 get(run->second.first, *ind) = run->second.second;
254 }
255 }
256 }
257 }
258
259 /* Here we calculate the maximum offsets in each folded dimension
260 (dimension sizes, hence ‘ds’). */
261
262 void
setDimensionSizes()263 PerTensorDimens2::setDimensionSizes()
264 {
265 const IntSequence &nvs = getNVS();
266 for (int i = 0; i < numSyms(); i++)
267 {
268 TensorDimens td(syms[i], nvs);
269 ds[i] = td.calcFoldMaxOffset();
270 }
271 }
272
273 /* If there are two folded dimensions, the offset in such a dimension is offset
274 of the second plus offset of the first times the maximum offset of the
275 second. If there are n+1 dimensions, the offset is a sum of offsets of the
276 last dimension plus the offset in the first n dimensions multiplied by the
277 maximum offset of the last dimension. This is exactly what the following
278 code does. */
279
280 int
calcOffset(const IntSequence & coor) const281 PerTensorDimens2::calcOffset(const IntSequence &coor) const
282 {
283 TL_RAISE_IF(coor.size() != dimen(),
284 "Wrong length of coordinates in PerTensorDimens2::calcOffset");
285 IntSequence cc(coor);
286 int ret = 0;
287 int off = 0;
288 for (int i = 0; i < numSyms(); i++)
289 {
290 TensorDimens td(syms[i], getNVS());
291 IntSequence c(cc, off, off+syms[i].dimen());
292 int a = td.calcFoldOffset(c);
293 ret = ret*ds[i] + a;
294 off += syms[i].dimen();
295 }
296 return ret;
297 }
298
299 void
print() const300 PerTensorDimens2::print() const
301 {
302 std::cout << "nvmax: ";
303 nvmax.print();
304 std::cout << "per: ";
305 per.print();
306 std::cout << "syms: ";
307 syms.print();
308 std::cout << "dims: ";
309 ds.print();
310 }
311
312 /* Here we increment the given integer sequence. It corresponds to
313 UTensor::increment() of the whole sequence, and then partial monotonizing of
314 the subsequences with respect to the symmetries of each dimension. */
315
316 void
increment(IntSequence & v) const317 FPSTensor::increment(IntSequence &v) const
318 {
319 TL_RAISE_IF(v.size() != dimen(),
320 "Wrong length of coordinates in FPSTensor::increment");
321 UTensor::increment(v, tdims.getNVX());
322 int off = 0;
323 for (int i = 0; i < tdims.numSyms(); i++)
324 {
325 IntSequence c(v, off, off+tdims.getSym(i).dimen());
326 c.pmonotone(tdims.getSym(i));
327 off += tdims.getSym(i).dimen();
328 }
329 }
330
331 void
decrement(IntSequence & v) const332 FPSTensor::decrement(IntSequence &v) const
333 {
334 TL_RAISE("FPSTensor::decrement not implemented");
335 }
336
337 std::unique_ptr<UTensor>
unfold() const338 FPSTensor::unfold() const
339 {
340 TL_RAISE("Unfolding of FPSTensor not implemented");
341 }
342
343 /* We only call calcOffset() on the PerTensorDimens2. */
344
345 int
getOffset(const IntSequence & v) const346 FPSTensor::getOffset(const IntSequence &v) const
347 {
348 return tdims.calcOffset(v);
349 }
350
351 /* Here we add the tensor to ‘out’. We go through all columns of the
352 ‘out’, apply the permutation to get index in the tensor, and add the
353 column. Note that if the permutation is identity, then the dimensions
354 of the tensors might not be the same (since this tensor is partially
355 folded). */
356
357 void
addTo(FGSTensor & out) const358 FPSTensor::addTo(FGSTensor &out) const
359 {
360 for (index tar = out.begin(); tar != out.end(); ++tar)
361 {
362 IntSequence coor(dimen());
363 tdims.getPer().apply(tar.getCoor(), coor);
364 index src(*this, coor);
365 out.addColumn(*this, *src, *tar);
366 }
367 }
368
369 /* Here is the constructor which multiplies the Kronecker product with
370 the general symmetry sparse tensor GSSparseTensor. The main idea is
371 to go through items in the sparse tensor (each item selects rows in
372 the matrices form the Kornecker product), then to Kronecker multiply
373 the rows and multiply with the item, and to add the resulting row to
374 the appropriate row of the resulting FPSTensor.
375
376 The realization of this idea is a bit more complicated since we have
377 to go through all items, and each item must be added as many times as
378 it has its symmetric elements. Moreover, the permutations shuffle
379 order of rows in their Kronecker product.
380
381 So, we through all unfolded indices in a tensor with the same
382 dimensions as the GSSparseTensor (sparse slice). For each such index
383 we calculate its folded version (corresponds to ordering of
384 subsequences within symmetries), we test if there is an item in the
385 sparse slice with such coordinates, and if there is, we construct the
386 Kronecker product of the rows, and go through all of items with the
387 coordinates, and add to appropriate rows of ‘this’ tensor. */
388
FPSTensor(const TensorDimens & td,const Equivalence & e,const Permutation & p,const GSSparseTensor & a,const KronProdAll & kp)389 FPSTensor::FPSTensor(const TensorDimens &td, const Equivalence &e, const Permutation &p,
390 const GSSparseTensor &a, const KronProdAll &kp)
391 : FTensor(indor::along_col, PerTensorDimens(td, Permutation(e, p)).getNVX(),
392 a.nrows(), kp.ncols(), td.dimen()),
393 tdims(td, e, p)
394 {
395 zeros();
396
397 UGSTensor dummy(0, a.getDims());
398 for (Tensor::index run = dummy.begin(); run != dummy.end(); ++run)
399 {
400 Tensor::index fold_ind = dummy.getFirstIndexOf(run);
401 const IntSequence &c = fold_ind.getCoor();
402 auto sl = a.getMap().lower_bound(c);
403 if (sl != a.getMap().end())
404 {
405 auto row_prod = kp.multRows(run.getCoor());
406 auto su = a.getMap().upper_bound(c);
407 for (auto srun = sl; srun != su; ++srun)
408 {
409 Vector out_row{getRow(srun->second.first)};
410 out_row.add(srun->second.second, *row_prod);
411 }
412 }
413 }
414 }
415