1 // SPDX-License-Identifier: Apache-2.0
2 //
3 // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
4 // Copyright 2008-2016 National ICT Australia (NICTA)
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 // ------------------------------------------------------------------------
17
18
19 //! \addtogroup glue_mixed
20 //! @{
21
22
23
24 //! matrix multiplication with different element types
25 template<typename T1, typename T2>
26 inline
27 void
apply(Mat<typename eT_promoter<T1,T2>::eT> & out,const mtGlue<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_times> & X)28 glue_mixed_times::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_times>& X)
29 {
30 arma_extra_debug_sigprint();
31
32 typedef typename T1::elem_type in_eT1;
33 typedef typename T2::elem_type in_eT2;
34
35 typedef typename eT_promoter<T1,T2>::eT out_eT;
36
37 const partial_unwrap<T1> tmp1(X.A);
38 const partial_unwrap<T2> tmp2(X.B);
39
40 const typename partial_unwrap<T1>::stored_type& A = tmp1.M;
41 const typename partial_unwrap<T2>::stored_type& B = tmp2.M;
42
43 const bool use_alpha = partial_unwrap<T1>::do_times || partial_unwrap<T2>::do_times;
44 const out_eT alpha = use_alpha ? (upgrade_val<in_eT1,in_eT2>::apply(tmp1.get_val()) * upgrade_val<in_eT1,in_eT2>::apply(tmp2.get_val())) : out_eT(0);
45
46 const bool do_trans_A = partial_unwrap<T1>::do_trans;
47 const bool do_trans_B = partial_unwrap<T2>::do_trans;
48
49 arma_debug_assert_trans_mul_size<do_trans_A, do_trans_B>(A.n_rows, A.n_cols, B.n_rows, B.n_cols, "matrix multiplication");
50
51 const uword out_n_rows = (do_trans_A == false) ? A.n_rows : A.n_cols;
52 const uword out_n_cols = (do_trans_B == false) ? B.n_cols : B.n_rows;
53
54 const bool alias = tmp1.is_alias(out) || tmp2.is_alias(out);
55
56 if(alias == false)
57 {
58 out.set_size(out_n_rows, out_n_cols);
59
60 gemm_mixed<do_trans_A, do_trans_B, use_alpha, false>::apply(out, A, B, alpha);
61 }
62 else
63 {
64 Mat<out_eT> tmp(out_n_rows, out_n_cols, arma_nozeros_indicator());
65
66 gemm_mixed<do_trans_A, do_trans_B, use_alpha, false>::apply(tmp, A, B, alpha);
67
68 out.steal_mem(tmp);
69 }
70 }
71
72
73
74 //! matrix addition with different element types
75 template<typename T1, typename T2>
76 inline
77 void
apply(Mat<typename eT_promoter<T1,T2>::eT> & out,const mtGlue<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_plus> & X)78 glue_mixed_plus::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_plus>& X)
79 {
80 arma_extra_debug_sigprint();
81
82 typedef typename T1::elem_type eT1;
83 typedef typename T2::elem_type eT2;
84
85 typedef typename promote_type<eT1,eT2>::result out_eT;
86
87 promote_type<eT1,eT2>::check();
88
89 const Proxy<T1> A(X.A);
90 const Proxy<T2> B(X.B);
91
92 arma_debug_assert_same_size(A, B, "addition");
93
94 const uword n_rows = A.get_n_rows();
95 const uword n_cols = A.get_n_cols();
96
97 out.set_size(n_rows, n_cols);
98
99 out_eT* out_mem = out.memptr();
100 const uword n_elem = out.n_elem;
101
102 const bool use_at = (Proxy<T1>::use_at || Proxy<T2>::use_at);
103
104 if(use_at == false)
105 {
106 typename Proxy<T1>::ea_type AA = A.get_ea();
107 typename Proxy<T2>::ea_type BB = B.get_ea();
108
109 if(memory::is_aligned(out_mem))
110 {
111 memory::mark_as_aligned(out_mem);
112
113 for(uword i=0; i<n_elem; ++i)
114 {
115 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) + upgrade_val<eT1,eT2>::apply(BB[i]);
116 }
117 }
118 else
119 {
120 for(uword i=0; i<n_elem; ++i)
121 {
122 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) + upgrade_val<eT1,eT2>::apply(BB[i]);
123 }
124 }
125 }
126 else
127 {
128 for(uword col=0; col < n_cols; ++col)
129 for(uword row=0; row < n_rows; ++row)
130 {
131 (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col)) + upgrade_val<eT1,eT2>::apply(B.at(row,col));
132 out_mem++;
133 }
134 }
135 }
136
137
138
139 //! matrix subtraction with different element types
140 template<typename T1, typename T2>
141 inline
142 void
apply(Mat<typename eT_promoter<T1,T2>::eT> & out,const mtGlue<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_minus> & X)143 glue_mixed_minus::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_minus>& X)
144 {
145 arma_extra_debug_sigprint();
146
147 typedef typename T1::elem_type eT1;
148 typedef typename T2::elem_type eT2;
149
150 typedef typename promote_type<eT1,eT2>::result out_eT;
151
152 promote_type<eT1,eT2>::check();
153
154 const Proxy<T1> A(X.A);
155 const Proxy<T2> B(X.B);
156
157 arma_debug_assert_same_size(A, B, "subtraction");
158
159 const uword n_rows = A.get_n_rows();
160 const uword n_cols = A.get_n_cols();
161
162 out.set_size(n_rows, n_cols);
163
164 out_eT* out_mem = out.memptr();
165 const uword n_elem = out.n_elem;
166
167 const bool use_at = (Proxy<T1>::use_at || Proxy<T2>::use_at);
168
169 if(use_at == false)
170 {
171 typename Proxy<T1>::ea_type AA = A.get_ea();
172 typename Proxy<T2>::ea_type BB = B.get_ea();
173
174 if(memory::is_aligned(out_mem))
175 {
176 memory::mark_as_aligned(out_mem);
177
178 for(uword i=0; i<n_elem; ++i)
179 {
180 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) - upgrade_val<eT1,eT2>::apply(BB[i]);
181 }
182 }
183 else
184 {
185 for(uword i=0; i<n_elem; ++i)
186 {
187 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) - upgrade_val<eT1,eT2>::apply(BB[i]);
188 }
189 }
190 }
191 else
192 {
193 for(uword col=0; col < n_cols; ++col)
194 for(uword row=0; row < n_rows; ++row)
195 {
196 (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col)) - upgrade_val<eT1,eT2>::apply(B.at(row,col));
197 out_mem++;
198 }
199 }
200 }
201
202
203
204 //! element-wise matrix division with different element types
205 template<typename T1, typename T2>
206 inline
207 void
apply(Mat<typename eT_promoter<T1,T2>::eT> & out,const mtGlue<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_div> & X)208 glue_mixed_div::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_div>& X)
209 {
210 arma_extra_debug_sigprint();
211
212 typedef typename T1::elem_type eT1;
213 typedef typename T2::elem_type eT2;
214
215 typedef typename promote_type<eT1,eT2>::result out_eT;
216
217 promote_type<eT1,eT2>::check();
218
219 const Proxy<T1> A(X.A);
220 const Proxy<T2> B(X.B);
221
222 arma_debug_assert_same_size(A, B, "element-wise division");
223
224 const uword n_rows = A.get_n_rows();
225 const uword n_cols = A.get_n_cols();
226
227 out.set_size(n_rows, n_cols);
228
229 out_eT* out_mem = out.memptr();
230 const uword n_elem = out.n_elem;
231
232 const bool use_at = (Proxy<T1>::use_at || Proxy<T2>::use_at);
233
234 if(use_at == false)
235 {
236 typename Proxy<T1>::ea_type AA = A.get_ea();
237 typename Proxy<T2>::ea_type BB = B.get_ea();
238
239 if(memory::is_aligned(out_mem))
240 {
241 memory::mark_as_aligned(out_mem);
242
243 for(uword i=0; i<n_elem; ++i)
244 {
245 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) / upgrade_val<eT1,eT2>::apply(BB[i]);
246 }
247 }
248 else
249 {
250 for(uword i=0; i<n_elem; ++i)
251 {
252 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) / upgrade_val<eT1,eT2>::apply(BB[i]);
253 }
254 }
255 }
256 else
257 {
258 for(uword col=0; col < n_cols; ++col)
259 for(uword row=0; row < n_rows; ++row)
260 {
261 (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col)) / upgrade_val<eT1,eT2>::apply(B.at(row,col));
262 out_mem++;
263 }
264 }
265 }
266
267
268
269 //! element-wise matrix multiplication with different element types
270 template<typename T1, typename T2>
271 inline
272 void
apply(Mat<typename eT_promoter<T1,T2>::eT> & out,const mtGlue<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_schur> & X)273 glue_mixed_schur::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_schur>& X)
274 {
275 arma_extra_debug_sigprint();
276
277 typedef typename T1::elem_type eT1;
278 typedef typename T2::elem_type eT2;
279
280 typedef typename promote_type<eT1,eT2>::result out_eT;
281
282 promote_type<eT1,eT2>::check();
283
284 const Proxy<T1> A(X.A);
285 const Proxy<T2> B(X.B);
286
287 arma_debug_assert_same_size(A, B, "element-wise multiplication");
288
289 const uword n_rows = A.get_n_rows();
290 const uword n_cols = A.get_n_cols();
291
292 out.set_size(n_rows, n_cols);
293
294 out_eT* out_mem = out.memptr();
295 const uword n_elem = out.n_elem;
296
297 const bool use_at = (Proxy<T1>::use_at || Proxy<T2>::use_at);
298
299 if(use_at == false)
300 {
301 typename Proxy<T1>::ea_type AA = A.get_ea();
302 typename Proxy<T2>::ea_type BB = B.get_ea();
303
304 if(memory::is_aligned(out_mem))
305 {
306 memory::mark_as_aligned(out_mem);
307
308 for(uword i=0; i<n_elem; ++i)
309 {
310 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) * upgrade_val<eT1,eT2>::apply(BB[i]);
311 }
312 }
313 else
314 {
315 for(uword i=0; i<n_elem; ++i)
316 {
317 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) * upgrade_val<eT1,eT2>::apply(BB[i]);
318 }
319 }
320 }
321 else
322 {
323 for(uword col=0; col < n_cols; ++col)
324 for(uword row=0; row < n_rows; ++row)
325 {
326 (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col)) * upgrade_val<eT1,eT2>::apply(B.at(row,col));
327 out_mem++;
328 }
329 }
330 }
331
332
333
334 //
335 //
336 //
337
338
339
340 //! cube addition with different element types
341 template<typename T1, typename T2>
342 inline
343 void
apply(Cube<typename eT_promoter<T1,T2>::eT> & out,const mtGlueCube<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_plus> & X)344 glue_mixed_plus::apply(Cube<typename eT_promoter<T1,T2>::eT>& out, const mtGlueCube<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_plus>& X)
345 {
346 arma_extra_debug_sigprint();
347
348 typedef typename T1::elem_type eT1;
349 typedef typename T2::elem_type eT2;
350
351 typedef typename promote_type<eT1,eT2>::result out_eT;
352
353 promote_type<eT1,eT2>::check();
354
355 const ProxyCube<T1> A(X.A);
356 const ProxyCube<T2> B(X.B);
357
358 arma_debug_assert_same_size(A, B, "addition");
359
360 const uword n_rows = A.get_n_rows();
361 const uword n_cols = A.get_n_cols();
362 const uword n_slices = A.get_n_slices();
363
364 out.set_size(n_rows, n_cols, n_slices);
365
366 out_eT* out_mem = out.memptr();
367 const uword n_elem = out.n_elem;
368
369 const bool use_at = (ProxyCube<T1>::use_at || ProxyCube<T2>::use_at);
370
371 if(use_at == false)
372 {
373 typename ProxyCube<T1>::ea_type AA = A.get_ea();
374 typename ProxyCube<T2>::ea_type BB = B.get_ea();
375
376 for(uword i=0; i<n_elem; ++i)
377 {
378 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) + upgrade_val<eT1,eT2>::apply(BB[i]);
379 }
380 }
381 else
382 {
383 for(uword slice = 0; slice < n_slices; ++slice)
384 for(uword col = 0; col < n_cols; ++col )
385 for(uword row = 0; row < n_rows; ++row )
386 {
387 (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col,slice)) + upgrade_val<eT1,eT2>::apply(B.at(row,col,slice));
388 out_mem++;
389 }
390 }
391 }
392
393
394
395 //! cube subtraction with different element types
396 template<typename T1, typename T2>
397 inline
398 void
apply(Cube<typename eT_promoter<T1,T2>::eT> & out,const mtGlueCube<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_minus> & X)399 glue_mixed_minus::apply(Cube<typename eT_promoter<T1,T2>::eT>& out, const mtGlueCube<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_minus>& X)
400 {
401 arma_extra_debug_sigprint();
402
403 typedef typename T1::elem_type eT1;
404 typedef typename T2::elem_type eT2;
405
406 typedef typename promote_type<eT1,eT2>::result out_eT;
407
408 promote_type<eT1,eT2>::check();
409
410 const ProxyCube<T1> A(X.A);
411 const ProxyCube<T2> B(X.B);
412
413 arma_debug_assert_same_size(A, B, "subtraction");
414
415 const uword n_rows = A.get_n_rows();
416 const uword n_cols = A.get_n_cols();
417 const uword n_slices = A.get_n_slices();
418
419 out.set_size(n_rows, n_cols, n_slices);
420
421 out_eT* out_mem = out.memptr();
422 const uword n_elem = out.n_elem;
423
424 const bool use_at = (ProxyCube<T1>::use_at || ProxyCube<T2>::use_at);
425
426 if(use_at == false)
427 {
428 typename ProxyCube<T1>::ea_type AA = A.get_ea();
429 typename ProxyCube<T2>::ea_type BB = B.get_ea();
430
431 for(uword i=0; i<n_elem; ++i)
432 {
433 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) - upgrade_val<eT1,eT2>::apply(BB[i]);
434 }
435 }
436 else
437 {
438 for(uword slice = 0; slice < n_slices; ++slice)
439 for(uword col = 0; col < n_cols; ++col )
440 for(uword row = 0; row < n_rows; ++row )
441 {
442 (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col,slice)) - upgrade_val<eT1,eT2>::apply(B.at(row,col,slice));
443 out_mem++;
444 }
445 }
446 }
447
448
449
450 //! element-wise cube division with different element types
451 template<typename T1, typename T2>
452 inline
453 void
apply(Cube<typename eT_promoter<T1,T2>::eT> & out,const mtGlueCube<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_div> & X)454 glue_mixed_div::apply(Cube<typename eT_promoter<T1,T2>::eT>& out, const mtGlueCube<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_div>& X)
455 {
456 arma_extra_debug_sigprint();
457
458 typedef typename T1::elem_type eT1;
459 typedef typename T2::elem_type eT2;
460
461 typedef typename promote_type<eT1,eT2>::result out_eT;
462
463 promote_type<eT1,eT2>::check();
464
465 const ProxyCube<T1> A(X.A);
466 const ProxyCube<T2> B(X.B);
467
468 arma_debug_assert_same_size(A, B, "element-wise division");
469
470 const uword n_rows = A.get_n_rows();
471 const uword n_cols = A.get_n_cols();
472 const uword n_slices = A.get_n_slices();
473
474 out.set_size(n_rows, n_cols, n_slices);
475
476 out_eT* out_mem = out.memptr();
477 const uword n_elem = out.n_elem;
478
479 const bool use_at = (ProxyCube<T1>::use_at || ProxyCube<T2>::use_at);
480
481 if(use_at == false)
482 {
483 typename ProxyCube<T1>::ea_type AA = A.get_ea();
484 typename ProxyCube<T2>::ea_type BB = B.get_ea();
485
486 for(uword i=0; i<n_elem; ++i)
487 {
488 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) / upgrade_val<eT1,eT2>::apply(BB[i]);
489 }
490 }
491 else
492 {
493 for(uword slice = 0; slice < n_slices; ++slice)
494 for(uword col = 0; col < n_cols; ++col )
495 for(uword row = 0; row < n_rows; ++row )
496 {
497 (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col,slice)) / upgrade_val<eT1,eT2>::apply(B.at(row,col,slice));
498 out_mem++;
499 }
500 }
501 }
502
503
504
505 //! element-wise cube multiplication with different element types
506 template<typename T1, typename T2>
507 inline
508 void
apply(Cube<typename eT_promoter<T1,T2>::eT> & out,const mtGlueCube<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_schur> & X)509 glue_mixed_schur::apply(Cube<typename eT_promoter<T1,T2>::eT>& out, const mtGlueCube<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_schur>& X)
510 {
511 arma_extra_debug_sigprint();
512
513 typedef typename T1::elem_type eT1;
514 typedef typename T2::elem_type eT2;
515
516 typedef typename promote_type<eT1,eT2>::result out_eT;
517
518 promote_type<eT1,eT2>::check();
519
520 const ProxyCube<T1> A(X.A);
521 const ProxyCube<T2> B(X.B);
522
523 arma_debug_assert_same_size(A, B, "element-wise multiplication");
524
525 const uword n_rows = A.get_n_rows();
526 const uword n_cols = A.get_n_cols();
527 const uword n_slices = A.get_n_slices();
528
529 out.set_size(n_rows, n_cols, n_slices);
530
531 out_eT* out_mem = out.memptr();
532 const uword n_elem = out.n_elem;
533
534 const bool use_at = (ProxyCube<T1>::use_at || ProxyCube<T2>::use_at);
535
536 if(use_at == false)
537 {
538 typename ProxyCube<T1>::ea_type AA = A.get_ea();
539 typename ProxyCube<T2>::ea_type BB = B.get_ea();
540
541 for(uword i=0; i<n_elem; ++i)
542 {
543 out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) * upgrade_val<eT1,eT2>::apply(BB[i]);
544 }
545 }
546 else
547 {
548 for(uword slice = 0; slice < n_slices; ++slice)
549 for(uword col = 0; col < n_cols; ++col )
550 for(uword row = 0; row < n_rows; ++row )
551 {
552 (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col,slice)) * upgrade_val<eT1,eT2>::apply(B.at(row,col,slice));
553 out_mem++;
554 }
555 }
556 }
557
558
559
560 //! @}
561