1 // SPDX-License-Identifier: Apache-2.0
2 //
3 // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
4 // Copyright 2008-2016 National ICT Australia (NICTA)
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 // ------------------------------------------------------------------------
17 
18 
19 //! \addtogroup glue_mixed
20 //! @{
21 
22 
23 
24 //! matrix multiplication with different element types
25 template<typename T1, typename T2>
26 inline
27 void
apply(Mat<typename eT_promoter<T1,T2>::eT> & out,const mtGlue<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_times> & X)28 glue_mixed_times::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_times>& X)
29   {
30   arma_extra_debug_sigprint();
31 
32   typedef typename T1::elem_type in_eT1;
33   typedef typename T2::elem_type in_eT2;
34 
35   typedef typename eT_promoter<T1,T2>::eT out_eT;
36 
37   const partial_unwrap<T1> tmp1(X.A);
38   const partial_unwrap<T2> tmp2(X.B);
39 
40   const typename partial_unwrap<T1>::stored_type& A = tmp1.M;
41   const typename partial_unwrap<T2>::stored_type& B = tmp2.M;
42 
43   const bool   use_alpha = partial_unwrap<T1>::do_times || partial_unwrap<T2>::do_times;
44   const out_eT     alpha = use_alpha ? (upgrade_val<in_eT1,in_eT2>::apply(tmp1.get_val()) * upgrade_val<in_eT1,in_eT2>::apply(tmp2.get_val())) : out_eT(0);
45 
46   const bool do_trans_A = partial_unwrap<T1>::do_trans;
47   const bool do_trans_B = partial_unwrap<T2>::do_trans;
48 
49   arma_debug_assert_trans_mul_size<do_trans_A, do_trans_B>(A.n_rows, A.n_cols, B.n_rows, B.n_cols, "matrix multiplication");
50 
51   const uword out_n_rows = (do_trans_A == false) ? A.n_rows : A.n_cols;
52   const uword out_n_cols = (do_trans_B == false) ? B.n_cols : B.n_rows;
53 
54   const bool alias = tmp1.is_alias(out) || tmp2.is_alias(out);
55 
56   if(alias == false)
57     {
58     out.set_size(out_n_rows, out_n_cols);
59 
60     gemm_mixed<do_trans_A, do_trans_B, use_alpha, false>::apply(out, A, B, alpha);
61     }
62   else
63     {
64     Mat<out_eT> tmp(out_n_rows, out_n_cols, arma_nozeros_indicator());
65 
66     gemm_mixed<do_trans_A, do_trans_B, use_alpha, false>::apply(tmp, A, B, alpha);
67 
68     out.steal_mem(tmp);
69     }
70   }
71 
72 
73 
74 //! matrix addition with different element types
75 template<typename T1, typename T2>
76 inline
77 void
apply(Mat<typename eT_promoter<T1,T2>::eT> & out,const mtGlue<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_plus> & X)78 glue_mixed_plus::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_plus>& X)
79   {
80   arma_extra_debug_sigprint();
81 
82   typedef typename T1::elem_type eT1;
83   typedef typename T2::elem_type eT2;
84 
85   typedef typename promote_type<eT1,eT2>::result out_eT;
86 
87   promote_type<eT1,eT2>::check();
88 
89   const Proxy<T1> A(X.A);
90   const Proxy<T2> B(X.B);
91 
92   arma_debug_assert_same_size(A, B, "addition");
93 
94   const uword n_rows = A.get_n_rows();
95   const uword n_cols = A.get_n_cols();
96 
97   out.set_size(n_rows, n_cols);
98 
99         out_eT* out_mem = out.memptr();
100   const uword   n_elem  = out.n_elem;
101 
102   const bool use_at = (Proxy<T1>::use_at || Proxy<T2>::use_at);
103 
104   if(use_at == false)
105     {
106     typename Proxy<T1>::ea_type AA = A.get_ea();
107     typename Proxy<T2>::ea_type BB = B.get_ea();
108 
109     if(memory::is_aligned(out_mem))
110       {
111       memory::mark_as_aligned(out_mem);
112 
113       for(uword i=0; i<n_elem; ++i)
114         {
115         out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) + upgrade_val<eT1,eT2>::apply(BB[i]);
116         }
117       }
118     else
119       {
120       for(uword i=0; i<n_elem; ++i)
121         {
122         out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) + upgrade_val<eT1,eT2>::apply(BB[i]);
123         }
124       }
125     }
126   else
127     {
128     for(uword col=0; col < n_cols; ++col)
129     for(uword row=0; row < n_rows; ++row)
130       {
131       (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col)) + upgrade_val<eT1,eT2>::apply(B.at(row,col));
132       out_mem++;
133       }
134     }
135   }
136 
137 
138 
139 //! matrix subtraction with different element types
140 template<typename T1, typename T2>
141 inline
142 void
apply(Mat<typename eT_promoter<T1,T2>::eT> & out,const mtGlue<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_minus> & X)143 glue_mixed_minus::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_minus>& X)
144   {
145   arma_extra_debug_sigprint();
146 
147   typedef typename T1::elem_type eT1;
148   typedef typename T2::elem_type eT2;
149 
150   typedef typename promote_type<eT1,eT2>::result out_eT;
151 
152   promote_type<eT1,eT2>::check();
153 
154   const Proxy<T1> A(X.A);
155   const Proxy<T2> B(X.B);
156 
157   arma_debug_assert_same_size(A, B, "subtraction");
158 
159   const uword n_rows = A.get_n_rows();
160   const uword n_cols = A.get_n_cols();
161 
162   out.set_size(n_rows, n_cols);
163 
164         out_eT* out_mem = out.memptr();
165   const uword   n_elem  = out.n_elem;
166 
167   const bool use_at = (Proxy<T1>::use_at || Proxy<T2>::use_at);
168 
169   if(use_at == false)
170     {
171     typename Proxy<T1>::ea_type AA = A.get_ea();
172     typename Proxy<T2>::ea_type BB = B.get_ea();
173 
174     if(memory::is_aligned(out_mem))
175       {
176       memory::mark_as_aligned(out_mem);
177 
178       for(uword i=0; i<n_elem; ++i)
179         {
180         out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) - upgrade_val<eT1,eT2>::apply(BB[i]);
181         }
182       }
183     else
184       {
185       for(uword i=0; i<n_elem; ++i)
186         {
187         out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) - upgrade_val<eT1,eT2>::apply(BB[i]);
188         }
189       }
190     }
191   else
192     {
193     for(uword col=0; col < n_cols; ++col)
194     for(uword row=0; row < n_rows; ++row)
195       {
196       (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col)) - upgrade_val<eT1,eT2>::apply(B.at(row,col));
197       out_mem++;
198       }
199     }
200   }
201 
202 
203 
204 //! element-wise matrix division with different element types
205 template<typename T1, typename T2>
206 inline
207 void
apply(Mat<typename eT_promoter<T1,T2>::eT> & out,const mtGlue<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_div> & X)208 glue_mixed_div::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_div>& X)
209   {
210   arma_extra_debug_sigprint();
211 
212   typedef typename T1::elem_type eT1;
213   typedef typename T2::elem_type eT2;
214 
215   typedef typename promote_type<eT1,eT2>::result out_eT;
216 
217   promote_type<eT1,eT2>::check();
218 
219   const Proxy<T1> A(X.A);
220   const Proxy<T2> B(X.B);
221 
222   arma_debug_assert_same_size(A, B, "element-wise division");
223 
224   const uword n_rows = A.get_n_rows();
225   const uword n_cols = A.get_n_cols();
226 
227   out.set_size(n_rows, n_cols);
228 
229         out_eT* out_mem = out.memptr();
230   const uword   n_elem  = out.n_elem;
231 
232   const bool use_at = (Proxy<T1>::use_at || Proxy<T2>::use_at);
233 
234   if(use_at == false)
235     {
236     typename Proxy<T1>::ea_type AA = A.get_ea();
237     typename Proxy<T2>::ea_type BB = B.get_ea();
238 
239     if(memory::is_aligned(out_mem))
240       {
241       memory::mark_as_aligned(out_mem);
242 
243       for(uword i=0; i<n_elem; ++i)
244         {
245         out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) / upgrade_val<eT1,eT2>::apply(BB[i]);
246         }
247       }
248     else
249       {
250       for(uword i=0; i<n_elem; ++i)
251         {
252         out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) / upgrade_val<eT1,eT2>::apply(BB[i]);
253         }
254       }
255     }
256   else
257     {
258     for(uword col=0; col < n_cols; ++col)
259     for(uword row=0; row < n_rows; ++row)
260       {
261       (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col)) / upgrade_val<eT1,eT2>::apply(B.at(row,col));
262       out_mem++;
263       }
264     }
265   }
266 
267 
268 
269 //! element-wise matrix multiplication with different element types
270 template<typename T1, typename T2>
271 inline
272 void
apply(Mat<typename eT_promoter<T1,T2>::eT> & out,const mtGlue<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_schur> & X)273 glue_mixed_schur::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_schur>& X)
274   {
275   arma_extra_debug_sigprint();
276 
277   typedef typename T1::elem_type eT1;
278   typedef typename T2::elem_type eT2;
279 
280   typedef typename promote_type<eT1,eT2>::result out_eT;
281 
282   promote_type<eT1,eT2>::check();
283 
284   const Proxy<T1> A(X.A);
285   const Proxy<T2> B(X.B);
286 
287   arma_debug_assert_same_size(A, B, "element-wise multiplication");
288 
289   const uword n_rows = A.get_n_rows();
290   const uword n_cols = A.get_n_cols();
291 
292   out.set_size(n_rows, n_cols);
293 
294         out_eT* out_mem = out.memptr();
295   const uword   n_elem  = out.n_elem;
296 
297   const bool use_at = (Proxy<T1>::use_at || Proxy<T2>::use_at);
298 
299   if(use_at == false)
300     {
301     typename Proxy<T1>::ea_type AA = A.get_ea();
302     typename Proxy<T2>::ea_type BB = B.get_ea();
303 
304     if(memory::is_aligned(out_mem))
305       {
306       memory::mark_as_aligned(out_mem);
307 
308       for(uword i=0; i<n_elem; ++i)
309         {
310         out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) * upgrade_val<eT1,eT2>::apply(BB[i]);
311         }
312       }
313     else
314       {
315       for(uword i=0; i<n_elem; ++i)
316         {
317         out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) * upgrade_val<eT1,eT2>::apply(BB[i]);
318         }
319       }
320     }
321   else
322     {
323     for(uword col=0; col < n_cols; ++col)
324     for(uword row=0; row < n_rows; ++row)
325       {
326       (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col)) * upgrade_val<eT1,eT2>::apply(B.at(row,col));
327       out_mem++;
328       }
329     }
330   }
331 
332 
333 
334 //
335 //
336 //
337 
338 
339 
340 //! cube addition with different element types
341 template<typename T1, typename T2>
342 inline
343 void
apply(Cube<typename eT_promoter<T1,T2>::eT> & out,const mtGlueCube<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_plus> & X)344 glue_mixed_plus::apply(Cube<typename eT_promoter<T1,T2>::eT>& out, const mtGlueCube<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_plus>& X)
345   {
346   arma_extra_debug_sigprint();
347 
348   typedef typename T1::elem_type eT1;
349   typedef typename T2::elem_type eT2;
350 
351   typedef typename promote_type<eT1,eT2>::result out_eT;
352 
353   promote_type<eT1,eT2>::check();
354 
355   const ProxyCube<T1> A(X.A);
356   const ProxyCube<T2> B(X.B);
357 
358   arma_debug_assert_same_size(A, B, "addition");
359 
360   const uword n_rows   = A.get_n_rows();
361   const uword n_cols   = A.get_n_cols();
362   const uword n_slices = A.get_n_slices();
363 
364   out.set_size(n_rows, n_cols, n_slices);
365 
366         out_eT* out_mem = out.memptr();
367   const uword    n_elem = out.n_elem;
368 
369   const bool use_at = (ProxyCube<T1>::use_at || ProxyCube<T2>::use_at);
370 
371   if(use_at == false)
372     {
373     typename ProxyCube<T1>::ea_type AA = A.get_ea();
374     typename ProxyCube<T2>::ea_type BB = B.get_ea();
375 
376     for(uword i=0; i<n_elem; ++i)
377       {
378       out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) + upgrade_val<eT1,eT2>::apply(BB[i]);
379       }
380     }
381   else
382     {
383     for(uword slice = 0; slice < n_slices; ++slice)
384     for(uword col   = 0; col   < n_cols;   ++col  )
385     for(uword row   = 0; row   < n_rows;   ++row  )
386       {
387       (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col,slice)) + upgrade_val<eT1,eT2>::apply(B.at(row,col,slice));
388       out_mem++;
389       }
390     }
391   }
392 
393 
394 
395 //! cube subtraction with different element types
396 template<typename T1, typename T2>
397 inline
398 void
apply(Cube<typename eT_promoter<T1,T2>::eT> & out,const mtGlueCube<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_minus> & X)399 glue_mixed_minus::apply(Cube<typename eT_promoter<T1,T2>::eT>& out, const mtGlueCube<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_minus>& X)
400   {
401   arma_extra_debug_sigprint();
402 
403   typedef typename T1::elem_type eT1;
404   typedef typename T2::elem_type eT2;
405 
406   typedef typename promote_type<eT1,eT2>::result out_eT;
407 
408   promote_type<eT1,eT2>::check();
409 
410   const ProxyCube<T1> A(X.A);
411   const ProxyCube<T2> B(X.B);
412 
413   arma_debug_assert_same_size(A, B, "subtraction");
414 
415   const uword n_rows   = A.get_n_rows();
416   const uword n_cols   = A.get_n_cols();
417   const uword n_slices = A.get_n_slices();
418 
419   out.set_size(n_rows, n_cols, n_slices);
420 
421         out_eT* out_mem = out.memptr();
422   const uword    n_elem = out.n_elem;
423 
424   const bool use_at = (ProxyCube<T1>::use_at || ProxyCube<T2>::use_at);
425 
426   if(use_at == false)
427     {
428     typename ProxyCube<T1>::ea_type AA = A.get_ea();
429     typename ProxyCube<T2>::ea_type BB = B.get_ea();
430 
431     for(uword i=0; i<n_elem; ++i)
432       {
433       out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) - upgrade_val<eT1,eT2>::apply(BB[i]);
434       }
435     }
436   else
437     {
438     for(uword slice = 0; slice < n_slices; ++slice)
439     for(uword col   = 0; col   < n_cols;   ++col  )
440     for(uword row   = 0; row   < n_rows;   ++row  )
441       {
442       (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col,slice)) - upgrade_val<eT1,eT2>::apply(B.at(row,col,slice));
443       out_mem++;
444       }
445     }
446   }
447 
448 
449 
450 //! element-wise cube division with different element types
451 template<typename T1, typename T2>
452 inline
453 void
apply(Cube<typename eT_promoter<T1,T2>::eT> & out,const mtGlueCube<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_div> & X)454 glue_mixed_div::apply(Cube<typename eT_promoter<T1,T2>::eT>& out, const mtGlueCube<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_div>& X)
455   {
456   arma_extra_debug_sigprint();
457 
458   typedef typename T1::elem_type eT1;
459   typedef typename T2::elem_type eT2;
460 
461   typedef typename promote_type<eT1,eT2>::result out_eT;
462 
463   promote_type<eT1,eT2>::check();
464 
465   const ProxyCube<T1> A(X.A);
466   const ProxyCube<T2> B(X.B);
467 
468   arma_debug_assert_same_size(A, B, "element-wise division");
469 
470   const uword n_rows   = A.get_n_rows();
471   const uword n_cols   = A.get_n_cols();
472   const uword n_slices = A.get_n_slices();
473 
474   out.set_size(n_rows, n_cols, n_slices);
475 
476         out_eT* out_mem = out.memptr();
477   const uword    n_elem = out.n_elem;
478 
479   const bool use_at = (ProxyCube<T1>::use_at || ProxyCube<T2>::use_at);
480 
481   if(use_at == false)
482     {
483     typename ProxyCube<T1>::ea_type AA = A.get_ea();
484     typename ProxyCube<T2>::ea_type BB = B.get_ea();
485 
486     for(uword i=0; i<n_elem; ++i)
487       {
488       out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) / upgrade_val<eT1,eT2>::apply(BB[i]);
489       }
490     }
491   else
492     {
493     for(uword slice = 0; slice < n_slices; ++slice)
494     for(uword col   = 0; col   < n_cols;   ++col  )
495     for(uword row   = 0; row   < n_rows;   ++row  )
496       {
497       (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col,slice)) / upgrade_val<eT1,eT2>::apply(B.at(row,col,slice));
498       out_mem++;
499       }
500     }
501   }
502 
503 
504 
505 //! element-wise cube multiplication with different element types
506 template<typename T1, typename T2>
507 inline
508 void
apply(Cube<typename eT_promoter<T1,T2>::eT> & out,const mtGlueCube<typename eT_promoter<T1,T2>::eT,T1,T2,glue_mixed_schur> & X)509 glue_mixed_schur::apply(Cube<typename eT_promoter<T1,T2>::eT>& out, const mtGlueCube<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_schur>& X)
510   {
511   arma_extra_debug_sigprint();
512 
513   typedef typename T1::elem_type eT1;
514   typedef typename T2::elem_type eT2;
515 
516   typedef typename promote_type<eT1,eT2>::result out_eT;
517 
518   promote_type<eT1,eT2>::check();
519 
520   const ProxyCube<T1> A(X.A);
521   const ProxyCube<T2> B(X.B);
522 
523   arma_debug_assert_same_size(A, B, "element-wise multiplication");
524 
525   const uword n_rows   = A.get_n_rows();
526   const uword n_cols   = A.get_n_cols();
527   const uword n_slices = A.get_n_slices();
528 
529   out.set_size(n_rows, n_cols, n_slices);
530 
531         out_eT* out_mem = out.memptr();
532   const uword    n_elem = out.n_elem;
533 
534   const bool use_at = (ProxyCube<T1>::use_at || ProxyCube<T2>::use_at);
535 
536   if(use_at == false)
537     {
538     typename ProxyCube<T1>::ea_type AA = A.get_ea();
539     typename ProxyCube<T2>::ea_type BB = B.get_ea();
540 
541     for(uword i=0; i<n_elem; ++i)
542       {
543       out_mem[i] = upgrade_val<eT1,eT2>::apply(AA[i]) * upgrade_val<eT1,eT2>::apply(BB[i]);
544       }
545     }
546   else
547     {
548     for(uword slice = 0; slice < n_slices; ++slice)
549     for(uword col   = 0; col   < n_cols;   ++col  )
550     for(uword row   = 0; row   < n_rows;   ++row  )
551       {
552       (*out_mem) = upgrade_val<eT1,eT2>::apply(A.at(row,col,slice)) * upgrade_val<eT1,eT2>::apply(B.at(row,col,slice));
553       out_mem++;
554       }
555     }
556   }
557 
558 
559 
560 //! @}
561