1 // SPDX-License-Identifier: Apache-2.0
2 //
3 // Copyright 2008-2016 Conrad Sanderson (http://conradsanderson.id.au)
4 // Copyright 2008-2016 National ICT Australia (NICTA)
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 // ------------------------------------------------------------------------
17
18
19 //! \addtogroup eop_core
20 //! @{
21
22
23 #undef arma_applier_1u
24 #undef arma_applier_1a
25 #undef arma_applier_2
26 #undef arma_applier_3
27 #undef operatorA
28
29 #undef arma_applier_1_mp
30 #undef arma_applier_2_mp
31 #undef arma_applier_3_mp
32
33
34 #if defined(ARMA_SIMPLE_LOOPS)
35 #define arma_applier_1u(operatorA) \
36 {\
37 for(uword i=0; i<n_elem; ++i)\
38 {\
39 out_mem[i] operatorA eop_core<eop_type>::process(P[i], k);\
40 }\
41 }
42 #else
43 #define arma_applier_1u(operatorA) \
44 {\
45 uword i,j;\
46 \
47 for(i=0, j=1; j<n_elem; i+=2, j+=2)\
48 {\
49 eT tmp_i = P[i];\
50 eT tmp_j = P[j];\
51 \
52 tmp_i = eop_core<eop_type>::process(tmp_i, k);\
53 tmp_j = eop_core<eop_type>::process(tmp_j, k);\
54 \
55 out_mem[i] operatorA tmp_i;\
56 out_mem[j] operatorA tmp_j;\
57 }\
58 \
59 if(i < n_elem)\
60 {\
61 out_mem[i] operatorA eop_core<eop_type>::process(P[i], k);\
62 }\
63 }
64 #endif
65
66
67
68 #if defined(ARMA_SIMPLE_LOOPS)
69 #define arma_applier_1a(operatorA) \
70 {\
71 for(uword i=0; i<n_elem; ++i)\
72 {\
73 out_mem[i] operatorA eop_core<eop_type>::process(P.at_alt(i), k);\
74 }\
75 }
76 #else
77 #define arma_applier_1a(operatorA) \
78 {\
79 uword i,j;\
80 \
81 for(i=0, j=1; j<n_elem; i+=2, j+=2)\
82 {\
83 eT tmp_i = P.at_alt(i);\
84 eT tmp_j = P.at_alt(j);\
85 \
86 tmp_i = eop_core<eop_type>::process(tmp_i, k);\
87 tmp_j = eop_core<eop_type>::process(tmp_j, k);\
88 \
89 out_mem[i] operatorA tmp_i;\
90 out_mem[j] operatorA tmp_j;\
91 }\
92 \
93 if(i < n_elem)\
94 {\
95 out_mem[i] operatorA eop_core<eop_type>::process(P.at_alt(i), k);\
96 }\
97 }
98 #endif
99
100
101 #define arma_applier_2(operatorA) \
102 {\
103 if(n_rows != 1)\
104 {\
105 for(uword col=0; col<n_cols; ++col)\
106 {\
107 uword i,j;\
108 \
109 for(i=0, j=1; j<n_rows; i+=2, j+=2)\
110 {\
111 eT tmp_i = P.at(i,col);\
112 eT tmp_j = P.at(j,col);\
113 \
114 tmp_i = eop_core<eop_type>::process(tmp_i, k);\
115 tmp_j = eop_core<eop_type>::process(tmp_j, k);\
116 \
117 *out_mem operatorA tmp_i; out_mem++;\
118 *out_mem operatorA tmp_j; out_mem++;\
119 }\
120 \
121 if(i < n_rows)\
122 {\
123 *out_mem operatorA eop_core<eop_type>::process(P.at(i,col), k); out_mem++;\
124 }\
125 }\
126 }\
127 else\
128 {\
129 for(uword count=0; count < n_cols; ++count)\
130 {\
131 out_mem[count] operatorA eop_core<eop_type>::process(P.at(0,count), k);\
132 }\
133 }\
134 }
135
136
137
138 #define arma_applier_3(operatorA) \
139 {\
140 for(uword slice=0; slice<n_slices; ++slice)\
141 {\
142 for(uword col=0; col<n_cols; ++col)\
143 {\
144 uword i,j;\
145 \
146 for(i=0, j=1; j<n_rows; i+=2, j+=2)\
147 {\
148 eT tmp_i = P.at(i,col,slice);\
149 eT tmp_j = P.at(j,col,slice);\
150 \
151 tmp_i = eop_core<eop_type>::process(tmp_i, k);\
152 tmp_j = eop_core<eop_type>::process(tmp_j, k);\
153 \
154 *out_mem operatorA tmp_i; out_mem++; \
155 *out_mem operatorA tmp_j; out_mem++; \
156 }\
157 \
158 if(i < n_rows)\
159 {\
160 *out_mem operatorA eop_core<eop_type>::process(P.at(i,col,slice), k); out_mem++; \
161 }\
162 }\
163 }\
164 }
165
166
167
168 #if defined(ARMA_USE_OPENMP)
169
170 #define arma_applier_1_mp(operatorA) \
171 {\
172 const int n_threads = mp_thread_limit::get();\
173 _Pragma("omp parallel for schedule(static) num_threads(n_threads)")\
174 for(uword i=0; i<n_elem; ++i)\
175 {\
176 out_mem[i] operatorA eop_core<eop_type>::process(P[i], k);\
177 }\
178 }
179
180 #define arma_applier_2_mp(operatorA) \
181 {\
182 const int n_threads = mp_thread_limit::get();\
183 if(n_cols == 1)\
184 {\
185 _Pragma("omp parallel for schedule(static) num_threads(n_threads)")\
186 for(uword count=0; count < n_rows; ++count)\
187 {\
188 out_mem[count] operatorA eop_core<eop_type>::process(P.at(count,0), k);\
189 }\
190 }\
191 else\
192 if(n_rows == 1)\
193 {\
194 _Pragma("omp parallel for schedule(static) num_threads(n_threads)")\
195 for(uword count=0; count < n_cols; ++count)\
196 {\
197 out_mem[count] operatorA eop_core<eop_type>::process(P.at(0,count), k);\
198 }\
199 }\
200 else\
201 {\
202 _Pragma("omp parallel for schedule(static) num_threads(n_threads)")\
203 for(uword col=0; col < n_cols; ++col)\
204 {\
205 for(uword row=0; row < n_rows; ++row)\
206 {\
207 out.at(row,col) operatorA eop_core<eop_type>::process(P.at(row,col), k);\
208 }\
209 }\
210 }\
211 }
212
213 #define arma_applier_3_mp(operatorA) \
214 {\
215 const int n_threads = mp_thread_limit::get();\
216 _Pragma("omp parallel for schedule(static) num_threads(n_threads)")\
217 for(uword slice=0; slice<n_slices; ++slice)\
218 {\
219 for(uword col=0; col<n_cols; ++col)\
220 for(uword row=0; row<n_rows; ++row)\
221 {\
222 out.at(row,col,slice) operatorA eop_core<eop_type>::process(P.at(row,col,slice), k);\
223 }\
224 }\
225 }
226
227 #else
228
229 #define arma_applier_1_mp(operatorA) arma_applier_1u(operatorA)
230 #define arma_applier_2_mp(operatorA) arma_applier_2(operatorA)
231 #define arma_applier_3_mp(operatorA) arma_applier_3(operatorA)
232
233 #endif
234
235
236
237 //
238 // matrices
239
240
241
242 template<typename eop_type>
243 template<typename outT, typename T1>
244 arma_hot
245 inline
246 void
apply(outT & out,const eOp<T1,eop_type> & x)247 eop_core<eop_type>::apply(outT& out, const eOp<T1, eop_type>& x)
248 {
249 arma_extra_debug_sigprint();
250
251 typedef typename T1::elem_type eT;
252
253 // NOTE: we're assuming that the matrix has already been set to the correct size and there is no aliasing;
254 // size setting and alias checking is done by either the Mat contructor or operator=()
255
256 const eT k = x.aux;
257 eT* out_mem = out.memptr();
258
259 const bool use_mp = (arma_config::openmp) && (eOp<T1, eop_type>::use_mp || (is_same_type<eop_type, eop_pow>::value && (is_cx<eT>::yes || x.aux != eT(2))));
260
261 if(Proxy<T1>::use_at == false)
262 {
263 const uword n_elem = x.get_n_elem();
264
265 if(use_mp && mp_gate<eT>::eval(n_elem))
266 {
267 typename Proxy<T1>::ea_type P = x.P.get_ea();
268
269 arma_applier_1_mp(=);
270 }
271 else
272 {
273 if(memory::is_aligned(out_mem))
274 {
275 memory::mark_as_aligned(out_mem);
276
277 if(x.P.is_aligned())
278 {
279 typename Proxy<T1>::aligned_ea_type P = x.P.get_aligned_ea();
280
281 arma_applier_1a(=);
282 }
283 else
284 {
285 typename Proxy<T1>::ea_type P = x.P.get_ea();
286
287 arma_applier_1u(=);
288 }
289 }
290 else
291 {
292 typename Proxy<T1>::ea_type P = x.P.get_ea();
293
294 arma_applier_1u(=);
295 }
296 }
297 }
298 else
299 {
300 const uword n_rows = x.get_n_rows();
301 const uword n_cols = x.get_n_cols();
302
303 const Proxy<T1>& P = x.P;
304
305 if(use_mp && mp_gate<eT>::eval(x.get_n_elem()))
306 {
307 arma_applier_2_mp(=);
308 }
309 else
310 {
311 arma_applier_2(=);
312 }
313 }
314 }
315
316
317
318 template<typename eop_type>
319 template<typename T1>
320 arma_hot
321 inline
322 void
apply_inplace_plus(Mat<typename T1::elem_type> & out,const eOp<T1,eop_type> & x)323 eop_core<eop_type>::apply_inplace_plus(Mat<typename T1::elem_type>& out, const eOp<T1, eop_type>& x)
324 {
325 arma_extra_debug_sigprint();
326
327 typedef typename T1::elem_type eT;
328
329 const uword n_rows = x.get_n_rows();
330 const uword n_cols = x.get_n_cols();
331
332 arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "addition");
333
334 const eT k = x.aux;
335 eT* out_mem = out.memptr();
336
337 const bool use_mp = (arma_config::openmp) && (eOp<T1, eop_type>::use_mp || (is_same_type<eop_type, eop_pow>::value && (is_cx<eT>::yes || x.aux != eT(2))));
338
339 if(Proxy<T1>::use_at == false)
340 {
341 const uword n_elem = x.get_n_elem();
342
343 if(use_mp && mp_gate<eT>::eval(n_elem))
344 {
345 typename Proxy<T1>::ea_type P = x.P.get_ea();
346
347 arma_applier_1_mp(+=);
348 }
349 else
350 {
351 if(memory::is_aligned(out_mem))
352 {
353 memory::mark_as_aligned(out_mem);
354
355 if(x.P.is_aligned())
356 {
357 typename Proxy<T1>::aligned_ea_type P = x.P.get_aligned_ea();
358
359 arma_applier_1a(+=);
360 }
361 else
362 {
363 typename Proxy<T1>::ea_type P = x.P.get_ea();
364
365 arma_applier_1u(+=);
366 }
367 }
368 else
369 {
370 typename Proxy<T1>::ea_type P = x.P.get_ea();
371
372 arma_applier_1u(+=);
373 }
374 }
375 }
376 else
377 {
378 const Proxy<T1>& P = x.P;
379
380 if(use_mp && mp_gate<eT>::eval(x.get_n_elem()))
381 {
382 arma_applier_2_mp(+=);
383 }
384 else
385 {
386 arma_applier_2(+=);
387 }
388 }
389 }
390
391
392
393 template<typename eop_type>
394 template<typename T1>
395 arma_hot
396 inline
397 void
apply_inplace_minus(Mat<typename T1::elem_type> & out,const eOp<T1,eop_type> & x)398 eop_core<eop_type>::apply_inplace_minus(Mat<typename T1::elem_type>& out, const eOp<T1, eop_type>& x)
399 {
400 arma_extra_debug_sigprint();
401
402 typedef typename T1::elem_type eT;
403
404 const uword n_rows = x.get_n_rows();
405 const uword n_cols = x.get_n_cols();
406
407 arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "subtraction");
408
409 const eT k = x.aux;
410 eT* out_mem = out.memptr();
411
412 const bool use_mp = (arma_config::openmp) && (eOp<T1, eop_type>::use_mp || (is_same_type<eop_type, eop_pow>::value && (is_cx<eT>::yes || x.aux != eT(2))));
413
414 if(Proxy<T1>::use_at == false)
415 {
416 const uword n_elem = x.get_n_elem();
417
418 if(use_mp && mp_gate<eT>::eval(n_elem))
419 {
420 typename Proxy<T1>::ea_type P = x.P.get_ea();
421
422 arma_applier_1_mp(-=);
423 }
424 else
425 {
426 if(memory::is_aligned(out_mem))
427 {
428 memory::mark_as_aligned(out_mem);
429
430 if(x.P.is_aligned())
431 {
432 typename Proxy<T1>::aligned_ea_type P = x.P.get_aligned_ea();
433
434 arma_applier_1a(-=);
435 }
436 else
437 {
438 typename Proxy<T1>::ea_type P = x.P.get_ea();
439
440 arma_applier_1u(-=);
441 }
442 }
443 else
444 {
445 typename Proxy<T1>::ea_type P = x.P.get_ea();
446
447 arma_applier_1u(-=);
448 }
449 }
450 }
451 else
452 {
453 const Proxy<T1>& P = x.P;
454
455 if(use_mp && mp_gate<eT>::eval(x.get_n_elem()))
456 {
457 arma_applier_2_mp(-=);
458 }
459 else
460 {
461 arma_applier_2(-=);
462 }
463 }
464 }
465
466
467
468 template<typename eop_type>
469 template<typename T1>
470 arma_hot
471 inline
472 void
apply_inplace_schur(Mat<typename T1::elem_type> & out,const eOp<T1,eop_type> & x)473 eop_core<eop_type>::apply_inplace_schur(Mat<typename T1::elem_type>& out, const eOp<T1, eop_type>& x)
474 {
475 arma_extra_debug_sigprint();
476
477 typedef typename T1::elem_type eT;
478
479 const uword n_rows = x.get_n_rows();
480 const uword n_cols = x.get_n_cols();
481
482 arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "element-wise multiplication");
483
484 const eT k = x.aux;
485 eT* out_mem = out.memptr();
486
487 const bool use_mp = (arma_config::openmp) && (eOp<T1, eop_type>::use_mp || (is_same_type<eop_type, eop_pow>::value && (is_cx<eT>::yes || x.aux != eT(2))));
488
489 if(Proxy<T1>::use_at == false)
490 {
491 const uword n_elem = x.get_n_elem();
492
493 if(use_mp && mp_gate<eT>::eval(n_elem))
494 {
495 typename Proxy<T1>::ea_type P = x.P.get_ea();
496
497 arma_applier_1_mp(*=);
498 }
499 else
500 {
501 if(memory::is_aligned(out_mem))
502 {
503 memory::mark_as_aligned(out_mem);
504
505 if(x.P.is_aligned())
506 {
507 typename Proxy<T1>::aligned_ea_type P = x.P.get_aligned_ea();
508
509 arma_applier_1a(*=);
510 }
511 else
512 {
513 typename Proxy<T1>::ea_type P = x.P.get_ea();
514
515 arma_applier_1u(*=);
516 }
517 }
518 else
519 {
520 typename Proxy<T1>::ea_type P = x.P.get_ea();
521
522 arma_applier_1u(*=);
523 }
524 }
525 }
526 else
527 {
528 const Proxy<T1>& P = x.P;
529
530 if(use_mp && mp_gate<eT>::eval(x.get_n_elem()))
531 {
532 arma_applier_2_mp(*=);
533 }
534 else
535 {
536 arma_applier_2(*=);
537 }
538 }
539 }
540
541
542
543 template<typename eop_type>
544 template<typename T1>
545 arma_hot
546 inline
547 void
apply_inplace_div(Mat<typename T1::elem_type> & out,const eOp<T1,eop_type> & x)548 eop_core<eop_type>::apply_inplace_div(Mat<typename T1::elem_type>& out, const eOp<T1, eop_type>& x)
549 {
550 arma_extra_debug_sigprint();
551
552 typedef typename T1::elem_type eT;
553
554 const uword n_rows = x.get_n_rows();
555 const uword n_cols = x.get_n_cols();
556
557 arma_debug_assert_same_size(out.n_rows, out.n_cols, n_rows, n_cols, "element-wise division");
558
559 const eT k = x.aux;
560 eT* out_mem = out.memptr();
561
562 const bool use_mp = (arma_config::openmp) && (eOp<T1, eop_type>::use_mp || (is_same_type<eop_type, eop_pow>::value && (is_cx<eT>::yes || x.aux != eT(2))));
563
564 if(Proxy<T1>::use_at == false)
565 {
566 const uword n_elem = x.get_n_elem();
567
568 if(use_mp && mp_gate<eT>::eval(n_elem))
569 {
570 typename Proxy<T1>::ea_type P = x.P.get_ea();
571
572 arma_applier_1_mp(/=);
573 }
574 else
575 {
576 if(memory::is_aligned(out_mem))
577 {
578 memory::mark_as_aligned(out_mem);
579
580 if(x.P.is_aligned())
581 {
582 typename Proxy<T1>::aligned_ea_type P = x.P.get_aligned_ea();
583
584 arma_applier_1a(/=);
585 }
586 else
587 {
588 typename Proxy<T1>::ea_type P = x.P.get_ea();
589
590 arma_applier_1u(/=);
591 }
592 }
593 else
594 {
595 typename Proxy<T1>::ea_type P = x.P.get_ea();
596
597 arma_applier_1u(/=);
598 }
599 }
600 }
601 else
602 {
603 const Proxy<T1>& P = x.P;
604
605 if(use_mp && mp_gate<eT>::eval(x.get_n_elem()))
606 {
607 arma_applier_2_mp(/=);
608 }
609 else
610 {
611 arma_applier_2(/=);
612 }
613 }
614 }
615
616
617
618 //
619 // cubes
620
621
622
623 template<typename eop_type>
624 template<typename T1>
625 arma_hot
626 inline
627 void
apply(Cube<typename T1::elem_type> & out,const eOpCube<T1,eop_type> & x)628 eop_core<eop_type>::apply(Cube<typename T1::elem_type>& out, const eOpCube<T1, eop_type>& x)
629 {
630 arma_extra_debug_sigprint();
631
632 typedef typename T1::elem_type eT;
633
634 // NOTE: we're assuming that the matrix has already been set to the correct size and there is no aliasing;
635 // size setting and alias checking is done by either the Mat contructor or operator=()
636
637 const eT k = x.aux;
638 eT* out_mem = out.memptr();
639
640 const bool use_mp = (arma_config::openmp) && (eOpCube<T1, eop_type>::use_mp || (is_same_type<eop_type, eop_pow>::value && (is_cx<eT>::yes || x.aux != eT(2))));
641
642 if(ProxyCube<T1>::use_at == false)
643 {
644 const uword n_elem = out.n_elem;
645
646 if(use_mp && mp_gate<eT>::eval(n_elem))
647 {
648 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
649
650 arma_applier_1_mp(=);
651 }
652 else
653 {
654 if(memory::is_aligned(out_mem))
655 {
656 memory::mark_as_aligned(out_mem);
657
658 if(x.P.is_aligned())
659 {
660 typename ProxyCube<T1>::aligned_ea_type P = x.P.get_aligned_ea();
661
662 arma_applier_1a(=);
663 }
664 else
665 {
666 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
667
668 arma_applier_1u(=);
669 }
670 }
671 else
672 {
673 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
674
675 arma_applier_1u(=);
676 }
677 }
678 }
679 else
680 {
681 const uword n_rows = x.get_n_rows();
682 const uword n_cols = x.get_n_cols();
683 const uword n_slices = x.get_n_slices();
684
685 const ProxyCube<T1>& P = x.P;
686
687 if(use_mp && mp_gate<eT>::eval(x.get_n_elem()))
688 {
689 arma_applier_3_mp(=);
690 }
691 else
692 {
693 arma_applier_3(=);
694 }
695 }
696 }
697
698
699
700 template<typename eop_type>
701 template<typename T1>
702 arma_hot
703 inline
704 void
apply_inplace_plus(Cube<typename T1::elem_type> & out,const eOpCube<T1,eop_type> & x)705 eop_core<eop_type>::apply_inplace_plus(Cube<typename T1::elem_type>& out, const eOpCube<T1, eop_type>& x)
706 {
707 arma_extra_debug_sigprint();
708
709 typedef typename T1::elem_type eT;
710
711 const uword n_rows = x.get_n_rows();
712 const uword n_cols = x.get_n_cols();
713 const uword n_slices = x.get_n_slices();
714
715 arma_debug_assert_same_size(out.n_rows, out.n_cols, out.n_slices, n_rows, n_cols, n_slices, "addition");
716
717 const eT k = x.aux;
718 eT* out_mem = out.memptr();
719
720 const bool use_mp = (arma_config::openmp) && (eOpCube<T1, eop_type>::use_mp || (is_same_type<eop_type, eop_pow>::value && (is_cx<eT>::yes || x.aux != eT(2))));
721
722 if(ProxyCube<T1>::use_at == false)
723 {
724 const uword n_elem = out.n_elem;
725
726 if(use_mp && mp_gate<eT>::eval(n_elem))
727 {
728 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
729
730 arma_applier_1_mp(+=);
731 }
732 else
733 {
734 if(memory::is_aligned(out_mem))
735 {
736 memory::mark_as_aligned(out_mem);
737
738 if(x.P.is_aligned())
739 {
740 typename ProxyCube<T1>::aligned_ea_type P = x.P.get_aligned_ea();
741
742 arma_applier_1a(+=);
743 }
744 else
745 {
746 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
747
748 arma_applier_1u(+=);
749 }
750 }
751 else
752 {
753 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
754
755 arma_applier_1u(+=);
756 }
757 }
758 }
759 else
760 {
761 const ProxyCube<T1>& P = x.P;
762
763 if(use_mp && mp_gate<eT>::eval(x.get_n_elem()))
764 {
765 arma_applier_3_mp(+=);
766 }
767 else
768 {
769 arma_applier_3(+=);
770 }
771 }
772 }
773
774
775
776 template<typename eop_type>
777 template<typename T1>
778 arma_hot
779 inline
780 void
apply_inplace_minus(Cube<typename T1::elem_type> & out,const eOpCube<T1,eop_type> & x)781 eop_core<eop_type>::apply_inplace_minus(Cube<typename T1::elem_type>& out, const eOpCube<T1, eop_type>& x)
782 {
783 arma_extra_debug_sigprint();
784
785 typedef typename T1::elem_type eT;
786
787 const uword n_rows = x.get_n_rows();
788 const uword n_cols = x.get_n_cols();
789 const uword n_slices = x.get_n_slices();
790
791 arma_debug_assert_same_size(out.n_rows, out.n_cols, out.n_slices, n_rows, n_cols, n_slices, "subtraction");
792
793 const eT k = x.aux;
794 eT* out_mem = out.memptr();
795
796 const bool use_mp = (arma_config::openmp) && (eOpCube<T1, eop_type>::use_mp || (is_same_type<eop_type, eop_pow>::value && (is_cx<eT>::yes || x.aux != eT(2))));
797
798 if(ProxyCube<T1>::use_at == false)
799 {
800 const uword n_elem = out.n_elem;
801
802 if(use_mp && mp_gate<eT>::eval(n_elem))
803 {
804 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
805
806 arma_applier_1_mp(-=);
807 }
808 else
809 {
810 if(memory::is_aligned(out_mem))
811 {
812 memory::mark_as_aligned(out_mem);
813
814 if(x.P.is_aligned())
815 {
816 typename ProxyCube<T1>::aligned_ea_type P = x.P.get_aligned_ea();
817
818 arma_applier_1a(-=);
819 }
820 else
821 {
822 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
823
824 arma_applier_1u(-=);
825 }
826 }
827 else
828 {
829 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
830
831 arma_applier_1u(-=);
832 }
833 }
834 }
835 else
836 {
837 const ProxyCube<T1>& P = x.P;
838
839 if(use_mp && mp_gate<eT>::eval(x.get_n_elem()))
840 {
841 arma_applier_3_mp(-=);
842 }
843 else
844 {
845 arma_applier_3(-=);
846 }
847 }
848 }
849
850
851
852 template<typename eop_type>
853 template<typename T1>
854 arma_hot
855 inline
856 void
apply_inplace_schur(Cube<typename T1::elem_type> & out,const eOpCube<T1,eop_type> & x)857 eop_core<eop_type>::apply_inplace_schur(Cube<typename T1::elem_type>& out, const eOpCube<T1, eop_type>& x)
858 {
859 arma_extra_debug_sigprint();
860
861 typedef typename T1::elem_type eT;
862
863 const uword n_rows = x.get_n_rows();
864 const uword n_cols = x.get_n_cols();
865 const uword n_slices = x.get_n_slices();
866
867 arma_debug_assert_same_size(out.n_rows, out.n_cols, out.n_slices, n_rows, n_cols, n_slices, "element-wise multiplication");
868
869 const eT k = x.aux;
870 eT* out_mem = out.memptr();
871
872 const bool use_mp = (arma_config::openmp) && (eOpCube<T1, eop_type>::use_mp || (is_same_type<eop_type, eop_pow>::value && (is_cx<eT>::yes || x.aux != eT(2))));
873
874 if(ProxyCube<T1>::use_at == false)
875 {
876 const uword n_elem = out.n_elem;
877
878 if(use_mp && mp_gate<eT>::eval(n_elem))
879 {
880 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
881
882 arma_applier_1_mp(*=);
883 }
884 else
885 {
886 if(memory::is_aligned(out_mem))
887 {
888 memory::mark_as_aligned(out_mem);
889
890 if(x.P.is_aligned())
891 {
892 typename ProxyCube<T1>::aligned_ea_type P = x.P.get_aligned_ea();
893
894 arma_applier_1a(*=);
895 }
896 else
897 {
898 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
899
900 arma_applier_1u(*=);
901 }
902 }
903 else
904 {
905 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
906
907 arma_applier_1u(*=);
908 }
909 }
910 }
911 else
912 {
913 const ProxyCube<T1>& P = x.P;
914
915 if(use_mp && mp_gate<eT>::eval(x.get_n_elem()))
916 {
917 arma_applier_3_mp(*=);
918 }
919 else
920 {
921 arma_applier_3(*=);
922 }
923 }
924 }
925
926
927
928 template<typename eop_type>
929 template<typename T1>
930 arma_hot
931 inline
932 void
apply_inplace_div(Cube<typename T1::elem_type> & out,const eOpCube<T1,eop_type> & x)933 eop_core<eop_type>::apply_inplace_div(Cube<typename T1::elem_type>& out, const eOpCube<T1, eop_type>& x)
934 {
935 arma_extra_debug_sigprint();
936
937 typedef typename T1::elem_type eT;
938
939 const uword n_rows = x.get_n_rows();
940 const uword n_cols = x.get_n_cols();
941 const uword n_slices = x.get_n_slices();
942
943 arma_debug_assert_same_size(out.n_rows, out.n_cols, out.n_slices, n_rows, n_cols, n_slices, "element-wise division");
944
945 const eT k = x.aux;
946 eT* out_mem = out.memptr();
947
948 const bool use_mp = (arma_config::openmp) && (eOpCube<T1, eop_type>::use_mp || (is_same_type<eop_type, eop_pow>::value && (is_cx<eT>::yes || x.aux != eT(2))));
949
950 if(ProxyCube<T1>::use_at == false)
951 {
952 const uword n_elem = out.n_elem;
953
954 if(use_mp && mp_gate<eT>::eval(n_elem))
955 {
956 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
957
958 arma_applier_1_mp(/=);
959 }
960 else
961 {
962 if(memory::is_aligned(out_mem))
963 {
964 memory::mark_as_aligned(out_mem);
965
966 if(x.P.is_aligned())
967 {
968 typename ProxyCube<T1>::aligned_ea_type P = x.P.get_aligned_ea();
969
970 arma_applier_1a(/=);
971 }
972 else
973 {
974 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
975
976 arma_applier_1u(/=);
977 }
978 }
979 else
980 {
981 typename ProxyCube<T1>::ea_type P = x.P.get_ea();
982
983 arma_applier_1u(/=);
984 }
985 }
986 }
987 else
988 {
989 const ProxyCube<T1>& P = x.P;
990
991 if(use_mp && mp_gate<eT>::eval(x.get_n_elem()))
992 {
993 arma_applier_3_mp(/=);
994 }
995 else
996 {
997 arma_applier_3(/=);
998 }
999 }
1000 }
1001
1002
1003
1004 //
1005 // common
1006
1007
1008
1009 template<typename eop_type>
1010 template<typename eT>
1011 arma_inline
1012 eT
process(const eT,const eT)1013 eop_core<eop_type>::process(const eT, const eT)
1014 {
1015 arma_stop_logic_error("eop_core::process(): unhandled eop_type");
1016 return eT(0);
1017 }
1018
1019
1020
1021 template<> template<typename eT> arma_inline eT
process(const eT val,const eT k)1022 eop_core<eop_scalar_plus >::process(const eT val, const eT k) { return val + k; }
1023
1024 template<> template<typename eT> arma_inline eT
process(const eT val,const eT k)1025 eop_core<eop_scalar_minus_pre >::process(const eT val, const eT k) { return k - val; }
1026
1027 template<> template<typename eT> arma_inline eT
process(const eT val,const eT k)1028 eop_core<eop_scalar_minus_post>::process(const eT val, const eT k) { return val - k; }
1029
1030 template<> template<typename eT> arma_inline eT
process(const eT val,const eT k)1031 eop_core<eop_scalar_times >::process(const eT val, const eT k) { return val * k; }
1032
1033 template<> template<typename eT> arma_inline eT
process(const eT val,const eT k)1034 eop_core<eop_scalar_div_pre >::process(const eT val, const eT k) { return k / val; }
1035
1036 template<> template<typename eT> arma_inline eT
process(const eT val,const eT k)1037 eop_core<eop_scalar_div_post >::process(const eT val, const eT k) { return val / k; }
1038
1039 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1040 eop_core<eop_square >::process(const eT val, const eT ) { return val*val; }
1041
1042 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1043 eop_core<eop_neg >::process(const eT val, const eT ) { return eop_aux::neg(val); }
1044
1045 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1046 eop_core<eop_sqrt >::process(const eT val, const eT ) { return eop_aux::sqrt(val); }
1047
1048 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1049 eop_core<eop_log >::process(const eT val, const eT ) { return eop_aux::log(val); }
1050
1051 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1052 eop_core<eop_log2 >::process(const eT val, const eT ) { return eop_aux::log2(val); }
1053
1054 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1055 eop_core<eop_log10 >::process(const eT val, const eT ) { return eop_aux::log10(val); }
1056
1057 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1058 eop_core<eop_trunc_log >::process(const eT val, const eT ) { return arma::trunc_log(val); }
1059
1060 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1061 eop_core<eop_log1p >::process(const eT val, const eT ) { return eop_aux::log1p(val); }
1062
1063 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1064 eop_core<eop_exp >::process(const eT val, const eT ) { return eop_aux::exp(val); }
1065
1066 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1067 eop_core<eop_exp2 >::process(const eT val, const eT ) { return eop_aux::exp2(val); }
1068
1069 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1070 eop_core<eop_exp10 >::process(const eT val, const eT ) { return eop_aux::exp10(val); }
1071
1072 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1073 eop_core<eop_trunc_exp >::process(const eT val, const eT ) { return arma::trunc_exp(val); }
1074
1075 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1076 eop_core<eop_expm1 >::process(const eT val, const eT ) { return eop_aux::expm1(val); }
1077
1078 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1079 eop_core<eop_cos >::process(const eT val, const eT ) { return eop_aux::cos(val); }
1080
1081 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1082 eop_core<eop_sin >::process(const eT val, const eT ) { return eop_aux::sin(val); }
1083
1084 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1085 eop_core<eop_tan >::process(const eT val, const eT ) { return eop_aux::tan(val); }
1086
1087 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1088 eop_core<eop_acos >::process(const eT val, const eT ) { return eop_aux::acos(val); }
1089
1090 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1091 eop_core<eop_asin >::process(const eT val, const eT ) { return eop_aux::asin(val); }
1092
1093 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1094 eop_core<eop_atan >::process(const eT val, const eT ) { return eop_aux::atan(val); }
1095
1096 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1097 eop_core<eop_cosh >::process(const eT val, const eT ) { return eop_aux::cosh(val); }
1098
1099 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1100 eop_core<eop_sinh >::process(const eT val, const eT ) { return eop_aux::sinh(val); }
1101
1102 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1103 eop_core<eop_tanh >::process(const eT val, const eT ) { return eop_aux::tanh(val); }
1104
1105 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1106 eop_core<eop_acosh >::process(const eT val, const eT ) { return eop_aux::acosh(val); }
1107
1108 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1109 eop_core<eop_asinh >::process(const eT val, const eT ) { return eop_aux::asinh(val); }
1110
1111 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1112 eop_core<eop_atanh >::process(const eT val, const eT ) { return eop_aux::atanh(val); }
1113
1114 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1115 eop_core<eop_sinc >::process(const eT val, const eT ) { return arma_sinc(val); }
1116
1117 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1118 eop_core<eop_eps >::process(const eT val, const eT ) { return eop_aux::direct_eps(val); }
1119
1120 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1121 eop_core<eop_abs >::process(const eT val, const eT ) { return eop_aux::arma_abs(val); }
1122
1123 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1124 eop_core<eop_arg >::process(const eT val, const eT ) { return arma_arg<eT>::eval(val); }
1125
1126 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1127 eop_core<eop_conj >::process(const eT val, const eT ) { return eop_aux::conj(val); }
1128
1129 template<> template<typename eT> arma_inline eT
process(const eT val,const eT k)1130 eop_core<eop_pow >::process(const eT val, const eT k) { return eop_aux::pow(val, k); }
1131
1132 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1133 eop_core<eop_floor >::process(const eT val, const eT ) { return eop_aux::floor(val); }
1134
1135 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1136 eop_core<eop_ceil >::process(const eT val, const eT ) { return eop_aux::ceil(val); }
1137
1138 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1139 eop_core<eop_round >::process(const eT val, const eT ) { return eop_aux::round(val); }
1140
1141 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1142 eop_core<eop_trunc >::process(const eT val, const eT ) { return eop_aux::trunc(val); }
1143
1144 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1145 eop_core<eop_sign >::process(const eT val, const eT ) { return arma_sign(val); }
1146
1147 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1148 eop_core<eop_erf >::process(const eT val, const eT ) { return eop_aux::erf(val); }
1149
1150 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1151 eop_core<eop_erfc >::process(const eT val, const eT ) { return eop_aux::erfc(val); }
1152
1153 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1154 eop_core<eop_lgamma >::process(const eT val, const eT ) { return eop_aux::lgamma(val); }
1155
1156 template<> template<typename eT> arma_inline eT
process(const eT val,const eT)1157 eop_core<eop_tgamma >::process(const eT val, const eT ) { return eop_aux::tgamma(val); }
1158
1159
1160 #undef arma_applier_1u
1161 #undef arma_applier_1a
1162 #undef arma_applier_2
1163 #undef arma_applier_3
1164
1165 #undef arma_applier_1_mp
1166 #undef arma_applier_2_mp
1167 #undef arma_applier_3_mp
1168
1169
1170 //! @}
1171