1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 1993-2021 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING. If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25
26 #if ! defined (octave_mx_inlines_h)
27 #define octave_mx_inlines_h 1
28
29 // This file should *not* include config.h. It is only included in other C++
30 // source files that should have included config.h before including this file.
31
32 #include <cstddef>
33 #include <cmath>
34
35 #include <algorithm>
36
37 #include "Array-util.h"
38 #include "Array.h"
39 #include "bsxfun.h"
40 #include "oct-cmplx.h"
41 #include "oct-inttypes.h"
42 #include "oct-locbuf.h"
43
44 // Provides some commonly repeated, basic loop templates.
45
46 template <typename R, typename S>
mx_inline_fill(std::size_t n,R * r,S s)47 inline void mx_inline_fill (std::size_t n, R *r, S s)
48 {
49 for (std::size_t i = 0; i < n; i++)
50 r[i] = s;
51 }
52
53 template <typename R, typename X>
54 inline void
mx_inline_uminus(std::size_t n,R * r,const X * x)55 mx_inline_uminus (std::size_t n, R *r, const X *x)
56 {
57 for (std::size_t i = 0; i < n; i++)
58 r[i] = -x[i];
59 }
60
61 template <typename R>
62 inline void
mx_inline_uminus2(std::size_t n,R * r)63 mx_inline_uminus2 (std::size_t n, R *r)
64 {
65 for (std::size_t i = 0; i < n; i++)
66 r[i] = -r[i];
67 }
68
69 template <typename X>
70 inline void
mx_inline_iszero(std::size_t n,bool * r,const X * x)71 mx_inline_iszero (std::size_t n, bool *r, const X *x)
72 {
73 const X zero = X ();
74 for (std::size_t i = 0; i < n; i++)
75 r[i] = x[i] == zero;
76 }
77
78 template <typename X>
79 inline void
mx_inline_notzero(std::size_t n,bool * r,const X * x)80 mx_inline_notzero (std::size_t n, bool *r, const X *x)
81 {
82 const X zero = X ();
83 for (std::size_t i = 0; i < n; i++)
84 r[i] = x[i] != zero;
85 }
86
87 #define DEFMXBINOP(F, OP) \
88 template <typename R, typename X, typename Y> \
89 inline void F (std::size_t n, R *r, const X *x, const Y *y) \
90 { \
91 for (std::size_t i = 0; i < n; i++) \
92 r[i] = x[i] OP y[i]; \
93 } \
94 template <typename R, typename X, typename Y> \
95 inline void F (std::size_t n, R *r, const X *x, Y y) \
96 { \
97 for (std::size_t i = 0; i < n; i++) \
98 r[i] = x[i] OP y; \
99 } \
100 template <typename R, typename X, typename Y> \
101 inline void F (std::size_t n, R *r, X x, const Y *y) \
102 { \
103 for (std::size_t i = 0; i < n; i++) \
104 r[i] = x OP y[i]; \
105 }
106
107 DEFMXBINOP (mx_inline_add, +)
108 DEFMXBINOP (mx_inline_sub, -)
109 DEFMXBINOP (mx_inline_mul, *)
110 DEFMXBINOP (mx_inline_div, /)
111
112 #define DEFMXBINOPEQ(F, OP) \
113 template <typename R, typename X> \
114 inline void F (std::size_t n, R *r, const X *x) \
115 { \
116 for (std::size_t i = 0; i < n; i++) \
117 r[i] OP x[i]; \
118 } \
119 template <typename R, typename X> \
120 inline void F (std::size_t n, R *r, X x) \
121 { \
122 for (std::size_t i = 0; i < n; i++) \
123 r[i] OP x; \
124 }
125
126 DEFMXBINOPEQ (mx_inline_add2, +=)
127 DEFMXBINOPEQ (mx_inline_sub2, -=)
128 DEFMXBINOPEQ (mx_inline_mul2, *=)
129 DEFMXBINOPEQ (mx_inline_div2, /=)
130
131 #define DEFMXCMPOP(F, OP) \
132 template <typename X, typename Y> \
133 inline void F (std::size_t n, bool *r, const X *x, const Y *y) \
134 { \
135 for (std::size_t i = 0; i < n; i++) \
136 r[i] = x[i] OP y[i]; \
137 } \
138 template <typename X, typename Y> \
139 inline void F (std::size_t n, bool *r, const X *x, Y y) \
140 { \
141 for (std::size_t i = 0; i < n; i++) \
142 r[i] = x[i] OP y; \
143 } \
144 template <typename X, typename Y> \
145 inline void F (std::size_t n, bool *r, X x, const Y *y) \
146 { \
147 for (std::size_t i = 0; i < n; i++) \
148 r[i] = x OP y[i]; \
149 }
150
151 DEFMXCMPOP (mx_inline_lt, <)
152 DEFMXCMPOP (mx_inline_le, <=)
153 DEFMXCMPOP (mx_inline_gt, >)
154 DEFMXCMPOP (mx_inline_ge, >=)
155 DEFMXCMPOP (mx_inline_eq, ==)
156 DEFMXCMPOP (mx_inline_ne, !=)
157
158 // Convert to logical value, for logical op purposes.
159 template <typename T>
160 inline bool
logical_value(T x)161 logical_value (T x)
162 {
163 return x;
164 }
165
166 template <typename T>
167 inline bool
logical_value(const std::complex<T> & x)168 logical_value (const std::complex<T>& x)
169 {
170 return x.real () != 0 || x.imag () != 0;
171 }
172
173 template <typename T>
174 inline bool
logical_value(const octave_int<T> & x)175 logical_value (const octave_int<T>& x)
176 {
177 return x.value ();
178 }
179
180 template <typename X>
mx_inline_not(std::size_t n,bool * r,const X * x)181 void mx_inline_not (std::size_t n, bool *r, const X *x)
182 {
183 for (std::size_t i = 0; i < n; i++)
184 r[i] = ! logical_value (x[i]);
185 }
186
mx_inline_not2(std::size_t n,bool * r)187 inline void mx_inline_not2 (std::size_t n, bool *r)
188 {
189 for (std::size_t i = 0; i < n; i++)
190 r[i] = ! r[i];
191 }
192
193 #define DEFMXBOOLOP(F, NOT1, OP, NOT2) \
194 template <typename X, typename Y> \
195 inline void F (std::size_t n, bool *r, const X *x, const Y *y) \
196 { \
197 for (std::size_t i = 0; i < n; i++) \
198 r[i] = ((NOT1 logical_value (x[i])) \
199 OP (NOT2 logical_value (y[i]))); \
200 } \
201 template <typename X, typename Y> \
202 inline void F (std::size_t n, bool *r, const X *x, Y y) \
203 { \
204 const bool yy = (NOT2 logical_value (y)); \
205 for (std::size_t i = 0; i < n; i++) \
206 r[i] = (NOT1 logical_value (x[i])) OP yy; \
207 } \
208 template <typename X, typename Y> \
209 inline void F (std::size_t n, bool *r, X x, const Y *y) \
210 { \
211 const bool xx = (NOT1 logical_value (x)); \
212 for (std::size_t i = 0; i < n; i++) \
213 r[i] = xx OP (NOT2 logical_value (y[i])); \
214 }
215
216 DEFMXBOOLOP (mx_inline_and, , &, )
217 DEFMXBOOLOP (mx_inline_or, , |, )
218 DEFMXBOOLOP (mx_inline_not_and, !, &, )
219 DEFMXBOOLOP (mx_inline_not_or, !, |, )
220 DEFMXBOOLOP (mx_inline_and_not, , &, !)
221 DEFMXBOOLOP (mx_inline_or_not, , |, !)
222
223 template <typename X>
224 inline void
mx_inline_and2(std::size_t n,bool * r,const X * x)225 mx_inline_and2 (std::size_t n, bool *r, const X *x)
226 {
227 for (std::size_t i = 0; i < n; i++)
228 r[i] &= logical_value (x[i]);
229 }
230
231 template <typename X>
232 inline void
mx_inline_and2(std::size_t n,bool * r,X x)233 mx_inline_and2 (std::size_t n, bool *r, X x)
234 {
235 for (std::size_t i = 0; i < n; i++)
236 r[i] &= x;
237 }
238
239 template <typename X>
240 inline void
mx_inline_or2(std::size_t n,bool * r,const X * x)241 mx_inline_or2 (std::size_t n, bool *r, const X *x)
242 {
243 for (std::size_t i = 0; i < n; i++)
244 r[i] |= logical_value (x[i]);
245 }
246
247 template <typename X>
248 inline void
mx_inline_or2(std::size_t n,bool * r,X x)249 mx_inline_or2 (std::size_t n, bool *r, X x)
250 {
251 for (std::size_t i = 0; i < n; i++)
252 r[i] |= x;
253 }
254
255 template <typename T>
256 inline bool
mx_inline_any_nan(std::size_t n,const T * x)257 mx_inline_any_nan (std::size_t n, const T *x)
258 {
259 for (std::size_t i = 0; i < n; i++)
260 {
261 if (octave::math::isnan (x[i]))
262 return true;
263 }
264
265 return false;
266 }
267
268 template <typename T>
269 inline bool
mx_inline_all_finite(std::size_t n,const T * x)270 mx_inline_all_finite (std::size_t n, const T *x)
271 {
272 for (std::size_t i = 0; i < n; i++)
273 {
274 if (! octave::math::isfinite (x[i]))
275 return false;
276 }
277
278 return true;
279 }
280
281 template <typename T>
282 inline bool
mx_inline_any_negative(std::size_t n,const T * x)283 mx_inline_any_negative (std::size_t n, const T *x)
284 {
285 for (std::size_t i = 0; i < n; i++)
286 {
287 if (x[i] < 0)
288 return true;
289 }
290
291 return false;
292 }
293
294 template <typename T>
295 inline bool
mx_inline_any_positive(std::size_t n,const T * x)296 mx_inline_any_positive (std::size_t n, const T *x)
297 {
298 for (std::size_t i = 0; i < n; i++)
299 {
300 if (x[i] > 0)
301 return true;
302 }
303
304 return false;
305 }
306
307 template <typename T>
308 inline bool
mx_inline_all_real(std::size_t n,const std::complex<T> * x)309 mx_inline_all_real (std::size_t n, const std::complex<T>* x)
310 {
311 for (std::size_t i = 0; i < n; i++)
312 {
313 if (x[i].imag () != 0)
314 return false;
315 }
316
317 return true;
318 }
319
320 template <typename T>
mx_inline_real(std::size_t n,T * r,const std::complex<T> * x)321 inline void mx_inline_real (std::size_t n, T *r, const std::complex<T>* x)
322 {
323 for (std::size_t i = 0; i < n; i++)
324 r[i] = x[i].real ();
325 }
326
327 template <typename T>
mx_inline_imag(std::size_t n,T * r,const std::complex<T> * x)328 inline void mx_inline_imag (std::size_t n, T *r, const std::complex<T>* x)
329 {
330 for (std::size_t i = 0; i < n; i++)
331 r[i] = x[i].imag ();
332 }
333
334 template <typename T>
335 inline void
mx_inline_xmin(std::size_t n,T * r,const T * x,const T * y)336 mx_inline_xmin (std::size_t n, T *r, const T *x, const T *y)
337 {
338 for (std::size_t i = 0; i < n; i++)
339 r[i] = octave::math::min (x[i], y[i]);
340 }
341
342 template <typename T>
343 inline void
mx_inline_xmin(std::size_t n,T * r,const T * x,T y)344 mx_inline_xmin (std::size_t n, T *r, const T *x, T y)
345 {
346 for (std::size_t i = 0; i < n; i++)
347 r[i] = octave::math::min (x[i], y);
348 }
349
350 template <typename T>
351 inline void
mx_inline_xmin(std::size_t n,T * r,T x,const T * y)352 mx_inline_xmin (std::size_t n, T *r, T x, const T *y)
353 {
354 for (std::size_t i = 0; i < n; i++)
355 r[i] = octave::math::min (x, y[i]);
356 }
357
358 template <typename T>
359 inline void
mx_inline_xmax(std::size_t n,T * r,const T * x,const T * y)360 mx_inline_xmax (std::size_t n, T *r, const T *x, const T *y)
361 {
362 for (std::size_t i = 0; i < n; i++)
363 r[i] = octave::math::max (x[i], y[i]);
364 }
365
366 template <typename T>
367 inline void
mx_inline_xmax(std::size_t n,T * r,const T * x,T y)368 mx_inline_xmax (std::size_t n, T *r, const T *x, T y)
369 {
370 for (std::size_t i = 0; i < n; i++)
371 r[i] = octave::math::max (x[i], y);
372 }
373
374 template <typename T>
375 inline void
mx_inline_xmax(std::size_t n,T * r,T x,const T * y)376 mx_inline_xmax (std::size_t n, T *r, T x, const T *y)
377 {
378 for (std::size_t i = 0; i < n; i++)
379 r[i] = octave::math::max (x, y[i]);
380 }
381
382 // Specialize array-scalar max/min
383 #define DEFMINMAXSPEC(T, F, OP) \
384 template <> \
385 inline void F<T> (std::size_t n, T *r, const T *x, T y) \
386 { \
387 if (octave::math::isnan (y)) \
388 std::memcpy (r, x, n * sizeof (T)); \
389 else \
390 for (std::size_t i = 0; i < n; i++) \
391 r[i] = (x[i] OP y ? x[i] : y); \
392 } \
393 template <> \
394 inline void F<T> (std::size_t n, T *r, T x, const T *y) \
395 { \
396 if (octave::math::isnan (x)) \
397 std::memcpy (r, y, n * sizeof (T)); \
398 else \
399 for (std::size_t i = 0; i < n; i++) \
400 r[i] = (y[i] OP x ? y[i] : x); \
401 }
402
403 DEFMINMAXSPEC (double, mx_inline_xmin, <=)
404 DEFMINMAXSPEC (double, mx_inline_xmax, >=)
405 DEFMINMAXSPEC (float, mx_inline_xmin, <=)
406 DEFMINMAXSPEC (float, mx_inline_xmax, >=)
407
408 // FIXME: Is this comment correct anymore? It seems like std::pow is chosen.
409 // Let the compiler decide which pow to use, whichever best matches the
410 // arguments provided.
411
412 template <typename R, typename X, typename Y>
413 inline void
mx_inline_pow(std::size_t n,R * r,const X * x,const Y * y)414 mx_inline_pow (std::size_t n, R *r, const X *x, const Y *y)
415 {
416 using std::pow;
417
418 for (std::size_t i = 0; i < n; i++)
419 r[i] = pow (x[i], y[i]);
420 }
421
422 template <typename R, typename X, typename Y>
423 inline void
mx_inline_pow(std::size_t n,R * r,const X * x,Y y)424 mx_inline_pow (std::size_t n, R *r, const X *x, Y y)
425 {
426 using std::pow;
427
428 for (std::size_t i = 0; i < n; i++)
429 r[i] = pow (x[i], y);
430 }
431
432 template <typename R, typename X, typename Y>
433 inline void
mx_inline_pow(std::size_t n,R * r,X x,const Y * y)434 mx_inline_pow (std::size_t n, R *r, X x, const Y *y)
435 {
436 using std::pow;
437
438 for (std::size_t i = 0; i < n; i++)
439 r[i] = pow (x, y[i]);
440 }
441
442 // Arbitrary function appliers.
443 // The function is a template parameter to enable inlining.
444 template <typename R, typename X, R fun (X x)>
mx_inline_map(std::size_t n,R * r,const X * x)445 inline void mx_inline_map (std::size_t n, R *r, const X *x)
446 {
447 for (std::size_t i = 0; i < n; i++)
448 r[i] = fun (x[i]);
449 }
450
451 template <typename R, typename X, R fun (const X& x)>
mx_inline_map(std::size_t n,R * r,const X * x)452 inline void mx_inline_map (std::size_t n, R *r, const X *x)
453 {
454 for (std::size_t i = 0; i < n; i++)
455 r[i] = fun (x[i]);
456 }
457
458 // Appliers. Since these call the operation just once, we pass it as
459 // a pointer, to allow the compiler reduce number of instances.
460
461 template <typename R, typename X>
462 inline Array<R>
do_mx_unary_op(const Array<X> & x,void (* op)(std::size_t,R *,const X *))463 do_mx_unary_op (const Array<X>& x,
464 void (*op) (std::size_t, R *, const X *))
465 {
466 Array<R> r (x.dims ());
467 op (r.numel (), r.fortran_vec (), x.data ());
468 return r;
469 }
470
471 // Shortcuts for applying mx_inline_map.
472
473 template <typename R, typename X, R fun (X)>
474 inline Array<R>
do_mx_unary_map(const Array<X> & x)475 do_mx_unary_map (const Array<X>& x)
476 {
477 return do_mx_unary_op<R, X> (x, mx_inline_map<R, X, fun>);
478 }
479
480 template <typename R, typename X, R fun (const X&)>
481 inline Array<R>
do_mx_unary_map(const Array<X> & x)482 do_mx_unary_map (const Array<X>& x)
483 {
484 return do_mx_unary_op<R, X> (x, mx_inline_map<R, X, fun>);
485 }
486
487 template <typename R>
488 inline Array<R>&
do_mx_inplace_op(Array<R> & r,void (* op)(std::size_t,R *))489 do_mx_inplace_op (Array<R>& r,
490 void (*op) (std::size_t, R *))
491 {
492 op (r.numel (), r.fortran_vec ());
493 return r;
494 }
495
496 template <typename R, typename X, typename Y>
497 inline Array<R>
do_mm_binary_op(const Array<X> & x,const Array<Y> & y,void (* op)(std::size_t,R *,const X *,const Y *),void (* op1)(std::size_t,R *,X,const Y *),void (* op2)(std::size_t,R *,const X *,Y),const char * opname)498 do_mm_binary_op (const Array<X>& x, const Array<Y>& y,
499 void (*op) (std::size_t, R *, const X *, const Y *),
500 void (*op1) (std::size_t, R *, X, const Y *),
501 void (*op2) (std::size_t, R *, const X *, Y),
502 const char *opname)
503 {
504 dim_vector dx = x.dims ();
505 dim_vector dy = y.dims ();
506 if (dx == dy)
507 {
508 Array<R> r (dx);
509 op (r.numel (), r.fortran_vec (), x.data (), y.data ());
510 return r;
511 }
512 else if (is_valid_bsxfun (opname, dx, dy))
513 {
514 return do_bsxfun_op (x, y, op, op1, op2);
515 }
516 else
517 octave::err_nonconformant (opname, dx, dy);
518 }
519
520 template <typename R, typename X, typename Y>
521 inline Array<R>
do_ms_binary_op(const Array<X> & x,const Y & y,void (* op)(std::size_t,R *,const X *,Y))522 do_ms_binary_op (const Array<X>& x, const Y& y,
523 void (*op) (std::size_t, R *, const X *, Y))
524 {
525 Array<R> r (x.dims ());
526 op (r.numel (), r.fortran_vec (), x.data (), y);
527 return r;
528 }
529
530 template <typename R, typename X, typename Y>
531 inline Array<R>
do_sm_binary_op(const X & x,const Array<Y> & y,void (* op)(std::size_t,R *,X,const Y *))532 do_sm_binary_op (const X& x, const Array<Y>& y,
533 void (*op) (std::size_t, R *, X, const Y *))
534 {
535 Array<R> r (y.dims ());
536 op (r.numel (), r.fortran_vec (), x, y.data ());
537 return r;
538 }
539
540 template <typename R, typename X>
541 inline Array<R>&
do_mm_inplace_op(Array<R> & r,const Array<X> & x,void (* op)(std::size_t,R *,const X *),void (* op1)(std::size_t,R *,X),const char * opname)542 do_mm_inplace_op (Array<R>& r, const Array<X>& x,
543 void (*op) (std::size_t, R *, const X *),
544 void (*op1) (std::size_t, R *, X),
545 const char *opname)
546 {
547 dim_vector dr = r.dims ();
548 dim_vector dx = x.dims ();
549 if (dr == dx)
550 op (r.numel (), r.fortran_vec (), x.data ());
551 else if (is_valid_inplace_bsxfun (opname, dr, dx))
552 do_inplace_bsxfun_op (r, x, op, op1);
553 else
554 octave::err_nonconformant (opname, dr, dx);
555
556 return r;
557 }
558
559 template <typename R, typename X>
560 inline Array<R>&
do_ms_inplace_op(Array<R> & r,const X & x,void (* op)(std::size_t,R *,X))561 do_ms_inplace_op (Array<R>& r, const X& x,
562 void (*op) (std::size_t, R *, X))
563 {
564 op (r.numel (), r.fortran_vec (), x);
565 return r;
566 }
567
568 template <typename T1, typename T2>
569 inline bool
mx_inline_equal(std::size_t n,const T1 * x,const T2 * y)570 mx_inline_equal (std::size_t n, const T1 *x, const T2 *y)
571 {
572 for (std::size_t i = 0; i < n; i++)
573 if (x[i] != y[i])
574 return false;
575 return true;
576 }
577
578 template <typename T>
579 inline bool
do_mx_check(const Array<T> & a,bool (* op)(std::size_t,const T *))580 do_mx_check (const Array<T>& a,
581 bool (*op) (std::size_t, const T *))
582 {
583 return op (a.numel (), a.data ());
584 }
585
586 // NOTE: we don't use std::norm because it typically does some heavyweight
587 // magic to avoid underflows, which we don't need here.
588 template <typename T>
cabsq(const std::complex<T> & c)589 inline T cabsq (const std::complex<T>& c)
590 {
591 return c.real () * c.real () + c.imag () * c.imag ();
592 }
593
594 // default. works for integers and bool.
595 template <typename T>
596 inline bool
xis_true(T x)597 xis_true (T x)
598 {
599 return x;
600 }
601
602 template <typename T>
603 inline bool
xis_false(T x)604 xis_false (T x)
605 {
606 return ! x;
607 }
608
609 // for octave_ints
610 template <typename T>
611 inline bool
xis_true(const octave_int<T> & x)612 xis_true (const octave_int<T>& x)
613 {
614 return x.value ();
615 }
616
617 template <typename T>
618 inline bool
xis_false(const octave_int<T> & x)619 xis_false (const octave_int<T>& x)
620 {
621 return ! x.value ();
622 }
623
624 // for reals, we want to ignore NaNs.
625 inline bool
xis_true(double x)626 xis_true (double x)
627 {
628 return ! octave::math::isnan (x) && x != 0.0;
629 }
630
631 inline bool
xis_false(double x)632 xis_false (double x)
633 {
634 return x == 0.0;
635 }
636
637 inline bool
xis_true(float x)638 xis_true (float x)
639 {
640 return ! octave::math::isnan (x) && x != 0.0f;
641 }
642
643 inline bool
xis_false(float x)644 xis_false (float x)
645 {
646 return x == 0.0f;
647 }
648
649 // Ditto for complex.
650 inline bool
xis_true(const Complex & x)651 xis_true (const Complex& x)
652 {
653 return ! octave::math::isnan (x) && x != 0.0;
654 }
655
656 inline bool
xis_false(const Complex & x)657 xis_false (const Complex& x)
658 {
659 return x == 0.0;
660 }
661
662 inline bool
xis_true(const FloatComplex & x)663 xis_true (const FloatComplex& x)
664 {
665 return ! octave::math::isnan (x) && x != 0.0f;
666 }
667
668 inline bool
xis_false(const FloatComplex & x)669 xis_false (const FloatComplex& x)
670 {
671 return x == 0.0f;
672 }
673
674 #define OP_RED_SUM(ac, el) ac += el
675 #define OP_RED_PROD(ac, el) ac *= el
676 #define OP_RED_SUMSQ(ac, el) ac += ((el)*(el))
677 #define OP_RED_SUMSQC(ac, el) ac += cabsq (el)
678
679 inline void
op_dble_prod(double & ac,float el)680 op_dble_prod (double& ac, float el)
681 {
682 ac *= el;
683 }
684
685 // FIXME: guaranteed?
686 inline void
op_dble_prod(Complex & ac,const FloatComplex & el)687 op_dble_prod (Complex& ac, const FloatComplex& el)
688 {
689 ac *= el;
690 }
691
692 template <typename T>
693 inline void
op_dble_prod(double & ac,const octave_int<T> & el)694 op_dble_prod (double& ac, const octave_int<T>& el)
695 {
696 ac *= el.double_value ();
697 }
698
699 inline void
op_dble_sum(double & ac,float el)700 op_dble_sum (double& ac, float el)
701 {
702 ac += el;
703 }
704
705 // FIXME: guaranteed?
706 inline void
op_dble_sum(Complex & ac,const FloatComplex & el)707 op_dble_sum (Complex& ac, const FloatComplex& el)
708 {
709 ac += el;
710 }
711
712 template <typename T>
713 inline void
op_dble_sum(double & ac,const octave_int<T> & el)714 op_dble_sum (double& ac, const octave_int<T>& el)
715 {
716 ac += el.double_value ();
717 }
718
719 // The following two implement a simple short-circuiting.
720 #define OP_RED_ANYC(ac, el) \
721 if (xis_true (el)) \
722 { \
723 ac = true; \
724 break; \
725 } \
726 else \
727 continue
728
729 #define OP_RED_ALLC(ac, el) \
730 if (xis_false (el)) \
731 { \
732 ac = false; \
733 break; \
734 } \
735 else \
736 continue
737
738 #define OP_RED_FCN(F, TSRC, TRES, OP, ZERO) \
739 template <typename T> \
740 inline TRES \
741 F (const TSRC *v, octave_idx_type n) \
742 { \
743 TRES ac = ZERO; \
744 for (octave_idx_type i = 0; i < n; i++) \
745 OP(ac, v[i]); \
746 return ac; \
747 }
748
749 #define PROMOTE_DOUBLE(T) \
750 typename subst_template_param<std::complex, T, double>::type
751
752 OP_RED_FCN (mx_inline_sum, T, T, OP_RED_SUM, 0)
753 OP_RED_FCN (mx_inline_dsum, T, PROMOTE_DOUBLE(T), op_dble_sum, 0.0)
754 OP_RED_FCN (mx_inline_count, bool, T, OP_RED_SUM, 0)
755 OP_RED_FCN (mx_inline_prod, T, T, OP_RED_PROD, 1)
756 OP_RED_FCN (mx_inline_dprod, T, PROMOTE_DOUBLE(T), op_dble_prod, 1)
757 OP_RED_FCN (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0)
758 OP_RED_FCN (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0)
OP_RED_FCN(mx_inline_any,T,bool,OP_RED_ANYC,false)759 OP_RED_FCN (mx_inline_any, T, bool, OP_RED_ANYC, false)
760 OP_RED_FCN (mx_inline_all, T, bool, OP_RED_ALLC, true)
761
762 #define OP_RED_FCN2(F, TSRC, TRES, OP, ZERO) \
763 template <typename T> \
764 inline void \
765 F (const TSRC *v, TRES *r, octave_idx_type m, octave_idx_type n) \
766 { \
767 for (octave_idx_type i = 0; i < m; i++) \
768 r[i] = ZERO; \
769 for (octave_idx_type j = 0; j < n; j++) \
770 { \
771 for (octave_idx_type i = 0; i < m; i++) \
772 OP(r[i], v[i]); \
773 v += m; \
774 } \
775 }
776
777 OP_RED_FCN2 (mx_inline_sum, T, T, OP_RED_SUM, 0)
778 OP_RED_FCN2 (mx_inline_dsum, T, PROMOTE_DOUBLE(T), op_dble_sum, 0.0)
779 OP_RED_FCN2 (mx_inline_count, bool, T, OP_RED_SUM, 0)
780 OP_RED_FCN2 (mx_inline_prod, T, T, OP_RED_PROD, 1)
781 OP_RED_FCN2 (mx_inline_dprod, T, PROMOTE_DOUBLE(T), op_dble_prod, 0.0)
782 OP_RED_FCN2 (mx_inline_sumsq, T, T, OP_RED_SUMSQ, 0)
783 OP_RED_FCN2 (mx_inline_sumsq, std::complex<T>, T, OP_RED_SUMSQC, 0)
784
785 #define OP_RED_ANYR(ac, el) ac |= xis_true (el)
786 #define OP_RED_ALLR(ac, el) ac &= xis_true (el)
787
788 OP_RED_FCN2 (mx_inline_any_r, T, bool, OP_RED_ANYR, false)
789 OP_RED_FCN2 (mx_inline_all_r, T, bool, OP_RED_ALLR, true)
790
791 // Using the general code for any/all would sacrifice short-circuiting.
792 // OTOH, going by rows would sacrifice cache-coherence. The following
793 // algorithm will achieve both, at the cost of a temporary octave_idx_type
794 // array.
795
796 #define OP_ROW_SHORT_CIRCUIT(F, PRED, ZERO) \
797 template <typename T> \
798 inline void \
799 F (const T *v, bool *r, octave_idx_type m, octave_idx_type n) \
800 { \
801 if (n <= 8) \
802 return F ## _r (v, r, m, n); \
803 \
804 /* FIXME: it may be sub-optimal to allocate the buffer here. */ \
805 OCTAVE_LOCAL_BUFFER (octave_idx_type, iact, m); \
806 for (octave_idx_type i = 0; i < m; i++) iact[i] = i; \
807 octave_idx_type nact = m; \
808 for (octave_idx_type j = 0; j < n; j++) \
809 { \
810 octave_idx_type k = 0; \
811 for (octave_idx_type i = 0; i < nact; i++) \
812 { \
813 octave_idx_type ia = iact[i]; \
814 if (! PRED (v[ia])) \
815 iact[k++] = ia; \
816 } \
817 nact = k; \
818 v += m; \
819 } \
820 for (octave_idx_type i = 0; i < m; i++) r[i] = ! ZERO; \
821 for (octave_idx_type i = 0; i < nact; i++) r[iact[i]] = ZERO; \
822 }
823
824 OP_ROW_SHORT_CIRCUIT (mx_inline_any, xis_true, false)
825 OP_ROW_SHORT_CIRCUIT (mx_inline_all, xis_false, true)
826
827 #define OP_RED_FCNN(F, TSRC, TRES) \
828 template <typename T> \
829 inline void \
830 F (const TSRC *v, TRES *r, octave_idx_type l, \
831 octave_idx_type n, octave_idx_type u) \
832 { \
833 if (l == 1) \
834 { \
835 for (octave_idx_type i = 0; i < u; i++) \
836 { \
837 r[i] = F<T> (v, n); \
838 v += n; \
839 } \
840 } \
841 else \
842 { \
843 for (octave_idx_type i = 0; i < u; i++) \
844 { \
845 F (v, r, l, n); \
846 v += l*n; \
847 r += l; \
848 } \
849 } \
850 }
851
852 OP_RED_FCNN (mx_inline_sum, T, T)
853 OP_RED_FCNN (mx_inline_dsum, T, PROMOTE_DOUBLE(T))
854 OP_RED_FCNN (mx_inline_count, bool, T)
855 OP_RED_FCNN (mx_inline_prod, T, T)
856 OP_RED_FCNN (mx_inline_dprod, T, PROMOTE_DOUBLE(T))
857 OP_RED_FCNN (mx_inline_sumsq, T, T)
858 OP_RED_FCNN (mx_inline_sumsq, std::complex<T>, T)
859 OP_RED_FCNN (mx_inline_any, T, bool)
860 OP_RED_FCNN (mx_inline_all, T, bool)
861
862 #define OP_CUM_FCN(F, TSRC, TRES, OP) \
863 template <typename T> \
864 inline void \
865 F (const TSRC *v, TRES *r, octave_idx_type n) \
866 { \
867 if (n) \
868 { \
869 TRES t = r[0] = v[0]; \
870 for (octave_idx_type i = 1; i < n; i++) \
871 r[i] = t = t OP v[i]; \
872 } \
873 }
874
875 OP_CUM_FCN (mx_inline_cumsum, T, T, +)
876 OP_CUM_FCN (mx_inline_cumprod, T, T, *)
877 OP_CUM_FCN (mx_inline_cumcount, bool, T, +)
878
879 #define OP_CUM_FCN2(F, TSRC, TRES, OP) \
880 template <typename T> \
881 inline void \
882 F (const TSRC *v, TRES *r, octave_idx_type m, octave_idx_type n) \
883 { \
884 if (n) \
885 { \
886 for (octave_idx_type i = 0; i < m; i++) \
887 r[i] = v[i]; \
888 const T *r0 = r; \
889 for (octave_idx_type j = 1; j < n; j++) \
890 { \
891 r += m; v += m; \
892 for (octave_idx_type i = 0; i < m; i++) \
893 r[i] = r0[i] OP v[i]; \
894 r0 += m; \
895 } \
896 } \
897 }
898
899 OP_CUM_FCN2 (mx_inline_cumsum, T, T, +)
900 OP_CUM_FCN2 (mx_inline_cumprod, T, T, *)
901 OP_CUM_FCN2 (mx_inline_cumcount, bool, T, +)
902
903 #define OP_CUM_FCNN(F, TSRC, TRES) \
904 template <typename T> \
905 inline void \
906 F (const TSRC *v, TRES *r, octave_idx_type l, \
907 octave_idx_type n, octave_idx_type u) \
908 { \
909 if (l == 1) \
910 { \
911 for (octave_idx_type i = 0; i < u; i++) \
912 { \
913 F (v, r, n); \
914 v += n; \
915 r += n; \
916 } \
917 } \
918 else \
919 { \
920 for (octave_idx_type i = 0; i < u; i++) \
921 { \
922 F (v, r, l, n); \
923 v += l*n; \
924 r += l*n; \
925 } \
926 } \
927 }
928
929 OP_CUM_FCNN (mx_inline_cumsum, T, T)
930 OP_CUM_FCNN (mx_inline_cumprod, T, T)
931 OP_CUM_FCNN (mx_inline_cumcount, bool, T)
932
933 #define OP_MINMAX_FCN(F, OP) \
934 template <typename T> \
935 void F (const T *v, T *r, octave_idx_type n) \
936 { \
937 if (! n) \
938 return; \
939 T tmp = v[0]; \
940 octave_idx_type i = 1; \
941 if (octave::math::isnan (tmp)) \
942 { \
943 for (; i < n && octave::math::isnan (v[i]); i++) ; \
944 if (i < n) \
945 tmp = v[i]; \
946 } \
947 for (; i < n; i++) \
948 if (v[i] OP tmp) \
949 tmp = v[i]; \
950 *r = tmp; \
951 } \
952 template <typename T> \
953 void F (const T *v, T *r, octave_idx_type *ri, octave_idx_type n) \
954 { \
955 if (! n) \
956 return; \
957 T tmp = v[0]; \
958 octave_idx_type tmpi = 0; \
959 octave_idx_type i = 1; \
960 if (octave::math::isnan (tmp)) \
961 { \
962 for (; i < n && octave::math::isnan (v[i]); i++) ; \
963 if (i < n) \
964 { \
965 tmp = v[i]; \
966 tmpi = i; \
967 } \
968 } \
969 for (; i < n; i++) \
970 if (v[i] OP tmp) \
971 { \
972 tmp = v[i]; \
973 tmpi = i; \
974 } \
975 *r = tmp; \
976 *ri = tmpi; \
977 }
978
979 OP_MINMAX_FCN (mx_inline_min, <)
980 OP_MINMAX_FCN (mx_inline_max, >)
981
982 // Row reductions will be slightly complicated. We will proceed with checks
983 // for NaNs until we detect that no row will yield a NaN, in which case we
984 // proceed to a faster code.
985
986 #define OP_MINMAX_FCN2(F, OP) \
987 template <typename T> \
988 inline void \
989 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \
990 { \
991 if (! n) \
992 return; \
993 bool nan = false; \
994 octave_idx_type j = 0; \
995 for (octave_idx_type i = 0; i < m; i++) \
996 { \
997 r[i] = v[i]; \
998 if (octave::math::isnan (v[i])) \
999 nan = true; \
1000 } \
1001 j++; \
1002 v += m; \
1003 while (nan && j < n) \
1004 { \
1005 nan = false; \
1006 for (octave_idx_type i = 0; i < m; i++) \
1007 { \
1008 if (octave::math::isnan (v[i])) \
1009 nan = true; \
1010 else if (octave::math::isnan (r[i]) || v[i] OP r[i]) \
1011 r[i] = v[i]; \
1012 } \
1013 j++; \
1014 v += m; \
1015 } \
1016 while (j < n) \
1017 { \
1018 for (octave_idx_type i = 0; i < m; i++) \
1019 if (v[i] OP r[i]) \
1020 r[i] = v[i]; \
1021 j++; \
1022 v += m; \
1023 } \
1024 } \
1025 template <typename T> \
1026 inline void \
1027 F (const T *v, T *r, octave_idx_type *ri, \
1028 octave_idx_type m, octave_idx_type n) \
1029 { \
1030 if (! n) \
1031 return; \
1032 bool nan = false; \
1033 octave_idx_type j = 0; \
1034 for (octave_idx_type i = 0; i < m; i++) \
1035 { \
1036 r[i] = v[i]; \
1037 ri[i] = j; \
1038 if (octave::math::isnan (v[i])) \
1039 nan = true; \
1040 } \
1041 j++; \
1042 v += m; \
1043 while (nan && j < n) \
1044 { \
1045 nan = false; \
1046 for (octave_idx_type i = 0; i < m; i++) \
1047 { \
1048 if (octave::math::isnan (v[i])) \
1049 nan = true; \
1050 else if (octave::math::isnan (r[i]) || v[i] OP r[i]) \
1051 { \
1052 r[i] = v[i]; \
1053 ri[i] = j; \
1054 } \
1055 } \
1056 j++; \
1057 v += m; \
1058 } \
1059 while (j < n) \
1060 { \
1061 for (octave_idx_type i = 0; i < m; i++) \
1062 if (v[i] OP r[i]) \
1063 { \
1064 r[i] = v[i]; \
1065 ri[i] = j; \
1066 } \
1067 j++; \
1068 v += m; \
1069 } \
1070 }
1071
1072 OP_MINMAX_FCN2 (mx_inline_min, <)
1073 OP_MINMAX_FCN2 (mx_inline_max, >)
1074
1075 #define OP_MINMAX_FCNN(F) \
1076 template <typename T> \
1077 inline void \
1078 F (const T *v, T *r, octave_idx_type l, \
1079 octave_idx_type n, octave_idx_type u) \
1080 { \
1081 if (! n) \
1082 return; \
1083 if (l == 1) \
1084 { \
1085 for (octave_idx_type i = 0; i < u; i++) \
1086 { \
1087 F (v, r, n); \
1088 v += n; \
1089 r++; \
1090 } \
1091 } \
1092 else \
1093 { \
1094 for (octave_idx_type i = 0; i < u; i++) \
1095 { \
1096 F (v, r, l, n); \
1097 v += l*n; \
1098 r += l; \
1099 } \
1100 } \
1101 } \
1102 template <typename T> \
1103 inline void \
1104 F (const T *v, T *r, octave_idx_type *ri, \
1105 octave_idx_type l, octave_idx_type n, octave_idx_type u) \
1106 { \
1107 if (! n) return; \
1108 if (l == 1) \
1109 { \
1110 for (octave_idx_type i = 0; i < u; i++) \
1111 { \
1112 F (v, r, ri, n); \
1113 v += n; \
1114 r++; \
1115 ri++; \
1116 } \
1117 } \
1118 else \
1119 { \
1120 for (octave_idx_type i = 0; i < u; i++) \
1121 { \
1122 F (v, r, ri, l, n); \
1123 v += l*n; \
1124 r += l; \
1125 ri += l; \
1126 } \
1127 } \
1128 }
1129
1130 OP_MINMAX_FCNN (mx_inline_min)
1131 OP_MINMAX_FCNN (mx_inline_max)
1132
1133 #define OP_CUMMINMAX_FCN(F, OP) \
1134 template <typename T> \
1135 void F (const T *v, T *r, octave_idx_type n) \
1136 { \
1137 if (! n) \
1138 return; \
1139 T tmp = v[0]; \
1140 octave_idx_type i = 1; \
1141 octave_idx_type j = 0; \
1142 if (octave::math::isnan (tmp)) \
1143 { \
1144 for (; i < n && octave::math::isnan (v[i]); i++) ; \
1145 for (; j < i; j++) \
1146 r[j] = tmp; \
1147 if (i < n) \
1148 tmp = v[i]; \
1149 } \
1150 for (; i < n; i++) \
1151 if (v[i] OP tmp) \
1152 { \
1153 for (; j < i; j++) \
1154 r[j] = tmp; \
1155 tmp = v[i]; \
1156 } \
1157 for (; j < i; j++) \
1158 r[j] = tmp; \
1159 } \
1160 template <typename T> \
1161 void F (const T *v, T *r, octave_idx_type *ri, octave_idx_type n) \
1162 { \
1163 if (! n) \
1164 return; \
1165 T tmp = v[0]; \
1166 octave_idx_type tmpi = 0; \
1167 octave_idx_type i = 1; \
1168 octave_idx_type j = 0; \
1169 if (octave::math::isnan (tmp)) \
1170 { \
1171 for (; i < n && octave::math::isnan (v[i]); i++) ; \
1172 for (; j < i; j++) \
1173 { \
1174 r[j] = tmp; \
1175 ri[j] = tmpi; \
1176 } \
1177 if (i < n) \
1178 { \
1179 tmp = v[i]; \
1180 tmpi = i; \
1181 } \
1182 } \
1183 for (; i < n; i++) \
1184 if (v[i] OP tmp) \
1185 { \
1186 for (; j < i; j++) \
1187 { \
1188 r[j] = tmp; \
1189 ri[j] = tmpi; \
1190 } \
1191 tmp = v[i]; \
1192 tmpi = i; \
1193 } \
1194 for (; j < i; j++) \
1195 { \
1196 r[j] = tmp; \
1197 ri[j] = tmpi; \
1198 } \
1199 }
1200
1201 OP_CUMMINMAX_FCN (mx_inline_cummin, <)
1202 OP_CUMMINMAX_FCN (mx_inline_cummax, >)
1203
1204 // Row reductions will be slightly complicated. We will proceed with checks
1205 // for NaNs until we detect that no row will yield a NaN, in which case we
1206 // proceed to a faster code.
1207
1208 #define OP_CUMMINMAX_FCN2(F, OP) \
1209 template <typename T> \
1210 inline void \
1211 F (const T *v, T *r, octave_idx_type m, octave_idx_type n) \
1212 { \
1213 if (! n) \
1214 return; \
1215 bool nan = false; \
1216 const T *r0; \
1217 octave_idx_type j = 0; \
1218 for (octave_idx_type i = 0; i < m; i++) \
1219 { \
1220 r[i] = v[i]; \
1221 if (octave::math::isnan (v[i])) \
1222 nan = true; \
1223 } \
1224 j++; \
1225 v += m; \
1226 r0 = r; \
1227 r += m; \
1228 while (nan && j < n) \
1229 { \
1230 nan = false; \
1231 for (octave_idx_type i = 0; i < m; i++) \
1232 { \
1233 if (octave::math::isnan (v[i])) \
1234 { \
1235 r[i] = r0[i]; \
1236 nan = true; \
1237 } \
1238 else if (octave::math::isnan (r0[i]) || v[i] OP r0[i]) \
1239 r[i] = v[i]; \
1240 else \
1241 r[i] = r0[i]; \
1242 } \
1243 j++; \
1244 v += m; \
1245 r0 = r; \
1246 r += m; \
1247 } \
1248 while (j < n) \
1249 { \
1250 for (octave_idx_type i = 0; i < m; i++) \
1251 if (v[i] OP r0[i]) \
1252 r[i] = v[i]; \
1253 else \
1254 r[i] = r0[i]; \
1255 j++; \
1256 v += m; \
1257 r0 = r; \
1258 r += m; \
1259 } \
1260 } \
1261 template <typename T> \
1262 inline void \
1263 F (const T *v, T *r, octave_idx_type *ri, \
1264 octave_idx_type m, octave_idx_type n) \
1265 { \
1266 if (! n) \
1267 return; \
1268 bool nan = false; \
1269 const T *r0; \
1270 const octave_idx_type *r0i; \
1271 octave_idx_type j = 0; \
1272 for (octave_idx_type i = 0; i < m; i++) \
1273 { \
1274 r[i] = v[i]; ri[i] = 0; \
1275 if (octave::math::isnan (v[i])) \
1276 nan = true; \
1277 } \
1278 j++; \
1279 v += m; \
1280 r0 = r; \
1281 r += m; \
1282 r0i = ri; \
1283 ri += m; \
1284 while (nan && j < n) \
1285 { \
1286 nan = false; \
1287 for (octave_idx_type i = 0; i < m; i++) \
1288 { \
1289 if (octave::math::isnan (v[i])) \
1290 { \
1291 r[i] = r0[i]; \
1292 ri[i] = r0i[i]; \
1293 nan = true; \
1294 } \
1295 else if (octave::math::isnan (r0[i]) || v[i] OP r0[i]) \
1296 { \
1297 r[i] = v[i]; \
1298 ri[i] = j; \
1299 } \
1300 else \
1301 { \
1302 r[i] = r0[i]; \
1303 ri[i] = r0i[i]; \
1304 } \
1305 } \
1306 j++; \
1307 v += m; \
1308 r0 = r; \
1309 r += m; \
1310 r0i = ri; \
1311 ri += m; \
1312 } \
1313 while (j < n) \
1314 { \
1315 for (octave_idx_type i = 0; i < m; i++) \
1316 if (v[i] OP r0[i]) \
1317 { \
1318 r[i] = v[i]; \
1319 ri[i] = j; \
1320 } \
1321 else \
1322 { \
1323 r[i] = r0[i]; \
1324 ri[i] = r0i[i]; \
1325 } \
1326 j++; \
1327 v += m; \
1328 r0 = r; \
1329 r += m; \
1330 r0i = ri; \
1331 ri += m; \
1332 } \
1333 }
1334
1335 OP_CUMMINMAX_FCN2 (mx_inline_cummin, <)
1336 OP_CUMMINMAX_FCN2 (mx_inline_cummax, >)
1337
1338 #define OP_CUMMINMAX_FCNN(F) \
1339 template <typename T> \
1340 inline void \
1341 F (const T *v, T *r, octave_idx_type l, \
1342 octave_idx_type n, octave_idx_type u) \
1343 { \
1344 if (! n) \
1345 return; \
1346 if (l == 1) \
1347 { \
1348 for (octave_idx_type i = 0; i < u; i++) \
1349 { \
1350 F (v, r, n); \
1351 v += n; \
1352 r += n; \
1353 } \
1354 } \
1355 else \
1356 { \
1357 for (octave_idx_type i = 0; i < u; i++) \
1358 { \
1359 F (v, r, l, n); \
1360 v += l*n; \
1361 r += l*n; \
1362 } \
1363 } \
1364 } \
1365 template <typename T> \
1366 inline void \
1367 F (const T *v, T *r, octave_idx_type *ri, \
1368 octave_idx_type l, octave_idx_type n, octave_idx_type u) \
1369 { \
1370 if (! n) \
1371 return; \
1372 if (l == 1) \
1373 { \
1374 for (octave_idx_type i = 0; i < u; i++) \
1375 { \
1376 F (v, r, ri, n); \
1377 v += n; \
1378 r += n; \
1379 ri += n; \
1380 } \
1381 } \
1382 else \
1383 { \
1384 for (octave_idx_type i = 0; i < u; i++) \
1385 { \
1386 F (v, r, ri, l, n); \
1387 v += l*n; \
1388 r += l*n; \
1389 ri += l*n; \
1390 } \
1391 } \
1392 }
1393
1394 OP_CUMMINMAX_FCNN (mx_inline_cummin)
1395 OP_CUMMINMAX_FCNN (mx_inline_cummax)
1396
1397 template <typename T>
1398 void mx_inline_diff (const T *v, T *r, octave_idx_type n,
1399 octave_idx_type order)
1400 {
1401 switch (order)
1402 {
1403 case 1:
1404 for (octave_idx_type i = 0; i < n-1; i++)
1405 r[i] = v[i+1] - v[i];
1406 break;
1407 case 2:
1408 if (n > 1)
1409 {
1410 T lst = v[1] - v[0];
1411 for (octave_idx_type i = 0; i < n-2; i++)
1412 {
1413 T dif = v[i+2] - v[i+1];
1414 r[i] = dif - lst;
1415 lst = dif;
1416 }
1417 }
1418 break;
1419 default:
1420 {
1421 OCTAVE_LOCAL_BUFFER (T, buf, n-1);
1422
1423 for (octave_idx_type i = 0; i < n-1; i++)
1424 buf[i] = v[i+1] - v[i];
1425
1426 for (octave_idx_type o = 2; o <= order; o++)
1427 {
1428 for (octave_idx_type i = 0; i < n-o; i++)
1429 buf[i] = buf[i+1] - buf[i];
1430 }
1431
1432 for (octave_idx_type i = 0; i < n-order; i++)
1433 r[i] = buf[i];
1434 }
1435 }
1436 }
1437
1438 template <typename T>
mx_inline_diff(const T * v,T * r,octave_idx_type m,octave_idx_type n,octave_idx_type order)1439 void mx_inline_diff (const T *v, T *r,
1440 octave_idx_type m, octave_idx_type n,
1441 octave_idx_type order)
1442 {
1443 switch (order)
1444 {
1445 case 1:
1446 for (octave_idx_type i = 0; i < m*(n-1); i++)
1447 r[i] = v[i+m] - v[i];
1448 break;
1449 case 2:
1450 for (octave_idx_type i = 0; i < n-2; i++)
1451 {
1452 for (octave_idx_type j = i*m; j < i*m+m; j++)
1453 r[j] = (v[j+m+m] - v[j+m]) - (v[j+m] - v[j]);
1454 }
1455 break;
1456 default:
1457 {
1458 OCTAVE_LOCAL_BUFFER (T, buf, n-1);
1459
1460 for (octave_idx_type j = 0; j < m; j++)
1461 {
1462 for (octave_idx_type i = 0; i < n-1; i++)
1463 buf[i] = v[i*m+j+m] - v[i*m+j];
1464
1465 for (octave_idx_type o = 2; o <= order; o++)
1466 {
1467 for (octave_idx_type i = 0; i < n-o; i++)
1468 buf[i] = buf[i+1] - buf[i];
1469 }
1470
1471 for (octave_idx_type i = 0; i < n-order; i++)
1472 r[i*m+j] = buf[i];
1473 }
1474 }
1475 }
1476 }
1477
1478 template <typename T>
1479 inline void
mx_inline_diff(const T * v,T * r,octave_idx_type l,octave_idx_type n,octave_idx_type u,octave_idx_type order)1480 mx_inline_diff (const T *v, T *r,
1481 octave_idx_type l, octave_idx_type n, octave_idx_type u,
1482 octave_idx_type order)
1483 {
1484 if (! n) return;
1485 if (l == 1)
1486 {
1487 for (octave_idx_type i = 0; i < u; i++)
1488 {
1489 mx_inline_diff (v, r, n, order);
1490 v += n; r += n-order;
1491 }
1492 }
1493 else
1494 {
1495 for (octave_idx_type i = 0; i < u; i++)
1496 {
1497 mx_inline_diff (v, r, l, n, order);
1498 v += l*n;
1499 r += l*(n-order);
1500 }
1501 }
1502 }
1503
1504 // Assistant function
1505
1506 inline void
get_extent_triplet(const dim_vector & dims,int & dim,octave_idx_type & l,octave_idx_type & n,octave_idx_type & u)1507 get_extent_triplet (const dim_vector& dims, int& dim,
1508 octave_idx_type& l, octave_idx_type& n,
1509 octave_idx_type& u)
1510 {
1511 octave_idx_type ndims = dims.ndims ();
1512 if (dim >= ndims)
1513 {
1514 l = dims.numel ();
1515 n = 1;
1516 u = 1;
1517 }
1518 else
1519 {
1520 if (dim < 0)
1521 dim = dims.first_non_singleton ();
1522
1523 // calculate extent triplet.
1524 l = 1, n = dims(dim), u = 1;
1525 for (octave_idx_type i = 0; i < dim; i++)
1526 l *= dims(i);
1527 for (octave_idx_type i = dim + 1; i < ndims; i++)
1528 u *= dims(i);
1529 }
1530 }
1531
1532 // Appliers.
1533 // FIXME: is this the best design? C++ gives a lot of options here...
1534 // maybe it can be done without an explicit parameter?
1535
1536 template <typename R, typename T>
1537 inline Array<R>
do_mx_red_op(const Array<T> & src,int dim,void (* mx_red_op)(const T *,R *,octave_idx_type,octave_idx_type,octave_idx_type))1538 do_mx_red_op (const Array<T>& src, int dim,
1539 void (*mx_red_op) (const T *, R *, octave_idx_type,
1540 octave_idx_type, octave_idx_type))
1541 {
1542 octave_idx_type l, n, u;
1543 dim_vector dims = src.dims ();
1544 // M*b inconsistency: sum ([]) = 0 etc.
1545 if (dims.ndims () == 2 && dims(0) == 0 && dims(1) == 0)
1546 dims(1) = 1;
1547
1548 get_extent_triplet (dims, dim, l, n, u);
1549
1550 // Reduction operation reduces the array size.
1551 if (dim < dims.ndims ()) dims(dim) = 1;
1552 dims.chop_trailing_singletons ();
1553
1554 Array<R> ret (dims);
1555 mx_red_op (src.data (), ret.fortran_vec (), l, n, u);
1556
1557 return ret;
1558 }
1559
1560 template <typename R, typename T>
1561 inline Array<R>
do_mx_cum_op(const Array<T> & src,int dim,void (* mx_cum_op)(const T *,R *,octave_idx_type,octave_idx_type,octave_idx_type))1562 do_mx_cum_op (const Array<T>& src, int dim,
1563 void (*mx_cum_op) (const T *, R *, octave_idx_type,
1564 octave_idx_type, octave_idx_type))
1565 {
1566 octave_idx_type l, n, u;
1567 dim_vector dims = src.dims ();
1568 get_extent_triplet (dims, dim, l, n, u);
1569
1570 // Cumulative operation doesn't reduce the array size.
1571 Array<R> ret (dims);
1572 mx_cum_op (src.data (), ret.fortran_vec (), l, n, u);
1573
1574 return ret;
1575 }
1576
1577 template <typename R>
1578 inline Array<R>
do_mx_minmax_op(const Array<R> & src,int dim,void (* mx_minmax_op)(const R *,R *,octave_idx_type,octave_idx_type,octave_idx_type))1579 do_mx_minmax_op (const Array<R>& src, int dim,
1580 void (*mx_minmax_op) (const R *, R *, octave_idx_type,
1581 octave_idx_type, octave_idx_type))
1582 {
1583 octave_idx_type l, n, u;
1584 dim_vector dims = src.dims ();
1585 get_extent_triplet (dims, dim, l, n, u);
1586
1587 // If the dimension is zero, we don't do anything.
1588 if (dim < dims.ndims () && dims(dim) != 0) dims(dim) = 1;
1589 dims.chop_trailing_singletons ();
1590
1591 Array<R> ret (dims);
1592 mx_minmax_op (src.data (), ret.fortran_vec (), l, n, u);
1593
1594 return ret;
1595 }
1596
1597 template <typename R>
1598 inline Array<R>
do_mx_minmax_op(const Array<R> & src,Array<octave_idx_type> & idx,int dim,void (* mx_minmax_op)(const R *,R *,octave_idx_type *,octave_idx_type,octave_idx_type,octave_idx_type))1599 do_mx_minmax_op (const Array<R>& src, Array<octave_idx_type>& idx, int dim,
1600 void (*mx_minmax_op) (const R *, R *, octave_idx_type *,
1601 octave_idx_type, octave_idx_type, octave_idx_type))
1602 {
1603 octave_idx_type l, n, u;
1604 dim_vector dims = src.dims ();
1605 get_extent_triplet (dims, dim, l, n, u);
1606
1607 // If the dimension is zero, we don't do anything.
1608 if (dim < dims.ndims () && dims(dim) != 0) dims(dim) = 1;
1609 dims.chop_trailing_singletons ();
1610
1611 Array<R> ret (dims);
1612 if (idx.dims () != dims) idx = Array<octave_idx_type> (dims);
1613
1614 mx_minmax_op (src.data (), ret.fortran_vec (), idx.fortran_vec (),
1615 l, n, u);
1616
1617 return ret;
1618 }
1619
1620 template <typename R>
1621 inline Array<R>
do_mx_cumminmax_op(const Array<R> & src,int dim,void (* mx_cumminmax_op)(const R *,R *,octave_idx_type,octave_idx_type,octave_idx_type))1622 do_mx_cumminmax_op (const Array<R>& src, int dim,
1623 void (*mx_cumminmax_op) (const R *, R *, octave_idx_type,
1624 octave_idx_type, octave_idx_type))
1625 {
1626 octave_idx_type l, n, u;
1627 dim_vector dims = src.dims ();
1628 get_extent_triplet (dims, dim, l, n, u);
1629
1630 Array<R> ret (dims);
1631 mx_cumminmax_op (src.data (), ret.fortran_vec (), l, n, u);
1632
1633 return ret;
1634 }
1635
1636 template <typename R>
1637 inline Array<R>
do_mx_cumminmax_op(const Array<R> & src,Array<octave_idx_type> & idx,int dim,void (* mx_cumminmax_op)(const R *,R *,octave_idx_type *,octave_idx_type,octave_idx_type,octave_idx_type))1638 do_mx_cumminmax_op (const Array<R>& src, Array<octave_idx_type>& idx, int dim,
1639 void (*mx_cumminmax_op) (const R *, R *, octave_idx_type *,
1640 octave_idx_type, octave_idx_type, octave_idx_type))
1641 {
1642 octave_idx_type l, n, u;
1643 dim_vector dims = src.dims ();
1644 get_extent_triplet (dims, dim, l, n, u);
1645
1646 Array<R> ret (dims);
1647 if (idx.dims () != dims) idx = Array<octave_idx_type> (dims);
1648
1649 mx_cumminmax_op (src.data (), ret.fortran_vec (), idx.fortran_vec (),
1650 l, n, u);
1651
1652 return ret;
1653 }
1654
1655 template <typename R>
1656 inline Array<R>
do_mx_diff_op(const Array<R> & src,int dim,octave_idx_type order,void (* mx_diff_op)(const R *,R *,octave_idx_type,octave_idx_type,octave_idx_type,octave_idx_type))1657 do_mx_diff_op (const Array<R>& src, int dim, octave_idx_type order,
1658 void (*mx_diff_op) (const R *, R *,
1659 octave_idx_type, octave_idx_type,
1660 octave_idx_type, octave_idx_type))
1661 {
1662 octave_idx_type l, n, u;
1663 if (order <= 0)
1664 return src;
1665
1666 dim_vector dims = src.dims ();
1667
1668 get_extent_triplet (dims, dim, l, n, u);
1669 if (dim >= dims.ndims ())
1670 dims.resize (dim+1, 1);
1671
1672 if (dims(dim) <= order)
1673 {
1674 dims(dim) = 0;
1675 return Array<R> (dims);
1676 }
1677 else
1678 {
1679 dims(dim) -= order;
1680 }
1681
1682 Array<R> ret (dims);
1683 mx_diff_op (src.data (), ret.fortran_vec (), l, n, u, order);
1684
1685 return ret;
1686 }
1687
1688 // Fast extra-precise summation. According to
1689 // T. Ogita, S. M. Rump, S. Oishi:
1690 // Accurate Sum And Dot Product,
1691 // SIAM J. Sci. Computing, Vol. 26, 2005
1692
1693 template <typename T>
twosum_accum(T & s,T & e,const T & x)1694 inline void twosum_accum (T& s, T& e,
1695 const T& x)
1696 {
1697 T s1 = s + x;
1698 T t = s1 - s;
1699 T e1 = (s - (s1 - t)) + (x - t);
1700 s = s1;
1701 e += e1;
1702 }
1703
1704 template <typename T>
1705 inline T
mx_inline_xsum(const T * v,octave_idx_type n)1706 mx_inline_xsum (const T *v, octave_idx_type n)
1707 {
1708 T s, e;
1709 s = e = 0;
1710 for (octave_idx_type i = 0; i < n; i++)
1711 twosum_accum (s, e, v[i]);
1712
1713 return s + e;
1714 }
1715
1716 template <typename T>
1717 inline void
mx_inline_xsum(const T * v,T * r,octave_idx_type m,octave_idx_type n)1718 mx_inline_xsum (const T *v, T *r,
1719 octave_idx_type m, octave_idx_type n)
1720 {
1721 OCTAVE_LOCAL_BUFFER (T, e, m);
1722 for (octave_idx_type i = 0; i < m; i++)
1723 e[i] = r[i] = T ();
1724
1725 for (octave_idx_type j = 0; j < n; j++)
1726 {
1727 for (octave_idx_type i = 0; i < m; i++)
1728 twosum_accum (r[i], e[i], v[i]);
1729
1730 v += m;
1731 }
1732
1733 for (octave_idx_type i = 0; i < m; i++)
1734 r[i] += e[i];
1735 }
1736
1737 OP_RED_FCNN (mx_inline_xsum, T, T)
1738
1739 #endif
1740