1 /*************************************************************************
2 ALGLIB 3.18.0 (source code generated 2021-10-25)
3 Copyright (c) Sergey Bochkanov (ALGLIB project).
4
5 >>> SOURCE LICENSE >>>
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation (www.fsf.org); either version 2 of the
9 License, or (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 A copy of the GNU General Public License is available at
17 http://www.fsf.org/licensing/licenses
18 >>> END OF LICENSE >>>
19 *************************************************************************/
20 #ifdef _MSC_VER
21 #define _CRT_SECURE_NO_WARNINGS
22 #endif
23 #include "stdafx.h"
24 #include "alglibinternal.h"
25
26 // disable some irrelevant warnings
27 #if (AE_COMPILER==AE_MSVC) && !defined(AE_ALL_WARNINGS)
28 #pragma warning(disable:4100)
29 #pragma warning(disable:4127)
30 #pragma warning(disable:4611)
31 #pragma warning(disable:4702)
32 #pragma warning(disable:4996)
33 #endif
34
35 /////////////////////////////////////////////////////////////////////////
36 //
37 // THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
38 //
39 /////////////////////////////////////////////////////////////////////////
40 namespace alglib
41 {
42
43
44 }
45
46 /////////////////////////////////////////////////////////////////////////
47 //
48 // THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
49 //
50 /////////////////////////////////////////////////////////////////////////
51 namespace alglib_impl
52 {
53 #if defined(AE_COMPILE_APSERV) || !defined(AE_PARTIAL_BUILD)
54
55
56 #endif
57 #if defined(AE_COMPILE_ABLASF) || !defined(AE_PARTIAL_BUILD)
58 #ifdef ALGLIB_NO_FAST_KERNELS
59 static ae_bool ablasf_rgemm32basecase(ae_int_t m,
60 ae_int_t n,
61 ae_int_t k,
62 double alpha,
63 /* Real */ ae_matrix* a,
64 ae_int_t ia,
65 ae_int_t ja,
66 ae_int_t optypea,
67 /* Real */ ae_matrix* b,
68 ae_int_t ib,
69 ae_int_t jb,
70 ae_int_t optypeb,
71 double beta,
72 /* Real */ ae_matrix* c,
73 ae_int_t ic,
74 ae_int_t jc,
75 ae_state *_state);
76 #endif
77
78
79 #endif
80 #if defined(AE_COMPILE_HBLAS) || !defined(AE_PARTIAL_BUILD)
81
82
83 #endif
84 #if defined(AE_COMPILE_CREFLECTIONS) || !defined(AE_PARTIAL_BUILD)
85
86
87 #endif
88 #if defined(AE_COMPILE_SBLAS) || !defined(AE_PARTIAL_BUILD)
89
90
91 #endif
92 #if defined(AE_COMPILE_ABLASMKL) || !defined(AE_PARTIAL_BUILD)
93
94
95 #endif
96 #if defined(AE_COMPILE_SCODES) || !defined(AE_PARTIAL_BUILD)
97
98
99 #endif
100 #if defined(AE_COMPILE_TSORT) || !defined(AE_PARTIAL_BUILD)
101 static void tsort_tagsortfastirec(/* Real */ ae_vector* a,
102 /* Integer */ ae_vector* b,
103 /* Real */ ae_vector* bufa,
104 /* Integer */ ae_vector* bufb,
105 ae_int_t i1,
106 ae_int_t i2,
107 ae_state *_state);
108 static void tsort_tagsortfastrrec(/* Real */ ae_vector* a,
109 /* Real */ ae_vector* b,
110 /* Real */ ae_vector* bufa,
111 /* Real */ ae_vector* bufb,
112 ae_int_t i1,
113 ae_int_t i2,
114 ae_state *_state);
115 static void tsort_tagsortfastrec(/* Real */ ae_vector* a,
116 /* Real */ ae_vector* bufa,
117 ae_int_t i1,
118 ae_int_t i2,
119 ae_state *_state);
120
121
122 #endif
123 #if defined(AE_COMPILE_BLAS) || !defined(AE_PARTIAL_BUILD)
124
125
126 #endif
127 #if defined(AE_COMPILE_ROTATIONS) || !defined(AE_PARTIAL_BUILD)
128
129
130 #endif
131 #if defined(AE_COMPILE_BASICSTATOPS) || !defined(AE_PARTIAL_BUILD)
132
133
134 #endif
135 #if defined(AE_COMPILE_TRLINSOLVE) || !defined(AE_PARTIAL_BUILD)
136
137
138 #endif
139 #if defined(AE_COMPILE_SAFESOLVE) || !defined(AE_PARTIAL_BUILD)
140 static ae_bool safesolve_cbasicsolveandupdate(ae_complex alpha,
141 ae_complex beta,
142 double lnmax,
143 double bnorm,
144 double maxgrowth,
145 double* xnorm,
146 ae_complex* x,
147 ae_state *_state);
148
149
150 #endif
151 #if defined(AE_COMPILE_XBLAS) || !defined(AE_PARTIAL_BUILD)
152 static void xblas_xsum(/* Real */ ae_vector* w,
153 double mx,
154 ae_int_t n,
155 double* r,
156 double* rerr,
157 ae_state *_state);
158 static double xblas_xfastpow(double r, ae_int_t n, ae_state *_state);
159
160
161 #endif
162 #if defined(AE_COMPILE_LINMIN) || !defined(AE_PARTIAL_BUILD)
163 static double linmin_ftol = 0.001;
164 static double linmin_xtol = 100*ae_machineepsilon;
165 static ae_int_t linmin_maxfev = 20;
166 static double linmin_stpmin = 1.0E-50;
167 static double linmin_defstpmax = 1.0E+50;
168 static double linmin_armijofactor = 1.3;
169 static void linmin_mcstep(double* stx,
170 double* fx,
171 double* dx,
172 double* sty,
173 double* fy,
174 double* dy,
175 double* stp,
176 double fp,
177 double dp,
178 ae_bool* brackt,
179 double stmin,
180 double stmax,
181 ae_int_t* info,
182 ae_state *_state);
183
184
185 #endif
186 #if defined(AE_COMPILE_NEARUNITYUNIT) || !defined(AE_PARTIAL_BUILD)
187
188
189 #endif
190 #if defined(AE_COMPILE_NTHEORY) || !defined(AE_PARTIAL_BUILD)
191 static ae_bool ntheory_isprime(ae_int_t n, ae_state *_state);
192 static ae_int_t ntheory_modmul(ae_int_t a,
193 ae_int_t b,
194 ae_int_t n,
195 ae_state *_state);
196 static ae_int_t ntheory_modexp(ae_int_t a,
197 ae_int_t b,
198 ae_int_t n,
199 ae_state *_state);
200
201
202 #endif
203 #if defined(AE_COMPILE_FTBASE) || !defined(AE_PARTIAL_BUILD)
204 static ae_int_t ftbase_coltype = 0;
205 static ae_int_t ftbase_coloperandscnt = 1;
206 static ae_int_t ftbase_coloperandsize = 2;
207 static ae_int_t ftbase_colmicrovectorsize = 3;
208 static ae_int_t ftbase_colparam0 = 4;
209 static ae_int_t ftbase_colparam1 = 5;
210 static ae_int_t ftbase_colparam2 = 6;
211 static ae_int_t ftbase_colparam3 = 7;
212 static ae_int_t ftbase_colscnt = 8;
213 static ae_int_t ftbase_opend = 0;
214 static ae_int_t ftbase_opcomplexreffft = 1;
215 static ae_int_t ftbase_opbluesteinsfft = 2;
216 static ae_int_t ftbase_opcomplexcodeletfft = 3;
217 static ae_int_t ftbase_opcomplexcodelettwfft = 4;
218 static ae_int_t ftbase_opradersfft = 5;
219 static ae_int_t ftbase_opcomplextranspose = -1;
220 static ae_int_t ftbase_opcomplexfftfactors = -2;
221 static ae_int_t ftbase_opstart = -3;
222 static ae_int_t ftbase_opjmp = -4;
223 static ae_int_t ftbase_opparallelcall = -5;
224 static ae_int_t ftbase_maxradix = 6;
225 static ae_int_t ftbase_updatetw = 16;
226 static ae_int_t ftbase_recursivethreshold = 1024;
227 static ae_int_t ftbase_raderthreshold = 19;
228 static ae_int_t ftbase_ftbasecodeletrecommended = 5;
229 static double ftbase_ftbaseinefficiencyfactor = 1.3;
230 static ae_int_t ftbase_ftbasemaxsmoothfactor = 5;
231 static void ftbase_ftdeterminespacerequirements(ae_int_t n,
232 ae_int_t* precrsize,
233 ae_int_t* precisize,
234 ae_state *_state);
235 static void ftbase_ftcomplexfftplanrec(ae_int_t n,
236 ae_int_t k,
237 ae_bool childplan,
238 ae_bool topmostplan,
239 ae_int_t* rowptr,
240 ae_int_t* bluesteinsize,
241 ae_int_t* precrptr,
242 ae_int_t* preciptr,
243 fasttransformplan* plan,
244 ae_state *_state);
245 static void ftbase_ftpushentry(fasttransformplan* plan,
246 ae_int_t* rowptr,
247 ae_int_t etype,
248 ae_int_t eopcnt,
249 ae_int_t eopsize,
250 ae_int_t emcvsize,
251 ae_int_t eparam0,
252 ae_state *_state);
253 static void ftbase_ftpushentry2(fasttransformplan* plan,
254 ae_int_t* rowptr,
255 ae_int_t etype,
256 ae_int_t eopcnt,
257 ae_int_t eopsize,
258 ae_int_t emcvsize,
259 ae_int_t eparam0,
260 ae_int_t eparam1,
261 ae_state *_state);
262 static void ftbase_ftpushentry4(fasttransformplan* plan,
263 ae_int_t* rowptr,
264 ae_int_t etype,
265 ae_int_t eopcnt,
266 ae_int_t eopsize,
267 ae_int_t emcvsize,
268 ae_int_t eparam0,
269 ae_int_t eparam1,
270 ae_int_t eparam2,
271 ae_int_t eparam3,
272 ae_state *_state);
273 static void ftbase_ftapplysubplan(fasttransformplan* plan,
274 ae_int_t subplan,
275 /* Real */ ae_vector* a,
276 ae_int_t abase,
277 ae_int_t aoffset,
278 /* Real */ ae_vector* buf,
279 ae_int_t repcnt,
280 ae_state *_state);
281 static void ftbase_ftapplycomplexreffft(/* Real */ ae_vector* a,
282 ae_int_t offs,
283 ae_int_t operandscnt,
284 ae_int_t operandsize,
285 ae_int_t microvectorsize,
286 /* Real */ ae_vector* buf,
287 ae_state *_state);
288 static void ftbase_ftapplycomplexcodeletfft(/* Real */ ae_vector* a,
289 ae_int_t offs,
290 ae_int_t operandscnt,
291 ae_int_t operandsize,
292 ae_int_t microvectorsize,
293 ae_state *_state);
294 static void ftbase_ftapplycomplexcodelettwfft(/* Real */ ae_vector* a,
295 ae_int_t offs,
296 ae_int_t operandscnt,
297 ae_int_t operandsize,
298 ae_int_t microvectorsize,
299 ae_state *_state);
300 static void ftbase_ftprecomputebluesteinsfft(ae_int_t n,
301 ae_int_t m,
302 /* Real */ ae_vector* precr,
303 ae_int_t offs,
304 ae_state *_state);
305 static void ftbase_ftbluesteinsfft(fasttransformplan* plan,
306 /* Real */ ae_vector* a,
307 ae_int_t abase,
308 ae_int_t aoffset,
309 ae_int_t operandscnt,
310 ae_int_t n,
311 ae_int_t m,
312 ae_int_t precoffs,
313 ae_int_t subplan,
314 /* Real */ ae_vector* bufa,
315 /* Real */ ae_vector* bufb,
316 /* Real */ ae_vector* bufc,
317 /* Real */ ae_vector* bufd,
318 ae_state *_state);
319 static void ftbase_ftprecomputeradersfft(ae_int_t n,
320 ae_int_t rq,
321 ae_int_t riq,
322 /* Real */ ae_vector* precr,
323 ae_int_t offs,
324 ae_state *_state);
325 static void ftbase_ftradersfft(fasttransformplan* plan,
326 /* Real */ ae_vector* a,
327 ae_int_t abase,
328 ae_int_t aoffset,
329 ae_int_t operandscnt,
330 ae_int_t n,
331 ae_int_t subplan,
332 ae_int_t rq,
333 ae_int_t riq,
334 ae_int_t precoffs,
335 /* Real */ ae_vector* buf,
336 ae_state *_state);
337 static void ftbase_ftfactorize(ae_int_t n,
338 ae_bool isroot,
339 ae_int_t* n1,
340 ae_int_t* n2,
341 ae_state *_state);
342 static ae_int_t ftbase_ftoptimisticestimate(ae_int_t n, ae_state *_state);
343 static void ftbase_ffttwcalc(/* Real */ ae_vector* a,
344 ae_int_t aoffset,
345 ae_int_t n1,
346 ae_int_t n2,
347 ae_state *_state);
348 static void ftbase_internalcomplexlintranspose(/* Real */ ae_vector* a,
349 ae_int_t m,
350 ae_int_t n,
351 ae_int_t astart,
352 /* Real */ ae_vector* buf,
353 ae_state *_state);
354 static void ftbase_ffticltrec(/* Real */ ae_vector* a,
355 ae_int_t astart,
356 ae_int_t astride,
357 /* Real */ ae_vector* b,
358 ae_int_t bstart,
359 ae_int_t bstride,
360 ae_int_t m,
361 ae_int_t n,
362 ae_state *_state);
363 static void ftbase_ftbasefindsmoothrec(ae_int_t n,
364 ae_int_t seed,
365 ae_int_t leastfactor,
366 ae_int_t* best,
367 ae_state *_state);
368
369
370 #endif
371 #if defined(AE_COMPILE_HPCCORES) || !defined(AE_PARTIAL_BUILD)
372 static ae_bool hpccores_hpcpreparechunkedgradientx(/* Real */ ae_vector* weights,
373 ae_int_t wcount,
374 /* Real */ ae_vector* hpcbuf,
375 ae_state *_state);
376 static ae_bool hpccores_hpcfinalizechunkedgradientx(/* Real */ ae_vector* buf,
377 ae_int_t wcount,
378 /* Real */ ae_vector* grad,
379 ae_state *_state);
380
381
382 #endif
383 #if defined(AE_COMPILE_ALGLIBBASICS) || !defined(AE_PARTIAL_BUILD)
384
385
386 #endif
387
388 #if defined(AE_COMPILE_APSERV) || !defined(AE_PARTIAL_BUILD)
389
390
391 /*************************************************************************
392 Internally calls SetErrorFlag() with condition:
393
394 Abs(Val-RefVal)>Tol*Max(Abs(RefVal),S)
395
396 This function is used to test relative error in Val against RefVal, with
397 relative error being replaced by absolute when scale of RefVal is less
398 than S.
399
400 This function returns value of COND.
401 *************************************************************************/
seterrorflagdiff(ae_bool * flag,double val,double refval,double tol,double s,ae_state * _state)402 void seterrorflagdiff(ae_bool* flag,
403 double val,
404 double refval,
405 double tol,
406 double s,
407 ae_state *_state)
408 {
409
410
411 ae_set_error_flag(flag, ae_fp_greater(ae_fabs(val-refval, _state),tol*ae_maxreal(ae_fabs(refval, _state), s, _state)), __FILE__, __LINE__, "apserv.ap:162");
412 }
413
414
415 /*************************************************************************
416 The function always returns False.
417 It may be used sometimes to prevent spurious warnings.
418
419 -- ALGLIB --
420 Copyright 17.09.2012 by Bochkanov Sergey
421 *************************************************************************/
alwaysfalse(ae_state * _state)422 ae_bool alwaysfalse(ae_state *_state)
423 {
424 ae_bool result;
425
426
427 result = ae_false;
428 return result;
429 }
430
431
432 /*************************************************************************
433 The function "touches" integer - it is used to avoid compiler messages
434 about unused variables (in rare cases when we do NOT want to remove these
435 variables).
436
437 -- ALGLIB --
438 Copyright 17.09.2012 by Bochkanov Sergey
439 *************************************************************************/
touchint(ae_int_t * a,ae_state * _state)440 void touchint(ae_int_t* a, ae_state *_state)
441 {
442
443
444 }
445
446
447 /*************************************************************************
448 The function "touches" real - it is used to avoid compiler messages
449 about unused variables (in rare cases when we do NOT want to remove these
450 variables).
451
452 -- ALGLIB --
453 Copyright 17.09.2012 by Bochkanov Sergey
454 *************************************************************************/
touchreal(double * a,ae_state * _state)455 void touchreal(double* a, ae_state *_state)
456 {
457
458
459 }
460
461
462 /*************************************************************************
463 The function performs zero-coalescing on real value.
464
465 NOTE: no check is performed for B<>0
466
467 -- ALGLIB --
468 Copyright 18.05.2015 by Bochkanov Sergey
469 *************************************************************************/
coalesce(double a,double b,ae_state * _state)470 double coalesce(double a, double b, ae_state *_state)
471 {
472 double result;
473
474
475 result = a;
476 if( ae_fp_eq(a,0.0) )
477 {
478 result = b;
479 }
480 return result;
481 }
482
483
484 /*************************************************************************
485 The function performs zero-coalescing on integer value.
486
487 NOTE: no check is performed for B<>0
488
489 -- ALGLIB --
490 Copyright 18.05.2015 by Bochkanov Sergey
491 *************************************************************************/
coalescei(ae_int_t a,ae_int_t b,ae_state * _state)492 ae_int_t coalescei(ae_int_t a, ae_int_t b, ae_state *_state)
493 {
494 ae_int_t result;
495
496
497 result = a;
498 if( a==0 )
499 {
500 result = b;
501 }
502 return result;
503 }
504
505
506 /*************************************************************************
507 The function convert integer value to real value.
508
509 -- ALGLIB --
510 Copyright 17.09.2012 by Bochkanov Sergey
511 *************************************************************************/
inttoreal(ae_int_t a,ae_state * _state)512 double inttoreal(ae_int_t a, ae_state *_state)
513 {
514 double result;
515
516
517 result = (double)(a);
518 return result;
519 }
520
521
522 /*************************************************************************
523 The function calculates binary logarithm.
524
525 NOTE: it costs twice as much as Ln(x)
526
527 -- ALGLIB --
528 Copyright 17.09.2012 by Bochkanov Sergey
529 *************************************************************************/
logbase2(double x,ae_state * _state)530 double logbase2(double x, ae_state *_state)
531 {
532 double result;
533
534
535 result = ae_log(x, _state)/ae_log((double)(2), _state);
536 return result;
537 }
538
539
540 /*************************************************************************
541 This function compares two numbers for approximate equality, with tolerance
542 to errors as large as tol.
543
544
545 -- ALGLIB --
546 Copyright 02.12.2009 by Bochkanov Sergey
547 *************************************************************************/
approxequal(double a,double b,double tol,ae_state * _state)548 ae_bool approxequal(double a, double b, double tol, ae_state *_state)
549 {
550 ae_bool result;
551
552
553 result = ae_fp_less_eq(ae_fabs(a-b, _state),tol);
554 return result;
555 }
556
557
558 /*************************************************************************
559 This function compares two numbers for approximate equality, with tolerance
560 to errors as large as max(|a|,|b|)*tol.
561
562
563 -- ALGLIB --
564 Copyright 02.12.2009 by Bochkanov Sergey
565 *************************************************************************/
approxequalrel(double a,double b,double tol,ae_state * _state)566 ae_bool approxequalrel(double a, double b, double tol, ae_state *_state)
567 {
568 ae_bool result;
569
570
571 result = ae_fp_less_eq(ae_fabs(a-b, _state),ae_maxreal(ae_fabs(a, _state), ae_fabs(b, _state), _state)*tol);
572 return result;
573 }
574
575
576 /*************************************************************************
577 This function generates 1-dimensional general interpolation task with
578 moderate Lipshitz constant (close to 1.0)
579
580 If N=1 then suborutine generates only one point at the middle of [A,B]
581
582 -- ALGLIB --
583 Copyright 02.12.2009 by Bochkanov Sergey
584 *************************************************************************/
taskgenint1d(double a,double b,ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)585 void taskgenint1d(double a,
586 double b,
587 ae_int_t n,
588 /* Real */ ae_vector* x,
589 /* Real */ ae_vector* y,
590 ae_state *_state)
591 {
592 ae_int_t i;
593 double h;
594
595 ae_vector_clear(x);
596 ae_vector_clear(y);
597
598 ae_assert(n>=1, "TaskGenInterpolationEqdist1D: N<1!", _state);
599 ae_vector_set_length(x, n, _state);
600 ae_vector_set_length(y, n, _state);
601 if( n>1 )
602 {
603 x->ptr.p_double[0] = a;
604 y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
605 h = (b-a)/(n-1);
606 for(i=1; i<=n-1; i++)
607 {
608 if( i!=n-1 )
609 {
610 x->ptr.p_double[i] = a+(i+0.2*(2*ae_randomreal(_state)-1))*h;
611 }
612 else
613 {
614 x->ptr.p_double[i] = b;
615 }
616 y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
617 }
618 }
619 else
620 {
621 x->ptr.p_double[0] = 0.5*(a+b);
622 y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
623 }
624 }
625
626
627 /*************************************************************************
628 This function generates 1-dimensional equidistant interpolation task with
629 moderate Lipshitz constant (close to 1.0)
630
631 If N=1 then suborutine generates only one point at the middle of [A,B]
632
633 -- ALGLIB --
634 Copyright 02.12.2009 by Bochkanov Sergey
635 *************************************************************************/
taskgenint1dequidist(double a,double b,ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)636 void taskgenint1dequidist(double a,
637 double b,
638 ae_int_t n,
639 /* Real */ ae_vector* x,
640 /* Real */ ae_vector* y,
641 ae_state *_state)
642 {
643 ae_int_t i;
644 double h;
645
646 ae_vector_clear(x);
647 ae_vector_clear(y);
648
649 ae_assert(n>=1, "TaskGenInterpolationEqdist1D: N<1!", _state);
650 ae_vector_set_length(x, n, _state);
651 ae_vector_set_length(y, n, _state);
652 if( n>1 )
653 {
654 x->ptr.p_double[0] = a;
655 y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
656 h = (b-a)/(n-1);
657 for(i=1; i<=n-1; i++)
658 {
659 x->ptr.p_double[i] = a+i*h;
660 y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*h;
661 }
662 }
663 else
664 {
665 x->ptr.p_double[0] = 0.5*(a+b);
666 y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
667 }
668 }
669
670
671 /*************************************************************************
672 This function generates 1-dimensional Chebyshev-1 interpolation task with
673 moderate Lipshitz constant (close to 1.0)
674
675 If N=1 then suborutine generates only one point at the middle of [A,B]
676
677 -- ALGLIB --
678 Copyright 02.12.2009 by Bochkanov Sergey
679 *************************************************************************/
taskgenint1dcheb1(double a,double b,ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)680 void taskgenint1dcheb1(double a,
681 double b,
682 ae_int_t n,
683 /* Real */ ae_vector* x,
684 /* Real */ ae_vector* y,
685 ae_state *_state)
686 {
687 ae_int_t i;
688
689 ae_vector_clear(x);
690 ae_vector_clear(y);
691
692 ae_assert(n>=1, "TaskGenInterpolation1DCheb1: N<1!", _state);
693 ae_vector_set_length(x, n, _state);
694 ae_vector_set_length(y, n, _state);
695 if( n>1 )
696 {
697 for(i=0; i<=n-1; i++)
698 {
699 x->ptr.p_double[i] = 0.5*(b+a)+0.5*(b-a)*ae_cos(ae_pi*(2*i+1)/(2*n), _state);
700 if( i==0 )
701 {
702 y->ptr.p_double[i] = 2*ae_randomreal(_state)-1;
703 }
704 else
705 {
706 y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
707 }
708 }
709 }
710 else
711 {
712 x->ptr.p_double[0] = 0.5*(a+b);
713 y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
714 }
715 }
716
717
718 /*************************************************************************
719 This function generates 1-dimensional Chebyshev-2 interpolation task with
720 moderate Lipshitz constant (close to 1.0)
721
722 If N=1 then suborutine generates only one point at the middle of [A,B]
723
724 -- ALGLIB --
725 Copyright 02.12.2009 by Bochkanov Sergey
726 *************************************************************************/
taskgenint1dcheb2(double a,double b,ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)727 void taskgenint1dcheb2(double a,
728 double b,
729 ae_int_t n,
730 /* Real */ ae_vector* x,
731 /* Real */ ae_vector* y,
732 ae_state *_state)
733 {
734 ae_int_t i;
735
736 ae_vector_clear(x);
737 ae_vector_clear(y);
738
739 ae_assert(n>=1, "TaskGenInterpolation1DCheb2: N<1!", _state);
740 ae_vector_set_length(x, n, _state);
741 ae_vector_set_length(y, n, _state);
742 if( n>1 )
743 {
744 for(i=0; i<=n-1; i++)
745 {
746 x->ptr.p_double[i] = 0.5*(b+a)+0.5*(b-a)*ae_cos(ae_pi*i/(n-1), _state);
747 if( i==0 )
748 {
749 y->ptr.p_double[i] = 2*ae_randomreal(_state)-1;
750 }
751 else
752 {
753 y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
754 }
755 }
756 }
757 else
758 {
759 x->ptr.p_double[0] = 0.5*(a+b);
760 y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
761 }
762 }
763
764
765 /*************************************************************************
766 This function checks that all values from X[] are distinct. It does more
767 than just usual floating point comparison:
768 * first, it calculates max(X) and min(X)
769 * second, it maps X[] from [min,max] to [1,2]
770 * only at this stage actual comparison is done
771
772 The meaning of such check is to ensure that all values are "distinct enough"
773 and will not cause interpolation subroutine to fail.
774
775 NOTE:
776 X[] must be sorted by ascending (subroutine ASSERT's it)
777
778 -- ALGLIB --
779 Copyright 02.12.2009 by Bochkanov Sergey
780 *************************************************************************/
aredistinct(ae_vector * x,ae_int_t n,ae_state * _state)781 ae_bool aredistinct(/* Real */ ae_vector* x,
782 ae_int_t n,
783 ae_state *_state)
784 {
785 double a;
786 double b;
787 ae_int_t i;
788 ae_bool nonsorted;
789 ae_bool result;
790
791
792 ae_assert(n>=1, "APSERVAreDistinct: internal error (N<1)", _state);
793 if( n==1 )
794 {
795
796 /*
797 * everything is alright, it is up to caller to decide whether it
798 * can interpolate something with just one point
799 */
800 result = ae_true;
801 return result;
802 }
803 a = x->ptr.p_double[0];
804 b = x->ptr.p_double[0];
805 nonsorted = ae_false;
806 for(i=1; i<=n-1; i++)
807 {
808 a = ae_minreal(a, x->ptr.p_double[i], _state);
809 b = ae_maxreal(b, x->ptr.p_double[i], _state);
810 nonsorted = nonsorted||ae_fp_greater_eq(x->ptr.p_double[i-1],x->ptr.p_double[i]);
811 }
812 ae_assert(!nonsorted, "APSERVAreDistinct: internal error (not sorted)", _state);
813 for(i=1; i<=n-1; i++)
814 {
815 if( ae_fp_eq((x->ptr.p_double[i]-a)/(b-a)+1,(x->ptr.p_double[i-1]-a)/(b-a)+1) )
816 {
817 result = ae_false;
818 return result;
819 }
820 }
821 result = ae_true;
822 return result;
823 }
824
825
826 /*************************************************************************
827 This function checks that two boolean values are the same (both are True
828 or both are False).
829
830 -- ALGLIB --
831 Copyright 02.12.2009 by Bochkanov Sergey
832 *************************************************************************/
aresameboolean(ae_bool v1,ae_bool v2,ae_state * _state)833 ae_bool aresameboolean(ae_bool v1, ae_bool v2, ae_state *_state)
834 {
835 ae_bool result;
836
837
838 result = (v1&&v2)||(!v1&&!v2);
839 return result;
840 }
841
842
843 /*************************************************************************
844 Resizes X and fills by zeros
845
846 -- ALGLIB --
847 Copyright 20.03.2009 by Bochkanov Sergey
848 *************************************************************************/
setlengthzero(ae_vector * x,ae_int_t n,ae_state * _state)849 void setlengthzero(/* Real */ ae_vector* x,
850 ae_int_t n,
851 ae_state *_state)
852 {
853 ae_int_t i;
854
855
856 ae_assert(n>=0, "SetLengthZero: N<0", _state);
857 ae_vector_set_length(x, n, _state);
858 for(i=0; i<=n-1; i++)
859 {
860 x->ptr.p_double[i] = (double)(0);
861 }
862 }
863
864
865 /*************************************************************************
866 If Length(X)<N, resizes X
867
868 -- ALGLIB --
869 Copyright 20.03.2009 by Bochkanov Sergey
870 *************************************************************************/
bvectorsetlengthatleast(ae_vector * x,ae_int_t n,ae_state * _state)871 void bvectorsetlengthatleast(/* Boolean */ ae_vector* x,
872 ae_int_t n,
873 ae_state *_state)
874 {
875
876
877 if( x->cnt<n )
878 {
879 ae_vector_set_length(x, n, _state);
880 }
881 }
882
883
884 /*************************************************************************
885 If Length(X)<N, resizes X
886
887 -- ALGLIB --
888 Copyright 20.03.2009 by Bochkanov Sergey
889 *************************************************************************/
ivectorsetlengthatleast(ae_vector * x,ae_int_t n,ae_state * _state)890 void ivectorsetlengthatleast(/* Integer */ ae_vector* x,
891 ae_int_t n,
892 ae_state *_state)
893 {
894
895
896 if( x->cnt<n )
897 {
898 ae_vector_set_length(x, n, _state);
899 }
900 }
901
902
903 /*************************************************************************
904 If Length(X)<N, resizes X
905
906 -- ALGLIB --
907 Copyright 20.03.2009 by Bochkanov Sergey
908 *************************************************************************/
rvectorsetlengthatleast(ae_vector * x,ae_int_t n,ae_state * _state)909 void rvectorsetlengthatleast(/* Real */ ae_vector* x,
910 ae_int_t n,
911 ae_state *_state)
912 {
913
914
915 if( x->cnt<n )
916 {
917 ae_vector_set_length(x, n, _state);
918 }
919 }
920
921
922 /*************************************************************************
923 If Cols(X)<N or Rows(X)<M, resizes X
924
925 -- ALGLIB --
926 Copyright 20.03.2009 by Bochkanov Sergey
927 *************************************************************************/
rmatrixsetlengthatleast(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)928 void rmatrixsetlengthatleast(/* Real */ ae_matrix* x,
929 ae_int_t m,
930 ae_int_t n,
931 ae_state *_state)
932 {
933
934
935 if( m>0&&n>0 )
936 {
937 if( x->rows<m||x->cols<n )
938 {
939 ae_matrix_set_length(x, m, n, _state);
940 }
941 }
942 }
943
944
945 /*************************************************************************
946 If Cols(X)<N or Rows(X)<M, resizes X
947
948 -- ALGLIB --
949 Copyright 20.03.2009 by Bochkanov Sergey
950 *************************************************************************/
bmatrixsetlengthatleast(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)951 void bmatrixsetlengthatleast(/* Boolean */ ae_matrix* x,
952 ae_int_t m,
953 ae_int_t n,
954 ae_state *_state)
955 {
956
957
958 if( m>0&&n>0 )
959 {
960 if( x->rows<m||x->cols<n )
961 {
962 ae_matrix_set_length(x, m, n, _state);
963 }
964 }
965 }
966
967
968 /*************************************************************************
969 Grows X, i.e. changes its size in such a way that:
970 a) contents is preserved
971 b) new size is at least N
972 c) new size can be larger than N, so subsequent grow() calls can return
973 without reallocation
974
975 -- ALGLIB --
976 Copyright 20.03.2009 by Bochkanov Sergey
977 *************************************************************************/
bvectorgrowto(ae_vector * x,ae_int_t n,ae_state * _state)978 void bvectorgrowto(/* Boolean */ ae_vector* x,
979 ae_int_t n,
980 ae_state *_state)
981 {
982 ae_frame _frame_block;
983 ae_vector oldx;
984 ae_int_t i;
985 ae_int_t n2;
986
987 ae_frame_make(_state, &_frame_block);
988 memset(&oldx, 0, sizeof(oldx));
989 ae_vector_init(&oldx, 0, DT_BOOL, _state, ae_true);
990
991
992 /*
993 * Enough place
994 */
995 if( x->cnt>=n )
996 {
997 ae_frame_leave(_state);
998 return;
999 }
1000
1001 /*
1002 * Choose new size
1003 */
1004 n = ae_maxint(n, ae_round(1.8*x->cnt+1, _state), _state);
1005
1006 /*
1007 * Grow
1008 */
1009 n2 = x->cnt;
1010 ae_swap_vectors(x, &oldx);
1011 ae_vector_set_length(x, n, _state);
1012 for(i=0; i<=n-1; i++)
1013 {
1014 if( i<n2 )
1015 {
1016 x->ptr.p_bool[i] = oldx.ptr.p_bool[i];
1017 }
1018 else
1019 {
1020 x->ptr.p_bool[i] = ae_false;
1021 }
1022 }
1023 ae_frame_leave(_state);
1024 }
1025
1026
1027 /*************************************************************************
1028 Grows X, i.e. changes its size in such a way that:
1029 a) contents is preserved
1030 b) new size is at least N
1031 c) new size can be larger than N, so subsequent grow() calls can return
1032 without reallocation
1033
1034 -- ALGLIB --
1035 Copyright 20.03.2009 by Bochkanov Sergey
1036 *************************************************************************/
ivectorgrowto(ae_vector * x,ae_int_t n,ae_state * _state)1037 void ivectorgrowto(/* Integer */ ae_vector* x,
1038 ae_int_t n,
1039 ae_state *_state)
1040 {
1041 ae_frame _frame_block;
1042 ae_vector oldx;
1043 ae_int_t i;
1044 ae_int_t n2;
1045
1046 ae_frame_make(_state, &_frame_block);
1047 memset(&oldx, 0, sizeof(oldx));
1048 ae_vector_init(&oldx, 0, DT_INT, _state, ae_true);
1049
1050
1051 /*
1052 * Enough place
1053 */
1054 if( x->cnt>=n )
1055 {
1056 ae_frame_leave(_state);
1057 return;
1058 }
1059
1060 /*
1061 * Choose new size
1062 */
1063 n = ae_maxint(n, ae_round(1.8*x->cnt+1, _state), _state);
1064
1065 /*
1066 * Grow
1067 */
1068 n2 = x->cnt;
1069 ae_swap_vectors(x, &oldx);
1070 ae_vector_set_length(x, n, _state);
1071 for(i=0; i<=n-1; i++)
1072 {
1073 if( i<n2 )
1074 {
1075 x->ptr.p_int[i] = oldx.ptr.p_int[i];
1076 }
1077 else
1078 {
1079 x->ptr.p_int[i] = 0;
1080 }
1081 }
1082 ae_frame_leave(_state);
1083 }
1084
1085
1086 /*************************************************************************
1087 Grows X, i.e. appends rows in such a way that:
1088 a) contents is preserved
1089 b) new row count is at least N
1090 c) new row count can be larger than N, so subsequent grow() calls can return
1091 without reallocation
1092 d) new matrix has at least MinCols columns (if less than specified amount
1093 of columns is present, new columns are added with undefined contents);
1094 MinCols can be 0 or negative value = ignored
1095
1096 -- ALGLIB --
1097 Copyright 20.03.2009 by Bochkanov Sergey
1098 *************************************************************************/
rmatrixgrowrowsto(ae_matrix * a,ae_int_t n,ae_int_t mincols,ae_state * _state)1099 void rmatrixgrowrowsto(/* Real */ ae_matrix* a,
1100 ae_int_t n,
1101 ae_int_t mincols,
1102 ae_state *_state)
1103 {
1104 ae_frame _frame_block;
1105 ae_matrix olda;
1106 ae_int_t i;
1107 ae_int_t j;
1108 ae_int_t n2;
1109 ae_int_t m;
1110
1111 ae_frame_make(_state, &_frame_block);
1112 memset(&olda, 0, sizeof(olda));
1113 ae_matrix_init(&olda, 0, 0, DT_REAL, _state, ae_true);
1114
1115
1116 /*
1117 * Enough place?
1118 */
1119 if( a->rows>=n&&a->cols>=mincols )
1120 {
1121 ae_frame_leave(_state);
1122 return;
1123 }
1124
1125 /*
1126 * Sizes and metrics
1127 */
1128 if( a->rows<n )
1129 {
1130 n = ae_maxint(n, ae_round(1.8*a->rows+1, _state), _state);
1131 }
1132 n2 = ae_minint(a->rows, n, _state);
1133 m = a->cols;
1134
1135 /*
1136 * Grow
1137 */
1138 ae_swap_matrices(a, &olda);
1139 ae_matrix_set_length(a, n, ae_maxint(m, mincols, _state), _state);
1140 for(i=0; i<=n2-1; i++)
1141 {
1142 for(j=0; j<=m-1; j++)
1143 {
1144 a->ptr.pp_double[i][j] = olda.ptr.pp_double[i][j];
1145 }
1146 }
1147 ae_frame_leave(_state);
1148 }
1149
1150
1151 /*************************************************************************
1152 Grows X, i.e. appends cols in such a way that:
1153 a) contents is preserved
1154 b) new col count is at least N
1155 c) new col count can be larger than N, so subsequent grow() calls can return
1156 without reallocation
1157 d) new matrix has at least MinRows row (if less than specified amount
1158 of rows is present, new rows are added with undefined contents);
1159 MinRows can be 0 or negative value = ignored
1160
1161 -- ALGLIB --
1162 Copyright 20.03.2009 by Bochkanov Sergey
1163 *************************************************************************/
rmatrixgrowcolsto(ae_matrix * a,ae_int_t n,ae_int_t minrows,ae_state * _state)1164 void rmatrixgrowcolsto(/* Real */ ae_matrix* a,
1165 ae_int_t n,
1166 ae_int_t minrows,
1167 ae_state *_state)
1168 {
1169 ae_frame _frame_block;
1170 ae_matrix olda;
1171 ae_int_t i;
1172 ae_int_t j;
1173 ae_int_t n2;
1174 ae_int_t m;
1175
1176 ae_frame_make(_state, &_frame_block);
1177 memset(&olda, 0, sizeof(olda));
1178 ae_matrix_init(&olda, 0, 0, DT_REAL, _state, ae_true);
1179
1180
1181 /*
1182 * Enough place?
1183 */
1184 if( a->cols>=n&&a->rows>=minrows )
1185 {
1186 ae_frame_leave(_state);
1187 return;
1188 }
1189
1190 /*
1191 * Sizes and metrics
1192 */
1193 if( a->cols<n )
1194 {
1195 n = ae_maxint(n, ae_round(1.8*a->cols+1, _state), _state);
1196 }
1197 n2 = ae_minint(a->cols, n, _state);
1198 m = a->rows;
1199
1200 /*
1201 * Grow
1202 */
1203 ae_swap_matrices(a, &olda);
1204 ae_matrix_set_length(a, ae_maxint(m, minrows, _state), n, _state);
1205 for(i=0; i<=m-1; i++)
1206 {
1207 for(j=0; j<=n2-1; j++)
1208 {
1209 a->ptr.pp_double[i][j] = olda.ptr.pp_double[i][j];
1210 }
1211 }
1212 ae_frame_leave(_state);
1213 }
1214
1215
1216 /*************************************************************************
1217 Grows X, i.e. changes its size in such a way that:
1218 a) contents is preserved
1219 b) new size is at least N
1220 c) new size can be larger than N, so subsequent grow() calls can return
1221 without reallocation
1222
1223 -- ALGLIB --
1224 Copyright 20.03.2009 by Bochkanov Sergey
1225 *************************************************************************/
rvectorgrowto(ae_vector * x,ae_int_t n,ae_state * _state)1226 void rvectorgrowto(/* Real */ ae_vector* x,
1227 ae_int_t n,
1228 ae_state *_state)
1229 {
1230 ae_frame _frame_block;
1231 ae_vector oldx;
1232 ae_int_t i;
1233 ae_int_t n2;
1234
1235 ae_frame_make(_state, &_frame_block);
1236 memset(&oldx, 0, sizeof(oldx));
1237 ae_vector_init(&oldx, 0, DT_REAL, _state, ae_true);
1238
1239
1240 /*
1241 * Enough place
1242 */
1243 if( x->cnt>=n )
1244 {
1245 ae_frame_leave(_state);
1246 return;
1247 }
1248
1249 /*
1250 * Choose new size
1251 */
1252 n = ae_maxint(n, ae_round(1.8*x->cnt+1, _state), _state);
1253
1254 /*
1255 * Grow
1256 */
1257 n2 = x->cnt;
1258 ae_swap_vectors(x, &oldx);
1259 ae_vector_set_length(x, n, _state);
1260 for(i=0; i<=n-1; i++)
1261 {
1262 if( i<n2 )
1263 {
1264 x->ptr.p_double[i] = oldx.ptr.p_double[i];
1265 }
1266 else
1267 {
1268 x->ptr.p_double[i] = (double)(0);
1269 }
1270 }
1271 ae_frame_leave(_state);
1272 }
1273
1274
1275 /*************************************************************************
1276 Resizes X and:
1277 * preserves old contents of X
1278 * fills new elements by zeros
1279
1280 -- ALGLIB --
1281 Copyright 20.03.2009 by Bochkanov Sergey
1282 *************************************************************************/
ivectorresize(ae_vector * x,ae_int_t n,ae_state * _state)1283 void ivectorresize(/* Integer */ ae_vector* x,
1284 ae_int_t n,
1285 ae_state *_state)
1286 {
1287 ae_frame _frame_block;
1288 ae_vector oldx;
1289 ae_int_t i;
1290 ae_int_t n2;
1291
1292 ae_frame_make(_state, &_frame_block);
1293 memset(&oldx, 0, sizeof(oldx));
1294 ae_vector_init(&oldx, 0, DT_INT, _state, ae_true);
1295
1296 n2 = x->cnt;
1297 ae_swap_vectors(x, &oldx);
1298 ae_vector_set_length(x, n, _state);
1299 for(i=0; i<=n-1; i++)
1300 {
1301 if( i<n2 )
1302 {
1303 x->ptr.p_int[i] = oldx.ptr.p_int[i];
1304 }
1305 else
1306 {
1307 x->ptr.p_int[i] = 0;
1308 }
1309 }
1310 ae_frame_leave(_state);
1311 }
1312
1313
1314 /*************************************************************************
1315 Resizes X and:
1316 * preserves old contents of X
1317 * fills new elements by zeros
1318
1319 -- ALGLIB --
1320 Copyright 20.03.2009 by Bochkanov Sergey
1321 *************************************************************************/
rvectorresize(ae_vector * x,ae_int_t n,ae_state * _state)1322 void rvectorresize(/* Real */ ae_vector* x,
1323 ae_int_t n,
1324 ae_state *_state)
1325 {
1326 ae_frame _frame_block;
1327 ae_vector oldx;
1328 ae_int_t i;
1329 ae_int_t n2;
1330
1331 ae_frame_make(_state, &_frame_block);
1332 memset(&oldx, 0, sizeof(oldx));
1333 ae_vector_init(&oldx, 0, DT_REAL, _state, ae_true);
1334
1335 n2 = x->cnt;
1336 ae_swap_vectors(x, &oldx);
1337 ae_vector_set_length(x, n, _state);
1338 for(i=0; i<=n-1; i++)
1339 {
1340 if( i<n2 )
1341 {
1342 x->ptr.p_double[i] = oldx.ptr.p_double[i];
1343 }
1344 else
1345 {
1346 x->ptr.p_double[i] = (double)(0);
1347 }
1348 }
1349 ae_frame_leave(_state);
1350 }
1351
1352
1353 /*************************************************************************
1354 Resizes X and:
1355 * preserves old contents of X
1356 * fills new elements by zeros
1357
1358 -- ALGLIB --
1359 Copyright 20.03.2009 by Bochkanov Sergey
1360 *************************************************************************/
rmatrixresize(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)1361 void rmatrixresize(/* Real */ ae_matrix* x,
1362 ae_int_t m,
1363 ae_int_t n,
1364 ae_state *_state)
1365 {
1366 ae_frame _frame_block;
1367 ae_matrix oldx;
1368 ae_int_t i;
1369 ae_int_t j;
1370 ae_int_t m2;
1371 ae_int_t n2;
1372
1373 ae_frame_make(_state, &_frame_block);
1374 memset(&oldx, 0, sizeof(oldx));
1375 ae_matrix_init(&oldx, 0, 0, DT_REAL, _state, ae_true);
1376
1377 m2 = x->rows;
1378 n2 = x->cols;
1379 ae_swap_matrices(x, &oldx);
1380 ae_matrix_set_length(x, m, n, _state);
1381 for(i=0; i<=m-1; i++)
1382 {
1383 for(j=0; j<=n-1; j++)
1384 {
1385 if( i<m2&&j<n2 )
1386 {
1387 x->ptr.pp_double[i][j] = oldx.ptr.pp_double[i][j];
1388 }
1389 else
1390 {
1391 x->ptr.pp_double[i][j] = 0.0;
1392 }
1393 }
1394 }
1395 ae_frame_leave(_state);
1396 }
1397
1398
1399 /*************************************************************************
1400 Resizes X and:
1401 * preserves old contents of X
1402 * fills new elements by zeros
1403
1404 -- ALGLIB --
1405 Copyright 20.03.2009 by Bochkanov Sergey
1406 *************************************************************************/
imatrixresize(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)1407 void imatrixresize(/* Integer */ ae_matrix* x,
1408 ae_int_t m,
1409 ae_int_t n,
1410 ae_state *_state)
1411 {
1412 ae_frame _frame_block;
1413 ae_matrix oldx;
1414 ae_int_t i;
1415 ae_int_t j;
1416 ae_int_t m2;
1417 ae_int_t n2;
1418
1419 ae_frame_make(_state, &_frame_block);
1420 memset(&oldx, 0, sizeof(oldx));
1421 ae_matrix_init(&oldx, 0, 0, DT_INT, _state, ae_true);
1422
1423 m2 = x->rows;
1424 n2 = x->cols;
1425 ae_swap_matrices(x, &oldx);
1426 ae_matrix_set_length(x, m, n, _state);
1427 for(i=0; i<=m-1; i++)
1428 {
1429 for(j=0; j<=n-1; j++)
1430 {
1431 if( i<m2&&j<n2 )
1432 {
1433 x->ptr.pp_int[i][j] = oldx.ptr.pp_int[i][j];
1434 }
1435 else
1436 {
1437 x->ptr.pp_int[i][j] = 0;
1438 }
1439 }
1440 }
1441 ae_frame_leave(_state);
1442 }
1443
1444
1445 /*************************************************************************
1446 appends element to X
1447
1448 -- ALGLIB --
1449 Copyright 20.03.2009 by Bochkanov Sergey
1450 *************************************************************************/
ivectorappend(ae_vector * x,ae_int_t v,ae_state * _state)1451 void ivectorappend(/* Integer */ ae_vector* x,
1452 ae_int_t v,
1453 ae_state *_state)
1454 {
1455 ae_frame _frame_block;
1456 ae_vector oldx;
1457 ae_int_t i;
1458 ae_int_t n;
1459
1460 ae_frame_make(_state, &_frame_block);
1461 memset(&oldx, 0, sizeof(oldx));
1462 ae_vector_init(&oldx, 0, DT_INT, _state, ae_true);
1463
1464 n = x->cnt;
1465 ae_swap_vectors(x, &oldx);
1466 ae_vector_set_length(x, n+1, _state);
1467 for(i=0; i<=n-1; i++)
1468 {
1469 x->ptr.p_int[i] = oldx.ptr.p_int[i];
1470 }
1471 x->ptr.p_int[n] = v;
1472 ae_frame_leave(_state);
1473 }
1474
1475
1476 /*************************************************************************
1477 This function checks that length(X) is at least N and first N values from
1478 X[] are finite
1479
1480 -- ALGLIB --
1481 Copyright 18.06.2010 by Bochkanov Sergey
1482 *************************************************************************/
isfinitevector(ae_vector * x,ae_int_t n,ae_state * _state)1483 ae_bool isfinitevector(/* Real */ ae_vector* x,
1484 ae_int_t n,
1485 ae_state *_state)
1486 {
1487 ae_int_t i;
1488 double v;
1489 ae_bool result;
1490
1491
1492 ae_assert(n>=0, "APSERVIsFiniteVector: internal error (N<0)", _state);
1493 if( n==0 )
1494 {
1495 result = ae_true;
1496 return result;
1497 }
1498 if( x->cnt<n )
1499 {
1500 result = ae_false;
1501 return result;
1502 }
1503 v = (double)(0);
1504 for(i=0; i<=n-1; i++)
1505 {
1506 v = 0.01*v+x->ptr.p_double[i];
1507 }
1508 result = ae_isfinite(v, _state);
1509 return result;
1510 }
1511
1512
1513 /*************************************************************************
1514 This function checks that first N values from X[] are finite
1515
1516 -- ALGLIB --
1517 Copyright 18.06.2010 by Bochkanov Sergey
1518 *************************************************************************/
isfinitecvector(ae_vector * z,ae_int_t n,ae_state * _state)1519 ae_bool isfinitecvector(/* Complex */ ae_vector* z,
1520 ae_int_t n,
1521 ae_state *_state)
1522 {
1523 ae_int_t i;
1524 ae_bool result;
1525
1526
1527 ae_assert(n>=0, "APSERVIsFiniteCVector: internal error (N<0)", _state);
1528 for(i=0; i<=n-1; i++)
1529 {
1530 if( !ae_isfinite(z->ptr.p_complex[i].x, _state)||!ae_isfinite(z->ptr.p_complex[i].y, _state) )
1531 {
1532 result = ae_false;
1533 return result;
1534 }
1535 }
1536 result = ae_true;
1537 return result;
1538 }
1539
1540
1541 /*************************************************************************
1542 This function checks that size of X is at least MxN and values from
1543 X[0..M-1,0..N-1] are finite.
1544
1545 -- ALGLIB --
1546 Copyright 18.06.2010 by Bochkanov Sergey
1547 *************************************************************************/
apservisfinitematrix(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)1548 ae_bool apservisfinitematrix(/* Real */ ae_matrix* x,
1549 ae_int_t m,
1550 ae_int_t n,
1551 ae_state *_state)
1552 {
1553 ae_int_t i;
1554 ae_int_t j;
1555 ae_bool result;
1556
1557
1558 ae_assert(n>=0, "APSERVIsFiniteMatrix: internal error (N<0)", _state);
1559 ae_assert(m>=0, "APSERVIsFiniteMatrix: internal error (M<0)", _state);
1560 if( m==0||n==0 )
1561 {
1562 result = ae_true;
1563 return result;
1564 }
1565 if( x->rows<m||x->cols<n )
1566 {
1567 result = ae_false;
1568 return result;
1569 }
1570 for(i=0; i<=m-1; i++)
1571 {
1572 for(j=0; j<=n-1; j++)
1573 {
1574 if( !ae_isfinite(x->ptr.pp_double[i][j], _state) )
1575 {
1576 result = ae_false;
1577 return result;
1578 }
1579 }
1580 }
1581 result = ae_true;
1582 return result;
1583 }
1584
1585
1586 /*************************************************************************
1587 This function checks that all values from X[0..M-1,0..N-1] are finite
1588
1589 -- ALGLIB --
1590 Copyright 18.06.2010 by Bochkanov Sergey
1591 *************************************************************************/
apservisfinitecmatrix(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)1592 ae_bool apservisfinitecmatrix(/* Complex */ ae_matrix* x,
1593 ae_int_t m,
1594 ae_int_t n,
1595 ae_state *_state)
1596 {
1597 ae_int_t i;
1598 ae_int_t j;
1599 ae_bool result;
1600
1601
1602 ae_assert(n>=0, "APSERVIsFiniteCMatrix: internal error (N<0)", _state);
1603 ae_assert(m>=0, "APSERVIsFiniteCMatrix: internal error (M<0)", _state);
1604 for(i=0; i<=m-1; i++)
1605 {
1606 for(j=0; j<=n-1; j++)
1607 {
1608 if( !ae_isfinite(x->ptr.pp_complex[i][j].x, _state)||!ae_isfinite(x->ptr.pp_complex[i][j].y, _state) )
1609 {
1610 result = ae_false;
1611 return result;
1612 }
1613 }
1614 }
1615 result = ae_true;
1616 return result;
1617 }
1618
1619
1620 /*************************************************************************
1621 This function checks that size of X is at least NxN and all values from
1622 upper/lower triangle of X[0..N-1,0..N-1] are finite
1623
1624 -- ALGLIB --
1625 Copyright 18.06.2010 by Bochkanov Sergey
1626 *************************************************************************/
isfinitertrmatrix(ae_matrix * x,ae_int_t n,ae_bool isupper,ae_state * _state)1627 ae_bool isfinitertrmatrix(/* Real */ ae_matrix* x,
1628 ae_int_t n,
1629 ae_bool isupper,
1630 ae_state *_state)
1631 {
1632 ae_int_t i;
1633 ae_int_t j1;
1634 ae_int_t j2;
1635 ae_int_t j;
1636 ae_bool result;
1637
1638
1639 ae_assert(n>=0, "APSERVIsFiniteRTRMatrix: internal error (N<0)", _state);
1640 if( n==0 )
1641 {
1642 result = ae_true;
1643 return result;
1644 }
1645 if( x->rows<n||x->cols<n )
1646 {
1647 result = ae_false;
1648 return result;
1649 }
1650 for(i=0; i<=n-1; i++)
1651 {
1652 if( isupper )
1653 {
1654 j1 = i;
1655 j2 = n-1;
1656 }
1657 else
1658 {
1659 j1 = 0;
1660 j2 = i;
1661 }
1662 for(j=j1; j<=j2; j++)
1663 {
1664 if( !ae_isfinite(x->ptr.pp_double[i][j], _state) )
1665 {
1666 result = ae_false;
1667 return result;
1668 }
1669 }
1670 }
1671 result = ae_true;
1672 return result;
1673 }
1674
1675
1676 /*************************************************************************
1677 This function checks that all values from upper/lower triangle of
1678 X[0..N-1,0..N-1] are finite
1679
1680 -- ALGLIB --
1681 Copyright 18.06.2010 by Bochkanov Sergey
1682 *************************************************************************/
apservisfinitectrmatrix(ae_matrix * x,ae_int_t n,ae_bool isupper,ae_state * _state)1683 ae_bool apservisfinitectrmatrix(/* Complex */ ae_matrix* x,
1684 ae_int_t n,
1685 ae_bool isupper,
1686 ae_state *_state)
1687 {
1688 ae_int_t i;
1689 ae_int_t j1;
1690 ae_int_t j2;
1691 ae_int_t j;
1692 ae_bool result;
1693
1694
1695 ae_assert(n>=0, "APSERVIsFiniteCTRMatrix: internal error (N<0)", _state);
1696 for(i=0; i<=n-1; i++)
1697 {
1698 if( isupper )
1699 {
1700 j1 = i;
1701 j2 = n-1;
1702 }
1703 else
1704 {
1705 j1 = 0;
1706 j2 = i;
1707 }
1708 for(j=j1; j<=j2; j++)
1709 {
1710 if( !ae_isfinite(x->ptr.pp_complex[i][j].x, _state)||!ae_isfinite(x->ptr.pp_complex[i][j].y, _state) )
1711 {
1712 result = ae_false;
1713 return result;
1714 }
1715 }
1716 }
1717 result = ae_true;
1718 return result;
1719 }
1720
1721
1722 /*************************************************************************
1723 This function checks that all values from X[0..M-1,0..N-1] are finite or
1724 NaN's.
1725
1726 -- ALGLIB --
1727 Copyright 18.06.2010 by Bochkanov Sergey
1728 *************************************************************************/
apservisfiniteornanmatrix(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)1729 ae_bool apservisfiniteornanmatrix(/* Real */ ae_matrix* x,
1730 ae_int_t m,
1731 ae_int_t n,
1732 ae_state *_state)
1733 {
1734 ae_int_t i;
1735 ae_int_t j;
1736 ae_bool result;
1737
1738
1739 ae_assert(n>=0, "APSERVIsFiniteOrNaNMatrix: internal error (N<0)", _state);
1740 ae_assert(m>=0, "APSERVIsFiniteOrNaNMatrix: internal error (M<0)", _state);
1741 for(i=0; i<=m-1; i++)
1742 {
1743 for(j=0; j<=n-1; j++)
1744 {
1745 if( !(ae_isfinite(x->ptr.pp_double[i][j], _state)||ae_isnan(x->ptr.pp_double[i][j], _state)) )
1746 {
1747 result = ae_false;
1748 return result;
1749 }
1750 }
1751 }
1752 result = ae_true;
1753 return result;
1754 }
1755
1756
1757 /*************************************************************************
1758 Safe sqrt(x^2+y^2)
1759
1760 -- ALGLIB --
1761 Copyright by Bochkanov Sergey
1762 *************************************************************************/
safepythag2(double x,double y,ae_state * _state)1763 double safepythag2(double x, double y, ae_state *_state)
1764 {
1765 double w;
1766 double xabs;
1767 double yabs;
1768 double z;
1769 double result;
1770
1771
1772 xabs = ae_fabs(x, _state);
1773 yabs = ae_fabs(y, _state);
1774 w = ae_maxreal(xabs, yabs, _state);
1775 z = ae_minreal(xabs, yabs, _state);
1776 if( ae_fp_eq(z,(double)(0)) )
1777 {
1778 result = w;
1779 }
1780 else
1781 {
1782 result = w*ae_sqrt(1+ae_sqr(z/w, _state), _state);
1783 }
1784 return result;
1785 }
1786
1787
1788 /*************************************************************************
1789 Safe sqrt(x^2+y^2)
1790
1791 -- ALGLIB --
1792 Copyright by Bochkanov Sergey
1793 *************************************************************************/
safepythag3(double x,double y,double z,ae_state * _state)1794 double safepythag3(double x, double y, double z, ae_state *_state)
1795 {
1796 double w;
1797 double result;
1798
1799
1800 w = ae_maxreal(ae_fabs(x, _state), ae_maxreal(ae_fabs(y, _state), ae_fabs(z, _state), _state), _state);
1801 if( ae_fp_eq(w,(double)(0)) )
1802 {
1803 result = (double)(0);
1804 return result;
1805 }
1806 x = x/w;
1807 y = y/w;
1808 z = z/w;
1809 result = w*ae_sqrt(ae_sqr(x, _state)+ae_sqr(y, _state)+ae_sqr(z, _state), _state);
1810 return result;
1811 }
1812
1813
1814 /*************************************************************************
1815 Safe division.
1816
1817 This function attempts to calculate R=X/Y without overflow.
1818
1819 It returns:
1820 * +1, if abs(X/Y)>=MaxRealNumber or undefined - overflow-like situation
1821 (no overlfow is generated, R is either NAN, PosINF, NegINF)
1822 * 0, if MinRealNumber<abs(X/Y)<MaxRealNumber or X=0, Y<>0
1823 (R contains result, may be zero)
1824 * -1, if 0<abs(X/Y)<MinRealNumber - underflow-like situation
1825 (R contains zero; it corresponds to underflow)
1826
1827 No overflow is generated in any case.
1828
1829 -- ALGLIB --
1830 Copyright by Bochkanov Sergey
1831 *************************************************************************/
saferdiv(double x,double y,double * r,ae_state * _state)1832 ae_int_t saferdiv(double x, double y, double* r, ae_state *_state)
1833 {
1834 ae_int_t result;
1835
1836 *r = 0;
1837
1838
1839 /*
1840 * Two special cases:
1841 * * Y=0
1842 * * X=0 and Y<>0
1843 */
1844 if( ae_fp_eq(y,(double)(0)) )
1845 {
1846 result = 1;
1847 if( ae_fp_eq(x,(double)(0)) )
1848 {
1849 *r = _state->v_nan;
1850 }
1851 if( ae_fp_greater(x,(double)(0)) )
1852 {
1853 *r = _state->v_posinf;
1854 }
1855 if( ae_fp_less(x,(double)(0)) )
1856 {
1857 *r = _state->v_neginf;
1858 }
1859 return result;
1860 }
1861 if( ae_fp_eq(x,(double)(0)) )
1862 {
1863 *r = (double)(0);
1864 result = 0;
1865 return result;
1866 }
1867
1868 /*
1869 * make Y>0
1870 */
1871 if( ae_fp_less(y,(double)(0)) )
1872 {
1873 x = -x;
1874 y = -y;
1875 }
1876
1877 /*
1878 *
1879 */
1880 if( ae_fp_greater_eq(y,(double)(1)) )
1881 {
1882 *r = x/y;
1883 if( ae_fp_less_eq(ae_fabs(*r, _state),ae_minrealnumber) )
1884 {
1885 result = -1;
1886 *r = (double)(0);
1887 }
1888 else
1889 {
1890 result = 0;
1891 }
1892 }
1893 else
1894 {
1895 if( ae_fp_greater_eq(ae_fabs(x, _state),ae_maxrealnumber*y) )
1896 {
1897 if( ae_fp_greater(x,(double)(0)) )
1898 {
1899 *r = _state->v_posinf;
1900 }
1901 else
1902 {
1903 *r = _state->v_neginf;
1904 }
1905 result = 1;
1906 }
1907 else
1908 {
1909 *r = x/y;
1910 result = 0;
1911 }
1912 }
1913 return result;
1914 }
1915
1916
1917 /*************************************************************************
1918 This function calculates "safe" min(X/Y,V) for positive finite X, Y, V.
1919 No overflow is generated in any case.
1920
1921 -- ALGLIB --
1922 Copyright by Bochkanov Sergey
1923 *************************************************************************/
safeminposrv(double x,double y,double v,ae_state * _state)1924 double safeminposrv(double x, double y, double v, ae_state *_state)
1925 {
1926 double r;
1927 double result;
1928
1929
1930 if( ae_fp_greater_eq(y,(double)(1)) )
1931 {
1932
1933 /*
1934 * Y>=1, we can safely divide by Y
1935 */
1936 r = x/y;
1937 result = v;
1938 if( ae_fp_greater(v,r) )
1939 {
1940 result = r;
1941 }
1942 else
1943 {
1944 result = v;
1945 }
1946 }
1947 else
1948 {
1949
1950 /*
1951 * Y<1, we can safely multiply by Y
1952 */
1953 if( ae_fp_less(x,v*y) )
1954 {
1955 result = x/y;
1956 }
1957 else
1958 {
1959 result = v;
1960 }
1961 }
1962 return result;
1963 }
1964
1965
1966 /*************************************************************************
1967 This function makes periodic mapping of X to [A,B].
1968
1969 It accepts X, A, B (A>B). It returns T which lies in [A,B] and integer K,
1970 such that X = T + K*(B-A).
1971
1972 NOTES:
1973 * K is represented as real value, although actually it is integer
1974 * T is guaranteed to be in [A,B]
1975 * T replaces X
1976
1977 -- ALGLIB --
1978 Copyright by Bochkanov Sergey
1979 *************************************************************************/
apperiodicmap(double * x,double a,double b,double * k,ae_state * _state)1980 void apperiodicmap(double* x,
1981 double a,
1982 double b,
1983 double* k,
1984 ae_state *_state)
1985 {
1986
1987 *k = 0;
1988
1989 ae_assert(ae_fp_less(a,b), "APPeriodicMap: internal error!", _state);
1990 *k = (double)(ae_ifloor((*x-a)/(b-a), _state));
1991 *x = *x-*k*(b-a);
1992 while(ae_fp_less(*x,a))
1993 {
1994 *x = *x+(b-a);
1995 *k = *k-1;
1996 }
1997 while(ae_fp_greater(*x,b))
1998 {
1999 *x = *x-(b-a);
2000 *k = *k+1;
2001 }
2002 *x = ae_maxreal(*x, a, _state);
2003 *x = ae_minreal(*x, b, _state);
2004 }
2005
2006
2007 /*************************************************************************
2008 Returns random normal number using low-quality system-provided generator
2009
2010 -- ALGLIB --
2011 Copyright 20.03.2009 by Bochkanov Sergey
2012 *************************************************************************/
randomnormal(ae_state * _state)2013 double randomnormal(ae_state *_state)
2014 {
2015 double u;
2016 double v;
2017 double s;
2018 double result;
2019
2020
2021 for(;;)
2022 {
2023 u = 2*ae_randomreal(_state)-1;
2024 v = 2*ae_randomreal(_state)-1;
2025 s = ae_sqr(u, _state)+ae_sqr(v, _state);
2026 if( ae_fp_greater(s,(double)(0))&&ae_fp_less(s,(double)(1)) )
2027 {
2028
2029 /*
2030 * two Sqrt's instead of one to
2031 * avoid overflow when S is too small
2032 */
2033 s = ae_sqrt(-2*ae_log(s, _state), _state)/ae_sqrt(s, _state);
2034 result = u*s;
2035 break;
2036 }
2037 }
2038 return result;
2039 }
2040
2041
2042 /*************************************************************************
2043 Generates random unit vector using low-quality system-provided generator.
2044 Reallocates array if its size is too short.
2045
2046 -- ALGLIB --
2047 Copyright 20.03.2009 by Bochkanov Sergey
2048 *************************************************************************/
randomunit(ae_int_t n,ae_vector * x,ae_state * _state)2049 void randomunit(ae_int_t n, /* Real */ ae_vector* x, ae_state *_state)
2050 {
2051 ae_int_t i;
2052 double v;
2053 double vv;
2054
2055
2056 ae_assert(n>0, "RandomUnit: N<=0", _state);
2057 if( x->cnt<n )
2058 {
2059 ae_vector_set_length(x, n, _state);
2060 }
2061 do
2062 {
2063 v = 0.0;
2064 for(i=0; i<=n-1; i++)
2065 {
2066 vv = randomnormal(_state);
2067 x->ptr.p_double[i] = vv;
2068 v = v+vv*vv;
2069 }
2070 }
2071 while(ae_fp_less_eq(v,(double)(0)));
2072 v = 1/ae_sqrt(v, _state);
2073 for(i=0; i<=n-1; i++)
2074 {
2075 x->ptr.p_double[i] = x->ptr.p_double[i]*v;
2076 }
2077 }
2078
2079
2080 /*************************************************************************
2081 This function is used to swap two integer values
2082 *************************************************************************/
swapi(ae_int_t * v0,ae_int_t * v1,ae_state * _state)2083 void swapi(ae_int_t* v0, ae_int_t* v1, ae_state *_state)
2084 {
2085 ae_int_t v;
2086
2087
2088 v = *v0;
2089 *v0 = *v1;
2090 *v1 = v;
2091 }
2092
2093
2094 /*************************************************************************
2095 This function is used to swap two real values
2096 *************************************************************************/
swapr(double * v0,double * v1,ae_state * _state)2097 void swapr(double* v0, double* v1, ae_state *_state)
2098 {
2099 double v;
2100
2101
2102 v = *v0;
2103 *v0 = *v1;
2104 *v1 = v;
2105 }
2106
2107
2108 /*************************************************************************
2109 This function is used to swap two rows of the matrix; if NCols<0, automatically
2110 determined from the matrix size.
2111 *************************************************************************/
swaprows(ae_matrix * a,ae_int_t i0,ae_int_t i1,ae_int_t ncols,ae_state * _state)2112 void swaprows(/* Real */ ae_matrix* a,
2113 ae_int_t i0,
2114 ae_int_t i1,
2115 ae_int_t ncols,
2116 ae_state *_state)
2117 {
2118 ae_int_t j;
2119 double v;
2120
2121
2122 if( i0==i1 )
2123 {
2124 return;
2125 }
2126 if( ncols<0 )
2127 {
2128 ncols = a->cols;
2129 }
2130 for(j=0; j<=ncols-1; j++)
2131 {
2132 v = a->ptr.pp_double[i0][j];
2133 a->ptr.pp_double[i0][j] = a->ptr.pp_double[i1][j];
2134 a->ptr.pp_double[i1][j] = v;
2135 }
2136 }
2137
2138
2139 /*************************************************************************
2140 This function is used to swap two cols of the matrix; if NRows<0, automatically
2141 determined from the matrix size.
2142 *************************************************************************/
swapcols(ae_matrix * a,ae_int_t j0,ae_int_t j1,ae_int_t nrows,ae_state * _state)2143 void swapcols(/* Real */ ae_matrix* a,
2144 ae_int_t j0,
2145 ae_int_t j1,
2146 ae_int_t nrows,
2147 ae_state *_state)
2148 {
2149 ae_int_t i;
2150 double v;
2151
2152
2153 if( j0==j1 )
2154 {
2155 return;
2156 }
2157 if( nrows<0 )
2158 {
2159 nrows = a->rows;
2160 }
2161 for(i=0; i<=nrows-1; i++)
2162 {
2163 v = a->ptr.pp_double[i][j0];
2164 a->ptr.pp_double[i][j0] = a->ptr.pp_double[i][j1];
2165 a->ptr.pp_double[i][j1] = v;
2166 }
2167 }
2168
2169
2170 /*************************************************************************
2171 This function is used to swap two "entries" in 1-dimensional array composed
2172 from D-element entries
2173 *************************************************************************/
swapentries(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_int_t entrywidth,ae_state * _state)2174 void swapentries(/* Real */ ae_vector* a,
2175 ae_int_t i0,
2176 ae_int_t i1,
2177 ae_int_t entrywidth,
2178 ae_state *_state)
2179 {
2180 ae_int_t offs0;
2181 ae_int_t offs1;
2182 ae_int_t j;
2183 double v;
2184
2185
2186 if( i0==i1 )
2187 {
2188 return;
2189 }
2190 offs0 = i0*entrywidth;
2191 offs1 = i1*entrywidth;
2192 for(j=0; j<=entrywidth-1; j++)
2193 {
2194 v = a->ptr.p_double[offs0+j];
2195 a->ptr.p_double[offs0+j] = a->ptr.p_double[offs1+j];
2196 a->ptr.p_double[offs1+j] = v;
2197 }
2198 }
2199
2200
2201 /*************************************************************************
2202 This function is used to swap two elements of the vector
2203 *************************************************************************/
swapelements(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_state * _state)2204 void swapelements(/* Real */ ae_vector* a,
2205 ae_int_t i0,
2206 ae_int_t i1,
2207 ae_state *_state)
2208 {
2209 double v;
2210
2211
2212 if( i0==i1 )
2213 {
2214 return;
2215 }
2216 v = a->ptr.p_double[i0];
2217 a->ptr.p_double[i0] = a->ptr.p_double[i1];
2218 a->ptr.p_double[i1] = v;
2219 }
2220
2221
2222 /*************************************************************************
2223 This function is used to swap two elements of the vector
2224 *************************************************************************/
swapelementsi(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_state * _state)2225 void swapelementsi(/* Integer */ ae_vector* a,
2226 ae_int_t i0,
2227 ae_int_t i1,
2228 ae_state *_state)
2229 {
2230 ae_int_t v;
2231
2232
2233 if( i0==i1 )
2234 {
2235 return;
2236 }
2237 v = a->ptr.p_int[i0];
2238 a->ptr.p_int[i0] = a->ptr.p_int[i1];
2239 a->ptr.p_int[i1] = v;
2240 }
2241
2242
2243 /*************************************************************************
2244 This function is used to return maximum of three real values
2245 *************************************************************************/
maxreal3(double v0,double v1,double v2,ae_state * _state)2246 double maxreal3(double v0, double v1, double v2, ae_state *_state)
2247 {
2248 double result;
2249
2250
2251 result = v0;
2252 if( ae_fp_less(result,v1) )
2253 {
2254 result = v1;
2255 }
2256 if( ae_fp_less(result,v2) )
2257 {
2258 result = v2;
2259 }
2260 return result;
2261 }
2262
2263
2264 /*************************************************************************
2265 This function is used to increment value of integer variable
2266 *************************************************************************/
inc(ae_int_t * v,ae_state * _state)2267 void inc(ae_int_t* v, ae_state *_state)
2268 {
2269
2270
2271 *v = *v+1;
2272 }
2273
2274
2275 /*************************************************************************
2276 This function is used to decrement value of integer variable
2277 *************************************************************************/
dec(ae_int_t * v,ae_state * _state)2278 void dec(ae_int_t* v, ae_state *_state)
2279 {
2280
2281
2282 *v = *v-1;
2283 }
2284
2285
2286 /*************************************************************************
2287 This function is used to increment value of integer variable; name of the
2288 function suggests that increment is done in multithreaded setting in the
2289 thread-unsafe manner (optional progress reports which do not need guaranteed
2290 correctness)
2291 *************************************************************************/
threadunsafeinc(ae_int_t * v,ae_state * _state)2292 void threadunsafeinc(ae_int_t* v, ae_state *_state)
2293 {
2294
2295
2296 *v = *v+1;
2297 }
2298
2299
2300 /*************************************************************************
2301 This function is used to increment value of integer variable; name of the
2302 function suggests that increment is done in multithreaded setting in the
2303 thread-unsafe manner (optional progress reports which do not need guaranteed
2304 correctness)
2305 *************************************************************************/
threadunsafeincby(ae_int_t * v,ae_int_t k,ae_state * _state)2306 void threadunsafeincby(ae_int_t* v, ae_int_t k, ae_state *_state)
2307 {
2308
2309
2310 *v = *v+k;
2311 }
2312
2313
2314 /*************************************************************************
2315 This function performs two operations:
2316 1. decrements value of integer variable, if it is positive
2317 2. explicitly sets variable to zero if it is non-positive
2318 It is used by some algorithms to decrease value of internal counters.
2319 *************************************************************************/
countdown(ae_int_t * v,ae_state * _state)2320 void countdown(ae_int_t* v, ae_state *_state)
2321 {
2322
2323
2324 if( *v>0 )
2325 {
2326 *v = *v-1;
2327 }
2328 else
2329 {
2330 *v = 0;
2331 }
2332 }
2333
2334
2335 /*************************************************************************
2336 This function returns +1 or -1 depending on sign of X.
2337 x=0 results in +1 being returned.
2338 *************************************************************************/
possign(double x,ae_state * _state)2339 double possign(double x, ae_state *_state)
2340 {
2341 double result;
2342
2343
2344 if( ae_fp_greater_eq(x,(double)(0)) )
2345 {
2346 result = (double)(1);
2347 }
2348 else
2349 {
2350 result = (double)(-1);
2351 }
2352 return result;
2353 }
2354
2355
2356 /*************************************************************************
2357 This function returns product of two real numbers. It is convenient when
2358 you have to perform typecast-and-product of two INTEGERS.
2359 *************************************************************************/
rmul2(double v0,double v1,ae_state * _state)2360 double rmul2(double v0, double v1, ae_state *_state)
2361 {
2362 double result;
2363
2364
2365 result = v0*v1;
2366 return result;
2367 }
2368
2369
2370 /*************************************************************************
2371 This function returns product of three real numbers. It is convenient when
2372 you have to perform typecast-and-product of two INTEGERS.
2373 *************************************************************************/
rmul3(double v0,double v1,double v2,ae_state * _state)2374 double rmul3(double v0, double v1, double v2, ae_state *_state)
2375 {
2376 double result;
2377
2378
2379 result = v0*v1*v2;
2380 return result;
2381 }
2382
2383
2384 /*************************************************************************
2385 This function returns (A div B) rounded up; it expects that A>0, B>0, but
2386 does not check it.
2387 *************************************************************************/
idivup(ae_int_t a,ae_int_t b,ae_state * _state)2388 ae_int_t idivup(ae_int_t a, ae_int_t b, ae_state *_state)
2389 {
2390 ae_int_t result;
2391
2392
2393 result = a/b;
2394 if( a%b>0 )
2395 {
2396 result = result+1;
2397 }
2398 return result;
2399 }
2400
2401
2402 /*************************************************************************
2403 This function returns min(i0,i1)
2404 *************************************************************************/
imin2(ae_int_t i0,ae_int_t i1,ae_state * _state)2405 ae_int_t imin2(ae_int_t i0, ae_int_t i1, ae_state *_state)
2406 {
2407 ae_int_t result;
2408
2409
2410 result = i0;
2411 if( i1<result )
2412 {
2413 result = i1;
2414 }
2415 return result;
2416 }
2417
2418
2419 /*************************************************************************
2420 This function returns min(i0,i1,i2)
2421 *************************************************************************/
imin3(ae_int_t i0,ae_int_t i1,ae_int_t i2,ae_state * _state)2422 ae_int_t imin3(ae_int_t i0, ae_int_t i1, ae_int_t i2, ae_state *_state)
2423 {
2424 ae_int_t result;
2425
2426
2427 result = i0;
2428 if( i1<result )
2429 {
2430 result = i1;
2431 }
2432 if( i2<result )
2433 {
2434 result = i2;
2435 }
2436 return result;
2437 }
2438
2439
2440 /*************************************************************************
2441 This function returns max(i0,i1)
2442 *************************************************************************/
imax2(ae_int_t i0,ae_int_t i1,ae_state * _state)2443 ae_int_t imax2(ae_int_t i0, ae_int_t i1, ae_state *_state)
2444 {
2445 ae_int_t result;
2446
2447
2448 result = i0;
2449 if( i1>result )
2450 {
2451 result = i1;
2452 }
2453 return result;
2454 }
2455
2456
2457 /*************************************************************************
2458 This function returns max(i0,i1,i2)
2459 *************************************************************************/
imax3(ae_int_t i0,ae_int_t i1,ae_int_t i2,ae_state * _state)2460 ae_int_t imax3(ae_int_t i0, ae_int_t i1, ae_int_t i2, ae_state *_state)
2461 {
2462 ae_int_t result;
2463
2464
2465 result = i0;
2466 if( i1>result )
2467 {
2468 result = i1;
2469 }
2470 if( i2>result )
2471 {
2472 result = i2;
2473 }
2474 return result;
2475 }
2476
2477
2478 /*************************************************************************
2479 This function returns max(r0,r1,r2)
2480 *************************************************************************/
rmax3(double r0,double r1,double r2,ae_state * _state)2481 double rmax3(double r0, double r1, double r2, ae_state *_state)
2482 {
2483 double result;
2484
2485
2486 result = r0;
2487 if( ae_fp_greater(r1,result) )
2488 {
2489 result = r1;
2490 }
2491 if( ae_fp_greater(r2,result) )
2492 {
2493 result = r2;
2494 }
2495 return result;
2496 }
2497
2498
2499 /*************************************************************************
2500 This function returns max(|r0|,|r1|,|r2|)
2501 *************************************************************************/
rmaxabs3(double r0,double r1,double r2,ae_state * _state)2502 double rmaxabs3(double r0, double r1, double r2, ae_state *_state)
2503 {
2504 double result;
2505
2506
2507 r0 = ae_fabs(r0, _state);
2508 r1 = ae_fabs(r1, _state);
2509 r2 = ae_fabs(r2, _state);
2510 result = r0;
2511 if( ae_fp_greater(r1,result) )
2512 {
2513 result = r1;
2514 }
2515 if( ae_fp_greater(r2,result) )
2516 {
2517 result = r2;
2518 }
2519 return result;
2520 }
2521
2522
2523 /*************************************************************************
2524 'bounds' value: maps X to [B1,B2]
2525
2526 -- ALGLIB --
2527 Copyright 20.03.2009 by Bochkanov Sergey
2528 *************************************************************************/
boundval(double x,double b1,double b2,ae_state * _state)2529 double boundval(double x, double b1, double b2, ae_state *_state)
2530 {
2531 double result;
2532
2533
2534 if( ae_fp_less_eq(x,b1) )
2535 {
2536 result = b1;
2537 return result;
2538 }
2539 if( ae_fp_greater_eq(x,b2) )
2540 {
2541 result = b2;
2542 return result;
2543 }
2544 result = x;
2545 return result;
2546 }
2547
2548
2549 /*************************************************************************
2550 'bounds' value: maps X to [B1,B2]
2551
2552 -- ALGLIB --
2553 Copyright 20.03.2009 by Bochkanov Sergey
2554 *************************************************************************/
iboundval(ae_int_t x,ae_int_t b1,ae_int_t b2,ae_state * _state)2555 ae_int_t iboundval(ae_int_t x, ae_int_t b1, ae_int_t b2, ae_state *_state)
2556 {
2557 ae_int_t result;
2558
2559
2560 if( x<=b1 )
2561 {
2562 result = b1;
2563 return result;
2564 }
2565 if( x>=b2 )
2566 {
2567 result = b2;
2568 return result;
2569 }
2570 result = x;
2571 return result;
2572 }
2573
2574
2575 /*************************************************************************
2576 'bounds' value: maps X to [B1,B2]
2577
2578 -- ALGLIB --
2579 Copyright 20.03.2009 by Bochkanov Sergey
2580 *************************************************************************/
rboundval(double x,double b1,double b2,ae_state * _state)2581 double rboundval(double x, double b1, double b2, ae_state *_state)
2582 {
2583 double result;
2584
2585
2586 if( ae_fp_less_eq(x,b1) )
2587 {
2588 result = b1;
2589 return result;
2590 }
2591 if( ae_fp_greater_eq(x,b2) )
2592 {
2593 result = b2;
2594 return result;
2595 }
2596 result = x;
2597 return result;
2598 }
2599
2600
2601 /*************************************************************************
2602 Returns number of non-zeros
2603 *************************************************************************/
countnz1(ae_vector * v,ae_int_t n,ae_state * _state)2604 ae_int_t countnz1(/* Real */ ae_vector* v,
2605 ae_int_t n,
2606 ae_state *_state)
2607 {
2608 ae_int_t i;
2609 ae_int_t result;
2610
2611
2612 result = 0;
2613 for(i=0; i<=n-1; i++)
2614 {
2615 if( !(v->ptr.p_double[i]==0) )
2616 {
2617 result = result+1;
2618 }
2619 }
2620 return result;
2621 }
2622
2623
2624 /*************************************************************************
2625 Returns number of non-zeros
2626 *************************************************************************/
countnz2(ae_matrix * v,ae_int_t m,ae_int_t n,ae_state * _state)2627 ae_int_t countnz2(/* Real */ ae_matrix* v,
2628 ae_int_t m,
2629 ae_int_t n,
2630 ae_state *_state)
2631 {
2632 ae_int_t i;
2633 ae_int_t j;
2634 ae_int_t result;
2635
2636
2637 result = 0;
2638 for(i=0; i<=m-1; i++)
2639 {
2640 for(j=0; j<=n-1; j++)
2641 {
2642 if( !(v->ptr.pp_double[i][j]==0) )
2643 {
2644 result = result+1;
2645 }
2646 }
2647 }
2648 return result;
2649 }
2650
2651
2652 /*************************************************************************
2653 Allocation of serializer: complex value
2654 *************************************************************************/
alloccomplex(ae_serializer * s,ae_complex v,ae_state * _state)2655 void alloccomplex(ae_serializer* s, ae_complex v, ae_state *_state)
2656 {
2657
2658
2659 ae_serializer_alloc_entry(s);
2660 ae_serializer_alloc_entry(s);
2661 }
2662
2663
2664 /*************************************************************************
2665 Serialization: complex value
2666 *************************************************************************/
serializecomplex(ae_serializer * s,ae_complex v,ae_state * _state)2667 void serializecomplex(ae_serializer* s, ae_complex v, ae_state *_state)
2668 {
2669
2670
2671 ae_serializer_serialize_double(s, v.x, _state);
2672 ae_serializer_serialize_double(s, v.y, _state);
2673 }
2674
2675
2676 /*************************************************************************
2677 Unserialization: complex value
2678 *************************************************************************/
unserializecomplex(ae_serializer * s,ae_state * _state)2679 ae_complex unserializecomplex(ae_serializer* s, ae_state *_state)
2680 {
2681 ae_complex result;
2682
2683
2684 ae_serializer_unserialize_double(s, &result.x, _state);
2685 ae_serializer_unserialize_double(s, &result.y, _state);
2686 return result;
2687 }
2688
2689
2690 /*************************************************************************
2691 Allocation of serializer: real array
2692 *************************************************************************/
allocrealarray(ae_serializer * s,ae_vector * v,ae_int_t n,ae_state * _state)2693 void allocrealarray(ae_serializer* s,
2694 /* Real */ ae_vector* v,
2695 ae_int_t n,
2696 ae_state *_state)
2697 {
2698 ae_int_t i;
2699
2700
2701 if( n<0 )
2702 {
2703 n = v->cnt;
2704 }
2705 ae_serializer_alloc_entry(s);
2706 for(i=0; i<=n-1; i++)
2707 {
2708 ae_serializer_alloc_entry(s);
2709 }
2710 }
2711
2712
2713 /*************************************************************************
2714 Serialization: complex value
2715 *************************************************************************/
serializerealarray(ae_serializer * s,ae_vector * v,ae_int_t n,ae_state * _state)2716 void serializerealarray(ae_serializer* s,
2717 /* Real */ ae_vector* v,
2718 ae_int_t n,
2719 ae_state *_state)
2720 {
2721 ae_int_t i;
2722
2723
2724 if( n<0 )
2725 {
2726 n = v->cnt;
2727 }
2728 ae_serializer_serialize_int(s, n, _state);
2729 for(i=0; i<=n-1; i++)
2730 {
2731 ae_serializer_serialize_double(s, v->ptr.p_double[i], _state);
2732 }
2733 }
2734
2735
2736 /*************************************************************************
2737 Unserialization: complex value
2738 *************************************************************************/
unserializerealarray(ae_serializer * s,ae_vector * v,ae_state * _state)2739 void unserializerealarray(ae_serializer* s,
2740 /* Real */ ae_vector* v,
2741 ae_state *_state)
2742 {
2743 ae_int_t n;
2744 ae_int_t i;
2745 double t;
2746
2747 ae_vector_clear(v);
2748
2749 ae_serializer_unserialize_int(s, &n, _state);
2750 if( n==0 )
2751 {
2752 return;
2753 }
2754 ae_vector_set_length(v, n, _state);
2755 for(i=0; i<=n-1; i++)
2756 {
2757 ae_serializer_unserialize_double(s, &t, _state);
2758 v->ptr.p_double[i] = t;
2759 }
2760 }
2761
2762
2763 /*************************************************************************
2764 Allocation of serializer: Integer array
2765 *************************************************************************/
allocintegerarray(ae_serializer * s,ae_vector * v,ae_int_t n,ae_state * _state)2766 void allocintegerarray(ae_serializer* s,
2767 /* Integer */ ae_vector* v,
2768 ae_int_t n,
2769 ae_state *_state)
2770 {
2771 ae_int_t i;
2772
2773
2774 if( n<0 )
2775 {
2776 n = v->cnt;
2777 }
2778 ae_serializer_alloc_entry(s);
2779 for(i=0; i<=n-1; i++)
2780 {
2781 ae_serializer_alloc_entry(s);
2782 }
2783 }
2784
2785
2786 /*************************************************************************
2787 Serialization: Integer array
2788 *************************************************************************/
serializeintegerarray(ae_serializer * s,ae_vector * v,ae_int_t n,ae_state * _state)2789 void serializeintegerarray(ae_serializer* s,
2790 /* Integer */ ae_vector* v,
2791 ae_int_t n,
2792 ae_state *_state)
2793 {
2794 ae_int_t i;
2795
2796
2797 if( n<0 )
2798 {
2799 n = v->cnt;
2800 }
2801 ae_serializer_serialize_int(s, n, _state);
2802 for(i=0; i<=n-1; i++)
2803 {
2804 ae_serializer_serialize_int(s, v->ptr.p_int[i], _state);
2805 }
2806 }
2807
2808
2809 /*************************************************************************
2810 Unserialization: complex value
2811 *************************************************************************/
unserializeintegerarray(ae_serializer * s,ae_vector * v,ae_state * _state)2812 void unserializeintegerarray(ae_serializer* s,
2813 /* Integer */ ae_vector* v,
2814 ae_state *_state)
2815 {
2816 ae_int_t n;
2817 ae_int_t i;
2818 ae_int_t t;
2819
2820 ae_vector_clear(v);
2821
2822 ae_serializer_unserialize_int(s, &n, _state);
2823 if( n==0 )
2824 {
2825 return;
2826 }
2827 ae_vector_set_length(v, n, _state);
2828 for(i=0; i<=n-1; i++)
2829 {
2830 ae_serializer_unserialize_int(s, &t, _state);
2831 v->ptr.p_int[i] = t;
2832 }
2833 }
2834
2835
2836 /*************************************************************************
2837 Allocation of serializer: real matrix
2838 *************************************************************************/
allocrealmatrix(ae_serializer * s,ae_matrix * v,ae_int_t n0,ae_int_t n1,ae_state * _state)2839 void allocrealmatrix(ae_serializer* s,
2840 /* Real */ ae_matrix* v,
2841 ae_int_t n0,
2842 ae_int_t n1,
2843 ae_state *_state)
2844 {
2845 ae_int_t i;
2846 ae_int_t j;
2847
2848
2849 if( n0<0 )
2850 {
2851 n0 = v->rows;
2852 }
2853 if( n1<0 )
2854 {
2855 n1 = v->cols;
2856 }
2857 ae_serializer_alloc_entry(s);
2858 ae_serializer_alloc_entry(s);
2859 for(i=0; i<=n0-1; i++)
2860 {
2861 for(j=0; j<=n1-1; j++)
2862 {
2863 ae_serializer_alloc_entry(s);
2864 }
2865 }
2866 }
2867
2868
2869 /*************************************************************************
2870 Serialization: complex value
2871 *************************************************************************/
serializerealmatrix(ae_serializer * s,ae_matrix * v,ae_int_t n0,ae_int_t n1,ae_state * _state)2872 void serializerealmatrix(ae_serializer* s,
2873 /* Real */ ae_matrix* v,
2874 ae_int_t n0,
2875 ae_int_t n1,
2876 ae_state *_state)
2877 {
2878 ae_int_t i;
2879 ae_int_t j;
2880
2881
2882 if( n0<0 )
2883 {
2884 n0 = v->rows;
2885 }
2886 if( n1<0 )
2887 {
2888 n1 = v->cols;
2889 }
2890 ae_serializer_serialize_int(s, n0, _state);
2891 ae_serializer_serialize_int(s, n1, _state);
2892 for(i=0; i<=n0-1; i++)
2893 {
2894 for(j=0; j<=n1-1; j++)
2895 {
2896 ae_serializer_serialize_double(s, v->ptr.pp_double[i][j], _state);
2897 }
2898 }
2899 }
2900
2901
2902 /*************************************************************************
2903 Unserialization: complex value
2904 *************************************************************************/
unserializerealmatrix(ae_serializer * s,ae_matrix * v,ae_state * _state)2905 void unserializerealmatrix(ae_serializer* s,
2906 /* Real */ ae_matrix* v,
2907 ae_state *_state)
2908 {
2909 ae_int_t i;
2910 ae_int_t j;
2911 ae_int_t n0;
2912 ae_int_t n1;
2913 double t;
2914
2915 ae_matrix_clear(v);
2916
2917 ae_serializer_unserialize_int(s, &n0, _state);
2918 ae_serializer_unserialize_int(s, &n1, _state);
2919 if( n0==0||n1==0 )
2920 {
2921 return;
2922 }
2923 ae_matrix_set_length(v, n0, n1, _state);
2924 for(i=0; i<=n0-1; i++)
2925 {
2926 for(j=0; j<=n1-1; j++)
2927 {
2928 ae_serializer_unserialize_double(s, &t, _state);
2929 v->ptr.pp_double[i][j] = t;
2930 }
2931 }
2932 }
2933
2934
2935 /*************************************************************************
2936 Copy boolean array
2937 *************************************************************************/
copybooleanarray(ae_vector * src,ae_vector * dst,ae_state * _state)2938 void copybooleanarray(/* Boolean */ ae_vector* src,
2939 /* Boolean */ ae_vector* dst,
2940 ae_state *_state)
2941 {
2942 ae_int_t i;
2943
2944 ae_vector_clear(dst);
2945
2946 if( src->cnt>0 )
2947 {
2948 ae_vector_set_length(dst, src->cnt, _state);
2949 for(i=0; i<=src->cnt-1; i++)
2950 {
2951 dst->ptr.p_bool[i] = src->ptr.p_bool[i];
2952 }
2953 }
2954 }
2955
2956
2957 /*************************************************************************
2958 Copy integer array
2959 *************************************************************************/
copyintegerarray(ae_vector * src,ae_vector * dst,ae_state * _state)2960 void copyintegerarray(/* Integer */ ae_vector* src,
2961 /* Integer */ ae_vector* dst,
2962 ae_state *_state)
2963 {
2964 ae_int_t i;
2965
2966 ae_vector_clear(dst);
2967
2968 if( src->cnt>0 )
2969 {
2970 ae_vector_set_length(dst, src->cnt, _state);
2971 for(i=0; i<=src->cnt-1; i++)
2972 {
2973 dst->ptr.p_int[i] = src->ptr.p_int[i];
2974 }
2975 }
2976 }
2977
2978
2979 /*************************************************************************
2980 Copy real array
2981 *************************************************************************/
copyrealarray(ae_vector * src,ae_vector * dst,ae_state * _state)2982 void copyrealarray(/* Real */ ae_vector* src,
2983 /* Real */ ae_vector* dst,
2984 ae_state *_state)
2985 {
2986 ae_int_t i;
2987
2988 ae_vector_clear(dst);
2989
2990 if( src->cnt>0 )
2991 {
2992 ae_vector_set_length(dst, src->cnt, _state);
2993 for(i=0; i<=src->cnt-1; i++)
2994 {
2995 dst->ptr.p_double[i] = src->ptr.p_double[i];
2996 }
2997 }
2998 }
2999
3000
3001 /*************************************************************************
3002 Copy real matrix
3003 *************************************************************************/
copyrealmatrix(ae_matrix * src,ae_matrix * dst,ae_state * _state)3004 void copyrealmatrix(/* Real */ ae_matrix* src,
3005 /* Real */ ae_matrix* dst,
3006 ae_state *_state)
3007 {
3008 ae_int_t i;
3009 ae_int_t j;
3010
3011 ae_matrix_clear(dst);
3012
3013 if( src->rows>0&&src->cols>0 )
3014 {
3015 ae_matrix_set_length(dst, src->rows, src->cols, _state);
3016 for(i=0; i<=src->rows-1; i++)
3017 {
3018 for(j=0; j<=src->cols-1; j++)
3019 {
3020 dst->ptr.pp_double[i][j] = src->ptr.pp_double[i][j];
3021 }
3022 }
3023 }
3024 }
3025
3026
3027 /*************************************************************************
3028 Clears integer array
3029 *************************************************************************/
unsetintegerarray(ae_vector * a,ae_state * _state)3030 void unsetintegerarray(/* Integer */ ae_vector* a, ae_state *_state)
3031 {
3032
3033 ae_vector_clear(a);
3034
3035 }
3036
3037
3038 /*************************************************************************
3039 Clears real array
3040 *************************************************************************/
unsetrealarray(ae_vector * a,ae_state * _state)3041 void unsetrealarray(/* Real */ ae_vector* a, ae_state *_state)
3042 {
3043
3044 ae_vector_clear(a);
3045
3046 }
3047
3048
3049 /*************************************************************************
3050 Clears real matrix
3051 *************************************************************************/
unsetrealmatrix(ae_matrix * a,ae_state * _state)3052 void unsetrealmatrix(/* Real */ ae_matrix* a, ae_state *_state)
3053 {
3054
3055 ae_matrix_clear(a);
3056
3057 }
3058
3059
3060 /*************************************************************************
3061 This function is used in parallel functions for recurrent division of large
3062 task into two smaller tasks.
3063
3064 It has following properties:
3065 * it works only for TaskSize>=2 and TaskSize>TileSize (assertion is thrown otherwise)
3066 * Task0+Task1=TaskSize, Task0>0, Task1>0
3067 * Task0 and Task1 are close to each other
3068 * Task0>=Task1
3069 * Task0 is always divisible by TileSize
3070
3071 -- ALGLIB --
3072 Copyright 07.04.2013 by Bochkanov Sergey
3073 *************************************************************************/
tiledsplit(ae_int_t tasksize,ae_int_t tilesize,ae_int_t * task0,ae_int_t * task1,ae_state * _state)3074 void tiledsplit(ae_int_t tasksize,
3075 ae_int_t tilesize,
3076 ae_int_t* task0,
3077 ae_int_t* task1,
3078 ae_state *_state)
3079 {
3080 ae_int_t cc;
3081
3082 *task0 = 0;
3083 *task1 = 0;
3084
3085 ae_assert(tasksize>=2, "TiledSplit: TaskSize<2", _state);
3086 ae_assert(tasksize>tilesize, "TiledSplit: TaskSize<=TileSize", _state);
3087 cc = chunkscount(tasksize, tilesize, _state);
3088 ae_assert(cc>=2, "TiledSplit: integrity check failed", _state);
3089 *task0 = idivup(cc, 2, _state)*tilesize;
3090 *task1 = tasksize-(*task0);
3091 ae_assert(*task0>=1, "TiledSplit: internal error", _state);
3092 ae_assert(*task1>=1, "TiledSplit: internal error", _state);
3093 ae_assert(*task0%tilesize==0, "TiledSplit: internal error", _state);
3094 ae_assert(*task0>=(*task1), "TiledSplit: internal error", _state);
3095 }
3096
3097
3098 /*************************************************************************
3099 This function searches integer array. Elements in this array are actually
3100 records, each NRec elements wide. Each record has unique header - NHeader
3101 integer values, which identify it. Records are lexicographically sorted by
3102 header.
3103
3104 Records are identified by their index, not offset (offset = NRec*index).
3105
3106 This function searches A (records with indices [I0,I1)) for a record with
3107 header B. It returns index of this record (not offset!), or -1 on failure.
3108
3109 -- ALGLIB --
3110 Copyright 28.03.2011 by Bochkanov Sergey
3111 *************************************************************************/
recsearch(ae_vector * a,ae_int_t nrec,ae_int_t nheader,ae_int_t i0,ae_int_t i1,ae_vector * b,ae_state * _state)3112 ae_int_t recsearch(/* Integer */ ae_vector* a,
3113 ae_int_t nrec,
3114 ae_int_t nheader,
3115 ae_int_t i0,
3116 ae_int_t i1,
3117 /* Integer */ ae_vector* b,
3118 ae_state *_state)
3119 {
3120 ae_int_t mididx;
3121 ae_int_t cflag;
3122 ae_int_t k;
3123 ae_int_t offs;
3124 ae_int_t result;
3125
3126
3127 result = -1;
3128 for(;;)
3129 {
3130 if( i0>=i1 )
3131 {
3132 break;
3133 }
3134 mididx = (i0+i1)/2;
3135 offs = nrec*mididx;
3136 cflag = 0;
3137 for(k=0; k<=nheader-1; k++)
3138 {
3139 if( a->ptr.p_int[offs+k]<b->ptr.p_int[k] )
3140 {
3141 cflag = -1;
3142 break;
3143 }
3144 if( a->ptr.p_int[offs+k]>b->ptr.p_int[k] )
3145 {
3146 cflag = 1;
3147 break;
3148 }
3149 }
3150 if( cflag==0 )
3151 {
3152 result = mididx;
3153 return result;
3154 }
3155 if( cflag<0 )
3156 {
3157 i0 = mididx+1;
3158 }
3159 else
3160 {
3161 i1 = mididx;
3162 }
3163 }
3164 return result;
3165 }
3166
3167
3168 /*************************************************************************
3169 This function is used in parallel functions for recurrent division of large
3170 task into two smaller tasks.
3171
3172 It has following properties:
3173 * it works only for TaskSize>=2 (assertion is thrown otherwise)
3174 * for TaskSize=2, it returns Task0=1, Task1=1
3175 * in case TaskSize is odd, Task0=TaskSize-1, Task1=1
3176 * in case TaskSize is even, Task0 and Task1 are approximately TaskSize/2
3177 and both Task0 and Task1 are even, Task0>=Task1
3178
3179 -- ALGLIB --
3180 Copyright 07.04.2013 by Bochkanov Sergey
3181 *************************************************************************/
splitlengtheven(ae_int_t tasksize,ae_int_t * task0,ae_int_t * task1,ae_state * _state)3182 void splitlengtheven(ae_int_t tasksize,
3183 ae_int_t* task0,
3184 ae_int_t* task1,
3185 ae_state *_state)
3186 {
3187
3188 *task0 = 0;
3189 *task1 = 0;
3190
3191 ae_assert(tasksize>=2, "SplitLengthEven: TaskSize<2", _state);
3192 if( tasksize==2 )
3193 {
3194 *task0 = 1;
3195 *task1 = 1;
3196 return;
3197 }
3198 if( tasksize%2==0 )
3199 {
3200
3201 /*
3202 * Even division
3203 */
3204 *task0 = tasksize/2;
3205 *task1 = tasksize/2;
3206 if( *task0%2!=0 )
3207 {
3208 *task0 = *task0+1;
3209 *task1 = *task1-1;
3210 }
3211 }
3212 else
3213 {
3214
3215 /*
3216 * Odd task size, split trailing odd part from it.
3217 */
3218 *task0 = tasksize-1;
3219 *task1 = 1;
3220 }
3221 ae_assert(*task0>=1, "SplitLengthEven: internal error", _state);
3222 ae_assert(*task1>=1, "SplitLengthEven: internal error", _state);
3223 }
3224
3225
3226 /*************************************************************************
3227 This function is used to calculate number of chunks (including partial,
3228 non-complete chunks) in some set. It expects that ChunkSize>=1, TaskSize>=0.
3229 Assertion is thrown otherwise.
3230
3231 Function result is equivalent to Ceil(TaskSize/ChunkSize), but with guarantees
3232 that rounding errors won't ruin results.
3233
3234 -- ALGLIB --
3235 Copyright 21.01.2015 by Bochkanov Sergey
3236 *************************************************************************/
chunkscount(ae_int_t tasksize,ae_int_t chunksize,ae_state * _state)3237 ae_int_t chunkscount(ae_int_t tasksize,
3238 ae_int_t chunksize,
3239 ae_state *_state)
3240 {
3241 ae_int_t result;
3242
3243
3244 ae_assert(tasksize>=0, "ChunksCount: TaskSize<0", _state);
3245 ae_assert(chunksize>=1, "ChunksCount: ChunkSize<1", _state);
3246 result = tasksize/chunksize;
3247 if( tasksize%chunksize!=0 )
3248 {
3249 result = result+1;
3250 }
3251 return result;
3252 }
3253
3254
3255 /*************************************************************************
3256 Returns maximum density for level 2 sparse/dense functions. Density values
3257 below one returned by this function are better to handle via sparse Level 2
3258 functionality.
3259
3260 -- ALGLIB routine --
3261 10.01.2019
3262 Bochkanov Sergey
3263 *************************************************************************/
sparselevel2density(ae_state * _state)3264 double sparselevel2density(ae_state *_state)
3265 {
3266 double result;
3267
3268
3269 result = 0.1;
3270 return result;
3271 }
3272
3273
3274 /*************************************************************************
3275 Returns A-tile size for a matrix.
3276
3277 A-tiles are smallest tiles (32x32), suitable for processing by ALGLIB own
3278 implementation of Level 3 linear algebra.
3279
3280 -- ALGLIB routine --
3281 10.01.2019
3282 Bochkanov Sergey
3283 *************************************************************************/
matrixtilesizea(ae_state * _state)3284 ae_int_t matrixtilesizea(ae_state *_state)
3285 {
3286 ae_int_t result;
3287
3288
3289 result = 32;
3290 return result;
3291 }
3292
3293
3294 /*************************************************************************
3295 Returns B-tile size for a matrix.
3296
3297 B-tiles are larger tiles (64x64), suitable for parallel execution or for
3298 processing by vendor's implementation of Level 3 linear algebra.
3299
3300 -- ALGLIB routine --
3301 10.01.2019
3302 Bochkanov Sergey
3303 *************************************************************************/
matrixtilesizeb(ae_state * _state)3304 ae_int_t matrixtilesizeb(ae_state *_state)
3305 {
3306 #ifndef ALGLIB_INTERCEPTS_MKL
3307 ae_int_t result;
3308
3309
3310 result = 64;
3311 return result;
3312 #else
3313 return _ialglib_i_matrixtilesizeb();
3314 #endif
3315 }
3316
3317
3318 /*************************************************************************
3319 This function returns minimum cost of task which is feasible for
3320 multithreaded processing. It returns real number in order to avoid overflow
3321 problems.
3322
3323 -- ALGLIB --
3324 Copyright 10.01.2018 by Bochkanov Sergey
3325 *************************************************************************/
smpactivationlevel(ae_state * _state)3326 double smpactivationlevel(ae_state *_state)
3327 {
3328 double nn;
3329 double result;
3330
3331
3332 nn = (double)(2*matrixtilesizeb(_state));
3333 result = ae_maxreal(0.95*2*nn*nn*nn, 1.0E7, _state);
3334 return result;
3335 }
3336
3337
3338 /*************************************************************************
3339 This function returns minimum cost of task which is feasible for
3340 spawn (given that multithreading is active).
3341
3342 It returns real number in order to avoid overflow problems.
3343
3344 -- ALGLIB --
3345 Copyright 10.01.2018 by Bochkanov Sergey
3346 *************************************************************************/
spawnlevel(ae_state * _state)3347 double spawnlevel(ae_state *_state)
3348 {
3349 double nn;
3350 double result;
3351
3352
3353 nn = (double)(2*matrixtilesizea(_state));
3354 result = 0.95*2*nn*nn*nn;
3355 return result;
3356 }
3357
3358
3359 /*************************************************************************
3360 --- OBSOLETE FUNCTION, USE TILED SPLIT INSTEAD ---
3361
3362 This function is used in parallel functions for recurrent division of large
3363 task into two smaller tasks.
3364
3365 It has following properties:
3366 * it works only for TaskSize>=2 and ChunkSize>=2
3367 (assertion is thrown otherwise)
3368 * Task0+Task1=TaskSize, Task0>0, Task1>0
3369 * Task0 and Task1 are close to each other
3370 * in case TaskSize>ChunkSize, Task0 is always divisible by ChunkSize
3371
3372 -- ALGLIB --
3373 Copyright 07.04.2013 by Bochkanov Sergey
3374 *************************************************************************/
splitlength(ae_int_t tasksize,ae_int_t chunksize,ae_int_t * task0,ae_int_t * task1,ae_state * _state)3375 void splitlength(ae_int_t tasksize,
3376 ae_int_t chunksize,
3377 ae_int_t* task0,
3378 ae_int_t* task1,
3379 ae_state *_state)
3380 {
3381
3382 *task0 = 0;
3383 *task1 = 0;
3384
3385 ae_assert(chunksize>=2, "SplitLength: ChunkSize<2", _state);
3386 ae_assert(tasksize>=2, "SplitLength: TaskSize<2", _state);
3387 *task0 = tasksize/2;
3388 if( *task0>chunksize&&*task0%chunksize!=0 )
3389 {
3390 *task0 = *task0-*task0%chunksize;
3391 }
3392 *task1 = tasksize-(*task0);
3393 ae_assert(*task0>=1, "SplitLength: internal error", _state);
3394 ae_assert(*task1>=1, "SplitLength: internal error", _state);
3395 }
3396
3397
3398 /*************************************************************************
3399 Outputs vector A[I0,I1-1] to trace log using either:
3400 a) 6-digit exponential format (no trace flags is set)
3401 b) 15-ditit exponential format ('PREC.E15' trace flag is set)
3402 b) 6-ditit fixed-point format ('PREC.F6' trace flag is set)
3403
3404 This function checks trace flags every time it is called.
3405 *************************************************************************/
tracevectorautoprec(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_state * _state)3406 void tracevectorautoprec(/* Real */ ae_vector* a,
3407 ae_int_t i0,
3408 ae_int_t i1,
3409 ae_state *_state)
3410 {
3411 ae_int_t i;
3412 ae_int_t prectouse;
3413
3414
3415
3416 /*
3417 * Determine precision to use
3418 */
3419 prectouse = 0;
3420 if( ae_is_trace_enabled("PREC.E15") )
3421 {
3422 prectouse = 1;
3423 }
3424 if( ae_is_trace_enabled("PREC.F6") )
3425 {
3426 prectouse = 2;
3427 }
3428
3429 /*
3430 * Output
3431 */
3432 ae_trace("[ ");
3433 for(i=i0; i<=i1-1; i++)
3434 {
3435 if( prectouse==0 )
3436 {
3437 ae_trace("%14.6e",
3438 (double)(a->ptr.p_double[i]));
3439 }
3440 if( prectouse==1 )
3441 {
3442 ae_trace("%23.15e",
3443 (double)(a->ptr.p_double[i]));
3444 }
3445 if( prectouse==2 )
3446 {
3447 ae_trace("%13.6f",
3448 (double)(a->ptr.p_double[i]));
3449 }
3450 if( i<i1-1 )
3451 {
3452 ae_trace(" ");
3453 }
3454 }
3455 ae_trace(" ]");
3456 }
3457
3458
3459 /*************************************************************************
3460 Outputs row A[I,J0..J1-1] to trace log using either:
3461 a) 6-digit exponential format (no trace flags is set)
3462 b) 15-ditit exponential format ('PREC.E15' trace flag is set)
3463 b) 6-ditit fixed-point format ('PREC.F6' trace flag is set)
3464
3465 This function checks trace flags every time it is called.
3466 *************************************************************************/
tracerowautoprec(ae_matrix * a,ae_int_t i,ae_int_t j0,ae_int_t j1,ae_state * _state)3467 void tracerowautoprec(/* Real */ ae_matrix* a,
3468 ae_int_t i,
3469 ae_int_t j0,
3470 ae_int_t j1,
3471 ae_state *_state)
3472 {
3473 ae_int_t j;
3474 ae_int_t prectouse;
3475
3476
3477
3478 /*
3479 * Determine precision to use
3480 */
3481 prectouse = 0;
3482 if( ae_is_trace_enabled("PREC.E15") )
3483 {
3484 prectouse = 1;
3485 }
3486 if( ae_is_trace_enabled("PREC.F6") )
3487 {
3488 prectouse = 2;
3489 }
3490
3491 /*
3492 * Output
3493 */
3494 ae_trace("[ ");
3495 for(j=j0; j<=j1-1; j++)
3496 {
3497 if( prectouse==0 )
3498 {
3499 ae_trace("%14.6e",
3500 (double)(a->ptr.pp_double[i][j]));
3501 }
3502 if( prectouse==1 )
3503 {
3504 ae_trace("%23.15e",
3505 (double)(a->ptr.pp_double[i][j]));
3506 }
3507 if( prectouse==2 )
3508 {
3509 ae_trace("%13.6f",
3510 (double)(a->ptr.pp_double[i][j]));
3511 }
3512 if( j<j1-1 )
3513 {
3514 ae_trace(" ");
3515 }
3516 }
3517 ae_trace(" ]");
3518 }
3519
3520
3521 /*************************************************************************
3522 Unscales/unshifts vector A[N] by computing A*Scl+Sft and outputs result to
3523 trace log using either:
3524 a) 6-digit exponential format (no trace flags is set)
3525 b) 15-ditit exponential format ('PREC.E15' trace flag is set)
3526 b) 6-ditit fixed-point format ('PREC.F6' trace flag is set)
3527
3528 This function checks trace flags every time it is called.
3529 Both Scl and Sft can be omitted.
3530 *************************************************************************/
tracevectorunscaledunshiftedautoprec(ae_vector * x,ae_int_t n,ae_vector * scl,ae_bool applyscl,ae_vector * sft,ae_bool applysft,ae_state * _state)3531 void tracevectorunscaledunshiftedautoprec(/* Real */ ae_vector* x,
3532 ae_int_t n,
3533 /* Real */ ae_vector* scl,
3534 ae_bool applyscl,
3535 /* Real */ ae_vector* sft,
3536 ae_bool applysft,
3537 ae_state *_state)
3538 {
3539 ae_int_t i;
3540 ae_int_t prectouse;
3541 double v;
3542
3543
3544
3545 /*
3546 * Determine precision to use
3547 */
3548 prectouse = 0;
3549 if( ae_is_trace_enabled("PREC.E15") )
3550 {
3551 prectouse = 1;
3552 }
3553 if( ae_is_trace_enabled("PREC.F6") )
3554 {
3555 prectouse = 2;
3556 }
3557
3558 /*
3559 * Output
3560 */
3561 ae_trace("[ ");
3562 for(i=0; i<=n-1; i++)
3563 {
3564 v = x->ptr.p_double[i];
3565 if( applyscl )
3566 {
3567 v = v*scl->ptr.p_double[i];
3568 }
3569 if( applysft )
3570 {
3571 v = v+sft->ptr.p_double[i];
3572 }
3573 if( prectouse==0 )
3574 {
3575 ae_trace("%14.6e",
3576 (double)(v));
3577 }
3578 if( prectouse==1 )
3579 {
3580 ae_trace("%23.15e",
3581 (double)(v));
3582 }
3583 if( prectouse==2 )
3584 {
3585 ae_trace("%13.6f",
3586 (double)(v));
3587 }
3588 if( i<n-1 )
3589 {
3590 ae_trace(" ");
3591 }
3592 }
3593 ae_trace(" ]");
3594 }
3595
3596
3597 /*************************************************************************
3598 Outputs vector of 1-norms of rows [I0,I1-1] of A[I0...I1-1,J0...J1-1] to
3599 trace log using either:
3600 a) 6-digit exponential format (no trace flags is set)
3601 b) 15-ditit exponential format ('PREC.E15' trace flag is set)
3602 b) 6-ditit fixed-point format ('PREC.F6' trace flag is set)
3603
3604 This function checks trace flags every time it is called.
3605 *************************************************************************/
tracerownrm1autoprec(ae_matrix * a,ae_int_t i0,ae_int_t i1,ae_int_t j0,ae_int_t j1,ae_state * _state)3606 void tracerownrm1autoprec(/* Real */ ae_matrix* a,
3607 ae_int_t i0,
3608 ae_int_t i1,
3609 ae_int_t j0,
3610 ae_int_t j1,
3611 ae_state *_state)
3612 {
3613 ae_int_t i;
3614 ae_int_t j;
3615 double v;
3616 ae_int_t prectouse;
3617
3618
3619
3620 /*
3621 * Determine precision to use
3622 */
3623 prectouse = 0;
3624 if( ae_is_trace_enabled("PREC.E15") )
3625 {
3626 prectouse = 1;
3627 }
3628 if( ae_is_trace_enabled("PREC.F6") )
3629 {
3630 prectouse = 2;
3631 }
3632
3633 /*
3634 * Output
3635 */
3636 ae_trace("[ ");
3637 for(i=i0; i<=i1-1; i++)
3638 {
3639 v = (double)(0);
3640 for(j=j0; j<=j1-1; j++)
3641 {
3642 v = ae_maxreal(v, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
3643 }
3644 if( prectouse==0 )
3645 {
3646 ae_trace("%14.6e",
3647 (double)(v));
3648 }
3649 if( prectouse==1 )
3650 {
3651 ae_trace("%23.15e",
3652 (double)(v));
3653 }
3654 if( prectouse==2 )
3655 {
3656 ae_trace("%13.6f",
3657 (double)(v));
3658 }
3659 if( i<i1-1 )
3660 {
3661 ae_trace(" ");
3662 }
3663 }
3664 ae_trace(" ]");
3665 }
3666
3667
3668 /*************************************************************************
3669 Outputs vector A[I0,I1-1] to trace log using E8 precision
3670 *************************************************************************/
tracevectore6(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_state * _state)3671 void tracevectore6(/* Real */ ae_vector* a,
3672 ae_int_t i0,
3673 ae_int_t i1,
3674 ae_state *_state)
3675 {
3676 ae_int_t i;
3677
3678
3679 ae_trace("[ ");
3680 for(i=i0; i<=i1-1; i++)
3681 {
3682 ae_trace("%14.6e",
3683 (double)(a->ptr.p_double[i]));
3684 if( i<i1-1 )
3685 {
3686 ae_trace(" ");
3687 }
3688 }
3689 ae_trace(" ]");
3690 }
3691
3692
3693 /*************************************************************************
3694 Outputs vector A[I0,I1-1] to trace log using E8 or E15 precision
3695 *************************************************************************/
tracevectore615(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_bool usee15,ae_state * _state)3696 void tracevectore615(/* Real */ ae_vector* a,
3697 ae_int_t i0,
3698 ae_int_t i1,
3699 ae_bool usee15,
3700 ae_state *_state)
3701 {
3702 ae_int_t i;
3703
3704
3705 ae_trace("[ ");
3706 for(i=i0; i<=i1-1; i++)
3707 {
3708 if( usee15 )
3709 {
3710 ae_trace("%23.15e",
3711 (double)(a->ptr.p_double[i]));
3712 }
3713 else
3714 {
3715 ae_trace("%14.6e",
3716 (double)(a->ptr.p_double[i]));
3717 }
3718 if( i<i1-1 )
3719 {
3720 ae_trace(" ");
3721 }
3722 }
3723 ae_trace(" ]");
3724 }
3725
3726
3727 /*************************************************************************
3728 Outputs vector of 1-norms of rows [I0,I1-1] of A[I0...I1-1,J0...J1-1] to
3729 trace log using E8 precision
3730 *************************************************************************/
tracerownrm1e6(ae_matrix * a,ae_int_t i0,ae_int_t i1,ae_int_t j0,ae_int_t j1,ae_state * _state)3731 void tracerownrm1e6(/* Real */ ae_matrix* a,
3732 ae_int_t i0,
3733 ae_int_t i1,
3734 ae_int_t j0,
3735 ae_int_t j1,
3736 ae_state *_state)
3737 {
3738 ae_int_t i;
3739 ae_int_t j;
3740 double v;
3741
3742
3743 ae_trace("[ ");
3744 for(i=i0; i<=i1-1; i++)
3745 {
3746 v = (double)(0);
3747 for(j=j0; j<=j1-1; j++)
3748 {
3749 v = ae_maxreal(v, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
3750 }
3751 ae_trace("%14.6e",
3752 (double)(v));
3753 if( i<i1-1 )
3754 {
3755 ae_trace(" ");
3756 }
3757 }
3758 ae_trace(" ]");
3759 }
3760
3761
_apbuffers_init(void * _p,ae_state * _state,ae_bool make_automatic)3762 void _apbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
3763 {
3764 apbuffers *p = (apbuffers*)_p;
3765 ae_touch_ptr((void*)p);
3766 ae_vector_init(&p->ba0, 0, DT_BOOL, _state, make_automatic);
3767 ae_vector_init(&p->ia0, 0, DT_INT, _state, make_automatic);
3768 ae_vector_init(&p->ia1, 0, DT_INT, _state, make_automatic);
3769 ae_vector_init(&p->ia2, 0, DT_INT, _state, make_automatic);
3770 ae_vector_init(&p->ia3, 0, DT_INT, _state, make_automatic);
3771 ae_vector_init(&p->ra0, 0, DT_REAL, _state, make_automatic);
3772 ae_vector_init(&p->ra1, 0, DT_REAL, _state, make_automatic);
3773 ae_vector_init(&p->ra2, 0, DT_REAL, _state, make_automatic);
3774 ae_vector_init(&p->ra3, 0, DT_REAL, _state, make_automatic);
3775 ae_matrix_init(&p->rm0, 0, 0, DT_REAL, _state, make_automatic);
3776 ae_matrix_init(&p->rm1, 0, 0, DT_REAL, _state, make_automatic);
3777 }
3778
3779
_apbuffers_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3780 void _apbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3781 {
3782 apbuffers *dst = (apbuffers*)_dst;
3783 apbuffers *src = (apbuffers*)_src;
3784 ae_vector_init_copy(&dst->ba0, &src->ba0, _state, make_automatic);
3785 ae_vector_init_copy(&dst->ia0, &src->ia0, _state, make_automatic);
3786 ae_vector_init_copy(&dst->ia1, &src->ia1, _state, make_automatic);
3787 ae_vector_init_copy(&dst->ia2, &src->ia2, _state, make_automatic);
3788 ae_vector_init_copy(&dst->ia3, &src->ia3, _state, make_automatic);
3789 ae_vector_init_copy(&dst->ra0, &src->ra0, _state, make_automatic);
3790 ae_vector_init_copy(&dst->ra1, &src->ra1, _state, make_automatic);
3791 ae_vector_init_copy(&dst->ra2, &src->ra2, _state, make_automatic);
3792 ae_vector_init_copy(&dst->ra3, &src->ra3, _state, make_automatic);
3793 ae_matrix_init_copy(&dst->rm0, &src->rm0, _state, make_automatic);
3794 ae_matrix_init_copy(&dst->rm1, &src->rm1, _state, make_automatic);
3795 }
3796
3797
_apbuffers_clear(void * _p)3798 void _apbuffers_clear(void* _p)
3799 {
3800 apbuffers *p = (apbuffers*)_p;
3801 ae_touch_ptr((void*)p);
3802 ae_vector_clear(&p->ba0);
3803 ae_vector_clear(&p->ia0);
3804 ae_vector_clear(&p->ia1);
3805 ae_vector_clear(&p->ia2);
3806 ae_vector_clear(&p->ia3);
3807 ae_vector_clear(&p->ra0);
3808 ae_vector_clear(&p->ra1);
3809 ae_vector_clear(&p->ra2);
3810 ae_vector_clear(&p->ra3);
3811 ae_matrix_clear(&p->rm0);
3812 ae_matrix_clear(&p->rm1);
3813 }
3814
3815
_apbuffers_destroy(void * _p)3816 void _apbuffers_destroy(void* _p)
3817 {
3818 apbuffers *p = (apbuffers*)_p;
3819 ae_touch_ptr((void*)p);
3820 ae_vector_destroy(&p->ba0);
3821 ae_vector_destroy(&p->ia0);
3822 ae_vector_destroy(&p->ia1);
3823 ae_vector_destroy(&p->ia2);
3824 ae_vector_destroy(&p->ia3);
3825 ae_vector_destroy(&p->ra0);
3826 ae_vector_destroy(&p->ra1);
3827 ae_vector_destroy(&p->ra2);
3828 ae_vector_destroy(&p->ra3);
3829 ae_matrix_destroy(&p->rm0);
3830 ae_matrix_destroy(&p->rm1);
3831 }
3832
3833
_sboolean_init(void * _p,ae_state * _state,ae_bool make_automatic)3834 void _sboolean_init(void* _p, ae_state *_state, ae_bool make_automatic)
3835 {
3836 sboolean *p = (sboolean*)_p;
3837 ae_touch_ptr((void*)p);
3838 }
3839
3840
_sboolean_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3841 void _sboolean_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3842 {
3843 sboolean *dst = (sboolean*)_dst;
3844 sboolean *src = (sboolean*)_src;
3845 dst->val = src->val;
3846 }
3847
3848
_sboolean_clear(void * _p)3849 void _sboolean_clear(void* _p)
3850 {
3851 sboolean *p = (sboolean*)_p;
3852 ae_touch_ptr((void*)p);
3853 }
3854
3855
_sboolean_destroy(void * _p)3856 void _sboolean_destroy(void* _p)
3857 {
3858 sboolean *p = (sboolean*)_p;
3859 ae_touch_ptr((void*)p);
3860 }
3861
3862
_sbooleanarray_init(void * _p,ae_state * _state,ae_bool make_automatic)3863 void _sbooleanarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
3864 {
3865 sbooleanarray *p = (sbooleanarray*)_p;
3866 ae_touch_ptr((void*)p);
3867 ae_vector_init(&p->val, 0, DT_BOOL, _state, make_automatic);
3868 }
3869
3870
_sbooleanarray_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3871 void _sbooleanarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3872 {
3873 sbooleanarray *dst = (sbooleanarray*)_dst;
3874 sbooleanarray *src = (sbooleanarray*)_src;
3875 ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic);
3876 }
3877
3878
_sbooleanarray_clear(void * _p)3879 void _sbooleanarray_clear(void* _p)
3880 {
3881 sbooleanarray *p = (sbooleanarray*)_p;
3882 ae_touch_ptr((void*)p);
3883 ae_vector_clear(&p->val);
3884 }
3885
3886
_sbooleanarray_destroy(void * _p)3887 void _sbooleanarray_destroy(void* _p)
3888 {
3889 sbooleanarray *p = (sbooleanarray*)_p;
3890 ae_touch_ptr((void*)p);
3891 ae_vector_destroy(&p->val);
3892 }
3893
3894
_sinteger_init(void * _p,ae_state * _state,ae_bool make_automatic)3895 void _sinteger_init(void* _p, ae_state *_state, ae_bool make_automatic)
3896 {
3897 sinteger *p = (sinteger*)_p;
3898 ae_touch_ptr((void*)p);
3899 }
3900
3901
_sinteger_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3902 void _sinteger_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3903 {
3904 sinteger *dst = (sinteger*)_dst;
3905 sinteger *src = (sinteger*)_src;
3906 dst->val = src->val;
3907 }
3908
3909
_sinteger_clear(void * _p)3910 void _sinteger_clear(void* _p)
3911 {
3912 sinteger *p = (sinteger*)_p;
3913 ae_touch_ptr((void*)p);
3914 }
3915
3916
_sinteger_destroy(void * _p)3917 void _sinteger_destroy(void* _p)
3918 {
3919 sinteger *p = (sinteger*)_p;
3920 ae_touch_ptr((void*)p);
3921 }
3922
3923
_sintegerarray_init(void * _p,ae_state * _state,ae_bool make_automatic)3924 void _sintegerarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
3925 {
3926 sintegerarray *p = (sintegerarray*)_p;
3927 ae_touch_ptr((void*)p);
3928 ae_vector_init(&p->val, 0, DT_INT, _state, make_automatic);
3929 }
3930
3931
_sintegerarray_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3932 void _sintegerarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3933 {
3934 sintegerarray *dst = (sintegerarray*)_dst;
3935 sintegerarray *src = (sintegerarray*)_src;
3936 ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic);
3937 }
3938
3939
_sintegerarray_clear(void * _p)3940 void _sintegerarray_clear(void* _p)
3941 {
3942 sintegerarray *p = (sintegerarray*)_p;
3943 ae_touch_ptr((void*)p);
3944 ae_vector_clear(&p->val);
3945 }
3946
3947
_sintegerarray_destroy(void * _p)3948 void _sintegerarray_destroy(void* _p)
3949 {
3950 sintegerarray *p = (sintegerarray*)_p;
3951 ae_touch_ptr((void*)p);
3952 ae_vector_destroy(&p->val);
3953 }
3954
3955
_sreal_init(void * _p,ae_state * _state,ae_bool make_automatic)3956 void _sreal_init(void* _p, ae_state *_state, ae_bool make_automatic)
3957 {
3958 sreal *p = (sreal*)_p;
3959 ae_touch_ptr((void*)p);
3960 }
3961
3962
_sreal_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3963 void _sreal_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3964 {
3965 sreal *dst = (sreal*)_dst;
3966 sreal *src = (sreal*)_src;
3967 dst->val = src->val;
3968 }
3969
3970
_sreal_clear(void * _p)3971 void _sreal_clear(void* _p)
3972 {
3973 sreal *p = (sreal*)_p;
3974 ae_touch_ptr((void*)p);
3975 }
3976
3977
_sreal_destroy(void * _p)3978 void _sreal_destroy(void* _p)
3979 {
3980 sreal *p = (sreal*)_p;
3981 ae_touch_ptr((void*)p);
3982 }
3983
3984
_srealarray_init(void * _p,ae_state * _state,ae_bool make_automatic)3985 void _srealarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
3986 {
3987 srealarray *p = (srealarray*)_p;
3988 ae_touch_ptr((void*)p);
3989 ae_vector_init(&p->val, 0, DT_REAL, _state, make_automatic);
3990 }
3991
3992
_srealarray_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3993 void _srealarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3994 {
3995 srealarray *dst = (srealarray*)_dst;
3996 srealarray *src = (srealarray*)_src;
3997 ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic);
3998 }
3999
4000
_srealarray_clear(void * _p)4001 void _srealarray_clear(void* _p)
4002 {
4003 srealarray *p = (srealarray*)_p;
4004 ae_touch_ptr((void*)p);
4005 ae_vector_clear(&p->val);
4006 }
4007
4008
_srealarray_destroy(void * _p)4009 void _srealarray_destroy(void* _p)
4010 {
4011 srealarray *p = (srealarray*)_p;
4012 ae_touch_ptr((void*)p);
4013 ae_vector_destroy(&p->val);
4014 }
4015
4016
_scomplex_init(void * _p,ae_state * _state,ae_bool make_automatic)4017 void _scomplex_init(void* _p, ae_state *_state, ae_bool make_automatic)
4018 {
4019 scomplex *p = (scomplex*)_p;
4020 ae_touch_ptr((void*)p);
4021 }
4022
4023
_scomplex_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)4024 void _scomplex_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
4025 {
4026 scomplex *dst = (scomplex*)_dst;
4027 scomplex *src = (scomplex*)_src;
4028 dst->val = src->val;
4029 }
4030
4031
_scomplex_clear(void * _p)4032 void _scomplex_clear(void* _p)
4033 {
4034 scomplex *p = (scomplex*)_p;
4035 ae_touch_ptr((void*)p);
4036 }
4037
4038
_scomplex_destroy(void * _p)4039 void _scomplex_destroy(void* _p)
4040 {
4041 scomplex *p = (scomplex*)_p;
4042 ae_touch_ptr((void*)p);
4043 }
4044
4045
_scomplexarray_init(void * _p,ae_state * _state,ae_bool make_automatic)4046 void _scomplexarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
4047 {
4048 scomplexarray *p = (scomplexarray*)_p;
4049 ae_touch_ptr((void*)p);
4050 ae_vector_init(&p->val, 0, DT_COMPLEX, _state, make_automatic);
4051 }
4052
4053
_scomplexarray_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)4054 void _scomplexarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
4055 {
4056 scomplexarray *dst = (scomplexarray*)_dst;
4057 scomplexarray *src = (scomplexarray*)_src;
4058 ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic);
4059 }
4060
4061
_scomplexarray_clear(void * _p)4062 void _scomplexarray_clear(void* _p)
4063 {
4064 scomplexarray *p = (scomplexarray*)_p;
4065 ae_touch_ptr((void*)p);
4066 ae_vector_clear(&p->val);
4067 }
4068
4069
_scomplexarray_destroy(void * _p)4070 void _scomplexarray_destroy(void* _p)
4071 {
4072 scomplexarray *p = (scomplexarray*)_p;
4073 ae_touch_ptr((void*)p);
4074 ae_vector_destroy(&p->val);
4075 }
4076
4077
4078 #endif
4079 #if defined(AE_COMPILE_ABLASF) || !defined(AE_PARTIAL_BUILD)
4080
4081
4082 #ifdef ALGLIB_NO_FAST_KERNELS
4083 /*************************************************************************
4084 Computes dot product (X,Y) for elements [0,N) of X[] and Y[]
4085
4086 INPUT PARAMETERS:
4087 N - vector length
4088 X - array[N], vector to process
4089 Y - array[N], vector to process
4090
4091 RESULT:
4092 (X,Y)
4093
4094 -- ALGLIB --
4095 Copyright 20.01.2020 by Bochkanov Sergey
4096 *************************************************************************/
rdotv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)4097 double rdotv(ae_int_t n,
4098 /* Real */ ae_vector* x,
4099 /* Real */ ae_vector* y,
4100 ae_state *_state)
4101 {
4102 ae_int_t i;
4103 double result;
4104
4105
4106 result = (double)(0);
4107 for(i=0; i<=n-1; i++)
4108 {
4109 result = result+x->ptr.p_double[i]*y->ptr.p_double[i];
4110 }
4111 return result;
4112 }
4113 #endif
4114
4115
4116 #ifdef ALGLIB_NO_FAST_KERNELS
4117 /*************************************************************************
4118 Computes dot product (X,A[i]) for elements [0,N) of vector X[] and row A[i,*]
4119
4120 INPUT PARAMETERS:
4121 N - vector length
4122 X - array[N], vector to process
4123 A - array[?,N], matrix to process
4124 I - row index
4125
4126 RESULT:
4127 (X,Ai)
4128
4129 -- ALGLIB --
4130 Copyright 20.01.2020 by Bochkanov Sergey
4131 *************************************************************************/
rdotvr(ae_int_t n,ae_vector * x,ae_matrix * a,ae_int_t i,ae_state * _state)4132 double rdotvr(ae_int_t n,
4133 /* Real */ ae_vector* x,
4134 /* Real */ ae_matrix* a,
4135 ae_int_t i,
4136 ae_state *_state)
4137 {
4138 ae_int_t j;
4139 double result;
4140
4141
4142 result = (double)(0);
4143 for(j=0; j<=n-1; j++)
4144 {
4145 result = result+x->ptr.p_double[j]*a->ptr.pp_double[i][j];
4146 }
4147 return result;
4148 }
4149 #endif
4150
4151
4152 #ifdef ALGLIB_NO_FAST_KERNELS
4153 /*************************************************************************
4154 Computes dot product (X,A[i]) for rows A[ia,*] and B[ib,*]
4155
4156 INPUT PARAMETERS:
4157 N - vector length
4158 X - array[N], vector to process
4159 A - array[?,N], matrix to process
4160 I - row index
4161
4162 RESULT:
4163 (X,Ai)
4164
4165 -- ALGLIB --
4166 Copyright 20.01.2020 by Bochkanov Sergey
4167 *************************************************************************/
rdotrr(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_matrix * b,ae_int_t ib,ae_state * _state)4168 double rdotrr(ae_int_t n,
4169 /* Real */ ae_matrix* a,
4170 ae_int_t ia,
4171 /* Real */ ae_matrix* b,
4172 ae_int_t ib,
4173 ae_state *_state)
4174 {
4175 ae_int_t j;
4176 double result;
4177
4178
4179 result = (double)(0);
4180 for(j=0; j<=n-1; j++)
4181 {
4182 result = result+a->ptr.pp_double[ia][j]*b->ptr.pp_double[ib][j];
4183 }
4184 return result;
4185 }
4186 #endif
4187
4188
4189 #ifdef ALGLIB_NO_FAST_KERNELS
4190 /*************************************************************************
4191 Computes dot product (X,X) for elements [0,N) of X[]
4192
4193 INPUT PARAMETERS:
4194 N - vector length
4195 X - array[N], vector to process
4196
4197 RESULT:
4198 (X,X)
4199
4200 -- ALGLIB --
4201 Copyright 20.01.2020 by Bochkanov Sergey
4202 *************************************************************************/
rdotv2(ae_int_t n,ae_vector * x,ae_state * _state)4203 double rdotv2(ae_int_t n, /* Real */ ae_vector* x, ae_state *_state)
4204 {
4205 ae_int_t i;
4206 double v;
4207 double result;
4208
4209
4210 result = (double)(0);
4211 for(i=0; i<=n-1; i++)
4212 {
4213 v = x->ptr.p_double[i];
4214 result = result+v*v;
4215 }
4216 return result;
4217 }
4218 #endif
4219
4220
4221 #ifdef ALGLIB_NO_FAST_KERNELS
4222 /*************************************************************************
4223 Performs inplace addition of Y[] to X[]
4224
4225 INPUT PARAMETERS:
4226 N - vector length
4227 Alpha - multiplier
4228 Y - array[N], vector to process
4229 X - array[N], vector to process
4230
4231 RESULT:
4232 X := X + alpha*Y
4233
4234 -- ALGLIB --
4235 Copyright 20.01.2020 by Bochkanov Sergey
4236 *************************************************************************/
raddv(ae_int_t n,double alpha,ae_vector * y,ae_vector * x,ae_state * _state)4237 void raddv(ae_int_t n,
4238 double alpha,
4239 /* Real */ ae_vector* y,
4240 /* Real */ ae_vector* x,
4241 ae_state *_state)
4242 {
4243 ae_int_t i;
4244
4245
4246 for(i=0; i<=n-1; i++)
4247 {
4248 x->ptr.p_double[i] = x->ptr.p_double[i]+alpha*y->ptr.p_double[i];
4249 }
4250 }
4251 #endif
4252
4253
4254 #ifdef ALGLIB_NO_FAST_KERNELS
4255 /*************************************************************************
4256 Performs inplace addition of Y[] to X[]
4257
4258 INPUT PARAMETERS:
4259 N - vector length
4260 Alpha - multiplier
4261 Y - source vector
4262 OffsY - source offset
4263 X - destination vector
4264 OffsX - destination offset
4265
4266 RESULT:
4267 X := X + alpha*Y
4268
4269 -- ALGLIB --
4270 Copyright 20.01.2020 by Bochkanov Sergey
4271 *************************************************************************/
raddvx(ae_int_t n,double alpha,ae_vector * y,ae_int_t offsy,ae_vector * x,ae_int_t offsx,ae_state * _state)4272 void raddvx(ae_int_t n,
4273 double alpha,
4274 /* Real */ ae_vector* y,
4275 ae_int_t offsy,
4276 /* Real */ ae_vector* x,
4277 ae_int_t offsx,
4278 ae_state *_state)
4279 {
4280 ae_int_t i;
4281
4282
4283 for(i=0; i<=n-1; i++)
4284 {
4285 x->ptr.p_double[offsx+i] = x->ptr.p_double[offsx+i]+alpha*y->ptr.p_double[offsy+i];
4286 }
4287 }
4288 #endif
4289
4290
4291 /*************************************************************************
4292 Performs inplace addition of vector Y[] to column X[]
4293
4294 INPUT PARAMETERS:
4295 N - vector length
4296 Alpha - multiplier
4297 Y - vector to add
4298 X - target column ColIdx
4299
4300 RESULT:
4301 X := X + alpha*Y
4302
4303 -- ALGLIB --
4304 Copyright 20.01.2020 by Bochkanov Sergey
4305 *************************************************************************/
raddvc(ae_int_t n,double alpha,ae_vector * y,ae_matrix * x,ae_int_t colidx,ae_state * _state)4306 void raddvc(ae_int_t n,
4307 double alpha,
4308 /* Real */ ae_vector* y,
4309 /* Real */ ae_matrix* x,
4310 ae_int_t colidx,
4311 ae_state *_state)
4312 {
4313 ae_int_t i;
4314
4315
4316 for(i=0; i<=n-1; i++)
4317 {
4318 x->ptr.pp_double[i][colidx] = x->ptr.pp_double[i][colidx]+alpha*y->ptr.p_double[i];
4319 }
4320 }
4321
4322
4323 #ifdef ALGLIB_NO_FAST_KERNELS
4324 /*************************************************************************
4325 Performs inplace addition of vector Y[] to row X[]
4326
4327 INPUT PARAMETERS:
4328 N - vector length
4329 Alpha - multiplier
4330 Y - vector to add
4331 X - target row RowIdx
4332
4333 RESULT:
4334 X := X + alpha*Y
4335
4336 -- ALGLIB --
4337 Copyright 20.01.2020 by Bochkanov Sergey
4338 *************************************************************************/
raddvr(ae_int_t n,double alpha,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4339 void raddvr(ae_int_t n,
4340 double alpha,
4341 /* Real */ ae_vector* y,
4342 /* Real */ ae_matrix* x,
4343 ae_int_t rowidx,
4344 ae_state *_state)
4345 {
4346 ae_int_t i;
4347
4348
4349 for(i=0; i<=n-1; i++)
4350 {
4351 x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]+alpha*y->ptr.p_double[i];
4352 }
4353 }
4354 #endif
4355
4356
4357 #ifdef ALGLIB_NO_FAST_KERNELS
4358 /*************************************************************************
4359 Performs componentwise multiplication of vector X[] by vector Y[]
4360
4361 INPUT PARAMETERS:
4362 N - vector length
4363 Y - vector to multiply by
4364 X - target vector
4365
4366 RESULT:
4367 X := componentwise(X*Y)
4368
4369 -- ALGLIB --
4370 Copyright 20.01.2020 by Bochkanov Sergey
4371 *************************************************************************/
rmergemulv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)4372 void rmergemulv(ae_int_t n,
4373 /* Real */ ae_vector* y,
4374 /* Real */ ae_vector* x,
4375 ae_state *_state)
4376 {
4377 ae_int_t i;
4378
4379
4380 for(i=0; i<=n-1; i++)
4381 {
4382 x->ptr.p_double[i] = x->ptr.p_double[i]*y->ptr.p_double[i];
4383 }
4384 }
4385 #endif
4386
4387
4388 #ifdef ALGLIB_NO_FAST_KERNELS
4389 /*************************************************************************
4390 Performs componentwise multiplication of row X[] by vector Y[]
4391
4392 INPUT PARAMETERS:
4393 N - vector length
4394 Y - vector to multiply by
4395 X - target row RowIdx
4396
4397 RESULT:
4398 X := componentwise(X*Y)
4399
4400 -- ALGLIB --
4401 Copyright 20.01.2020 by Bochkanov Sergey
4402 *************************************************************************/
rmergemulvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4403 void rmergemulvr(ae_int_t n,
4404 /* Real */ ae_vector* y,
4405 /* Real */ ae_matrix* x,
4406 ae_int_t rowidx,
4407 ae_state *_state)
4408 {
4409 ae_int_t i;
4410
4411
4412 for(i=0; i<=n-1; i++)
4413 {
4414 x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]*y->ptr.p_double[i];
4415 }
4416 }
4417 #endif
4418
4419
4420 #ifdef ALGLIB_NO_FAST_KERNELS
4421 /*************************************************************************
4422 Performs componentwise multiplication of row X[] by vector Y[]
4423
4424 INPUT PARAMETERS:
4425 N - vector length
4426 Y - vector to multiply by
4427 X - target row RowIdx
4428
4429 RESULT:
4430 X := componentwise(X*Y)
4431
4432 -- ALGLIB --
4433 Copyright 20.01.2020 by Bochkanov Sergey
4434 *************************************************************************/
rmergemulrv(ae_int_t n,ae_matrix * y,ae_int_t rowidx,ae_vector * x,ae_state * _state)4435 void rmergemulrv(ae_int_t n,
4436 /* Real */ ae_matrix* y,
4437 ae_int_t rowidx,
4438 /* Real */ ae_vector* x,
4439 ae_state *_state)
4440 {
4441 ae_int_t i;
4442
4443
4444 for(i=0; i<=n-1; i++)
4445 {
4446 x->ptr.p_double[i] = x->ptr.p_double[i]*y->ptr.pp_double[rowidx][i];
4447 }
4448 }
4449 #endif
4450
4451
4452 #ifdef ALGLIB_NO_FAST_KERNELS
4453 /*************************************************************************
4454 Performs componentwise max of vector X[] and vector Y[]
4455
4456 INPUT PARAMETERS:
4457 N - vector length
4458 Y - vector to multiply by
4459 X - target vector
4460
4461 RESULT:
4462 X := componentwise_max(X,Y)
4463
4464 -- ALGLIB --
4465 Copyright 20.01.2020 by Bochkanov Sergey
4466 *************************************************************************/
rmergemaxv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)4467 void rmergemaxv(ae_int_t n,
4468 /* Real */ ae_vector* y,
4469 /* Real */ ae_vector* x,
4470 ae_state *_state)
4471 {
4472 ae_int_t i;
4473
4474
4475 for(i=0; i<=n-1; i++)
4476 {
4477 x->ptr.p_double[i] = ae_maxreal(x->ptr.p_double[i], y->ptr.p_double[i], _state);
4478 }
4479 }
4480 #endif
4481
4482
4483 #ifdef ALGLIB_NO_FAST_KERNELS
4484 /*************************************************************************
4485 Performs componentwise max of row X[] and vector Y[]
4486
4487 INPUT PARAMETERS:
4488 N - vector length
4489 Y - vector to multiply by
4490 X - target row RowIdx
4491
4492 RESULT:
4493 X := componentwise_max(X,Y)
4494
4495 -- ALGLIB --
4496 Copyright 20.01.2020 by Bochkanov Sergey
4497 *************************************************************************/
rmergemaxvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4498 void rmergemaxvr(ae_int_t n,
4499 /* Real */ ae_vector* y,
4500 /* Real */ ae_matrix* x,
4501 ae_int_t rowidx,
4502 ae_state *_state)
4503 {
4504 ae_int_t i;
4505
4506
4507 for(i=0; i<=n-1; i++)
4508 {
4509 x->ptr.pp_double[rowidx][i] = ae_maxreal(x->ptr.pp_double[rowidx][i], y->ptr.p_double[i], _state);
4510 }
4511 }
4512 #endif
4513
4514
4515 #ifdef ALGLIB_NO_FAST_KERNELS
4516 /*************************************************************************
4517 Performs componentwise max of row X[I] and vector Y[]
4518
4519 INPUT PARAMETERS:
4520 N - vector length
4521 X - matrix, I-th row is source
4522 X - target row RowIdx
4523
4524 RESULT:
4525 Y := componentwise_max(Y,X)
4526
4527 -- ALGLIB --
4528 Copyright 20.01.2020 by Bochkanov Sergey
4529 *************************************************************************/
rmergemaxrv(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_vector * y,ae_state * _state)4530 void rmergemaxrv(ae_int_t n,
4531 /* Real */ ae_matrix* x,
4532 ae_int_t rowidx,
4533 /* Real */ ae_vector* y,
4534 ae_state *_state)
4535 {
4536 ae_int_t i;
4537
4538
4539 for(i=0; i<=n-1; i++)
4540 {
4541 y->ptr.p_double[i] = ae_maxreal(y->ptr.p_double[i], x->ptr.pp_double[rowidx][i], _state);
4542 }
4543 }
4544 #endif
4545
4546
4547 #ifdef ALGLIB_NO_FAST_KERNELS
4548 /*************************************************************************
4549 Performs componentwise max of vector X[] and vector Y[]
4550
4551 INPUT PARAMETERS:
4552 N - vector length
4553 Y - vector to multiply by
4554 X - target vector
4555
4556 RESULT:
4557 X := componentwise_max(X,Y)
4558
4559 -- ALGLIB --
4560 Copyright 20.01.2020 by Bochkanov Sergey
4561 *************************************************************************/
rmergeminv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)4562 void rmergeminv(ae_int_t n,
4563 /* Real */ ae_vector* y,
4564 /* Real */ ae_vector* x,
4565 ae_state *_state)
4566 {
4567 ae_int_t i;
4568
4569
4570 for(i=0; i<=n-1; i++)
4571 {
4572 x->ptr.p_double[i] = ae_minreal(x->ptr.p_double[i], y->ptr.p_double[i], _state);
4573 }
4574 }
4575 #endif
4576
4577
4578 #ifdef ALGLIB_NO_FAST_KERNELS
4579 /*************************************************************************
4580 Performs componentwise max of row X[] and vector Y[]
4581
4582 INPUT PARAMETERS:
4583 N - vector length
4584 Y - vector to multiply by
4585 X - target row RowIdx
4586
4587 RESULT:
4588 X := componentwise_max(X,Y)
4589
4590 -- ALGLIB --
4591 Copyright 20.01.2020 by Bochkanov Sergey
4592 *************************************************************************/
rmergeminvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4593 void rmergeminvr(ae_int_t n,
4594 /* Real */ ae_vector* y,
4595 /* Real */ ae_matrix* x,
4596 ae_int_t rowidx,
4597 ae_state *_state)
4598 {
4599 ae_int_t i;
4600
4601
4602 for(i=0; i<=n-1; i++)
4603 {
4604 x->ptr.pp_double[rowidx][i] = ae_minreal(x->ptr.pp_double[rowidx][i], y->ptr.p_double[i], _state);
4605 }
4606 }
4607 #endif
4608
4609
4610 #ifdef ALGLIB_NO_FAST_KERNELS
4611 /*************************************************************************
4612 Performs componentwise max of row X[I] and vector Y[]
4613
4614 INPUT PARAMETERS:
4615 N - vector length
4616 X - matrix, I-th row is source
4617 X - target row RowIdx
4618
4619 RESULT:
4620 X := componentwise_max(X,Y)
4621
4622 -- ALGLIB --
4623 Copyright 20.01.2020 by Bochkanov Sergey
4624 *************************************************************************/
rmergeminrv(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_vector * y,ae_state * _state)4625 void rmergeminrv(ae_int_t n,
4626 /* Real */ ae_matrix* x,
4627 ae_int_t rowidx,
4628 /* Real */ ae_vector* y,
4629 ae_state *_state)
4630 {
4631 ae_int_t i;
4632
4633
4634 for(i=0; i<=n-1; i++)
4635 {
4636 y->ptr.p_double[i] = ae_minreal(y->ptr.p_double[i], x->ptr.pp_double[rowidx][i], _state);
4637 }
4638 }
4639 #endif
4640
4641
4642 #ifdef ALGLIB_NO_FAST_KERNELS
4643 /*************************************************************************
4644 Performs inplace addition of Y[RIdx,...] to X[]
4645
4646 INPUT PARAMETERS:
4647 N - vector length
4648 Alpha - multiplier
4649 Y - array[?,N], matrix whose RIdx-th row is added
4650 RIdx - row index
4651 X - array[N], vector to process
4652
4653 RESULT:
4654 X := X + alpha*Y
4655
4656 -- ALGLIB --
4657 Copyright 20.01.2020 by Bochkanov Sergey
4658 *************************************************************************/
raddrv(ae_int_t n,double alpha,ae_matrix * y,ae_int_t ridx,ae_vector * x,ae_state * _state)4659 void raddrv(ae_int_t n,
4660 double alpha,
4661 /* Real */ ae_matrix* y,
4662 ae_int_t ridx,
4663 /* Real */ ae_vector* x,
4664 ae_state *_state)
4665 {
4666 ae_int_t i;
4667
4668
4669 for(i=0; i<=n-1; i++)
4670 {
4671 x->ptr.p_double[i] = x->ptr.p_double[i]+alpha*y->ptr.pp_double[ridx][i];
4672 }
4673 }
4674 #endif
4675
4676
4677 #ifdef ALGLIB_NO_FAST_KERNELS
4678 /*************************************************************************
4679 Performs inplace addition of Y[RIdx,...] to X[RIdxDst]
4680
4681 INPUT PARAMETERS:
4682 N - vector length
4683 Alpha - multiplier
4684 Y - array[?,N], matrix whose RIdxSrc-th row is added
4685 RIdxSrc - source row index
4686 X - array[?,N], matrix whose RIdxDst-th row is target
4687 RIdxDst - destination row index
4688
4689 RESULT:
4690 X := X + alpha*Y
4691
4692 -- ALGLIB --
4693 Copyright 20.01.2020 by Bochkanov Sergey
4694 *************************************************************************/
raddrr(ae_int_t n,double alpha,ae_matrix * y,ae_int_t ridxsrc,ae_matrix * x,ae_int_t ridxdst,ae_state * _state)4695 void raddrr(ae_int_t n,
4696 double alpha,
4697 /* Real */ ae_matrix* y,
4698 ae_int_t ridxsrc,
4699 /* Real */ ae_matrix* x,
4700 ae_int_t ridxdst,
4701 ae_state *_state)
4702 {
4703 ae_int_t i;
4704
4705
4706 for(i=0; i<=n-1; i++)
4707 {
4708 x->ptr.pp_double[ridxdst][i] = x->ptr.pp_double[ridxdst][i]+alpha*y->ptr.pp_double[ridxsrc][i];
4709 }
4710 }
4711 #endif
4712
4713
4714 #ifdef ALGLIB_NO_FAST_KERNELS
4715 /*************************************************************************
4716 Performs inplace multiplication of X[] by V
4717
4718 INPUT PARAMETERS:
4719 N - vector length
4720 X - array[N], vector to process
4721 V - multiplier
4722
4723 OUTPUT PARAMETERS:
4724 X - elements 0...N-1 multiplied by V
4725
4726 -- ALGLIB --
4727 Copyright 20.01.2020 by Bochkanov Sergey
4728 *************************************************************************/
rmulv(ae_int_t n,double v,ae_vector * x,ae_state * _state)4729 void rmulv(ae_int_t n,
4730 double v,
4731 /* Real */ ae_vector* x,
4732 ae_state *_state)
4733 {
4734 ae_int_t i;
4735
4736
4737 for(i=0; i<=n-1; i++)
4738 {
4739 x->ptr.p_double[i] = x->ptr.p_double[i]*v;
4740 }
4741 }
4742 #endif
4743
4744
4745 #ifdef ALGLIB_NO_FAST_KERNELS
4746 /*************************************************************************
4747 Performs inplace multiplication of X[] by V
4748
4749 INPUT PARAMETERS:
4750 N - row length
4751 X - array[?,N], row to process
4752 V - multiplier
4753
4754 OUTPUT PARAMETERS:
4755 X - elements 0...N-1 of row RowIdx are multiplied by V
4756
4757 -- ALGLIB --
4758 Copyright 20.01.2020 by Bochkanov Sergey
4759 *************************************************************************/
rmulr(ae_int_t n,double v,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4760 void rmulr(ae_int_t n,
4761 double v,
4762 /* Real */ ae_matrix* x,
4763 ae_int_t rowidx,
4764 ae_state *_state)
4765 {
4766 ae_int_t i;
4767
4768
4769 for(i=0; i<=n-1; i++)
4770 {
4771 x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]*v;
4772 }
4773 }
4774 #endif
4775
4776
4777 #ifdef ALGLIB_NO_FAST_KERNELS
4778 /*************************************************************************
4779 Performs inplace multiplication of X[OffsX:OffsX+N-1] by V
4780
4781 INPUT PARAMETERS:
4782 N - subvector length
4783 X - vector to process
4784 V - multiplier
4785
4786 OUTPUT PARAMETERS:
4787 X - elements OffsX:OffsX+N-1 multiplied by V
4788
4789 -- ALGLIB --
4790 Copyright 20.01.2020 by Bochkanov Sergey
4791 *************************************************************************/
rmulvx(ae_int_t n,double v,ae_vector * x,ae_int_t offsx,ae_state * _state)4792 void rmulvx(ae_int_t n,
4793 double v,
4794 /* Real */ ae_vector* x,
4795 ae_int_t offsx,
4796 ae_state *_state)
4797 {
4798 ae_int_t i;
4799
4800
4801 for(i=0; i<=n-1; i++)
4802 {
4803 x->ptr.p_double[offsx+i] = x->ptr.p_double[offsx+i]*v;
4804 }
4805 }
4806 #endif
4807
4808
4809 #ifdef ALGLIB_NO_FAST_KERNELS
4810 /*************************************************************************
4811 Returns maximum X
4812
4813 INPUT PARAMETERS:
4814 N - vector length
4815 X - array[N], vector to process
4816
4817 OUTPUT PARAMETERS:
4818 max(X[i])
4819 zero for N=0
4820
4821 -- ALGLIB --
4822 Copyright 20.01.2020 by Bochkanov Sergey
4823 *************************************************************************/
rmaxv(ae_int_t n,ae_vector * x,ae_state * _state)4824 double rmaxv(ae_int_t n, /* Real */ ae_vector* x, ae_state *_state)
4825 {
4826 ae_int_t i;
4827 double v;
4828 double result;
4829
4830
4831 if( n<=0 )
4832 {
4833 result = (double)(0);
4834 return result;
4835 }
4836 result = x->ptr.p_double[0];
4837 for(i=1; i<=n-1; i++)
4838 {
4839 v = x->ptr.p_double[i];
4840 if( v>result )
4841 {
4842 result = v;
4843 }
4844 }
4845 return result;
4846 }
4847 #endif
4848
4849
4850 #ifdef ALGLIB_NO_FAST_KERNELS
4851 /*************************************************************************
4852 Returns maximum |X|
4853
4854 INPUT PARAMETERS:
4855 N - vector length
4856 X - array[N], vector to process
4857
4858 OUTPUT PARAMETERS:
4859 max(|X[i]|)
4860 zero for N=0
4861
4862 -- ALGLIB --
4863 Copyright 20.01.2020 by Bochkanov Sergey
4864 *************************************************************************/
rmaxabsv(ae_int_t n,ae_vector * x,ae_state * _state)4865 double rmaxabsv(ae_int_t n, /* Real */ ae_vector* x, ae_state *_state)
4866 {
4867 ae_int_t i;
4868 double v;
4869 double result;
4870
4871
4872 result = (double)(0);
4873 for(i=0; i<=n-1; i++)
4874 {
4875 v = ae_fabs(x->ptr.p_double[i], _state);
4876 if( v>result )
4877 {
4878 result = v;
4879 }
4880 }
4881 return result;
4882 }
4883 #endif
4884
4885
4886 #ifdef ALGLIB_NO_FAST_KERNELS
4887 /*************************************************************************
4888 Returns maximum X
4889
4890 INPUT PARAMETERS:
4891 N - vector length
4892 X - matrix to process, RowIdx-th row is processed
4893
4894 OUTPUT PARAMETERS:
4895 max(X[RowIdx,i])
4896 zero for N=0
4897
4898 -- ALGLIB --
4899 Copyright 20.01.2020 by Bochkanov Sergey
4900 *************************************************************************/
rmaxr(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4901 double rmaxr(ae_int_t n,
4902 /* Real */ ae_matrix* x,
4903 ae_int_t rowidx,
4904 ae_state *_state)
4905 {
4906 ae_int_t i;
4907 double v;
4908 double result;
4909
4910
4911 if( n<=0 )
4912 {
4913 result = (double)(0);
4914 return result;
4915 }
4916 result = x->ptr.pp_double[rowidx][0];
4917 for(i=1; i<=n-1; i++)
4918 {
4919 v = x->ptr.pp_double[rowidx][i];
4920 if( v>result )
4921 {
4922 result = v;
4923 }
4924 }
4925 return result;
4926 }
4927 #endif
4928
4929
4930 #ifdef ALGLIB_NO_FAST_KERNELS
4931 /*************************************************************************
4932 Returns maximum |X|
4933
4934 INPUT PARAMETERS:
4935 N - vector length
4936 X - matrix to process, RowIdx-th row is processed
4937
4938 OUTPUT PARAMETERS:
4939 max(|X[RowIdx,i]|)
4940 zero for N=0
4941
4942 -- ALGLIB --
4943 Copyright 20.01.2020 by Bochkanov Sergey
4944 *************************************************************************/
rmaxabsr(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4945 double rmaxabsr(ae_int_t n,
4946 /* Real */ ae_matrix* x,
4947 ae_int_t rowidx,
4948 ae_state *_state)
4949 {
4950 ae_int_t i;
4951 double v;
4952 double result;
4953
4954
4955 result = (double)(0);
4956 for(i=0; i<=n-1; i++)
4957 {
4958 v = ae_fabs(x->ptr.pp_double[rowidx][i], _state);
4959 if( v>result )
4960 {
4961 result = v;
4962 }
4963 }
4964 return result;
4965 }
4966 #endif
4967
4968
4969 #ifdef ALGLIB_NO_FAST_KERNELS
4970 /*************************************************************************
4971 Sets vector X[] to V
4972
4973 INPUT PARAMETERS:
4974 N - vector length
4975 V - value to set
4976 X - array[N]
4977
4978 OUTPUT PARAMETERS:
4979 X - leading N elements are replaced by V
4980
4981 -- ALGLIB --
4982 Copyright 20.01.2020 by Bochkanov Sergey
4983 *************************************************************************/
rsetv(ae_int_t n,double v,ae_vector * x,ae_state * _state)4984 void rsetv(ae_int_t n,
4985 double v,
4986 /* Real */ ae_vector* x,
4987 ae_state *_state)
4988 {
4989 ae_int_t j;
4990
4991
4992 for(j=0; j<=n-1; j++)
4993 {
4994 x->ptr.p_double[j] = v;
4995 }
4996 }
4997 #endif
4998
4999
5000 #ifdef ALGLIB_NO_FAST_KERNELS
5001 /*************************************************************************
5002 Sets X[OffsX:OffsX+N-1] to V
5003
5004 INPUT PARAMETERS:
5005 N - subvector length
5006 V - value to set
5007 X - array[N]
5008
5009 OUTPUT PARAMETERS:
5010 X - X[OffsX:OffsX+N-1] is replaced by V
5011
5012 -- ALGLIB --
5013 Copyright 20.01.2020 by Bochkanov Sergey
5014 *************************************************************************/
rsetvx(ae_int_t n,double v,ae_vector * x,ae_int_t offsx,ae_state * _state)5015 void rsetvx(ae_int_t n,
5016 double v,
5017 /* Real */ ae_vector* x,
5018 ae_int_t offsx,
5019 ae_state *_state)
5020 {
5021 ae_int_t j;
5022
5023
5024 for(j=0; j<=n-1; j++)
5025 {
5026 x->ptr.p_double[offsx+j] = v;
5027 }
5028 }
5029 #endif
5030
5031
5032 #ifdef ALGLIB_NO_FAST_KERNELS
5033 /*************************************************************************
5034 Sets vector X[] to V
5035
5036 INPUT PARAMETERS:
5037 N - vector length
5038 V - value to set
5039 X - array[N]
5040
5041 OUTPUT PARAMETERS:
5042 X - leading N elements are replaced by V
5043
5044 -- ALGLIB --
5045 Copyright 20.01.2020 by Bochkanov Sergey
5046 *************************************************************************/
isetv(ae_int_t n,ae_int_t v,ae_vector * x,ae_state * _state)5047 void isetv(ae_int_t n,
5048 ae_int_t v,
5049 /* Integer */ ae_vector* x,
5050 ae_state *_state)
5051 {
5052 ae_int_t j;
5053
5054
5055 for(j=0; j<=n-1; j++)
5056 {
5057 x->ptr.p_int[j] = v;
5058 }
5059 }
5060 #endif
5061
5062
5063 #ifdef ALGLIB_NO_FAST_KERNELS
5064 /*************************************************************************
5065 Sets vector X[] to V
5066
5067 INPUT PARAMETERS:
5068 N - vector length
5069 V - value to set
5070 X - array[N]
5071
5072 OUTPUT PARAMETERS:
5073 X - leading N elements are replaced by V
5074
5075 -- ALGLIB --
5076 Copyright 20.01.2020 by Bochkanov Sergey
5077 *************************************************************************/
bsetv(ae_int_t n,ae_bool v,ae_vector * x,ae_state * _state)5078 void bsetv(ae_int_t n,
5079 ae_bool v,
5080 /* Boolean */ ae_vector* x,
5081 ae_state *_state)
5082 {
5083 ae_int_t j;
5084
5085
5086 for(j=0; j<=n-1; j++)
5087 {
5088 x->ptr.p_bool[j] = v;
5089 }
5090 }
5091 #endif
5092
5093
5094 #ifdef ALGLIB_NO_FAST_KERNELS
5095 /*************************************************************************
5096 Sets matrix A[] to V
5097
5098 INPUT PARAMETERS:
5099 M, N - rows/cols count
5100 V - value to set
5101 A - array[M,N]
5102
5103 OUTPUT PARAMETERS:
5104 A - leading M rows, N cols are replaced by V
5105
5106 -- ALGLIB --
5107 Copyright 20.01.2020 by Bochkanov Sergey
5108 *************************************************************************/
rsetm(ae_int_t m,ae_int_t n,double v,ae_matrix * a,ae_state * _state)5109 void rsetm(ae_int_t m,
5110 ae_int_t n,
5111 double v,
5112 /* Real */ ae_matrix* a,
5113 ae_state *_state)
5114 {
5115 ae_int_t i;
5116 ae_int_t j;
5117
5118
5119 for(i=0; i<=m-1; i++)
5120 {
5121 for(j=0; j<=n-1; j++)
5122 {
5123 a->ptr.pp_double[i][j] = v;
5124 }
5125 }
5126 }
5127 #endif
5128
5129
5130 /*************************************************************************
5131 Sets vector X[] to V, reallocating X[] if too small
5132
5133 INPUT PARAMETERS:
5134 N - vector length
5135 V - value to set
5136 X - possibly preallocated array
5137
5138 OUTPUT PARAMETERS:
5139 X - leading N elements are replaced by V; array is reallocated
5140 if its length is less than N.
5141
5142 -- ALGLIB --
5143 Copyright 20.01.2020 by Bochkanov Sergey
5144 *************************************************************************/
rsetallocv(ae_int_t n,double v,ae_vector * x,ae_state * _state)5145 void rsetallocv(ae_int_t n,
5146 double v,
5147 /* Real */ ae_vector* x,
5148 ae_state *_state)
5149 {
5150
5151
5152 if( x->cnt<n )
5153 {
5154 ae_vector_set_length(x, n, _state);
5155 }
5156 rsetv(n, v, x, _state);
5157 }
5158
5159
5160 /*************************************************************************
5161 Sets vector A[] to V, reallocating A[] if too small.
5162
5163 INPUT PARAMETERS:
5164 M - rows count
5165 N - cols count
5166 V - value to set
5167 A - possibly preallocated matrix
5168
5169 OUTPUT PARAMETERS:
5170 A - leading M rows, N cols are replaced by V; the matrix is
5171 reallocated if its rows/cols count is less than M/N.
5172
5173 -- ALGLIB --
5174 Copyright 20.01.2020 by Bochkanov Sergey
5175 *************************************************************************/
rsetallocm(ae_int_t m,ae_int_t n,double v,ae_matrix * a,ae_state * _state)5176 void rsetallocm(ae_int_t m,
5177 ae_int_t n,
5178 double v,
5179 /* Real */ ae_matrix* a,
5180 ae_state *_state)
5181 {
5182
5183
5184 if( a->rows<m||a->cols<n )
5185 {
5186 ae_matrix_set_length(a, m, n, _state);
5187 }
5188 rsetm(m, n, v, a, _state);
5189 }
5190
5191
5192 /*************************************************************************
5193 Reallocates X[] if its length is less than required value. Does not change
5194 its length and contents if it is large enough.
5195
5196 INPUT PARAMETERS:
5197 N - desired vector length
5198 X - possibly preallocated array
5199
5200 OUTPUT PARAMETERS:
5201 X - length(X)>=N
5202
5203 -- ALGLIB --
5204 Copyright 20.01.2020 by Bochkanov Sergey
5205 *************************************************************************/
rallocv(ae_int_t n,ae_vector * x,ae_state * _state)5206 void rallocv(ae_int_t n, /* Real */ ae_vector* x, ae_state *_state)
5207 {
5208
5209
5210 if( x->cnt<n )
5211 {
5212 ae_vector_set_length(x, n, _state);
5213 }
5214 }
5215
5216
5217 /*************************************************************************
5218 Reallocates X[] if its length is less than required value. Does not change
5219 its length and contents if it is large enough.
5220
5221 INPUT PARAMETERS:
5222 N - desired vector length
5223 X - possibly preallocated array
5224
5225 OUTPUT PARAMETERS:
5226 X - length(X)>=N
5227
5228 -- ALGLIB --
5229 Copyright 20.01.2020 by Bochkanov Sergey
5230 *************************************************************************/
iallocv(ae_int_t n,ae_vector * x,ae_state * _state)5231 void iallocv(ae_int_t n, /* Integer */ ae_vector* x, ae_state *_state)
5232 {
5233
5234
5235 if( x->cnt<n )
5236 {
5237 ae_vector_set_length(x, n, _state);
5238 }
5239 }
5240
5241
5242 /*************************************************************************
5243 Reallocates X[] if its length is less than required value. Does not change
5244 its length and contents if it is large enough.
5245
5246 INPUT PARAMETERS:
5247 N - desired vector length
5248 X - possibly preallocated array
5249
5250 OUTPUT PARAMETERS:
5251 X - length(X)>=N
5252
5253 -- ALGLIB --
5254 Copyright 20.01.2020 by Bochkanov Sergey
5255 *************************************************************************/
ballocv(ae_int_t n,ae_vector * x,ae_state * _state)5256 void ballocv(ae_int_t n, /* Boolean */ ae_vector* x, ae_state *_state)
5257 {
5258
5259
5260 if( x->cnt<n )
5261 {
5262 ae_vector_set_length(x, n, _state);
5263 }
5264 }
5265
5266
5267 /*************************************************************************
5268 Reallocates matrix if its rows or cols count is less than required. Does
5269 not change its size if it is exactly that size or larger.
5270
5271 INPUT PARAMETERS:
5272 M - rows count
5273 N - cols count
5274 A - possibly preallocated matrix
5275
5276 OUTPUT PARAMETERS:
5277 A - size is at least M*N
5278
5279 -- ALGLIB --
5280 Copyright 20.01.2020 by Bochkanov Sergey
5281 *************************************************************************/
rallocm(ae_int_t m,ae_int_t n,ae_matrix * a,ae_state * _state)5282 void rallocm(ae_int_t m,
5283 ae_int_t n,
5284 /* Real */ ae_matrix* a,
5285 ae_state *_state)
5286 {
5287
5288
5289 if( a->rows<m||a->cols<n )
5290 {
5291 ae_matrix_set_length(a, m, n, _state);
5292 }
5293 }
5294
5295
5296 /*************************************************************************
5297 Sets vector X[] to V, reallocating X[] if too small
5298
5299 INPUT PARAMETERS:
5300 N - vector length
5301 V - value to set
5302 X - possibly preallocated array
5303
5304 OUTPUT PARAMETERS:
5305 X - leading N elements are replaced by V; array is reallocated
5306 if its length is less than N.
5307
5308 -- ALGLIB --
5309 Copyright 20.01.2020 by Bochkanov Sergey
5310 *************************************************************************/
isetallocv(ae_int_t n,ae_int_t v,ae_vector * x,ae_state * _state)5311 void isetallocv(ae_int_t n,
5312 ae_int_t v,
5313 /* Integer */ ae_vector* x,
5314 ae_state *_state)
5315 {
5316
5317
5318 if( x->cnt<n )
5319 {
5320 ae_vector_set_length(x, n, _state);
5321 }
5322 isetv(n, v, x, _state);
5323 }
5324
5325
5326 /*************************************************************************
5327 Sets vector X[] to V, reallocating X[] if too small
5328
5329 INPUT PARAMETERS:
5330 N - vector length
5331 V - value to set
5332 X - possibly preallocated array
5333
5334 OUTPUT PARAMETERS:
5335 X - leading N elements are replaced by V; array is reallocated
5336 if its length is less than N.
5337
5338 -- ALGLIB --
5339 Copyright 20.01.2020 by Bochkanov Sergey
5340 *************************************************************************/
bsetallocv(ae_int_t n,ae_bool v,ae_vector * x,ae_state * _state)5341 void bsetallocv(ae_int_t n,
5342 ae_bool v,
5343 /* Boolean */ ae_vector* x,
5344 ae_state *_state)
5345 {
5346
5347
5348 if( x->cnt<n )
5349 {
5350 ae_vector_set_length(x, n, _state);
5351 }
5352 bsetv(n, v, x, _state);
5353 }
5354
5355
5356 #ifdef ALGLIB_NO_FAST_KERNELS
5357 /*************************************************************************
5358 Sets row I of A[,] to V
5359
5360 INPUT PARAMETERS:
5361 N - vector length
5362 V - value to set
5363 A - array[N,N] or larger
5364 I - row index
5365
5366 OUTPUT PARAMETERS:
5367 A - leading N elements of I-th row are replaced by V
5368
5369 -- ALGLIB --
5370 Copyright 20.01.2020 by Bochkanov Sergey
5371 *************************************************************************/
rsetr(ae_int_t n,double v,ae_matrix * a,ae_int_t i,ae_state * _state)5372 void rsetr(ae_int_t n,
5373 double v,
5374 /* Real */ ae_matrix* a,
5375 ae_int_t i,
5376 ae_state *_state)
5377 {
5378 ae_int_t j;
5379
5380
5381 for(j=0; j<=n-1; j++)
5382 {
5383 a->ptr.pp_double[i][j] = v;
5384 }
5385 }
5386 #endif
5387
5388
5389 /*************************************************************************
5390 Sets col J of A[,] to V
5391
5392 INPUT PARAMETERS:
5393 N - vector length
5394 V - value to set
5395 A - array[N,N] or larger
5396 J - col index
5397
5398 OUTPUT PARAMETERS:
5399 A - leading N elements of I-th col are replaced by V
5400
5401 -- ALGLIB --
5402 Copyright 20.01.2020 by Bochkanov Sergey
5403 *************************************************************************/
rsetc(ae_int_t n,double v,ae_matrix * a,ae_int_t j,ae_state * _state)5404 void rsetc(ae_int_t n,
5405 double v,
5406 /* Real */ ae_matrix* a,
5407 ae_int_t j,
5408 ae_state *_state)
5409 {
5410 ae_int_t i;
5411
5412
5413 for(i=0; i<=n-1; i++)
5414 {
5415 a->ptr.pp_double[i][j] = v;
5416 }
5417 }
5418
5419
5420 #ifdef ALGLIB_NO_FAST_KERNELS
5421 /*************************************************************************
5422 Copies vector X[] to Y[]
5423
5424 INPUT PARAMETERS:
5425 N - vector length
5426 X - array[N], source
5427 Y - preallocated array[N]
5428
5429 OUTPUT PARAMETERS:
5430 Y - leading N elements are replaced by X
5431
5432
5433 NOTE: destination and source should NOT overlap
5434
5435 -- ALGLIB --
5436 Copyright 20.01.2020 by Bochkanov Sergey
5437 *************************************************************************/
rcopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5438 void rcopyv(ae_int_t n,
5439 /* Real */ ae_vector* x,
5440 /* Real */ ae_vector* y,
5441 ae_state *_state)
5442 {
5443 ae_int_t j;
5444
5445
5446 for(j=0; j<=n-1; j++)
5447 {
5448 y->ptr.p_double[j] = x->ptr.p_double[j];
5449 }
5450 }
5451 #endif
5452
5453
5454 #ifdef ALGLIB_NO_FAST_KERNELS
5455 /*************************************************************************
5456 Copies vector X[] to Y[]
5457
5458 INPUT PARAMETERS:
5459 N - vector length
5460 X - array[N], source
5461 Y - preallocated array[N]
5462
5463 OUTPUT PARAMETERS:
5464 Y - leading N elements are replaced by X
5465
5466
5467 NOTE: destination and source should NOT overlap
5468
5469 -- ALGLIB --
5470 Copyright 20.01.2020 by Bochkanov Sergey
5471 *************************************************************************/
bcopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5472 void bcopyv(ae_int_t n,
5473 /* Boolean */ ae_vector* x,
5474 /* Boolean */ ae_vector* y,
5475 ae_state *_state)
5476 {
5477 ae_int_t j;
5478
5479
5480 for(j=0; j<=n-1; j++)
5481 {
5482 y->ptr.p_bool[j] = x->ptr.p_bool[j];
5483 }
5484 }
5485 #endif
5486
5487
5488 #ifdef ALGLIB_NO_FAST_KERNELS
5489 /*************************************************************************
5490 Copies vector X[] to Y[], extended version
5491
5492 INPUT PARAMETERS:
5493 N - vector length
5494 X - source array
5495 OffsX - source offset
5496 Y - preallocated array[N]
5497 OffsY - destination offset
5498
5499 OUTPUT PARAMETERS:
5500 Y - N elements starting from OffsY are replaced by X[OffsX:OffsX+N-1]
5501
5502 NOTE: destination and source should NOT overlap
5503
5504 -- ALGLIB --
5505 Copyright 20.01.2020 by Bochkanov Sergey
5506 *************************************************************************/
rcopyvx(ae_int_t n,ae_vector * x,ae_int_t offsx,ae_vector * y,ae_int_t offsy,ae_state * _state)5507 void rcopyvx(ae_int_t n,
5508 /* Real */ ae_vector* x,
5509 ae_int_t offsx,
5510 /* Real */ ae_vector* y,
5511 ae_int_t offsy,
5512 ae_state *_state)
5513 {
5514 ae_int_t j;
5515
5516
5517 for(j=0; j<=n-1; j++)
5518 {
5519 y->ptr.p_double[offsy+j] = x->ptr.p_double[offsx+j];
5520 }
5521 }
5522 #endif
5523
5524
5525 /*************************************************************************
5526 Copies vector X[] to Y[], resizing Y[] if needed.
5527
5528 INPUT PARAMETERS:
5529 N - vector length
5530 X - array[N], source
5531 Y - possibly preallocated array[N] (resized if needed)
5532
5533 OUTPUT PARAMETERS:
5534 Y - leading N elements are replaced by X
5535
5536 -- ALGLIB --
5537 Copyright 20.01.2020 by Bochkanov Sergey
5538 *************************************************************************/
rcopyallocv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5539 void rcopyallocv(ae_int_t n,
5540 /* Real */ ae_vector* x,
5541 /* Real */ ae_vector* y,
5542 ae_state *_state)
5543 {
5544
5545
5546 if( y->cnt<n )
5547 {
5548 ae_vector_set_length(y, n, _state);
5549 }
5550 rcopyv(n, x, y, _state);
5551 }
5552
5553
5554 /*************************************************************************
5555 Copies matrix X[] to Y[], resizing Y[] if needed. On resize, dimensions of
5556 Y[] are increased - but not decreased.
5557
5558 INPUT PARAMETERS:
5559 M - rows count
5560 N - cols count
5561 X - array[M,N], source
5562 Y - possibly preallocated array[M,N] (resized if needed)
5563
5564 OUTPUT PARAMETERS:
5565 Y - leading [M,N] elements are replaced by X
5566
5567 -- ALGLIB --
5568 Copyright 20.01.2020 by Bochkanov Sergey
5569 *************************************************************************/
rcopym(ae_int_t m,ae_int_t n,ae_matrix * x,ae_matrix * y,ae_state * _state)5570 void rcopym(ae_int_t m,
5571 ae_int_t n,
5572 /* Real */ ae_matrix* x,
5573 /* Real */ ae_matrix* y,
5574 ae_state *_state)
5575 {
5576 ae_int_t i;
5577 ae_int_t j;
5578
5579
5580 if( m==0||n==0 )
5581 {
5582 return;
5583 }
5584 for(i=0; i<=m-1; i++)
5585 {
5586 for(j=0; j<=n-1; j++)
5587 {
5588 y->ptr.pp_double[i][j] = x->ptr.pp_double[i][j];
5589 }
5590 }
5591 }
5592
5593
5594 /*************************************************************************
5595 Copies matrix X[] to Y[], resizing Y[] if needed. On resize, dimensions of
5596 Y[] are increased - but not decreased.
5597
5598 INPUT PARAMETERS:
5599 M - rows count
5600 N - cols count
5601 X - array[M,N], source
5602 Y - possibly preallocated array[M,N] (resized if needed)
5603
5604 OUTPUT PARAMETERS:
5605 Y - leading [M,N] elements are replaced by X
5606
5607 -- ALGLIB --
5608 Copyright 20.01.2020 by Bochkanov Sergey
5609 *************************************************************************/
rcopyallocm(ae_int_t m,ae_int_t n,ae_matrix * x,ae_matrix * y,ae_state * _state)5610 void rcopyallocm(ae_int_t m,
5611 ae_int_t n,
5612 /* Real */ ae_matrix* x,
5613 /* Real */ ae_matrix* y,
5614 ae_state *_state)
5615 {
5616
5617
5618 if( m==0||n==0 )
5619 {
5620 return;
5621 }
5622 if( y->rows<m||y->cols<n )
5623 {
5624 ae_matrix_set_length(y, ae_maxint(m, y->rows, _state), ae_maxint(n, y->cols, _state), _state);
5625 }
5626 rcopym(m, n, x, y, _state);
5627 }
5628
5629
5630 /*************************************************************************
5631 Copies vector X[] to Y[], resizing Y[] if needed.
5632
5633 INPUT PARAMETERS:
5634 N - vector length
5635 X - array[N], source
5636 Y - possibly preallocated array[N] (resized if needed)
5637
5638 OUTPUT PARAMETERS:
5639 Y - leading N elements are replaced by X
5640
5641 -- ALGLIB --
5642 Copyright 20.01.2020 by Bochkanov Sergey
5643 *************************************************************************/
icopyallocv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5644 void icopyallocv(ae_int_t n,
5645 /* Integer */ ae_vector* x,
5646 /* Integer */ ae_vector* y,
5647 ae_state *_state)
5648 {
5649
5650
5651 if( y->cnt<n )
5652 {
5653 ae_vector_set_length(y, n, _state);
5654 }
5655 icopyv(n, x, y, _state);
5656 }
5657
5658
5659 /*************************************************************************
5660 Copies vector X[] to Y[], resizing Y[] if needed.
5661
5662 INPUT PARAMETERS:
5663 N - vector length
5664 X - array[N], source
5665 Y - possibly preallocated array[N] (resized if needed)
5666
5667 OUTPUT PARAMETERS:
5668 Y - leading N elements are replaced by X
5669
5670 -- ALGLIB --
5671 Copyright 20.01.2020 by Bochkanov Sergey
5672 *************************************************************************/
bcopyallocv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5673 void bcopyallocv(ae_int_t n,
5674 /* Boolean */ ae_vector* x,
5675 /* Boolean */ ae_vector* y,
5676 ae_state *_state)
5677 {
5678
5679
5680 if( y->cnt<n )
5681 {
5682 ae_vector_set_length(y, n, _state);
5683 }
5684 bcopyv(n, x, y, _state);
5685 }
5686
5687
5688 #ifdef ALGLIB_NO_FAST_KERNELS
5689 /*************************************************************************
5690 Copies vector X[] to Y[]
5691
5692 INPUT PARAMETERS:
5693 N - vector length
5694 X - source array
5695 Y - preallocated array[N]
5696
5697 OUTPUT PARAMETERS:
5698 Y - X copied to Y
5699
5700 -- ALGLIB --
5701 Copyright 20.01.2020 by Bochkanov Sergey
5702 *************************************************************************/
icopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5703 void icopyv(ae_int_t n,
5704 /* Integer */ ae_vector* x,
5705 /* Integer */ ae_vector* y,
5706 ae_state *_state)
5707 {
5708 ae_int_t j;
5709
5710
5711 for(j=0; j<=n-1; j++)
5712 {
5713 y->ptr.p_int[j] = x->ptr.p_int[j];
5714 }
5715 }
5716 #endif
5717
5718
5719 #ifdef ALGLIB_NO_FAST_KERNELS
5720 /*************************************************************************
5721 Copies vector X[] to Y[], extended version
5722
5723 INPUT PARAMETERS:
5724 N - vector length
5725 X - source array
5726 OffsX - source offset
5727 Y - preallocated array[N]
5728 OffsY - destination offset
5729
5730 OUTPUT PARAMETERS:
5731 Y - N elements starting from OffsY are replaced by X[OffsX:OffsX+N-1]
5732
5733 NOTE: destination and source should NOT overlap
5734
5735 -- ALGLIB --
5736 Copyright 20.01.2020 by Bochkanov Sergey
5737 *************************************************************************/
icopyvx(ae_int_t n,ae_vector * x,ae_int_t offsx,ae_vector * y,ae_int_t offsy,ae_state * _state)5738 void icopyvx(ae_int_t n,
5739 /* Integer */ ae_vector* x,
5740 ae_int_t offsx,
5741 /* Integer */ ae_vector* y,
5742 ae_int_t offsy,
5743 ae_state *_state)
5744 {
5745 ae_int_t j;
5746
5747
5748 for(j=0; j<=n-1; j++)
5749 {
5750 y->ptr.p_int[offsy+j] = x->ptr.p_int[offsx+j];
5751 }
5752 }
5753 #endif
5754
5755
5756 /*************************************************************************
5757 Grows X, i.e. changes its size in such a way that:
5758 a) contents is preserved
5759 b) new size is at least N
5760 c) actual size can be larger than N, so subsequent grow() calls can return
5761 without reallocation
5762
5763 -- ALGLIB --
5764 Copyright 20.03.2009 by Bochkanov Sergey
5765 *************************************************************************/
igrowv(ae_int_t newn,ae_vector * x,ae_state * _state)5766 void igrowv(ae_int_t newn, /* Integer */ ae_vector* x, ae_state *_state)
5767 {
5768 ae_frame _frame_block;
5769 ae_vector oldx;
5770 ae_int_t oldn;
5771
5772 ae_frame_make(_state, &_frame_block);
5773 memset(&oldx, 0, sizeof(oldx));
5774 ae_vector_init(&oldx, 0, DT_INT, _state, ae_true);
5775
5776 if( x->cnt>=newn )
5777 {
5778 ae_frame_leave(_state);
5779 return;
5780 }
5781 oldn = x->cnt;
5782 newn = ae_maxint(newn, ae_round(1.8*oldn+1, _state), _state);
5783 ae_swap_vectors(x, &oldx);
5784 ae_vector_set_length(x, newn, _state);
5785 icopyv(oldn, &oldx, x, _state);
5786 ae_frame_leave(_state);
5787 }
5788
5789
5790 /*************************************************************************
5791 Grows X, i.e. changes its size in such a way that:
5792 a) contents is preserved
5793 b) new size is at least N
5794 c) actual size can be larger than N, so subsequent grow() calls can return
5795 without reallocation
5796
5797 -- ALGLIB --
5798 Copyright 20.03.2009 by Bochkanov Sergey
5799 *************************************************************************/
rgrowv(ae_int_t newn,ae_vector * x,ae_state * _state)5800 void rgrowv(ae_int_t newn, /* Real */ ae_vector* x, ae_state *_state)
5801 {
5802 ae_frame _frame_block;
5803 ae_vector oldx;
5804 ae_int_t oldn;
5805
5806 ae_frame_make(_state, &_frame_block);
5807 memset(&oldx, 0, sizeof(oldx));
5808 ae_vector_init(&oldx, 0, DT_REAL, _state, ae_true);
5809
5810 if( x->cnt>=newn )
5811 {
5812 ae_frame_leave(_state);
5813 return;
5814 }
5815 oldn = x->cnt;
5816 newn = ae_maxint(newn, ae_round(1.8*oldn+1, _state), _state);
5817 ae_swap_vectors(x, &oldx);
5818 ae_vector_set_length(x, newn, _state);
5819 rcopyv(oldn, &oldx, x, _state);
5820 ae_frame_leave(_state);
5821 }
5822
5823
5824 #ifdef ALGLIB_NO_FAST_KERNELS
5825 /*************************************************************************
5826 Performs copying with multiplication of V*X[] to Y[]
5827
5828 INPUT PARAMETERS:
5829 N - vector length
5830 V - multiplier
5831 X - array[N], source
5832 Y - preallocated array[N]
5833
5834 OUTPUT PARAMETERS:
5835 Y - array[N], Y = V*X
5836
5837 -- ALGLIB --
5838 Copyright 20.01.2020 by Bochkanov Sergey
5839 *************************************************************************/
rcopymulv(ae_int_t n,double v,ae_vector * x,ae_vector * y,ae_state * _state)5840 void rcopymulv(ae_int_t n,
5841 double v,
5842 /* Real */ ae_vector* x,
5843 /* Real */ ae_vector* y,
5844 ae_state *_state)
5845 {
5846 ae_int_t i;
5847
5848
5849 for(i=0; i<=n-1; i++)
5850 {
5851 y->ptr.p_double[i] = v*x->ptr.p_double[i];
5852 }
5853 }
5854 #endif
5855
5856
5857 #ifdef ALGLIB_NO_FAST_KERNELS
5858 /*************************************************************************
5859 Performs copying with multiplication of V*X[] to Y[I,*]
5860
5861 INPUT PARAMETERS:
5862 N - vector length
5863 V - multiplier
5864 X - array[N], source
5865 Y - preallocated array[?,N]
5866 RIdx - destination row index
5867
5868 OUTPUT PARAMETERS:
5869 Y - Y[RIdx,...] = V*X
5870
5871 -- ALGLIB --
5872 Copyright 20.01.2020 by Bochkanov Sergey
5873 *************************************************************************/
rcopymulvr(ae_int_t n,double v,ae_vector * x,ae_matrix * y,ae_int_t ridx,ae_state * _state)5874 void rcopymulvr(ae_int_t n,
5875 double v,
5876 /* Real */ ae_vector* x,
5877 /* Real */ ae_matrix* y,
5878 ae_int_t ridx,
5879 ae_state *_state)
5880 {
5881 ae_int_t i;
5882
5883
5884 for(i=0; i<=n-1; i++)
5885 {
5886 y->ptr.pp_double[ridx][i] = v*x->ptr.p_double[i];
5887 }
5888 }
5889 #endif
5890
5891
5892 /*************************************************************************
5893 Performs copying with multiplication of V*X[] to Y[*,J]
5894
5895 INPUT PARAMETERS:
5896 N - vector length
5897 V - multiplier
5898 X - array[N], source
5899 Y - preallocated array[N,?]
5900 CIdx - destination rocol index
5901
5902 OUTPUT PARAMETERS:
5903 Y - Y[RIdx,...] = V*X
5904
5905 -- ALGLIB --
5906 Copyright 20.01.2020 by Bochkanov Sergey
5907 *************************************************************************/
rcopymulvc(ae_int_t n,double v,ae_vector * x,ae_matrix * y,ae_int_t cidx,ae_state * _state)5908 void rcopymulvc(ae_int_t n,
5909 double v,
5910 /* Real */ ae_vector* x,
5911 /* Real */ ae_matrix* y,
5912 ae_int_t cidx,
5913 ae_state *_state)
5914 {
5915 ae_int_t i;
5916
5917
5918 for(i=0; i<=n-1; i++)
5919 {
5920 y->ptr.pp_double[i][cidx] = v*x->ptr.p_double[i];
5921 }
5922 }
5923
5924
5925 #ifdef ALGLIB_NO_FAST_KERNELS
5926 /*************************************************************************
5927 Copies vector X[] to row I of A[,]
5928
5929 INPUT PARAMETERS:
5930 N - vector length
5931 X - array[N], source
5932 A - preallocated 2D array large enough to store result
5933 I - destination row index
5934
5935 OUTPUT PARAMETERS:
5936 A - leading N elements of I-th row are replaced by X
5937
5938 -- ALGLIB --
5939 Copyright 20.01.2020 by Bochkanov Sergey
5940 *************************************************************************/
rcopyvr(ae_int_t n,ae_vector * x,ae_matrix * a,ae_int_t i,ae_state * _state)5941 void rcopyvr(ae_int_t n,
5942 /* Real */ ae_vector* x,
5943 /* Real */ ae_matrix* a,
5944 ae_int_t i,
5945 ae_state *_state)
5946 {
5947 ae_int_t j;
5948
5949
5950 for(j=0; j<=n-1; j++)
5951 {
5952 a->ptr.pp_double[i][j] = x->ptr.p_double[j];
5953 }
5954 }
5955 #endif
5956
5957
5958 #ifdef ALGLIB_NO_FAST_KERNELS
5959 /*************************************************************************
5960 Copies row I of A[,] to vector X[]
5961
5962 INPUT PARAMETERS:
5963 N - vector length
5964 A - 2D array, source
5965 I - source row index
5966 X - preallocated destination
5967
5968 OUTPUT PARAMETERS:
5969 X - array[N], destination
5970
5971 -- ALGLIB --
5972 Copyright 20.01.2020 by Bochkanov Sergey
5973 *************************************************************************/
rcopyrv(ae_int_t n,ae_matrix * a,ae_int_t i,ae_vector * x,ae_state * _state)5974 void rcopyrv(ae_int_t n,
5975 /* Real */ ae_matrix* a,
5976 ae_int_t i,
5977 /* Real */ ae_vector* x,
5978 ae_state *_state)
5979 {
5980 ae_int_t j;
5981
5982
5983 for(j=0; j<=n-1; j++)
5984 {
5985 x->ptr.p_double[j] = a->ptr.pp_double[i][j];
5986 }
5987 }
5988 #endif
5989
5990
5991 #ifdef ALGLIB_NO_FAST_KERNELS
5992 /*************************************************************************
5993 Copies row I of A[,] to row K of B[,].
5994
5995 A[i,...] and B[k,...] may overlap.
5996
5997 INPUT PARAMETERS:
5998 N - vector length
5999 A - 2D array, source
6000 I - source row index
6001 B - preallocated destination
6002 K - destination row index
6003
6004 OUTPUT PARAMETERS:
6005 B - row K overwritten
6006
6007 -- ALGLIB --
6008 Copyright 20.01.2020 by Bochkanov Sergey
6009 *************************************************************************/
rcopyrr(ae_int_t n,ae_matrix * a,ae_int_t i,ae_matrix * b,ae_int_t k,ae_state * _state)6010 void rcopyrr(ae_int_t n,
6011 /* Real */ ae_matrix* a,
6012 ae_int_t i,
6013 /* Real */ ae_matrix* b,
6014 ae_int_t k,
6015 ae_state *_state)
6016 {
6017 ae_int_t j;
6018
6019
6020 for(j=0; j<=n-1; j++)
6021 {
6022 b->ptr.pp_double[k][j] = a->ptr.pp_double[i][j];
6023 }
6024 }
6025 #endif
6026
6027
6028 /*************************************************************************
6029 Copies vector X[] to column J of A[,]
6030
6031 INPUT PARAMETERS:
6032 N - vector length
6033 X - array[N], source
6034 A - preallocated 2D array large enough to store result
6035 J - destination col index
6036
6037 OUTPUT PARAMETERS:
6038 A - leading N elements of J-th column are replaced by X
6039
6040 -- ALGLIB --
6041 Copyright 20.01.2020 by Bochkanov Sergey
6042 *************************************************************************/
rcopyvc(ae_int_t n,ae_vector * x,ae_matrix * a,ae_int_t j,ae_state * _state)6043 void rcopyvc(ae_int_t n,
6044 /* Real */ ae_vector* x,
6045 /* Real */ ae_matrix* a,
6046 ae_int_t j,
6047 ae_state *_state)
6048 {
6049 ae_int_t i;
6050
6051
6052 for(i=0; i<=n-1; i++)
6053 {
6054 a->ptr.pp_double[i][j] = x->ptr.p_double[i];
6055 }
6056 }
6057
6058
6059 /*************************************************************************
6060 Copies column J of A[,] to vector X[]
6061
6062 INPUT PARAMETERS:
6063 N - vector length
6064 A - source 2D array
6065 J - source col index
6066
6067 OUTPUT PARAMETERS:
6068 X - preallocated array[N], destination
6069
6070 -- ALGLIB --
6071 Copyright 20.01.2020 by Bochkanov Sergey
6072 *************************************************************************/
rcopycv(ae_int_t n,ae_matrix * a,ae_int_t j,ae_vector * x,ae_state * _state)6073 void rcopycv(ae_int_t n,
6074 /* Real */ ae_matrix* a,
6075 ae_int_t j,
6076 /* Real */ ae_vector* x,
6077 ae_state *_state)
6078 {
6079 ae_int_t i;
6080
6081
6082 for(i=0; i<=n-1; i++)
6083 {
6084 x->ptr.p_double[i] = a->ptr.pp_double[i][j];
6085 }
6086 }
6087
6088
6089 #ifdef ALGLIB_NO_FAST_KERNELS
6090 /*************************************************************************
6091 Matrix-vector product: y := alpha*op(A)*x + beta*y
6092
6093 NOTE: this function expects Y to be large enough to store result. No
6094 automatic preallocation happens for smaller arrays. No integrity
6095 checks is performed for sizes of A, x, y.
6096
6097 INPUT PARAMETERS:
6098 M - number of rows of op(A)
6099 N - number of columns of op(A)
6100 Alpha- coefficient
6101 A - source matrix
6102 OpA - operation type:
6103 * OpA=0 => op(A) = A
6104 * OpA=1 => op(A) = A^T
6105 X - input vector, has at least N elements
6106 Beta- coefficient
6107 Y - preallocated output array, has at least M elements
6108
6109 OUTPUT PARAMETERS:
6110 Y - vector which stores result
6111
6112 HANDLING OF SPECIAL CASES:
6113 * if M=0, then subroutine does nothing. It does not even touch arrays.
6114 * if N=0 or Alpha=0.0, then:
6115 * if Beta=0, then Y is filled by zeros. A and X are not referenced
6116 at all. Initial values of Y are ignored (we do not multiply Y by
6117 zero, we just rewrite it by zeros)
6118 * if Beta<>0, then Y is replaced by Beta*Y
6119 * if M>0, N>0, Alpha<>0, but Beta=0, then Y is replaced by A*x;
6120 initial state of Y is ignored (rewritten by A*x, without initial
6121 multiplication by zeros).
6122
6123
6124 -- ALGLIB routine --
6125
6126 01.09.2021
6127 Bochkanov Sergey
6128 *************************************************************************/
rgemv(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t opa,ae_vector * x,double beta,ae_vector * y,ae_state * _state)6129 void rgemv(ae_int_t m,
6130 ae_int_t n,
6131 double alpha,
6132 /* Real */ ae_matrix* a,
6133 ae_int_t opa,
6134 /* Real */ ae_vector* x,
6135 double beta,
6136 /* Real */ ae_vector* y,
6137 ae_state *_state)
6138 {
6139 ae_int_t i;
6140 ae_int_t j;
6141 double v;
6142
6143
6144
6145 /*
6146 * Properly premultiply Y by Beta.
6147 *
6148 * Quick exit for M=0, N=0 or Alpha=0.
6149 * After this block we have M>0, N>0, Alpha<>0.
6150 */
6151 if( m<=0 )
6152 {
6153 return;
6154 }
6155 if( ae_fp_neq(beta,(double)(0)) )
6156 {
6157 rmulv(m, beta, y, _state);
6158 }
6159 else
6160 {
6161 rsetv(m, 0.0, y, _state);
6162 }
6163 if( n<=0||ae_fp_eq(alpha,0.0) )
6164 {
6165 return;
6166 }
6167
6168 /*
6169 * Generic code
6170 */
6171 if( opa==0 )
6172 {
6173
6174 /*
6175 * y += A*x
6176 */
6177 for(i=0; i<=m-1; i++)
6178 {
6179 v = (double)(0);
6180 for(j=0; j<=n-1; j++)
6181 {
6182 v = v+a->ptr.pp_double[i][j]*x->ptr.p_double[j];
6183 }
6184 y->ptr.p_double[i] = alpha*v+y->ptr.p_double[i];
6185 }
6186 return;
6187 }
6188 if( opa==1 )
6189 {
6190
6191 /*
6192 * y += A^T*x
6193 */
6194 for(i=0; i<=n-1; i++)
6195 {
6196 v = alpha*x->ptr.p_double[i];
6197 for(j=0; j<=m-1; j++)
6198 {
6199 y->ptr.p_double[j] = y->ptr.p_double[j]+v*a->ptr.pp_double[i][j];
6200 }
6201 }
6202 return;
6203 }
6204 }
6205 #endif
6206
6207
6208 #ifdef ALGLIB_NO_FAST_KERNELS
6209 /*************************************************************************
6210 Matrix-vector product: y := alpha*op(A)*x + beta*y
6211
6212 Here x, y, A are subvectors/submatrices of larger vectors/matrices.
6213
6214 NOTE: this function expects Y to be large enough to store result. No
6215 automatic preallocation happens for smaller arrays. No integrity
6216 checks is performed for sizes of A, x, y.
6217
6218 INPUT PARAMETERS:
6219 M - number of rows of op(A)
6220 N - number of columns of op(A)
6221 Alpha- coefficient
6222 A - source matrix
6223 IA - submatrix offset (row index)
6224 JA - submatrix offset (column index)
6225 OpA - operation type:
6226 * OpA=0 => op(A) = A
6227 * OpA=1 => op(A) = A^T
6228 X - input vector, has at least N+IX elements
6229 IX - subvector offset
6230 Beta- coefficient
6231 Y - preallocated output array, has at least M+IY elements
6232 IY - subvector offset
6233
6234 OUTPUT PARAMETERS:
6235 Y - vector which stores result
6236
6237 HANDLING OF SPECIAL CASES:
6238 * if M=0, then subroutine does nothing. It does not even touch arrays.
6239 * if N=0 or Alpha=0.0, then:
6240 * if Beta=0, then Y is filled by zeros. A and X are not referenced
6241 at all. Initial values of Y are ignored (we do not multiply Y by
6242 zero, we just rewrite it by zeros)
6243 * if Beta<>0, then Y is replaced by Beta*Y
6244 * if M>0, N>0, Alpha<>0, but Beta=0, then Y is replaced by A*x;
6245 initial state of Y is ignored (rewritten by A*x, without initial
6246 multiplication by zeros).
6247
6248
6249 -- ALGLIB routine --
6250
6251 01.09.2021
6252 Bochkanov Sergey
6253 *************************************************************************/
rgemvx(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)6254 void rgemvx(ae_int_t m,
6255 ae_int_t n,
6256 double alpha,
6257 /* Real */ ae_matrix* a,
6258 ae_int_t ia,
6259 ae_int_t ja,
6260 ae_int_t opa,
6261 /* Real */ ae_vector* x,
6262 ae_int_t ix,
6263 double beta,
6264 /* Real */ ae_vector* y,
6265 ae_int_t iy,
6266 ae_state *_state)
6267 {
6268 ae_int_t i;
6269 ae_int_t j;
6270 double v;
6271
6272
6273
6274 /*
6275 * Properly premultiply Y by Beta.
6276 *
6277 * Quick exit for M=0, N=0 or Alpha=0.
6278 * After this block we have M>0, N>0, Alpha<>0.
6279 */
6280 if( m<=0 )
6281 {
6282 return;
6283 }
6284 if( ae_fp_neq(beta,(double)(0)) )
6285 {
6286 rmulvx(m, beta, y, iy, _state);
6287 }
6288 else
6289 {
6290 rsetvx(m, 0.0, y, iy, _state);
6291 }
6292 if( n<=0||ae_fp_eq(alpha,0.0) )
6293 {
6294 return;
6295 }
6296
6297 /*
6298 * Generic code
6299 */
6300 if( opa==0 )
6301 {
6302
6303 /*
6304 * y += A*x
6305 */
6306 for(i=0; i<=m-1; i++)
6307 {
6308 v = (double)(0);
6309 for(j=0; j<=n-1; j++)
6310 {
6311 v = v+a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
6312 }
6313 y->ptr.p_double[iy+i] = alpha*v+y->ptr.p_double[iy+i];
6314 }
6315 return;
6316 }
6317 if( opa==1 )
6318 {
6319
6320 /*
6321 * y += A^T*x
6322 */
6323 for(i=0; i<=n-1; i++)
6324 {
6325 v = alpha*x->ptr.p_double[ix+i];
6326 for(j=0; j<=m-1; j++)
6327 {
6328 y->ptr.p_double[iy+j] = y->ptr.p_double[iy+j]+v*a->ptr.pp_double[ia+i][ja+j];
6329 }
6330 }
6331 return;
6332 }
6333 }
6334 #endif
6335
6336
6337 #ifdef ALGLIB_NO_FAST_KERNELS
6338 /*************************************************************************
6339 Rank-1 correction: A := A + alpha*u*v'
6340
6341 NOTE: this function expects A to be large enough to store result. No
6342 automatic preallocation happens for smaller arrays. No integrity
6343 checks is performed for sizes of A, u, v.
6344
6345 INPUT PARAMETERS:
6346 M - number of rows
6347 N - number of columns
6348 A - target MxN matrix
6349 Alpha- coefficient
6350 U - vector #1
6351 V - vector #2
6352
6353
6354 -- ALGLIB routine --
6355 07.09.2021
6356 Bochkanov Sergey
6357 *************************************************************************/
rger(ae_int_t m,ae_int_t n,double alpha,ae_vector * u,ae_vector * v,ae_matrix * a,ae_state * _state)6358 void rger(ae_int_t m,
6359 ae_int_t n,
6360 double alpha,
6361 /* Real */ ae_vector* u,
6362 /* Real */ ae_vector* v,
6363 /* Real */ ae_matrix* a,
6364 ae_state *_state)
6365 {
6366 ae_int_t i;
6367 ae_int_t j;
6368 double s;
6369
6370
6371 if( (m<=0||n<=0)||ae_fp_eq(alpha,(double)(0)) )
6372 {
6373 return;
6374 }
6375 for(i=0; i<=m-1; i++)
6376 {
6377 s = alpha*u->ptr.p_double[i];
6378 for(j=0; j<=n-1; j++)
6379 {
6380 a->ptr.pp_double[i][j] = a->ptr.pp_double[i][j]+s*v->ptr.p_double[j];
6381 }
6382 }
6383 }
6384 #endif
6385
6386
6387 #ifdef ALGLIB_NO_FAST_KERNELS
6388 /*************************************************************************
6389 This subroutine solves linear system op(A)*x=b where:
6390 * A is NxN upper/lower triangular/unitriangular matrix
6391 * X and B are Nx1 vectors
6392 * "op" may be identity transformation or transposition
6393
6394 Solution replaces X.
6395
6396 IMPORTANT: * no overflow/underflow/denegeracy tests is performed.
6397 * no integrity checks for operand sizes, out-of-bounds accesses
6398 and so on is performed
6399
6400 INPUT PARAMETERS
6401 N - matrix size, N>=0
6402 A - matrix, actial matrix is stored in A[IA:IA+N-1,JA:JA+N-1]
6403 IA - submatrix offset
6404 JA - submatrix offset
6405 IsUpper - whether matrix is upper triangular
6406 IsUnit - whether matrix is unitriangular
6407 OpType - transformation type:
6408 * 0 - no transformation
6409 * 1 - transposition
6410 X - right part, actual vector is stored in X[IX:IX+N-1]
6411 IX - offset
6412
6413 OUTPUT PARAMETERS
6414 X - solution replaces elements X[IX:IX+N-1]
6415
6416 -- ALGLIB routine --
6417 (c) 07.09.2021 Bochkanov Sergey
6418 *************************************************************************/
rtrsvx(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_vector * x,ae_int_t ix,ae_state * _state)6419 void rtrsvx(ae_int_t n,
6420 /* Real */ ae_matrix* a,
6421 ae_int_t ia,
6422 ae_int_t ja,
6423 ae_bool isupper,
6424 ae_bool isunit,
6425 ae_int_t optype,
6426 /* Real */ ae_vector* x,
6427 ae_int_t ix,
6428 ae_state *_state)
6429 {
6430 ae_int_t i;
6431 ae_int_t j;
6432 double v;
6433
6434
6435 if( n<=0 )
6436 {
6437 return;
6438 }
6439 if( optype==0&&isupper )
6440 {
6441 for(i=n-1; i>=0; i--)
6442 {
6443 v = x->ptr.p_double[ix+i];
6444 for(j=i+1; j<=n-1; j++)
6445 {
6446 v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
6447 }
6448 if( !isunit )
6449 {
6450 v = v/a->ptr.pp_double[ia+i][ja+i];
6451 }
6452 x->ptr.p_double[ix+i] = v;
6453 }
6454 return;
6455 }
6456 if( optype==0&&!isupper )
6457 {
6458 for(i=0; i<=n-1; i++)
6459 {
6460 v = x->ptr.p_double[ix+i];
6461 for(j=0; j<=i-1; j++)
6462 {
6463 v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
6464 }
6465 if( !isunit )
6466 {
6467 v = v/a->ptr.pp_double[ia+i][ja+i];
6468 }
6469 x->ptr.p_double[ix+i] = v;
6470 }
6471 return;
6472 }
6473 if( optype==1&&isupper )
6474 {
6475 for(i=0; i<=n-1; i++)
6476 {
6477 v = x->ptr.p_double[ix+i];
6478 if( !isunit )
6479 {
6480 v = v/a->ptr.pp_double[ia+i][ja+i];
6481 }
6482 x->ptr.p_double[ix+i] = v;
6483 if( v==0 )
6484 {
6485 continue;
6486 }
6487 for(j=i+1; j<=n-1; j++)
6488 {
6489 x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
6490 }
6491 }
6492 return;
6493 }
6494 if( optype==1&&!isupper )
6495 {
6496 for(i=n-1; i>=0; i--)
6497 {
6498 v = x->ptr.p_double[ix+i];
6499 if( !isunit )
6500 {
6501 v = v/a->ptr.pp_double[ia+i][ja+i];
6502 }
6503 x->ptr.p_double[ix+i] = v;
6504 if( v==0 )
6505 {
6506 continue;
6507 }
6508 for(j=0; j<=i-1; j++)
6509 {
6510 x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
6511 }
6512 }
6513 return;
6514 }
6515 ae_assert(ae_false, "rTRSVX: unexpected operation type", _state);
6516 }
6517 #endif
6518
6519
6520 /*************************************************************************
6521 Fast kernel
6522
6523 -- ALGLIB routine --
6524 19.01.2010
6525 Bochkanov Sergey
6526 *************************************************************************/
rmatrixgerf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,double ralpha,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)6527 ae_bool rmatrixgerf(ae_int_t m,
6528 ae_int_t n,
6529 /* Real */ ae_matrix* a,
6530 ae_int_t ia,
6531 ae_int_t ja,
6532 double ralpha,
6533 /* Real */ ae_vector* u,
6534 ae_int_t iu,
6535 /* Real */ ae_vector* v,
6536 ae_int_t iv,
6537 ae_state *_state)
6538 {
6539 #ifndef ALGLIB_INTERCEPTS_ABLAS
6540 ae_bool result;
6541
6542
6543 result = ae_false;
6544 return result;
6545 #else
6546 return _ialglib_i_rmatrixgerf(m, n, a, ia, ja, ralpha, u, iu, v, iv);
6547 #endif
6548 }
6549
6550
6551 /*************************************************************************
6552 Fast kernel
6553
6554 -- ALGLIB routine --
6555 19.01.2010
6556 Bochkanov Sergey
6557 *************************************************************************/
cmatrixrank1f(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)6558 ae_bool cmatrixrank1f(ae_int_t m,
6559 ae_int_t n,
6560 /* Complex */ ae_matrix* a,
6561 ae_int_t ia,
6562 ae_int_t ja,
6563 /* Complex */ ae_vector* u,
6564 ae_int_t iu,
6565 /* Complex */ ae_vector* v,
6566 ae_int_t iv,
6567 ae_state *_state)
6568 {
6569 #ifndef ALGLIB_INTERCEPTS_ABLAS
6570 ae_bool result;
6571
6572
6573 result = ae_false;
6574 return result;
6575 #else
6576 return _ialglib_i_cmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv);
6577 #endif
6578 }
6579
6580
6581 /*************************************************************************
6582 Fast kernel
6583
6584 -- ALGLIB routine --
6585 19.01.2010
6586 Bochkanov Sergey
6587 *************************************************************************/
rmatrixrank1f(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)6588 ae_bool rmatrixrank1f(ae_int_t m,
6589 ae_int_t n,
6590 /* Real */ ae_matrix* a,
6591 ae_int_t ia,
6592 ae_int_t ja,
6593 /* Real */ ae_vector* u,
6594 ae_int_t iu,
6595 /* Real */ ae_vector* v,
6596 ae_int_t iv,
6597 ae_state *_state)
6598 {
6599 #ifndef ALGLIB_INTERCEPTS_ABLAS
6600 ae_bool result;
6601
6602
6603 result = ae_false;
6604 return result;
6605 #else
6606 return _ialglib_i_rmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv);
6607 #endif
6608 }
6609
6610
6611 /*************************************************************************
6612 Fast kernel
6613
6614 -- ALGLIB routine --
6615 19.01.2010
6616 Bochkanov Sergey
6617 *************************************************************************/
cmatrixrighttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)6618 ae_bool cmatrixrighttrsmf(ae_int_t m,
6619 ae_int_t n,
6620 /* Complex */ ae_matrix* a,
6621 ae_int_t i1,
6622 ae_int_t j1,
6623 ae_bool isupper,
6624 ae_bool isunit,
6625 ae_int_t optype,
6626 /* Complex */ ae_matrix* x,
6627 ae_int_t i2,
6628 ae_int_t j2,
6629 ae_state *_state)
6630 {
6631 #ifndef ALGLIB_INTERCEPTS_ABLAS
6632 ae_bool result;
6633
6634
6635 result = ae_false;
6636 return result;
6637 #else
6638 return _ialglib_i_cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
6639 #endif
6640 }
6641
6642
6643 /*************************************************************************
6644 Fast kernel
6645
6646 -- ALGLIB routine --
6647 19.01.2010
6648 Bochkanov Sergey
6649 *************************************************************************/
cmatrixlefttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)6650 ae_bool cmatrixlefttrsmf(ae_int_t m,
6651 ae_int_t n,
6652 /* Complex */ ae_matrix* a,
6653 ae_int_t i1,
6654 ae_int_t j1,
6655 ae_bool isupper,
6656 ae_bool isunit,
6657 ae_int_t optype,
6658 /* Complex */ ae_matrix* x,
6659 ae_int_t i2,
6660 ae_int_t j2,
6661 ae_state *_state)
6662 {
6663 #ifndef ALGLIB_INTERCEPTS_ABLAS
6664 ae_bool result;
6665
6666
6667 result = ae_false;
6668 return result;
6669 #else
6670 return _ialglib_i_cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
6671 #endif
6672 }
6673
6674
6675 /*************************************************************************
6676 Fast kernel
6677
6678 -- ALGLIB routine --
6679 19.01.2010
6680 Bochkanov Sergey
6681 *************************************************************************/
rmatrixrighttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)6682 ae_bool rmatrixrighttrsmf(ae_int_t m,
6683 ae_int_t n,
6684 /* Real */ ae_matrix* a,
6685 ae_int_t i1,
6686 ae_int_t j1,
6687 ae_bool isupper,
6688 ae_bool isunit,
6689 ae_int_t optype,
6690 /* Real */ ae_matrix* x,
6691 ae_int_t i2,
6692 ae_int_t j2,
6693 ae_state *_state)
6694 {
6695 #ifndef ALGLIB_INTERCEPTS_ABLAS
6696 ae_bool result;
6697
6698
6699 result = ae_false;
6700 return result;
6701 #else
6702 return _ialglib_i_rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
6703 #endif
6704 }
6705
6706
6707 /*************************************************************************
6708 Fast kernel
6709
6710 -- ALGLIB routine --
6711 19.01.2010
6712 Bochkanov Sergey
6713 *************************************************************************/
rmatrixlefttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)6714 ae_bool rmatrixlefttrsmf(ae_int_t m,
6715 ae_int_t n,
6716 /* Real */ ae_matrix* a,
6717 ae_int_t i1,
6718 ae_int_t j1,
6719 ae_bool isupper,
6720 ae_bool isunit,
6721 ae_int_t optype,
6722 /* Real */ ae_matrix* x,
6723 ae_int_t i2,
6724 ae_int_t j2,
6725 ae_state *_state)
6726 {
6727 #ifndef ALGLIB_INTERCEPTS_ABLAS
6728 ae_bool result;
6729
6730
6731 result = ae_false;
6732 return result;
6733 #else
6734 return _ialglib_i_rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
6735 #endif
6736 }
6737
6738
6739 /*************************************************************************
6740 Fast kernel
6741
6742 -- ALGLIB routine --
6743 19.01.2010
6744 Bochkanov Sergey
6745 *************************************************************************/
cmatrixherkf(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)6746 ae_bool cmatrixherkf(ae_int_t n,
6747 ae_int_t k,
6748 double alpha,
6749 /* Complex */ ae_matrix* a,
6750 ae_int_t ia,
6751 ae_int_t ja,
6752 ae_int_t optypea,
6753 double beta,
6754 /* Complex */ ae_matrix* c,
6755 ae_int_t ic,
6756 ae_int_t jc,
6757 ae_bool isupper,
6758 ae_state *_state)
6759 {
6760 #ifndef ALGLIB_INTERCEPTS_ABLAS
6761 ae_bool result;
6762
6763
6764 result = ae_false;
6765 return result;
6766 #else
6767 return _ialglib_i_cmatrixherkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
6768 #endif
6769 }
6770
6771
6772 /*************************************************************************
6773 Fast kernel
6774
6775 -- ALGLIB routine --
6776 19.01.2010
6777 Bochkanov Sergey
6778 *************************************************************************/
rmatrixsyrkf(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)6779 ae_bool rmatrixsyrkf(ae_int_t n,
6780 ae_int_t k,
6781 double alpha,
6782 /* Real */ ae_matrix* a,
6783 ae_int_t ia,
6784 ae_int_t ja,
6785 ae_int_t optypea,
6786 double beta,
6787 /* Real */ ae_matrix* c,
6788 ae_int_t ic,
6789 ae_int_t jc,
6790 ae_bool isupper,
6791 ae_state *_state)
6792 {
6793 #ifndef ALGLIB_INTERCEPTS_ABLAS
6794 ae_bool result;
6795
6796
6797 result = ae_false;
6798 return result;
6799 #else
6800 return _ialglib_i_rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
6801 #endif
6802 }
6803
6804
6805 /*************************************************************************
6806 Fast kernel
6807
6808 -- ALGLIB routine --
6809 19.01.2010
6810 Bochkanov Sergey
6811 *************************************************************************/
cmatrixgemmf(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)6812 ae_bool cmatrixgemmf(ae_int_t m,
6813 ae_int_t n,
6814 ae_int_t k,
6815 ae_complex alpha,
6816 /* Complex */ ae_matrix* a,
6817 ae_int_t ia,
6818 ae_int_t ja,
6819 ae_int_t optypea,
6820 /* Complex */ ae_matrix* b,
6821 ae_int_t ib,
6822 ae_int_t jb,
6823 ae_int_t optypeb,
6824 ae_complex beta,
6825 /* Complex */ ae_matrix* c,
6826 ae_int_t ic,
6827 ae_int_t jc,
6828 ae_state *_state)
6829 {
6830 #ifndef ALGLIB_INTERCEPTS_ABLAS
6831 ae_bool result;
6832
6833
6834 result = ae_false;
6835 return result;
6836 #else
6837 return _ialglib_i_cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
6838 #endif
6839 }
6840
6841
6842 /*************************************************************************
6843 CMatrixGEMM kernel, basecase code for CMatrixGEMM.
6844
6845 This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
6846 * C is MxN general matrix
6847 * op1(A) is MxK matrix
6848 * op2(B) is KxN matrix
6849 * "op" may be identity transformation, transposition, conjugate transposition
6850
6851 Additional info:
6852 * multiplication result replaces C. If Beta=0, C elements are not used in
6853 calculations (not multiplied by zero - just not referenced)
6854 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
6855 * if both Beta and Alpha are zero, C is filled by zeros.
6856
6857 IMPORTANT:
6858
6859 This function does NOT preallocate output matrix C, it MUST be preallocated
6860 by caller prior to calling this function. In case C does not have enough
6861 space to store result, exception will be generated.
6862
6863 INPUT PARAMETERS
6864 M - matrix size, M>0
6865 N - matrix size, N>0
6866 K - matrix size, K>0
6867 Alpha - coefficient
6868 A - matrix
6869 IA - submatrix offset
6870 JA - submatrix offset
6871 OpTypeA - transformation type:
6872 * 0 - no transformation
6873 * 1 - transposition
6874 * 2 - conjugate transposition
6875 B - matrix
6876 IB - submatrix offset
6877 JB - submatrix offset
6878 OpTypeB - transformation type:
6879 * 0 - no transformation
6880 * 1 - transposition
6881 * 2 - conjugate transposition
6882 Beta - coefficient
6883 C - PREALLOCATED output matrix
6884 IC - submatrix offset
6885 JC - submatrix offset
6886
6887 -- ALGLIB routine --
6888 27.03.2013
6889 Bochkanov Sergey
6890 *************************************************************************/
cmatrixgemmk(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)6891 void cmatrixgemmk(ae_int_t m,
6892 ae_int_t n,
6893 ae_int_t k,
6894 ae_complex alpha,
6895 /* Complex */ ae_matrix* a,
6896 ae_int_t ia,
6897 ae_int_t ja,
6898 ae_int_t optypea,
6899 /* Complex */ ae_matrix* b,
6900 ae_int_t ib,
6901 ae_int_t jb,
6902 ae_int_t optypeb,
6903 ae_complex beta,
6904 /* Complex */ ae_matrix* c,
6905 ae_int_t ic,
6906 ae_int_t jc,
6907 ae_state *_state)
6908 {
6909 ae_int_t i;
6910 ae_int_t j;
6911 ae_complex v;
6912 ae_complex v00;
6913 ae_complex v01;
6914 ae_complex v10;
6915 ae_complex v11;
6916 double v00x;
6917 double v00y;
6918 double v01x;
6919 double v01y;
6920 double v10x;
6921 double v10y;
6922 double v11x;
6923 double v11y;
6924 double a0x;
6925 double a0y;
6926 double a1x;
6927 double a1y;
6928 double b0x;
6929 double b0y;
6930 double b1x;
6931 double b1y;
6932 ae_int_t idxa0;
6933 ae_int_t idxa1;
6934 ae_int_t idxb0;
6935 ae_int_t idxb1;
6936 ae_int_t i0;
6937 ae_int_t i1;
6938 ae_int_t ik;
6939 ae_int_t j0;
6940 ae_int_t j1;
6941 ae_int_t jk;
6942 ae_int_t t;
6943 ae_int_t offsa;
6944 ae_int_t offsb;
6945
6946
6947
6948 /*
6949 * if matrix size is zero
6950 */
6951 if( m==0||n==0 )
6952 {
6953 return;
6954 }
6955
6956 /*
6957 * Try optimized code
6958 */
6959 if( cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
6960 {
6961 return;
6962 }
6963
6964 /*
6965 * if K=0 or Alpha=0, then C=Beta*C
6966 */
6967 if( k==0||ae_c_eq_d(alpha,(double)(0)) )
6968 {
6969 if( ae_c_neq_d(beta,(double)(1)) )
6970 {
6971 if( ae_c_neq_d(beta,(double)(0)) )
6972 {
6973 for(i=0; i<=m-1; i++)
6974 {
6975 for(j=0; j<=n-1; j++)
6976 {
6977 c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul(beta,c->ptr.pp_complex[ic+i][jc+j]);
6978 }
6979 }
6980 }
6981 else
6982 {
6983 for(i=0; i<=m-1; i++)
6984 {
6985 for(j=0; j<=n-1; j++)
6986 {
6987 c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_i(0);
6988 }
6989 }
6990 }
6991 }
6992 return;
6993 }
6994
6995 /*
6996 * This phase is not really necessary, but compiler complains
6997 * about "possibly uninitialized variables"
6998 */
6999 a0x = (double)(0);
7000 a0y = (double)(0);
7001 a1x = (double)(0);
7002 a1y = (double)(0);
7003 b0x = (double)(0);
7004 b0y = (double)(0);
7005 b1x = (double)(0);
7006 b1y = (double)(0);
7007
7008 /*
7009 * General case
7010 */
7011 i = 0;
7012 while(i<m)
7013 {
7014 j = 0;
7015 while(j<n)
7016 {
7017
7018 /*
7019 * Choose between specialized 4x4 code and general code
7020 */
7021 if( i+2<=m&&j+2<=n )
7022 {
7023
7024 /*
7025 * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
7026 *
7027 * This submatrix is calculated as sum of K rank-1 products,
7028 * with operands cached in local variables in order to speed
7029 * up operations with arrays.
7030 */
7031 v00x = 0.0;
7032 v00y = 0.0;
7033 v01x = 0.0;
7034 v01y = 0.0;
7035 v10x = 0.0;
7036 v10y = 0.0;
7037 v11x = 0.0;
7038 v11y = 0.0;
7039 if( optypea==0 )
7040 {
7041 idxa0 = ia+i+0;
7042 idxa1 = ia+i+1;
7043 offsa = ja;
7044 }
7045 else
7046 {
7047 idxa0 = ja+i+0;
7048 idxa1 = ja+i+1;
7049 offsa = ia;
7050 }
7051 if( optypeb==0 )
7052 {
7053 idxb0 = jb+j+0;
7054 idxb1 = jb+j+1;
7055 offsb = ib;
7056 }
7057 else
7058 {
7059 idxb0 = ib+j+0;
7060 idxb1 = ib+j+1;
7061 offsb = jb;
7062 }
7063 for(t=0; t<=k-1; t++)
7064 {
7065 if( optypea==0 )
7066 {
7067 a0x = a->ptr.pp_complex[idxa0][offsa].x;
7068 a0y = a->ptr.pp_complex[idxa0][offsa].y;
7069 a1x = a->ptr.pp_complex[idxa1][offsa].x;
7070 a1y = a->ptr.pp_complex[idxa1][offsa].y;
7071 }
7072 if( optypea==1 )
7073 {
7074 a0x = a->ptr.pp_complex[offsa][idxa0].x;
7075 a0y = a->ptr.pp_complex[offsa][idxa0].y;
7076 a1x = a->ptr.pp_complex[offsa][idxa1].x;
7077 a1y = a->ptr.pp_complex[offsa][idxa1].y;
7078 }
7079 if( optypea==2 )
7080 {
7081 a0x = a->ptr.pp_complex[offsa][idxa0].x;
7082 a0y = -a->ptr.pp_complex[offsa][idxa0].y;
7083 a1x = a->ptr.pp_complex[offsa][idxa1].x;
7084 a1y = -a->ptr.pp_complex[offsa][idxa1].y;
7085 }
7086 if( optypeb==0 )
7087 {
7088 b0x = b->ptr.pp_complex[offsb][idxb0].x;
7089 b0y = b->ptr.pp_complex[offsb][idxb0].y;
7090 b1x = b->ptr.pp_complex[offsb][idxb1].x;
7091 b1y = b->ptr.pp_complex[offsb][idxb1].y;
7092 }
7093 if( optypeb==1 )
7094 {
7095 b0x = b->ptr.pp_complex[idxb0][offsb].x;
7096 b0y = b->ptr.pp_complex[idxb0][offsb].y;
7097 b1x = b->ptr.pp_complex[idxb1][offsb].x;
7098 b1y = b->ptr.pp_complex[idxb1][offsb].y;
7099 }
7100 if( optypeb==2 )
7101 {
7102 b0x = b->ptr.pp_complex[idxb0][offsb].x;
7103 b0y = -b->ptr.pp_complex[idxb0][offsb].y;
7104 b1x = b->ptr.pp_complex[idxb1][offsb].x;
7105 b1y = -b->ptr.pp_complex[idxb1][offsb].y;
7106 }
7107 v00x = v00x+a0x*b0x-a0y*b0y;
7108 v00y = v00y+a0x*b0y+a0y*b0x;
7109 v01x = v01x+a0x*b1x-a0y*b1y;
7110 v01y = v01y+a0x*b1y+a0y*b1x;
7111 v10x = v10x+a1x*b0x-a1y*b0y;
7112 v10y = v10y+a1x*b0y+a1y*b0x;
7113 v11x = v11x+a1x*b1x-a1y*b1y;
7114 v11y = v11y+a1x*b1y+a1y*b1x;
7115 offsa = offsa+1;
7116 offsb = offsb+1;
7117 }
7118 v00.x = v00x;
7119 v00.y = v00y;
7120 v10.x = v10x;
7121 v10.y = v10y;
7122 v01.x = v01x;
7123 v01.y = v01y;
7124 v11.x = v11x;
7125 v11.y = v11y;
7126 if( ae_c_eq_d(beta,(double)(0)) )
7127 {
7128 c->ptr.pp_complex[ic+i+0][jc+j+0] = ae_c_mul(alpha,v00);
7129 c->ptr.pp_complex[ic+i+0][jc+j+1] = ae_c_mul(alpha,v01);
7130 c->ptr.pp_complex[ic+i+1][jc+j+0] = ae_c_mul(alpha,v10);
7131 c->ptr.pp_complex[ic+i+1][jc+j+1] = ae_c_mul(alpha,v11);
7132 }
7133 else
7134 {
7135 c->ptr.pp_complex[ic+i+0][jc+j+0] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+0][jc+j+0]),ae_c_mul(alpha,v00));
7136 c->ptr.pp_complex[ic+i+0][jc+j+1] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+0][jc+j+1]),ae_c_mul(alpha,v01));
7137 c->ptr.pp_complex[ic+i+1][jc+j+0] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+1][jc+j+0]),ae_c_mul(alpha,v10));
7138 c->ptr.pp_complex[ic+i+1][jc+j+1] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+1][jc+j+1]),ae_c_mul(alpha,v11));
7139 }
7140 }
7141 else
7142 {
7143
7144 /*
7145 * Determine submatrix [I0..I1]x[J0..J1] to process
7146 */
7147 i0 = i;
7148 i1 = ae_minint(i+1, m-1, _state);
7149 j0 = j;
7150 j1 = ae_minint(j+1, n-1, _state);
7151
7152 /*
7153 * Process submatrix
7154 */
7155 for(ik=i0; ik<=i1; ik++)
7156 {
7157 for(jk=j0; jk<=j1; jk++)
7158 {
7159 if( k==0||ae_c_eq_d(alpha,(double)(0)) )
7160 {
7161 v = ae_complex_from_i(0);
7162 }
7163 else
7164 {
7165 v = ae_complex_from_d(0.0);
7166 if( optypea==0&&optypeb==0 )
7167 {
7168 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+ik][ja], 1, "N", &b->ptr.pp_complex[ib][jb+jk], b->stride, "N", ae_v_len(ja,ja+k-1));
7169 }
7170 if( optypea==0&&optypeb==1 )
7171 {
7172 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+ik][ja], 1, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "N", ae_v_len(ja,ja+k-1));
7173 }
7174 if( optypea==0&&optypeb==2 )
7175 {
7176 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+ik][ja], 1, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "Conj", ae_v_len(ja,ja+k-1));
7177 }
7178 if( optypea==1&&optypeb==0 )
7179 {
7180 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "N", &b->ptr.pp_complex[ib][jb+jk], b->stride, "N", ae_v_len(ia,ia+k-1));
7181 }
7182 if( optypea==1&&optypeb==1 )
7183 {
7184 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "N", ae_v_len(ia,ia+k-1));
7185 }
7186 if( optypea==1&&optypeb==2 )
7187 {
7188 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "Conj", ae_v_len(ia,ia+k-1));
7189 }
7190 if( optypea==2&&optypeb==0 )
7191 {
7192 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "Conj", &b->ptr.pp_complex[ib][jb+jk], b->stride, "N", ae_v_len(ia,ia+k-1));
7193 }
7194 if( optypea==2&&optypeb==1 )
7195 {
7196 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "Conj", &b->ptr.pp_complex[ib+jk][jb], 1, "N", ae_v_len(ia,ia+k-1));
7197 }
7198 if( optypea==2&&optypeb==2 )
7199 {
7200 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "Conj", &b->ptr.pp_complex[ib+jk][jb], 1, "Conj", ae_v_len(ia,ia+k-1));
7201 }
7202 }
7203 if( ae_c_eq_d(beta,(double)(0)) )
7204 {
7205 c->ptr.pp_complex[ic+ik][jc+jk] = ae_c_mul(alpha,v);
7206 }
7207 else
7208 {
7209 c->ptr.pp_complex[ic+ik][jc+jk] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+ik][jc+jk]),ae_c_mul(alpha,v));
7210 }
7211 }
7212 }
7213 }
7214 j = j+2;
7215 }
7216 i = i+2;
7217 }
7218 }
7219
7220
7221 /*************************************************************************
7222 RMatrixGEMM kernel, basecase code for RMatrixGEMM.
7223
7224 This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
7225 * C is MxN general matrix
7226 * op1(A) is MxK matrix
7227 * op2(B) is KxN matrix
7228 * "op" may be identity transformation, transposition
7229
7230 Additional info:
7231 * multiplication result replaces C. If Beta=0, C elements are not used in
7232 calculations (not multiplied by zero - just not referenced)
7233 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
7234 * if both Beta and Alpha are zero, C is filled by zeros.
7235
7236 IMPORTANT:
7237
7238 This function does NOT preallocate output matrix C, it MUST be preallocated
7239 by caller prior to calling this function. In case C does not have enough
7240 space to store result, exception will be generated.
7241
7242 INPUT PARAMETERS
7243 M - matrix size, M>0
7244 N - matrix size, N>0
7245 K - matrix size, K>0
7246 Alpha - coefficient
7247 A - matrix
7248 IA - submatrix offset
7249 JA - submatrix offset
7250 OpTypeA - transformation type:
7251 * 0 - no transformation
7252 * 1 - transposition
7253 B - matrix
7254 IB - submatrix offset
7255 JB - submatrix offset
7256 OpTypeB - transformation type:
7257 * 0 - no transformation
7258 * 1 - transposition
7259 Beta - coefficient
7260 C - PREALLOCATED output matrix
7261 IC - submatrix offset
7262 JC - submatrix offset
7263
7264 -- ALGLIB routine --
7265 27.03.2013
7266 Bochkanov Sergey
7267 *************************************************************************/
rmatrixgemmk(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)7268 void rmatrixgemmk(ae_int_t m,
7269 ae_int_t n,
7270 ae_int_t k,
7271 double alpha,
7272 /* Real */ ae_matrix* a,
7273 ae_int_t ia,
7274 ae_int_t ja,
7275 ae_int_t optypea,
7276 /* Real */ ae_matrix* b,
7277 ae_int_t ib,
7278 ae_int_t jb,
7279 ae_int_t optypeb,
7280 double beta,
7281 /* Real */ ae_matrix* c,
7282 ae_int_t ic,
7283 ae_int_t jc,
7284 ae_state *_state)
7285 {
7286 ae_int_t i;
7287 ae_int_t j;
7288
7289
7290
7291 /*
7292 * if matrix size is zero
7293 */
7294 if( m==0||n==0 )
7295 {
7296 return;
7297 }
7298
7299 /*
7300 * Try optimized code
7301 */
7302 if( ablasf_rgemm32basecase(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
7303 {
7304 return;
7305 }
7306
7307 /*
7308 * if K=0 or Alpha=0, then C=Beta*C
7309 */
7310 if( k==0||ae_fp_eq(alpha,(double)(0)) )
7311 {
7312 if( ae_fp_neq(beta,(double)(1)) )
7313 {
7314 if( ae_fp_neq(beta,(double)(0)) )
7315 {
7316 for(i=0; i<=m-1; i++)
7317 {
7318 for(j=0; j<=n-1; j++)
7319 {
7320 c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j];
7321 }
7322 }
7323 }
7324 else
7325 {
7326 for(i=0; i<=m-1; i++)
7327 {
7328 for(j=0; j<=n-1; j++)
7329 {
7330 c->ptr.pp_double[ic+i][jc+j] = (double)(0);
7331 }
7332 }
7333 }
7334 }
7335 return;
7336 }
7337
7338 /*
7339 * Call specialized code.
7340 *
7341 * NOTE: specialized code was moved to separate function because of strange
7342 * issues with instructions cache on some systems; Having too long
7343 * functions significantly slows down internal loop of the algorithm.
7344 */
7345 if( optypea==0&&optypeb==0 )
7346 {
7347 rmatrixgemmk44v00(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
7348 }
7349 if( optypea==0&&optypeb!=0 )
7350 {
7351 rmatrixgemmk44v01(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
7352 }
7353 if( optypea!=0&&optypeb==0 )
7354 {
7355 rmatrixgemmk44v10(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
7356 }
7357 if( optypea!=0&&optypeb!=0 )
7358 {
7359 rmatrixgemmk44v11(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
7360 }
7361 }
7362
7363
7364 /*************************************************************************
7365 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
7366 with OpTypeA=0 and OpTypeB=0.
7367
7368 Additional info:
7369 * this function requires that Alpha<>0 (assertion is thrown otherwise)
7370
7371 INPUT PARAMETERS
7372 M - matrix size, M>0
7373 N - matrix size, N>0
7374 K - matrix size, K>0
7375 Alpha - coefficient
7376 A - matrix
7377 IA - submatrix offset
7378 JA - submatrix offset
7379 B - matrix
7380 IB - submatrix offset
7381 JB - submatrix offset
7382 Beta - coefficient
7383 C - PREALLOCATED output matrix
7384 IC - submatrix offset
7385 JC - submatrix offset
7386
7387 -- ALGLIB routine --
7388 27.03.2013
7389 Bochkanov Sergey
7390 *************************************************************************/
rmatrixgemmk44v00(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)7391 void rmatrixgemmk44v00(ae_int_t m,
7392 ae_int_t n,
7393 ae_int_t k,
7394 double alpha,
7395 /* Real */ ae_matrix* a,
7396 ae_int_t ia,
7397 ae_int_t ja,
7398 /* Real */ ae_matrix* b,
7399 ae_int_t ib,
7400 ae_int_t jb,
7401 double beta,
7402 /* Real */ ae_matrix* c,
7403 ae_int_t ic,
7404 ae_int_t jc,
7405 ae_state *_state)
7406 {
7407 ae_int_t i;
7408 ae_int_t j;
7409 double v;
7410 double v00;
7411 double v01;
7412 double v02;
7413 double v03;
7414 double v10;
7415 double v11;
7416 double v12;
7417 double v13;
7418 double v20;
7419 double v21;
7420 double v22;
7421 double v23;
7422 double v30;
7423 double v31;
7424 double v32;
7425 double v33;
7426 double a0;
7427 double a1;
7428 double a2;
7429 double a3;
7430 double b0;
7431 double b1;
7432 double b2;
7433 double b3;
7434 ae_int_t idxa0;
7435 ae_int_t idxa1;
7436 ae_int_t idxa2;
7437 ae_int_t idxa3;
7438 ae_int_t idxb0;
7439 ae_int_t idxb1;
7440 ae_int_t idxb2;
7441 ae_int_t idxb3;
7442 ae_int_t i0;
7443 ae_int_t i1;
7444 ae_int_t ik;
7445 ae_int_t j0;
7446 ae_int_t j1;
7447 ae_int_t jk;
7448 ae_int_t t;
7449 ae_int_t offsa;
7450 ae_int_t offsb;
7451
7452
7453 ae_assert(ae_fp_neq(alpha,(double)(0)), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
7454
7455 /*
7456 * if matrix size is zero
7457 */
7458 if( m==0||n==0 )
7459 {
7460 return;
7461 }
7462
7463 /*
7464 * A*B
7465 */
7466 i = 0;
7467 while(i<m)
7468 {
7469 j = 0;
7470 while(j<n)
7471 {
7472
7473 /*
7474 * Choose between specialized 4x4 code and general code
7475 */
7476 if( i+4<=m&&j+4<=n )
7477 {
7478
7479 /*
7480 * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
7481 *
7482 * This submatrix is calculated as sum of K rank-1 products,
7483 * with operands cached in local variables in order to speed
7484 * up operations with arrays.
7485 */
7486 idxa0 = ia+i+0;
7487 idxa1 = ia+i+1;
7488 idxa2 = ia+i+2;
7489 idxa3 = ia+i+3;
7490 offsa = ja;
7491 idxb0 = jb+j+0;
7492 idxb1 = jb+j+1;
7493 idxb2 = jb+j+2;
7494 idxb3 = jb+j+3;
7495 offsb = ib;
7496 v00 = 0.0;
7497 v01 = 0.0;
7498 v02 = 0.0;
7499 v03 = 0.0;
7500 v10 = 0.0;
7501 v11 = 0.0;
7502 v12 = 0.0;
7503 v13 = 0.0;
7504 v20 = 0.0;
7505 v21 = 0.0;
7506 v22 = 0.0;
7507 v23 = 0.0;
7508 v30 = 0.0;
7509 v31 = 0.0;
7510 v32 = 0.0;
7511 v33 = 0.0;
7512
7513 /*
7514 * Different variants of internal loop
7515 */
7516 for(t=0; t<=k-1; t++)
7517 {
7518 a0 = a->ptr.pp_double[idxa0][offsa];
7519 a1 = a->ptr.pp_double[idxa1][offsa];
7520 b0 = b->ptr.pp_double[offsb][idxb0];
7521 b1 = b->ptr.pp_double[offsb][idxb1];
7522 v00 = v00+a0*b0;
7523 v01 = v01+a0*b1;
7524 v10 = v10+a1*b0;
7525 v11 = v11+a1*b1;
7526 a2 = a->ptr.pp_double[idxa2][offsa];
7527 a3 = a->ptr.pp_double[idxa3][offsa];
7528 v20 = v20+a2*b0;
7529 v21 = v21+a2*b1;
7530 v30 = v30+a3*b0;
7531 v31 = v31+a3*b1;
7532 b2 = b->ptr.pp_double[offsb][idxb2];
7533 b3 = b->ptr.pp_double[offsb][idxb3];
7534 v22 = v22+a2*b2;
7535 v23 = v23+a2*b3;
7536 v32 = v32+a3*b2;
7537 v33 = v33+a3*b3;
7538 v02 = v02+a0*b2;
7539 v03 = v03+a0*b3;
7540 v12 = v12+a1*b2;
7541 v13 = v13+a1*b3;
7542 offsa = offsa+1;
7543 offsb = offsb+1;
7544 }
7545 if( ae_fp_eq(beta,(double)(0)) )
7546 {
7547 c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
7548 c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
7549 c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
7550 c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
7551 c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
7552 c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
7553 c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
7554 c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
7555 c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
7556 c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
7557 c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
7558 c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
7559 c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
7560 c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
7561 c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
7562 c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
7563 }
7564 else
7565 {
7566 c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
7567 c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
7568 c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
7569 c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
7570 c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
7571 c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
7572 c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
7573 c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
7574 c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
7575 c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
7576 c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
7577 c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
7578 c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
7579 c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
7580 c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
7581 c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
7582 }
7583 }
7584 else
7585 {
7586
7587 /*
7588 * Determine submatrix [I0..I1]x[J0..J1] to process
7589 */
7590 i0 = i;
7591 i1 = ae_minint(i+3, m-1, _state);
7592 j0 = j;
7593 j1 = ae_minint(j+3, n-1, _state);
7594
7595 /*
7596 * Process submatrix
7597 */
7598 for(ik=i0; ik<=i1; ik++)
7599 {
7600 for(jk=j0; jk<=j1; jk++)
7601 {
7602 if( k==0||ae_fp_eq(alpha,(double)(0)) )
7603 {
7604 v = (double)(0);
7605 }
7606 else
7607 {
7608 v = ae_v_dotproduct(&a->ptr.pp_double[ia+ik][ja], 1, &b->ptr.pp_double[ib][jb+jk], b->stride, ae_v_len(ja,ja+k-1));
7609 }
7610 if( ae_fp_eq(beta,(double)(0)) )
7611 {
7612 c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
7613 }
7614 else
7615 {
7616 c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
7617 }
7618 }
7619 }
7620 }
7621 j = j+4;
7622 }
7623 i = i+4;
7624 }
7625 }
7626
7627
7628 /*************************************************************************
7629 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
7630 with OpTypeA=0 and OpTypeB=1.
7631
7632 Additional info:
7633 * this function requires that Alpha<>0 (assertion is thrown otherwise)
7634
7635 INPUT PARAMETERS
7636 M - matrix size, M>0
7637 N - matrix size, N>0
7638 K - matrix size, K>0
7639 Alpha - coefficient
7640 A - matrix
7641 IA - submatrix offset
7642 JA - submatrix offset
7643 B - matrix
7644 IB - submatrix offset
7645 JB - submatrix offset
7646 Beta - coefficient
7647 C - PREALLOCATED output matrix
7648 IC - submatrix offset
7649 JC - submatrix offset
7650
7651 -- ALGLIB routine --
7652 27.03.2013
7653 Bochkanov Sergey
7654 *************************************************************************/
rmatrixgemmk44v01(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)7655 void rmatrixgemmk44v01(ae_int_t m,
7656 ae_int_t n,
7657 ae_int_t k,
7658 double alpha,
7659 /* Real */ ae_matrix* a,
7660 ae_int_t ia,
7661 ae_int_t ja,
7662 /* Real */ ae_matrix* b,
7663 ae_int_t ib,
7664 ae_int_t jb,
7665 double beta,
7666 /* Real */ ae_matrix* c,
7667 ae_int_t ic,
7668 ae_int_t jc,
7669 ae_state *_state)
7670 {
7671 ae_int_t i;
7672 ae_int_t j;
7673 double v;
7674 double v00;
7675 double v01;
7676 double v02;
7677 double v03;
7678 double v10;
7679 double v11;
7680 double v12;
7681 double v13;
7682 double v20;
7683 double v21;
7684 double v22;
7685 double v23;
7686 double v30;
7687 double v31;
7688 double v32;
7689 double v33;
7690 double a0;
7691 double a1;
7692 double a2;
7693 double a3;
7694 double b0;
7695 double b1;
7696 double b2;
7697 double b3;
7698 ae_int_t idxa0;
7699 ae_int_t idxa1;
7700 ae_int_t idxa2;
7701 ae_int_t idxa3;
7702 ae_int_t idxb0;
7703 ae_int_t idxb1;
7704 ae_int_t idxb2;
7705 ae_int_t idxb3;
7706 ae_int_t i0;
7707 ae_int_t i1;
7708 ae_int_t ik;
7709 ae_int_t j0;
7710 ae_int_t j1;
7711 ae_int_t jk;
7712 ae_int_t t;
7713 ae_int_t offsa;
7714 ae_int_t offsb;
7715
7716
7717 ae_assert(ae_fp_neq(alpha,(double)(0)), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
7718
7719 /*
7720 * if matrix size is zero
7721 */
7722 if( m==0||n==0 )
7723 {
7724 return;
7725 }
7726
7727 /*
7728 * A*B'
7729 */
7730 i = 0;
7731 while(i<m)
7732 {
7733 j = 0;
7734 while(j<n)
7735 {
7736
7737 /*
7738 * Choose between specialized 4x4 code and general code
7739 */
7740 if( i+4<=m&&j+4<=n )
7741 {
7742
7743 /*
7744 * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
7745 *
7746 * This submatrix is calculated as sum of K rank-1 products,
7747 * with operands cached in local variables in order to speed
7748 * up operations with arrays.
7749 */
7750 idxa0 = ia+i+0;
7751 idxa1 = ia+i+1;
7752 idxa2 = ia+i+2;
7753 idxa3 = ia+i+3;
7754 offsa = ja;
7755 idxb0 = ib+j+0;
7756 idxb1 = ib+j+1;
7757 idxb2 = ib+j+2;
7758 idxb3 = ib+j+3;
7759 offsb = jb;
7760 v00 = 0.0;
7761 v01 = 0.0;
7762 v02 = 0.0;
7763 v03 = 0.0;
7764 v10 = 0.0;
7765 v11 = 0.0;
7766 v12 = 0.0;
7767 v13 = 0.0;
7768 v20 = 0.0;
7769 v21 = 0.0;
7770 v22 = 0.0;
7771 v23 = 0.0;
7772 v30 = 0.0;
7773 v31 = 0.0;
7774 v32 = 0.0;
7775 v33 = 0.0;
7776 for(t=0; t<=k-1; t++)
7777 {
7778 a0 = a->ptr.pp_double[idxa0][offsa];
7779 a1 = a->ptr.pp_double[idxa1][offsa];
7780 b0 = b->ptr.pp_double[idxb0][offsb];
7781 b1 = b->ptr.pp_double[idxb1][offsb];
7782 v00 = v00+a0*b0;
7783 v01 = v01+a0*b1;
7784 v10 = v10+a1*b0;
7785 v11 = v11+a1*b1;
7786 a2 = a->ptr.pp_double[idxa2][offsa];
7787 a3 = a->ptr.pp_double[idxa3][offsa];
7788 v20 = v20+a2*b0;
7789 v21 = v21+a2*b1;
7790 v30 = v30+a3*b0;
7791 v31 = v31+a3*b1;
7792 b2 = b->ptr.pp_double[idxb2][offsb];
7793 b3 = b->ptr.pp_double[idxb3][offsb];
7794 v22 = v22+a2*b2;
7795 v23 = v23+a2*b3;
7796 v32 = v32+a3*b2;
7797 v33 = v33+a3*b3;
7798 v02 = v02+a0*b2;
7799 v03 = v03+a0*b3;
7800 v12 = v12+a1*b2;
7801 v13 = v13+a1*b3;
7802 offsa = offsa+1;
7803 offsb = offsb+1;
7804 }
7805 if( ae_fp_eq(beta,(double)(0)) )
7806 {
7807 c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
7808 c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
7809 c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
7810 c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
7811 c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
7812 c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
7813 c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
7814 c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
7815 c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
7816 c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
7817 c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
7818 c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
7819 c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
7820 c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
7821 c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
7822 c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
7823 }
7824 else
7825 {
7826 c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
7827 c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
7828 c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
7829 c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
7830 c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
7831 c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
7832 c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
7833 c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
7834 c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
7835 c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
7836 c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
7837 c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
7838 c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
7839 c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
7840 c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
7841 c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
7842 }
7843 }
7844 else
7845 {
7846
7847 /*
7848 * Determine submatrix [I0..I1]x[J0..J1] to process
7849 */
7850 i0 = i;
7851 i1 = ae_minint(i+3, m-1, _state);
7852 j0 = j;
7853 j1 = ae_minint(j+3, n-1, _state);
7854
7855 /*
7856 * Process submatrix
7857 */
7858 for(ik=i0; ik<=i1; ik++)
7859 {
7860 for(jk=j0; jk<=j1; jk++)
7861 {
7862 if( k==0||ae_fp_eq(alpha,(double)(0)) )
7863 {
7864 v = (double)(0);
7865 }
7866 else
7867 {
7868 v = ae_v_dotproduct(&a->ptr.pp_double[ia+ik][ja], 1, &b->ptr.pp_double[ib+jk][jb], 1, ae_v_len(ja,ja+k-1));
7869 }
7870 if( ae_fp_eq(beta,(double)(0)) )
7871 {
7872 c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
7873 }
7874 else
7875 {
7876 c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
7877 }
7878 }
7879 }
7880 }
7881 j = j+4;
7882 }
7883 i = i+4;
7884 }
7885 }
7886
7887
7888 /*************************************************************************
7889 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
7890 with OpTypeA=1 and OpTypeB=0.
7891
7892 Additional info:
7893 * this function requires that Alpha<>0 (assertion is thrown otherwise)
7894
7895 INPUT PARAMETERS
7896 M - matrix size, M>0
7897 N - matrix size, N>0
7898 K - matrix size, K>0
7899 Alpha - coefficient
7900 A - matrix
7901 IA - submatrix offset
7902 JA - submatrix offset
7903 B - matrix
7904 IB - submatrix offset
7905 JB - submatrix offset
7906 Beta - coefficient
7907 C - PREALLOCATED output matrix
7908 IC - submatrix offset
7909 JC - submatrix offset
7910
7911 -- ALGLIB routine --
7912 27.03.2013
7913 Bochkanov Sergey
7914 *************************************************************************/
rmatrixgemmk44v10(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)7915 void rmatrixgemmk44v10(ae_int_t m,
7916 ae_int_t n,
7917 ae_int_t k,
7918 double alpha,
7919 /* Real */ ae_matrix* a,
7920 ae_int_t ia,
7921 ae_int_t ja,
7922 /* Real */ ae_matrix* b,
7923 ae_int_t ib,
7924 ae_int_t jb,
7925 double beta,
7926 /* Real */ ae_matrix* c,
7927 ae_int_t ic,
7928 ae_int_t jc,
7929 ae_state *_state)
7930 {
7931 ae_int_t i;
7932 ae_int_t j;
7933 double v;
7934 double v00;
7935 double v01;
7936 double v02;
7937 double v03;
7938 double v10;
7939 double v11;
7940 double v12;
7941 double v13;
7942 double v20;
7943 double v21;
7944 double v22;
7945 double v23;
7946 double v30;
7947 double v31;
7948 double v32;
7949 double v33;
7950 double a0;
7951 double a1;
7952 double a2;
7953 double a3;
7954 double b0;
7955 double b1;
7956 double b2;
7957 double b3;
7958 ae_int_t idxa0;
7959 ae_int_t idxa1;
7960 ae_int_t idxa2;
7961 ae_int_t idxa3;
7962 ae_int_t idxb0;
7963 ae_int_t idxb1;
7964 ae_int_t idxb2;
7965 ae_int_t idxb3;
7966 ae_int_t i0;
7967 ae_int_t i1;
7968 ae_int_t ik;
7969 ae_int_t j0;
7970 ae_int_t j1;
7971 ae_int_t jk;
7972 ae_int_t t;
7973 ae_int_t offsa;
7974 ae_int_t offsb;
7975
7976
7977 ae_assert(ae_fp_neq(alpha,(double)(0)), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
7978
7979 /*
7980 * if matrix size is zero
7981 */
7982 if( m==0||n==0 )
7983 {
7984 return;
7985 }
7986
7987 /*
7988 * A'*B
7989 */
7990 i = 0;
7991 while(i<m)
7992 {
7993 j = 0;
7994 while(j<n)
7995 {
7996
7997 /*
7998 * Choose between specialized 4x4 code and general code
7999 */
8000 if( i+4<=m&&j+4<=n )
8001 {
8002
8003 /*
8004 * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
8005 *
8006 * This submatrix is calculated as sum of K rank-1 products,
8007 * with operands cached in local variables in order to speed
8008 * up operations with arrays.
8009 */
8010 idxa0 = ja+i+0;
8011 idxa1 = ja+i+1;
8012 idxa2 = ja+i+2;
8013 idxa3 = ja+i+3;
8014 offsa = ia;
8015 idxb0 = jb+j+0;
8016 idxb1 = jb+j+1;
8017 idxb2 = jb+j+2;
8018 idxb3 = jb+j+3;
8019 offsb = ib;
8020 v00 = 0.0;
8021 v01 = 0.0;
8022 v02 = 0.0;
8023 v03 = 0.0;
8024 v10 = 0.0;
8025 v11 = 0.0;
8026 v12 = 0.0;
8027 v13 = 0.0;
8028 v20 = 0.0;
8029 v21 = 0.0;
8030 v22 = 0.0;
8031 v23 = 0.0;
8032 v30 = 0.0;
8033 v31 = 0.0;
8034 v32 = 0.0;
8035 v33 = 0.0;
8036 for(t=0; t<=k-1; t++)
8037 {
8038 a0 = a->ptr.pp_double[offsa][idxa0];
8039 a1 = a->ptr.pp_double[offsa][idxa1];
8040 b0 = b->ptr.pp_double[offsb][idxb0];
8041 b1 = b->ptr.pp_double[offsb][idxb1];
8042 v00 = v00+a0*b0;
8043 v01 = v01+a0*b1;
8044 v10 = v10+a1*b0;
8045 v11 = v11+a1*b1;
8046 a2 = a->ptr.pp_double[offsa][idxa2];
8047 a3 = a->ptr.pp_double[offsa][idxa3];
8048 v20 = v20+a2*b0;
8049 v21 = v21+a2*b1;
8050 v30 = v30+a3*b0;
8051 v31 = v31+a3*b1;
8052 b2 = b->ptr.pp_double[offsb][idxb2];
8053 b3 = b->ptr.pp_double[offsb][idxb3];
8054 v22 = v22+a2*b2;
8055 v23 = v23+a2*b3;
8056 v32 = v32+a3*b2;
8057 v33 = v33+a3*b3;
8058 v02 = v02+a0*b2;
8059 v03 = v03+a0*b3;
8060 v12 = v12+a1*b2;
8061 v13 = v13+a1*b3;
8062 offsa = offsa+1;
8063 offsb = offsb+1;
8064 }
8065 if( ae_fp_eq(beta,(double)(0)) )
8066 {
8067 c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
8068 c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
8069 c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
8070 c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
8071 c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
8072 c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
8073 c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
8074 c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
8075 c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
8076 c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
8077 c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
8078 c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
8079 c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
8080 c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
8081 c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
8082 c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
8083 }
8084 else
8085 {
8086 c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
8087 c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
8088 c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
8089 c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
8090 c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
8091 c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
8092 c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
8093 c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
8094 c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
8095 c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
8096 c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
8097 c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
8098 c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
8099 c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
8100 c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
8101 c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
8102 }
8103 }
8104 else
8105 {
8106
8107 /*
8108 * Determine submatrix [I0..I1]x[J0..J1] to process
8109 */
8110 i0 = i;
8111 i1 = ae_minint(i+3, m-1, _state);
8112 j0 = j;
8113 j1 = ae_minint(j+3, n-1, _state);
8114
8115 /*
8116 * Process submatrix
8117 */
8118 for(ik=i0; ik<=i1; ik++)
8119 {
8120 for(jk=j0; jk<=j1; jk++)
8121 {
8122 if( k==0||ae_fp_eq(alpha,(double)(0)) )
8123 {
8124 v = (double)(0);
8125 }
8126 else
8127 {
8128 v = 0.0;
8129 v = ae_v_dotproduct(&a->ptr.pp_double[ia][ja+ik], a->stride, &b->ptr.pp_double[ib][jb+jk], b->stride, ae_v_len(ia,ia+k-1));
8130 }
8131 if( ae_fp_eq(beta,(double)(0)) )
8132 {
8133 c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
8134 }
8135 else
8136 {
8137 c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
8138 }
8139 }
8140 }
8141 }
8142 j = j+4;
8143 }
8144 i = i+4;
8145 }
8146 }
8147
8148
8149 /*************************************************************************
8150 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
8151 with OpTypeA=1 and OpTypeB=1.
8152
8153 Additional info:
8154 * this function requires that Alpha<>0 (assertion is thrown otherwise)
8155
8156 INPUT PARAMETERS
8157 M - matrix size, M>0
8158 N - matrix size, N>0
8159 K - matrix size, K>0
8160 Alpha - coefficient
8161 A - matrix
8162 IA - submatrix offset
8163 JA - submatrix offset
8164 B - matrix
8165 IB - submatrix offset
8166 JB - submatrix offset
8167 Beta - coefficient
8168 C - PREALLOCATED output matrix
8169 IC - submatrix offset
8170 JC - submatrix offset
8171
8172 -- ALGLIB routine --
8173 27.03.2013
8174 Bochkanov Sergey
8175 *************************************************************************/
rmatrixgemmk44v11(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)8176 void rmatrixgemmk44v11(ae_int_t m,
8177 ae_int_t n,
8178 ae_int_t k,
8179 double alpha,
8180 /* Real */ ae_matrix* a,
8181 ae_int_t ia,
8182 ae_int_t ja,
8183 /* Real */ ae_matrix* b,
8184 ae_int_t ib,
8185 ae_int_t jb,
8186 double beta,
8187 /* Real */ ae_matrix* c,
8188 ae_int_t ic,
8189 ae_int_t jc,
8190 ae_state *_state)
8191 {
8192 ae_int_t i;
8193 ae_int_t j;
8194 double v;
8195 double v00;
8196 double v01;
8197 double v02;
8198 double v03;
8199 double v10;
8200 double v11;
8201 double v12;
8202 double v13;
8203 double v20;
8204 double v21;
8205 double v22;
8206 double v23;
8207 double v30;
8208 double v31;
8209 double v32;
8210 double v33;
8211 double a0;
8212 double a1;
8213 double a2;
8214 double a3;
8215 double b0;
8216 double b1;
8217 double b2;
8218 double b3;
8219 ae_int_t idxa0;
8220 ae_int_t idxa1;
8221 ae_int_t idxa2;
8222 ae_int_t idxa3;
8223 ae_int_t idxb0;
8224 ae_int_t idxb1;
8225 ae_int_t idxb2;
8226 ae_int_t idxb3;
8227 ae_int_t i0;
8228 ae_int_t i1;
8229 ae_int_t ik;
8230 ae_int_t j0;
8231 ae_int_t j1;
8232 ae_int_t jk;
8233 ae_int_t t;
8234 ae_int_t offsa;
8235 ae_int_t offsb;
8236
8237
8238 ae_assert(ae_fp_neq(alpha,(double)(0)), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
8239
8240 /*
8241 * if matrix size is zero
8242 */
8243 if( m==0||n==0 )
8244 {
8245 return;
8246 }
8247
8248 /*
8249 * A'*B'
8250 */
8251 i = 0;
8252 while(i<m)
8253 {
8254 j = 0;
8255 while(j<n)
8256 {
8257
8258 /*
8259 * Choose between specialized 4x4 code and general code
8260 */
8261 if( i+4<=m&&j+4<=n )
8262 {
8263
8264 /*
8265 * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
8266 *
8267 * This submatrix is calculated as sum of K rank-1 products,
8268 * with operands cached in local variables in order to speed
8269 * up operations with arrays.
8270 */
8271 idxa0 = ja+i+0;
8272 idxa1 = ja+i+1;
8273 idxa2 = ja+i+2;
8274 idxa3 = ja+i+3;
8275 offsa = ia;
8276 idxb0 = ib+j+0;
8277 idxb1 = ib+j+1;
8278 idxb2 = ib+j+2;
8279 idxb3 = ib+j+3;
8280 offsb = jb;
8281 v00 = 0.0;
8282 v01 = 0.0;
8283 v02 = 0.0;
8284 v03 = 0.0;
8285 v10 = 0.0;
8286 v11 = 0.0;
8287 v12 = 0.0;
8288 v13 = 0.0;
8289 v20 = 0.0;
8290 v21 = 0.0;
8291 v22 = 0.0;
8292 v23 = 0.0;
8293 v30 = 0.0;
8294 v31 = 0.0;
8295 v32 = 0.0;
8296 v33 = 0.0;
8297 for(t=0; t<=k-1; t++)
8298 {
8299 a0 = a->ptr.pp_double[offsa][idxa0];
8300 a1 = a->ptr.pp_double[offsa][idxa1];
8301 b0 = b->ptr.pp_double[idxb0][offsb];
8302 b1 = b->ptr.pp_double[idxb1][offsb];
8303 v00 = v00+a0*b0;
8304 v01 = v01+a0*b1;
8305 v10 = v10+a1*b0;
8306 v11 = v11+a1*b1;
8307 a2 = a->ptr.pp_double[offsa][idxa2];
8308 a3 = a->ptr.pp_double[offsa][idxa3];
8309 v20 = v20+a2*b0;
8310 v21 = v21+a2*b1;
8311 v30 = v30+a3*b0;
8312 v31 = v31+a3*b1;
8313 b2 = b->ptr.pp_double[idxb2][offsb];
8314 b3 = b->ptr.pp_double[idxb3][offsb];
8315 v22 = v22+a2*b2;
8316 v23 = v23+a2*b3;
8317 v32 = v32+a3*b2;
8318 v33 = v33+a3*b3;
8319 v02 = v02+a0*b2;
8320 v03 = v03+a0*b3;
8321 v12 = v12+a1*b2;
8322 v13 = v13+a1*b3;
8323 offsa = offsa+1;
8324 offsb = offsb+1;
8325 }
8326 if( ae_fp_eq(beta,(double)(0)) )
8327 {
8328 c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
8329 c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
8330 c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
8331 c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
8332 c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
8333 c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
8334 c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
8335 c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
8336 c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
8337 c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
8338 c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
8339 c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
8340 c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
8341 c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
8342 c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
8343 c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
8344 }
8345 else
8346 {
8347 c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
8348 c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
8349 c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
8350 c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
8351 c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
8352 c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
8353 c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
8354 c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
8355 c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
8356 c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
8357 c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
8358 c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
8359 c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
8360 c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
8361 c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
8362 c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
8363 }
8364 }
8365 else
8366 {
8367
8368 /*
8369 * Determine submatrix [I0..I1]x[J0..J1] to process
8370 */
8371 i0 = i;
8372 i1 = ae_minint(i+3, m-1, _state);
8373 j0 = j;
8374 j1 = ae_minint(j+3, n-1, _state);
8375
8376 /*
8377 * Process submatrix
8378 */
8379 for(ik=i0; ik<=i1; ik++)
8380 {
8381 for(jk=j0; jk<=j1; jk++)
8382 {
8383 if( k==0||ae_fp_eq(alpha,(double)(0)) )
8384 {
8385 v = (double)(0);
8386 }
8387 else
8388 {
8389 v = 0.0;
8390 v = ae_v_dotproduct(&a->ptr.pp_double[ia][ja+ik], a->stride, &b->ptr.pp_double[ib+jk][jb], 1, ae_v_len(ia,ia+k-1));
8391 }
8392 if( ae_fp_eq(beta,(double)(0)) )
8393 {
8394 c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
8395 }
8396 else
8397 {
8398 c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
8399 }
8400 }
8401 }
8402 }
8403 j = j+4;
8404 }
8405 i = i+4;
8406 }
8407 }
8408
8409
8410 #ifdef ALGLIB_NO_FAST_KERNELS
8411 /*************************************************************************
8412 Fast kernel (new version with AVX2/SSE2)
8413
8414 -- ALGLIB routine --
8415 19.01.2010
8416 Bochkanov Sergey
8417 *************************************************************************/
ablasf_rgemm32basecase(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)8418 static ae_bool ablasf_rgemm32basecase(ae_int_t m,
8419 ae_int_t n,
8420 ae_int_t k,
8421 double alpha,
8422 /* Real */ ae_matrix* a,
8423 ae_int_t ia,
8424 ae_int_t ja,
8425 ae_int_t optypea,
8426 /* Real */ ae_matrix* b,
8427 ae_int_t ib,
8428 ae_int_t jb,
8429 ae_int_t optypeb,
8430 double beta,
8431 /* Real */ ae_matrix* c,
8432 ae_int_t ic,
8433 ae_int_t jc,
8434 ae_state *_state)
8435 {
8436 ae_bool result;
8437
8438
8439 result = ae_false;
8440 return result;
8441 }
8442 #endif
8443
8444
8445 #endif
8446 #if defined(AE_COMPILE_HBLAS) || !defined(AE_PARTIAL_BUILD)
8447
8448
hermitianmatrixvectormultiply(ae_matrix * a,ae_bool isupper,ae_int_t i1,ae_int_t i2,ae_vector * x,ae_complex alpha,ae_vector * y,ae_state * _state)8449 void hermitianmatrixvectormultiply(/* Complex */ ae_matrix* a,
8450 ae_bool isupper,
8451 ae_int_t i1,
8452 ae_int_t i2,
8453 /* Complex */ ae_vector* x,
8454 ae_complex alpha,
8455 /* Complex */ ae_vector* y,
8456 ae_state *_state)
8457 {
8458 ae_int_t i;
8459 ae_int_t ba1;
8460 ae_int_t by1;
8461 ae_int_t by2;
8462 ae_int_t bx1;
8463 ae_int_t bx2;
8464 ae_int_t n;
8465 ae_complex v;
8466
8467
8468 n = i2-i1+1;
8469 if( n<=0 )
8470 {
8471 return;
8472 }
8473
8474 /*
8475 * Let A = L + D + U, where
8476 * L is strictly lower triangular (main diagonal is zero)
8477 * D is diagonal
8478 * U is strictly upper triangular (main diagonal is zero)
8479 *
8480 * A*x = L*x + D*x + U*x
8481 *
8482 * Calculate D*x first
8483 */
8484 for(i=i1; i<=i2; i++)
8485 {
8486 y->ptr.p_complex[i-i1+1] = ae_c_mul(a->ptr.pp_complex[i][i],x->ptr.p_complex[i-i1+1]);
8487 }
8488
8489 /*
8490 * Add L*x + U*x
8491 */
8492 if( isupper )
8493 {
8494 for(i=i1; i<=i2-1; i++)
8495 {
8496
8497 /*
8498 * Add L*x to the result
8499 */
8500 v = x->ptr.p_complex[i-i1+1];
8501 by1 = i-i1+2;
8502 by2 = n;
8503 ba1 = i+1;
8504 ae_v_caddc(&y->ptr.p_complex[by1], 1, &a->ptr.pp_complex[i][ba1], 1, "Conj", ae_v_len(by1,by2), v);
8505
8506 /*
8507 * Add U*x to the result
8508 */
8509 bx1 = i-i1+2;
8510 bx2 = n;
8511 ba1 = i+1;
8512 v = ae_v_cdotproduct(&x->ptr.p_complex[bx1], 1, "N", &a->ptr.pp_complex[i][ba1], 1, "N", ae_v_len(bx1,bx2));
8513 y->ptr.p_complex[i-i1+1] = ae_c_add(y->ptr.p_complex[i-i1+1],v);
8514 }
8515 }
8516 else
8517 {
8518 for(i=i1+1; i<=i2; i++)
8519 {
8520
8521 /*
8522 * Add L*x to the result
8523 */
8524 bx1 = 1;
8525 bx2 = i-i1;
8526 ba1 = i1;
8527 v = ae_v_cdotproduct(&x->ptr.p_complex[bx1], 1, "N", &a->ptr.pp_complex[i][ba1], 1, "N", ae_v_len(bx1,bx2));
8528 y->ptr.p_complex[i-i1+1] = ae_c_add(y->ptr.p_complex[i-i1+1],v);
8529
8530 /*
8531 * Add U*x to the result
8532 */
8533 v = x->ptr.p_complex[i-i1+1];
8534 by1 = 1;
8535 by2 = i-i1;
8536 ba1 = i1;
8537 ae_v_caddc(&y->ptr.p_complex[by1], 1, &a->ptr.pp_complex[i][ba1], 1, "Conj", ae_v_len(by1,by2), v);
8538 }
8539 }
8540 ae_v_cmulc(&y->ptr.p_complex[1], 1, ae_v_len(1,n), alpha);
8541 }
8542
8543
hermitianrank2update(ae_matrix * a,ae_bool isupper,ae_int_t i1,ae_int_t i2,ae_vector * x,ae_vector * y,ae_vector * t,ae_complex alpha,ae_state * _state)8544 void hermitianrank2update(/* Complex */ ae_matrix* a,
8545 ae_bool isupper,
8546 ae_int_t i1,
8547 ae_int_t i2,
8548 /* Complex */ ae_vector* x,
8549 /* Complex */ ae_vector* y,
8550 /* Complex */ ae_vector* t,
8551 ae_complex alpha,
8552 ae_state *_state)
8553 {
8554 ae_int_t i;
8555 ae_int_t tp1;
8556 ae_int_t tp2;
8557 ae_complex v;
8558
8559
8560 if( isupper )
8561 {
8562 for(i=i1; i<=i2; i++)
8563 {
8564 tp1 = i+1-i1;
8565 tp2 = i2-i1+1;
8566 v = ae_c_mul(alpha,x->ptr.p_complex[i+1-i1]);
8567 ae_v_cmovec(&t->ptr.p_complex[tp1], 1, &y->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
8568 v = ae_c_mul(ae_c_conj(alpha, _state),y->ptr.p_complex[i+1-i1]);
8569 ae_v_caddc(&t->ptr.p_complex[tp1], 1, &x->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
8570 ae_v_cadd(&a->ptr.pp_complex[i][i], 1, &t->ptr.p_complex[tp1], 1, "N", ae_v_len(i,i2));
8571 }
8572 }
8573 else
8574 {
8575 for(i=i1; i<=i2; i++)
8576 {
8577 tp1 = 1;
8578 tp2 = i+1-i1;
8579 v = ae_c_mul(alpha,x->ptr.p_complex[i+1-i1]);
8580 ae_v_cmovec(&t->ptr.p_complex[tp1], 1, &y->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
8581 v = ae_c_mul(ae_c_conj(alpha, _state),y->ptr.p_complex[i+1-i1]);
8582 ae_v_caddc(&t->ptr.p_complex[tp1], 1, &x->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
8583 ae_v_cadd(&a->ptr.pp_complex[i][i1], 1, &t->ptr.p_complex[tp1], 1, "N", ae_v_len(i1,i));
8584 }
8585 }
8586 }
8587
8588
8589 #endif
8590 #if defined(AE_COMPILE_CREFLECTIONS) || !defined(AE_PARTIAL_BUILD)
8591
8592
8593 /*************************************************************************
8594 Generation of an elementary complex reflection transformation
8595
8596 The subroutine generates elementary complex reflection H of order N, so
8597 that, for a given X, the following equality holds true:
8598
8599 ( X(1) ) ( Beta )
8600 H' * ( .. ) = ( 0 ), H'*H = I, Beta is a real number
8601 ( X(n) ) ( 0 )
8602
8603 where
8604
8605 ( V(1) )
8606 H = 1 - Tau * ( .. ) * ( conj(V(1)), ..., conj(V(n)) )
8607 ( V(n) )
8608
8609 where the first component of vector V equals 1.
8610
8611 Input parameters:
8612 X - vector. Array with elements [1..N].
8613 N - reflection order.
8614
8615 Output parameters:
8616 X - components from 2 to N are replaced by vector V.
8617 The first component is replaced with parameter Beta.
8618 Tau - scalar value Tau.
8619
8620 This subroutine is the modification of CLARFG subroutines from the LAPACK
8621 library. It has similar functionality except for the fact that it doesn't
8622 handle errors when intermediate results cause an overflow.
8623
8624 -- LAPACK auxiliary routine (version 3.0) --
8625 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
8626 Courant Institute, Argonne National Lab, and Rice University
8627 September 30, 1994
8628 *************************************************************************/
complexgeneratereflection(ae_vector * x,ae_int_t n,ae_complex * tau,ae_state * _state)8629 void complexgeneratereflection(/* Complex */ ae_vector* x,
8630 ae_int_t n,
8631 ae_complex* tau,
8632 ae_state *_state)
8633 {
8634 ae_int_t j;
8635 ae_complex alpha;
8636 double alphi;
8637 double alphr;
8638 double beta;
8639 double xnorm;
8640 double mx;
8641 ae_complex t;
8642 double s;
8643 ae_complex v;
8644
8645 tau->x = 0;
8646 tau->y = 0;
8647
8648 if( n<=0 )
8649 {
8650 *tau = ae_complex_from_i(0);
8651 return;
8652 }
8653
8654 /*
8655 * Scale if needed (to avoid overflow/underflow during intermediate
8656 * calculations).
8657 */
8658 mx = (double)(0);
8659 for(j=1; j<=n; j++)
8660 {
8661 mx = ae_maxreal(ae_c_abs(x->ptr.p_complex[j], _state), mx, _state);
8662 }
8663 s = (double)(1);
8664 if( ae_fp_neq(mx,(double)(0)) )
8665 {
8666 if( ae_fp_less(mx,(double)(1)) )
8667 {
8668 s = ae_sqrt(ae_minrealnumber, _state);
8669 v = ae_complex_from_d(1/s);
8670 ae_v_cmulc(&x->ptr.p_complex[1], 1, ae_v_len(1,n), v);
8671 }
8672 else
8673 {
8674 s = ae_sqrt(ae_maxrealnumber, _state);
8675 v = ae_complex_from_d(1/s);
8676 ae_v_cmulc(&x->ptr.p_complex[1], 1, ae_v_len(1,n), v);
8677 }
8678 }
8679
8680 /*
8681 * calculate
8682 */
8683 alpha = x->ptr.p_complex[1];
8684 mx = (double)(0);
8685 for(j=2; j<=n; j++)
8686 {
8687 mx = ae_maxreal(ae_c_abs(x->ptr.p_complex[j], _state), mx, _state);
8688 }
8689 xnorm = (double)(0);
8690 if( ae_fp_neq(mx,(double)(0)) )
8691 {
8692 for(j=2; j<=n; j++)
8693 {
8694 t = ae_c_div_d(x->ptr.p_complex[j],mx);
8695 xnorm = xnorm+ae_c_mul(t,ae_c_conj(t, _state)).x;
8696 }
8697 xnorm = ae_sqrt(xnorm, _state)*mx;
8698 }
8699 alphr = alpha.x;
8700 alphi = alpha.y;
8701 if( ae_fp_eq(xnorm,(double)(0))&&ae_fp_eq(alphi,(double)(0)) )
8702 {
8703 *tau = ae_complex_from_i(0);
8704 x->ptr.p_complex[1] = ae_c_mul_d(x->ptr.p_complex[1],s);
8705 return;
8706 }
8707 mx = ae_maxreal(ae_fabs(alphr, _state), ae_fabs(alphi, _state), _state);
8708 mx = ae_maxreal(mx, ae_fabs(xnorm, _state), _state);
8709 beta = -mx*ae_sqrt(ae_sqr(alphr/mx, _state)+ae_sqr(alphi/mx, _state)+ae_sqr(xnorm/mx, _state), _state);
8710 if( ae_fp_less(alphr,(double)(0)) )
8711 {
8712 beta = -beta;
8713 }
8714 tau->x = (beta-alphr)/beta;
8715 tau->y = -alphi/beta;
8716 alpha = ae_c_d_div(1,ae_c_sub_d(alpha,beta));
8717 if( n>1 )
8718 {
8719 ae_v_cmulc(&x->ptr.p_complex[2], 1, ae_v_len(2,n), alpha);
8720 }
8721 alpha = ae_complex_from_d(beta);
8722 x->ptr.p_complex[1] = alpha;
8723
8724 /*
8725 * Scale back
8726 */
8727 x->ptr.p_complex[1] = ae_c_mul_d(x->ptr.p_complex[1],s);
8728 }
8729
8730
8731 /*************************************************************************
8732 Application of an elementary reflection to a rectangular matrix of size MxN
8733
8734 The algorithm pre-multiplies the matrix by an elementary reflection
8735 transformation which is given by column V and scalar Tau (see the
8736 description of the GenerateReflection). Not the whole matrix but only a
8737 part of it is transformed (rows from M1 to M2, columns from N1 to N2). Only
8738 the elements of this submatrix are changed.
8739
8740 Note: the matrix is multiplied by H, not by H'. If it is required to
8741 multiply the matrix by H', it is necessary to pass Conj(Tau) instead of Tau.
8742
8743 Input parameters:
8744 C - matrix to be transformed.
8745 Tau - scalar defining transformation.
8746 V - column defining transformation.
8747 Array whose index ranges within [1..M2-M1+1]
8748 M1, M2 - range of rows to be transformed.
8749 N1, N2 - range of columns to be transformed.
8750 WORK - working array whose index goes from N1 to N2.
8751
8752 Output parameters:
8753 C - the result of multiplying the input matrix C by the
8754 transformation matrix which is given by Tau and V.
8755 If N1>N2 or M1>M2, C is not modified.
8756
8757 -- LAPACK auxiliary routine (version 3.0) --
8758 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
8759 Courant Institute, Argonne National Lab, and Rice University
8760 September 30, 1994
8761 *************************************************************************/
complexapplyreflectionfromtheleft(ae_matrix * c,ae_complex tau,ae_vector * v,ae_int_t m1,ae_int_t m2,ae_int_t n1,ae_int_t n2,ae_vector * work,ae_state * _state)8762 void complexapplyreflectionfromtheleft(/* Complex */ ae_matrix* c,
8763 ae_complex tau,
8764 /* Complex */ ae_vector* v,
8765 ae_int_t m1,
8766 ae_int_t m2,
8767 ae_int_t n1,
8768 ae_int_t n2,
8769 /* Complex */ ae_vector* work,
8770 ae_state *_state)
8771 {
8772 ae_complex t;
8773 ae_int_t i;
8774
8775
8776 if( (ae_c_eq_d(tau,(double)(0))||n1>n2)||m1>m2 )
8777 {
8778 return;
8779 }
8780
8781 /*
8782 * w := C^T * conj(v)
8783 */
8784 for(i=n1; i<=n2; i++)
8785 {
8786 work->ptr.p_complex[i] = ae_complex_from_i(0);
8787 }
8788 for(i=m1; i<=m2; i++)
8789 {
8790 t = ae_c_conj(v->ptr.p_complex[i+1-m1], _state);
8791 ae_v_caddc(&work->ptr.p_complex[n1], 1, &c->ptr.pp_complex[i][n1], 1, "N", ae_v_len(n1,n2), t);
8792 }
8793
8794 /*
8795 * C := C - tau * v * w^T
8796 */
8797 for(i=m1; i<=m2; i++)
8798 {
8799 t = ae_c_mul(v->ptr.p_complex[i-m1+1],tau);
8800 ae_v_csubc(&c->ptr.pp_complex[i][n1], 1, &work->ptr.p_complex[n1], 1, "N", ae_v_len(n1,n2), t);
8801 }
8802 }
8803
8804
8805 /*************************************************************************
8806 Application of an elementary reflection to a rectangular matrix of size MxN
8807
8808 The algorithm post-multiplies the matrix by an elementary reflection
8809 transformation which is given by column V and scalar Tau (see the
8810 description of the GenerateReflection). Not the whole matrix but only a
8811 part of it is transformed (rows from M1 to M2, columns from N1 to N2).
8812 Only the elements of this submatrix are changed.
8813
8814 Input parameters:
8815 C - matrix to be transformed.
8816 Tau - scalar defining transformation.
8817 V - column defining transformation.
8818 Array whose index ranges within [1..N2-N1+1]
8819 M1, M2 - range of rows to be transformed.
8820 N1, N2 - range of columns to be transformed.
8821 WORK - working array whose index goes from M1 to M2.
8822
8823 Output parameters:
8824 C - the result of multiplying the input matrix C by the
8825 transformation matrix which is given by Tau and V.
8826 If N1>N2 or M1>M2, C is not modified.
8827
8828 -- LAPACK auxiliary routine (version 3.0) --
8829 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
8830 Courant Institute, Argonne National Lab, and Rice University
8831 September 30, 1994
8832 *************************************************************************/
complexapplyreflectionfromtheright(ae_matrix * c,ae_complex tau,ae_vector * v,ae_int_t m1,ae_int_t m2,ae_int_t n1,ae_int_t n2,ae_vector * work,ae_state * _state)8833 void complexapplyreflectionfromtheright(/* Complex */ ae_matrix* c,
8834 ae_complex tau,
8835 /* Complex */ ae_vector* v,
8836 ae_int_t m1,
8837 ae_int_t m2,
8838 ae_int_t n1,
8839 ae_int_t n2,
8840 /* Complex */ ae_vector* work,
8841 ae_state *_state)
8842 {
8843 ae_complex t;
8844 ae_int_t i;
8845 ae_int_t vm;
8846
8847
8848 if( (ae_c_eq_d(tau,(double)(0))||n1>n2)||m1>m2 )
8849 {
8850 return;
8851 }
8852
8853 /*
8854 * w := C * v
8855 */
8856 vm = n2-n1+1;
8857 for(i=m1; i<=m2; i++)
8858 {
8859 t = ae_v_cdotproduct(&c->ptr.pp_complex[i][n1], 1, "N", &v->ptr.p_complex[1], 1, "N", ae_v_len(n1,n2));
8860 work->ptr.p_complex[i] = t;
8861 }
8862
8863 /*
8864 * C := C - w * conj(v^T)
8865 */
8866 ae_v_cmove(&v->ptr.p_complex[1], 1, &v->ptr.p_complex[1], 1, "Conj", ae_v_len(1,vm));
8867 for(i=m1; i<=m2; i++)
8868 {
8869 t = ae_c_mul(work->ptr.p_complex[i],tau);
8870 ae_v_csubc(&c->ptr.pp_complex[i][n1], 1, &v->ptr.p_complex[1], 1, "N", ae_v_len(n1,n2), t);
8871 }
8872 ae_v_cmove(&v->ptr.p_complex[1], 1, &v->ptr.p_complex[1], 1, "Conj", ae_v_len(1,vm));
8873 }
8874
8875
8876 #endif
8877 #if defined(AE_COMPILE_SBLAS) || !defined(AE_PARTIAL_BUILD)
8878
8879
symmetricmatrixvectormultiply(ae_matrix * a,ae_bool isupper,ae_int_t i1,ae_int_t i2,ae_vector * x,double alpha,ae_vector * y,ae_state * _state)8880 void symmetricmatrixvectormultiply(/* Real */ ae_matrix* a,
8881 ae_bool isupper,
8882 ae_int_t i1,
8883 ae_int_t i2,
8884 /* Real */ ae_vector* x,
8885 double alpha,
8886 /* Real */ ae_vector* y,
8887 ae_state *_state)
8888 {
8889 ae_int_t i;
8890 ae_int_t ba1;
8891 ae_int_t ba2;
8892 ae_int_t by1;
8893 ae_int_t by2;
8894 ae_int_t bx1;
8895 ae_int_t bx2;
8896 ae_int_t n;
8897 double v;
8898
8899
8900 n = i2-i1+1;
8901 if( n<=0 )
8902 {
8903 return;
8904 }
8905
8906 /*
8907 * Let A = L + D + U, where
8908 * L is strictly lower triangular (main diagonal is zero)
8909 * D is diagonal
8910 * U is strictly upper triangular (main diagonal is zero)
8911 *
8912 * A*x = L*x + D*x + U*x
8913 *
8914 * Calculate D*x first
8915 */
8916 for(i=i1; i<=i2; i++)
8917 {
8918 y->ptr.p_double[i-i1+1] = a->ptr.pp_double[i][i]*x->ptr.p_double[i-i1+1];
8919 }
8920
8921 /*
8922 * Add L*x + U*x
8923 */
8924 if( isupper )
8925 {
8926 for(i=i1; i<=i2-1; i++)
8927 {
8928
8929 /*
8930 * Add L*x to the result
8931 */
8932 v = x->ptr.p_double[i-i1+1];
8933 by1 = i-i1+2;
8934 by2 = n;
8935 ba1 = i+1;
8936 ba2 = i2;
8937 ae_v_addd(&y->ptr.p_double[by1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(by1,by2), v);
8938
8939 /*
8940 * Add U*x to the result
8941 */
8942 bx1 = i-i1+2;
8943 bx2 = n;
8944 ba1 = i+1;
8945 ba2 = i2;
8946 v = ae_v_dotproduct(&x->ptr.p_double[bx1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(bx1,bx2));
8947 y->ptr.p_double[i-i1+1] = y->ptr.p_double[i-i1+1]+v;
8948 }
8949 }
8950 else
8951 {
8952 for(i=i1+1; i<=i2; i++)
8953 {
8954
8955 /*
8956 * Add L*x to the result
8957 */
8958 bx1 = 1;
8959 bx2 = i-i1;
8960 ba1 = i1;
8961 ba2 = i-1;
8962 v = ae_v_dotproduct(&x->ptr.p_double[bx1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(bx1,bx2));
8963 y->ptr.p_double[i-i1+1] = y->ptr.p_double[i-i1+1]+v;
8964
8965 /*
8966 * Add U*x to the result
8967 */
8968 v = x->ptr.p_double[i-i1+1];
8969 by1 = 1;
8970 by2 = i-i1;
8971 ba1 = i1;
8972 ba2 = i-1;
8973 ae_v_addd(&y->ptr.p_double[by1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(by1,by2), v);
8974 }
8975 }
8976 ae_v_muld(&y->ptr.p_double[1], 1, ae_v_len(1,n), alpha);
8977 touchint(&ba2, _state);
8978 }
8979
8980
symmetricrank2update(ae_matrix * a,ae_bool isupper,ae_int_t i1,ae_int_t i2,ae_vector * x,ae_vector * y,ae_vector * t,double alpha,ae_state * _state)8981 void symmetricrank2update(/* Real */ ae_matrix* a,
8982 ae_bool isupper,
8983 ae_int_t i1,
8984 ae_int_t i2,
8985 /* Real */ ae_vector* x,
8986 /* Real */ ae_vector* y,
8987 /* Real */ ae_vector* t,
8988 double alpha,
8989 ae_state *_state)
8990 {
8991 ae_int_t i;
8992 ae_int_t tp1;
8993 ae_int_t tp2;
8994 double v;
8995
8996
8997 if( isupper )
8998 {
8999 for(i=i1; i<=i2; i++)
9000 {
9001 tp1 = i+1-i1;
9002 tp2 = i2-i1+1;
9003 v = x->ptr.p_double[i+1-i1];
9004 ae_v_moved(&t->ptr.p_double[tp1], 1, &y->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
9005 v = y->ptr.p_double[i+1-i1];
9006 ae_v_addd(&t->ptr.p_double[tp1], 1, &x->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
9007 ae_v_muld(&t->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), alpha);
9008 ae_v_add(&a->ptr.pp_double[i][i], 1, &t->ptr.p_double[tp1], 1, ae_v_len(i,i2));
9009 }
9010 }
9011 else
9012 {
9013 for(i=i1; i<=i2; i++)
9014 {
9015 tp1 = 1;
9016 tp2 = i+1-i1;
9017 v = x->ptr.p_double[i+1-i1];
9018 ae_v_moved(&t->ptr.p_double[tp1], 1, &y->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
9019 v = y->ptr.p_double[i+1-i1];
9020 ae_v_addd(&t->ptr.p_double[tp1], 1, &x->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
9021 ae_v_muld(&t->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), alpha);
9022 ae_v_add(&a->ptr.pp_double[i][i1], 1, &t->ptr.p_double[tp1], 1, ae_v_len(i1,i));
9023 }
9024 }
9025 }
9026
9027
9028 #endif
9029 #if defined(AE_COMPILE_ABLASMKL) || !defined(AE_PARTIAL_BUILD)
9030
9031
9032 /*************************************************************************
9033 MKL-based kernel
9034
9035 -- ALGLIB routine --
9036 12.10.2017
9037 Bochkanov Sergey
9038 *************************************************************************/
rmatrixgermkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,double alpha,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)9039 ae_bool rmatrixgermkl(ae_int_t m,
9040 ae_int_t n,
9041 /* Real */ ae_matrix* a,
9042 ae_int_t ia,
9043 ae_int_t ja,
9044 double alpha,
9045 /* Real */ ae_vector* u,
9046 ae_int_t iu,
9047 /* Real */ ae_vector* v,
9048 ae_int_t iv,
9049 ae_state *_state)
9050 {
9051 #ifndef ALGLIB_INTERCEPTS_MKL
9052 ae_bool result;
9053
9054
9055 result = ae_false;
9056 return result;
9057 #else
9058 return _ialglib_i_rmatrixgermkl(m, n, a, ia, ja, alpha, u, iu, v, iv);
9059 #endif
9060 }
9061
9062
9063 /*************************************************************************
9064 MKL-based kernel
9065
9066 -- ALGLIB routine --
9067 12.10.2017
9068 Bochkanov Sergey
9069 *************************************************************************/
cmatrixrank1mkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)9070 ae_bool cmatrixrank1mkl(ae_int_t m,
9071 ae_int_t n,
9072 /* Complex */ ae_matrix* a,
9073 ae_int_t ia,
9074 ae_int_t ja,
9075 /* Complex */ ae_vector* u,
9076 ae_int_t iu,
9077 /* Complex */ ae_vector* v,
9078 ae_int_t iv,
9079 ae_state *_state)
9080 {
9081 #ifndef ALGLIB_INTERCEPTS_MKL
9082 ae_bool result;
9083
9084
9085 result = ae_false;
9086 return result;
9087 #else
9088 return _ialglib_i_cmatrixrank1mkl(m, n, a, ia, ja, u, iu, v, iv);
9089 #endif
9090 }
9091
9092
9093 /*************************************************************************
9094 MKL-based kernel
9095
9096 -- ALGLIB routine --
9097 12.10.2017
9098 Bochkanov Sergey
9099 *************************************************************************/
rmatrixrank1mkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)9100 ae_bool rmatrixrank1mkl(ae_int_t m,
9101 ae_int_t n,
9102 /* Real */ ae_matrix* a,
9103 ae_int_t ia,
9104 ae_int_t ja,
9105 /* Real */ ae_vector* u,
9106 ae_int_t iu,
9107 /* Real */ ae_vector* v,
9108 ae_int_t iv,
9109 ae_state *_state)
9110 {
9111 #ifndef ALGLIB_INTERCEPTS_MKL
9112 ae_bool result;
9113
9114
9115 result = ae_false;
9116 return result;
9117 #else
9118 return _ialglib_i_rmatrixrank1mkl(m, n, a, ia, ja, u, iu, v, iv);
9119 #endif
9120 }
9121
9122
9123 /*************************************************************************
9124 MKL-based kernel
9125
9126 -- ALGLIB routine --
9127 12.10.2017
9128 Bochkanov Sergey
9129 *************************************************************************/
cmatrixmvmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,ae_vector * y,ae_int_t iy,ae_state * _state)9130 ae_bool cmatrixmvmkl(ae_int_t m,
9131 ae_int_t n,
9132 /* Complex */ ae_matrix* a,
9133 ae_int_t ia,
9134 ae_int_t ja,
9135 ae_int_t opa,
9136 /* Complex */ ae_vector* x,
9137 ae_int_t ix,
9138 /* Complex */ ae_vector* y,
9139 ae_int_t iy,
9140 ae_state *_state)
9141 {
9142 #ifndef ALGLIB_INTERCEPTS_MKL
9143 ae_bool result;
9144
9145
9146 result = ae_false;
9147 return result;
9148 #else
9149 return _ialglib_i_cmatrixmvmkl(m, n, a, ia, ja, opa, x, ix, y, iy);
9150 #endif
9151 }
9152
9153
9154 /*************************************************************************
9155 MKL-based kernel
9156
9157 -- ALGLIB routine --
9158 12.10.2017
9159 Bochkanov Sergey
9160 *************************************************************************/
rmatrixmvmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,ae_vector * y,ae_int_t iy,ae_state * _state)9161 ae_bool rmatrixmvmkl(ae_int_t m,
9162 ae_int_t n,
9163 /* Real */ ae_matrix* a,
9164 ae_int_t ia,
9165 ae_int_t ja,
9166 ae_int_t opa,
9167 /* Real */ ae_vector* x,
9168 ae_int_t ix,
9169 /* Real */ ae_vector* y,
9170 ae_int_t iy,
9171 ae_state *_state)
9172 {
9173 #ifndef ALGLIB_INTERCEPTS_MKL
9174 ae_bool result;
9175
9176
9177 result = ae_false;
9178 return result;
9179 #else
9180 return _ialglib_i_rmatrixmvmkl(m, n, a, ia, ja, opa, x, ix, y, iy);
9181 #endif
9182 }
9183
9184
9185 /*************************************************************************
9186 MKL-based kernel
9187
9188 -- ALGLIB routine --
9189 12.10.2017
9190 Bochkanov Sergey
9191 *************************************************************************/
rmatrixgemvmkl(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)9192 ae_bool rmatrixgemvmkl(ae_int_t m,
9193 ae_int_t n,
9194 double alpha,
9195 /* Real */ ae_matrix* a,
9196 ae_int_t ia,
9197 ae_int_t ja,
9198 ae_int_t opa,
9199 /* Real */ ae_vector* x,
9200 ae_int_t ix,
9201 double beta,
9202 /* Real */ ae_vector* y,
9203 ae_int_t iy,
9204 ae_state *_state)
9205 {
9206 #ifndef ALGLIB_INTERCEPTS_MKL
9207 ae_bool result;
9208
9209
9210 result = ae_false;
9211 return result;
9212 #else
9213 return _ialglib_i_rmatrixgemvmkl(m, n, alpha, a, ia, ja, opa, x, ix, beta, y, iy);
9214 #endif
9215 }
9216
9217
9218 /*************************************************************************
9219 MKL-based kernel
9220
9221 -- ALGLIB routine --
9222 12.10.2017
9223 Bochkanov Sergey
9224 *************************************************************************/
rmatrixtrsvmkl(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_vector * x,ae_int_t ix,ae_state * _state)9225 ae_bool rmatrixtrsvmkl(ae_int_t n,
9226 /* Real */ ae_matrix* a,
9227 ae_int_t ia,
9228 ae_int_t ja,
9229 ae_bool isupper,
9230 ae_bool isunit,
9231 ae_int_t optype,
9232 /* Real */ ae_vector* x,
9233 ae_int_t ix,
9234 ae_state *_state)
9235 {
9236 #ifndef ALGLIB_INTERCEPTS_MKL
9237 ae_bool result;
9238
9239
9240 result = ae_false;
9241 return result;
9242 #else
9243 return _ialglib_i_rmatrixtrsvmkl(n, a, ia, ja, isupper, isunit, optype, x, ix);
9244 #endif
9245 }
9246
9247
9248 /*************************************************************************
9249 MKL-based kernel
9250
9251 -- ALGLIB routine --
9252 01.10.2013
9253 Bochkanov Sergey
9254 *************************************************************************/
rmatrixsyrkmkl(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)9255 ae_bool rmatrixsyrkmkl(ae_int_t n,
9256 ae_int_t k,
9257 double alpha,
9258 /* Real */ ae_matrix* a,
9259 ae_int_t ia,
9260 ae_int_t ja,
9261 ae_int_t optypea,
9262 double beta,
9263 /* Real */ ae_matrix* c,
9264 ae_int_t ic,
9265 ae_int_t jc,
9266 ae_bool isupper,
9267 ae_state *_state)
9268 {
9269 #ifndef ALGLIB_INTERCEPTS_MKL
9270 ae_bool result;
9271
9272
9273 result = ae_false;
9274 return result;
9275 #else
9276 return _ialglib_i_rmatrixsyrkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
9277 #endif
9278 }
9279
9280
9281 /*************************************************************************
9282 MKL-based kernel
9283
9284 -- ALGLIB routine --
9285 01.10.2013
9286 Bochkanov Sergey
9287 *************************************************************************/
cmatrixherkmkl(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)9288 ae_bool cmatrixherkmkl(ae_int_t n,
9289 ae_int_t k,
9290 double alpha,
9291 /* Complex */ ae_matrix* a,
9292 ae_int_t ia,
9293 ae_int_t ja,
9294 ae_int_t optypea,
9295 double beta,
9296 /* Complex */ ae_matrix* c,
9297 ae_int_t ic,
9298 ae_int_t jc,
9299 ae_bool isupper,
9300 ae_state *_state)
9301 {
9302 #ifndef ALGLIB_INTERCEPTS_MKL
9303 ae_bool result;
9304
9305
9306 result = ae_false;
9307 return result;
9308 #else
9309 return _ialglib_i_cmatrixherkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
9310 #endif
9311 }
9312
9313
9314 /*************************************************************************
9315 MKL-based kernel
9316
9317 -- ALGLIB routine --
9318 01.10.2013
9319 Bochkanov Sergey
9320 *************************************************************************/
rmatrixgemmmkl(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)9321 ae_bool rmatrixgemmmkl(ae_int_t m,
9322 ae_int_t n,
9323 ae_int_t k,
9324 double alpha,
9325 /* Real */ ae_matrix* a,
9326 ae_int_t ia,
9327 ae_int_t ja,
9328 ae_int_t optypea,
9329 /* Real */ ae_matrix* b,
9330 ae_int_t ib,
9331 ae_int_t jb,
9332 ae_int_t optypeb,
9333 double beta,
9334 /* Real */ ae_matrix* c,
9335 ae_int_t ic,
9336 ae_int_t jc,
9337 ae_state *_state)
9338 {
9339 #ifndef ALGLIB_INTERCEPTS_MKL
9340 ae_bool result;
9341
9342
9343 result = ae_false;
9344 return result;
9345 #else
9346 return _ialglib_i_rmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
9347 #endif
9348 }
9349
9350
9351 /*************************************************************************
9352 MKL-based kernel
9353
9354 -- ALGLIB routine --
9355 01.10.2017
9356 Bochkanov Sergey
9357 *************************************************************************/
rmatrixsymvmkl(ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)9358 ae_bool rmatrixsymvmkl(ae_int_t n,
9359 double alpha,
9360 /* Real */ ae_matrix* a,
9361 ae_int_t ia,
9362 ae_int_t ja,
9363 ae_bool isupper,
9364 /* Real */ ae_vector* x,
9365 ae_int_t ix,
9366 double beta,
9367 /* Real */ ae_vector* y,
9368 ae_int_t iy,
9369 ae_state *_state)
9370 {
9371 #ifndef ALGLIB_INTERCEPTS_MKL
9372 ae_bool result;
9373
9374
9375 result = ae_false;
9376 return result;
9377 #else
9378 return _ialglib_i_rmatrixsymvmkl(n, alpha, a, ia, ja, isupper, x, ix, beta, y, iy);
9379 #endif
9380 }
9381
9382
9383 /*************************************************************************
9384 MKL-based kernel
9385
9386 -- ALGLIB routine --
9387 16.10.2014
9388 Bochkanov Sergey
9389 *************************************************************************/
cmatrixgemmmkl(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)9390 ae_bool cmatrixgemmmkl(ae_int_t m,
9391 ae_int_t n,
9392 ae_int_t k,
9393 ae_complex alpha,
9394 /* Complex */ ae_matrix* a,
9395 ae_int_t ia,
9396 ae_int_t ja,
9397 ae_int_t optypea,
9398 /* Complex */ ae_matrix* b,
9399 ae_int_t ib,
9400 ae_int_t jb,
9401 ae_int_t optypeb,
9402 ae_complex beta,
9403 /* Complex */ ae_matrix* c,
9404 ae_int_t ic,
9405 ae_int_t jc,
9406 ae_state *_state)
9407 {
9408 #ifndef ALGLIB_INTERCEPTS_MKL
9409 ae_bool result;
9410
9411
9412 result = ae_false;
9413 return result;
9414 #else
9415 return _ialglib_i_cmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
9416 #endif
9417 }
9418
9419
9420 /*************************************************************************
9421 MKL-based kernel
9422
9423 -- ALGLIB routine --
9424 16.10.2014
9425 Bochkanov Sergey
9426 *************************************************************************/
cmatrixlefttrsmmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)9427 ae_bool cmatrixlefttrsmmkl(ae_int_t m,
9428 ae_int_t n,
9429 /* Complex */ ae_matrix* a,
9430 ae_int_t i1,
9431 ae_int_t j1,
9432 ae_bool isupper,
9433 ae_bool isunit,
9434 ae_int_t optype,
9435 /* Complex */ ae_matrix* x,
9436 ae_int_t i2,
9437 ae_int_t j2,
9438 ae_state *_state)
9439 {
9440 #ifndef ALGLIB_INTERCEPTS_MKL
9441 ae_bool result;
9442
9443
9444 result = ae_false;
9445 return result;
9446 #else
9447 return _ialglib_i_cmatrixlefttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
9448 #endif
9449 }
9450
9451
9452 /*************************************************************************
9453 MKL-based kernel
9454
9455 -- ALGLIB routine --
9456 16.10.2014
9457 Bochkanov Sergey
9458 *************************************************************************/
cmatrixrighttrsmmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)9459 ae_bool cmatrixrighttrsmmkl(ae_int_t m,
9460 ae_int_t n,
9461 /* Complex */ ae_matrix* a,
9462 ae_int_t i1,
9463 ae_int_t j1,
9464 ae_bool isupper,
9465 ae_bool isunit,
9466 ae_int_t optype,
9467 /* Complex */ ae_matrix* x,
9468 ae_int_t i2,
9469 ae_int_t j2,
9470 ae_state *_state)
9471 {
9472 #ifndef ALGLIB_INTERCEPTS_MKL
9473 ae_bool result;
9474
9475
9476 result = ae_false;
9477 return result;
9478 #else
9479 return _ialglib_i_cmatrixrighttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
9480 #endif
9481 }
9482
9483
9484 /*************************************************************************
9485 MKL-based kernel
9486
9487 -- ALGLIB routine --
9488 16.10.2014
9489 Bochkanov Sergey
9490 *************************************************************************/
rmatrixlefttrsmmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)9491 ae_bool rmatrixlefttrsmmkl(ae_int_t m,
9492 ae_int_t n,
9493 /* Real */ ae_matrix* a,
9494 ae_int_t i1,
9495 ae_int_t j1,
9496 ae_bool isupper,
9497 ae_bool isunit,
9498 ae_int_t optype,
9499 /* Real */ ae_matrix* x,
9500 ae_int_t i2,
9501 ae_int_t j2,
9502 ae_state *_state)
9503 {
9504 #ifndef ALGLIB_INTERCEPTS_MKL
9505 ae_bool result;
9506
9507
9508 result = ae_false;
9509 return result;
9510 #else
9511 return _ialglib_i_rmatrixlefttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
9512 #endif
9513 }
9514
9515
9516 /*************************************************************************
9517 MKL-based kernel
9518
9519 -- ALGLIB routine --
9520 16.10.2014
9521 Bochkanov Sergey
9522 *************************************************************************/
rmatrixrighttrsmmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)9523 ae_bool rmatrixrighttrsmmkl(ae_int_t m,
9524 ae_int_t n,
9525 /* Real */ ae_matrix* a,
9526 ae_int_t i1,
9527 ae_int_t j1,
9528 ae_bool isupper,
9529 ae_bool isunit,
9530 ae_int_t optype,
9531 /* Real */ ae_matrix* x,
9532 ae_int_t i2,
9533 ae_int_t j2,
9534 ae_state *_state)
9535 {
9536 #ifndef ALGLIB_INTERCEPTS_MKL
9537 ae_bool result;
9538
9539
9540 result = ae_false;
9541 return result;
9542 #else
9543 return _ialglib_i_rmatrixrighttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
9544 #endif
9545 }
9546
9547
9548 /*************************************************************************
9549 MKL-based kernel.
9550
9551 NOTE:
9552
9553 if function returned False, CholResult is NOT modified. Not ever referenced!
9554 if function returned True, CholResult is set to status of Cholesky decomposition
9555 (True on succeess).
9556
9557 -- ALGLIB routine --
9558 16.10.2014
9559 Bochkanov Sergey
9560 *************************************************************************/
spdmatrixcholeskymkl(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_bool * cholresult,ae_state * _state)9561 ae_bool spdmatrixcholeskymkl(/* Real */ ae_matrix* a,
9562 ae_int_t offs,
9563 ae_int_t n,
9564 ae_bool isupper,
9565 ae_bool* cholresult,
9566 ae_state *_state)
9567 {
9568 #ifndef ALGLIB_INTERCEPTS_MKL
9569 ae_bool result;
9570
9571
9572 result = ae_false;
9573 return result;
9574 #else
9575 return _ialglib_i_spdmatrixcholeskymkl(a, offs, n, isupper, cholresult);
9576 #endif
9577 }
9578
9579
9580 /*************************************************************************
9581 MKL-based kernel.
9582
9583 -- ALGLIB routine --
9584 20.10.2014
9585 Bochkanov Sergey
9586 *************************************************************************/
rmatrixplumkl(ae_matrix * a,ae_int_t offs,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_state * _state)9587 ae_bool rmatrixplumkl(/* Real */ ae_matrix* a,
9588 ae_int_t offs,
9589 ae_int_t m,
9590 ae_int_t n,
9591 /* Integer */ ae_vector* pivots,
9592 ae_state *_state)
9593 {
9594 #ifndef ALGLIB_INTERCEPTS_MKL
9595 ae_bool result;
9596
9597
9598 result = ae_false;
9599 return result;
9600 #else
9601 return _ialglib_i_rmatrixplumkl(a, offs, m, n, pivots);
9602 #endif
9603 }
9604
9605
9606 /*************************************************************************
9607 MKL-based kernel.
9608
9609 NOTE: this function needs preallocated output/temporary arrays.
9610 D and E must be at least max(M,N)-wide.
9611
9612 -- ALGLIB routine --
9613 20.10.2014
9614 Bochkanov Sergey
9615 *************************************************************************/
rmatrixbdmkl(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * d,ae_vector * e,ae_vector * tauq,ae_vector * taup,ae_state * _state)9616 ae_bool rmatrixbdmkl(/* Real */ ae_matrix* a,
9617 ae_int_t m,
9618 ae_int_t n,
9619 /* Real */ ae_vector* d,
9620 /* Real */ ae_vector* e,
9621 /* Real */ ae_vector* tauq,
9622 /* Real */ ae_vector* taup,
9623 ae_state *_state)
9624 {
9625 #ifndef ALGLIB_INTERCEPTS_MKL
9626 ae_bool result;
9627
9628
9629 result = ae_false;
9630 return result;
9631 #else
9632 return _ialglib_i_rmatrixbdmkl(a, m, n, d, e, tauq, taup);
9633 #endif
9634 }
9635
9636
9637 /*************************************************************************
9638 MKL-based kernel.
9639
9640 If ByQ is True, TauP is not used (can be empty array).
9641 If ByQ is False, TauQ is not used (can be empty array).
9642
9643 -- ALGLIB routine --
9644 20.10.2014
9645 Bochkanov Sergey
9646 *************************************************************************/
rmatrixbdmultiplybymkl(ae_matrix * qp,ae_int_t m,ae_int_t n,ae_vector * tauq,ae_vector * taup,ae_matrix * z,ae_int_t zrows,ae_int_t zcolumns,ae_bool byq,ae_bool fromtheright,ae_bool dotranspose,ae_state * _state)9647 ae_bool rmatrixbdmultiplybymkl(/* Real */ ae_matrix* qp,
9648 ae_int_t m,
9649 ae_int_t n,
9650 /* Real */ ae_vector* tauq,
9651 /* Real */ ae_vector* taup,
9652 /* Real */ ae_matrix* z,
9653 ae_int_t zrows,
9654 ae_int_t zcolumns,
9655 ae_bool byq,
9656 ae_bool fromtheright,
9657 ae_bool dotranspose,
9658 ae_state *_state)
9659 {
9660 #ifndef ALGLIB_INTERCEPTS_MKL
9661 ae_bool result;
9662
9663
9664 result = ae_false;
9665 return result;
9666 #else
9667 return _ialglib_i_rmatrixbdmultiplybymkl(qp, m, n, tauq, taup, z, zrows, zcolumns, byq, fromtheright, dotranspose);
9668 #endif
9669 }
9670
9671
9672 /*************************************************************************
9673 MKL-based kernel.
9674
9675 NOTE: Tau must be preallocated array with at least N-1 elements.
9676
9677 -- ALGLIB routine --
9678 20.10.2014
9679 Bochkanov Sergey
9680 *************************************************************************/
rmatrixhessenbergmkl(ae_matrix * a,ae_int_t n,ae_vector * tau,ae_state * _state)9681 ae_bool rmatrixhessenbergmkl(/* Real */ ae_matrix* a,
9682 ae_int_t n,
9683 /* Real */ ae_vector* tau,
9684 ae_state *_state)
9685 {
9686 #ifndef ALGLIB_INTERCEPTS_MKL
9687 ae_bool result;
9688
9689
9690 result = ae_false;
9691 return result;
9692 #else
9693 return _ialglib_i_rmatrixhessenbergmkl(a, n, tau);
9694 #endif
9695 }
9696
9697
9698 /*************************************************************************
9699 MKL-based kernel.
9700
9701 NOTE: Q must be preallocated N*N array
9702
9703 -- ALGLIB routine --
9704 20.10.2014
9705 Bochkanov Sergey
9706 *************************************************************************/
rmatrixhessenbergunpackqmkl(ae_matrix * a,ae_int_t n,ae_vector * tau,ae_matrix * q,ae_state * _state)9707 ae_bool rmatrixhessenbergunpackqmkl(/* Real */ ae_matrix* a,
9708 ae_int_t n,
9709 /* Real */ ae_vector* tau,
9710 /* Real */ ae_matrix* q,
9711 ae_state *_state)
9712 {
9713 #ifndef ALGLIB_INTERCEPTS_MKL
9714 ae_bool result;
9715
9716
9717 result = ae_false;
9718 return result;
9719 #else
9720 return _ialglib_i_rmatrixhessenbergunpackqmkl(a, n, tau, q);
9721 #endif
9722 }
9723
9724
9725 /*************************************************************************
9726 MKL-based kernel.
9727
9728 NOTE: Tau, D, E must be preallocated arrays;
9729 length(E)=length(Tau)=N-1 (or larger)
9730 length(D)=N (or larger)
9731
9732 -- ALGLIB routine --
9733 20.10.2014
9734 Bochkanov Sergey
9735 *************************************************************************/
smatrixtdmkl(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_vector * d,ae_vector * e,ae_state * _state)9736 ae_bool smatrixtdmkl(/* Real */ ae_matrix* a,
9737 ae_int_t n,
9738 ae_bool isupper,
9739 /* Real */ ae_vector* tau,
9740 /* Real */ ae_vector* d,
9741 /* Real */ ae_vector* e,
9742 ae_state *_state)
9743 {
9744 #ifndef ALGLIB_INTERCEPTS_MKL
9745 ae_bool result;
9746
9747
9748 result = ae_false;
9749 return result;
9750 #else
9751 return _ialglib_i_smatrixtdmkl(a, n, isupper, tau, d, e);
9752 #endif
9753 }
9754
9755
9756 /*************************************************************************
9757 MKL-based kernel.
9758
9759 NOTE: Q must be preallocated N*N array
9760
9761 -- ALGLIB routine --
9762 20.10.2014
9763 Bochkanov Sergey
9764 *************************************************************************/
smatrixtdunpackqmkl(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_matrix * q,ae_state * _state)9765 ae_bool smatrixtdunpackqmkl(/* Real */ ae_matrix* a,
9766 ae_int_t n,
9767 ae_bool isupper,
9768 /* Real */ ae_vector* tau,
9769 /* Real */ ae_matrix* q,
9770 ae_state *_state)
9771 {
9772 #ifndef ALGLIB_INTERCEPTS_MKL
9773 ae_bool result;
9774
9775
9776 result = ae_false;
9777 return result;
9778 #else
9779 return _ialglib_i_smatrixtdunpackqmkl(a, n, isupper, tau, q);
9780 #endif
9781 }
9782
9783
9784 /*************************************************************************
9785 MKL-based kernel.
9786
9787 NOTE: Tau, D, E must be preallocated arrays;
9788 length(E)=length(Tau)=N-1 (or larger)
9789 length(D)=N (or larger)
9790
9791 -- ALGLIB routine --
9792 20.10.2014
9793 Bochkanov Sergey
9794 *************************************************************************/
hmatrixtdmkl(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_vector * d,ae_vector * e,ae_state * _state)9795 ae_bool hmatrixtdmkl(/* Complex */ ae_matrix* a,
9796 ae_int_t n,
9797 ae_bool isupper,
9798 /* Complex */ ae_vector* tau,
9799 /* Real */ ae_vector* d,
9800 /* Real */ ae_vector* e,
9801 ae_state *_state)
9802 {
9803 #ifndef ALGLIB_INTERCEPTS_MKL
9804 ae_bool result;
9805
9806
9807 result = ae_false;
9808 return result;
9809 #else
9810 return _ialglib_i_hmatrixtdmkl(a, n, isupper, tau, d, e);
9811 #endif
9812 }
9813
9814
9815 /*************************************************************************
9816 MKL-based kernel.
9817
9818 NOTE: Q must be preallocated N*N array
9819
9820 -- ALGLIB routine --
9821 20.10.2014
9822 Bochkanov Sergey
9823 *************************************************************************/
hmatrixtdunpackqmkl(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_matrix * q,ae_state * _state)9824 ae_bool hmatrixtdunpackqmkl(/* Complex */ ae_matrix* a,
9825 ae_int_t n,
9826 ae_bool isupper,
9827 /* Complex */ ae_vector* tau,
9828 /* Complex */ ae_matrix* q,
9829 ae_state *_state)
9830 {
9831 #ifndef ALGLIB_INTERCEPTS_MKL
9832 ae_bool result;
9833
9834
9835 result = ae_false;
9836 return result;
9837 #else
9838 return _ialglib_i_hmatrixtdunpackqmkl(a, n, isupper, tau, q);
9839 #endif
9840 }
9841
9842
9843 /*************************************************************************
9844 MKL-based kernel.
9845
9846 Returns True if MKL was present and handled request (MKL completion code
9847 is returned as separate output parameter).
9848
9849 D and E are pre-allocated arrays with length N (both of them!). On output,
9850 D constraints singular values, and E is destroyed.
9851
9852 SVDResult is modified if and only if MKL is present.
9853
9854 -- ALGLIB routine --
9855 20.10.2014
9856 Bochkanov Sergey
9857 *************************************************************************/
rmatrixbdsvdmkl(ae_vector * d,ae_vector * e,ae_int_t n,ae_bool isupper,ae_matrix * u,ae_int_t nru,ae_matrix * c,ae_int_t ncc,ae_matrix * vt,ae_int_t ncvt,ae_bool * svdresult,ae_state * _state)9858 ae_bool rmatrixbdsvdmkl(/* Real */ ae_vector* d,
9859 /* Real */ ae_vector* e,
9860 ae_int_t n,
9861 ae_bool isupper,
9862 /* Real */ ae_matrix* u,
9863 ae_int_t nru,
9864 /* Real */ ae_matrix* c,
9865 ae_int_t ncc,
9866 /* Real */ ae_matrix* vt,
9867 ae_int_t ncvt,
9868 ae_bool* svdresult,
9869 ae_state *_state)
9870 {
9871 #ifndef ALGLIB_INTERCEPTS_MKL
9872 ae_bool result;
9873
9874
9875 result = ae_false;
9876 return result;
9877 #else
9878 return _ialglib_i_rmatrixbdsvdmkl(d, e, n, isupper, u, nru, c, ncc, vt, ncvt, svdresult);
9879 #endif
9880 }
9881
9882
9883 /*************************************************************************
9884 MKL-based DHSEQR kernel.
9885
9886 Returns True if MKL was present and handled request.
9887
9888 WR and WI are pre-allocated arrays with length N.
9889 Z is pre-allocated array[N,N].
9890
9891 -- ALGLIB routine --
9892 20.10.2014
9893 Bochkanov Sergey
9894 *************************************************************************/
rmatrixinternalschurdecompositionmkl(ae_matrix * h,ae_int_t n,ae_int_t tneeded,ae_int_t zneeded,ae_vector * wr,ae_vector * wi,ae_matrix * z,ae_int_t * info,ae_state * _state)9895 ae_bool rmatrixinternalschurdecompositionmkl(/* Real */ ae_matrix* h,
9896 ae_int_t n,
9897 ae_int_t tneeded,
9898 ae_int_t zneeded,
9899 /* Real */ ae_vector* wr,
9900 /* Real */ ae_vector* wi,
9901 /* Real */ ae_matrix* z,
9902 ae_int_t* info,
9903 ae_state *_state)
9904 {
9905 #ifndef ALGLIB_INTERCEPTS_MKL
9906 ae_bool result;
9907
9908
9909 result = ae_false;
9910 return result;
9911 #else
9912 return _ialglib_i_rmatrixinternalschurdecompositionmkl(h, n, tneeded, zneeded, wr, wi, z, info);
9913 #endif
9914 }
9915
9916
9917 /*************************************************************************
9918 MKL-based DTREVC kernel.
9919
9920 Returns True if MKL was present and handled request.
9921
9922 NOTE: this function does NOT support HOWMNY=3!!!!
9923
9924 VL and VR are pre-allocated arrays with length N*N, if required. If particalar
9925 variables is not required, it can be dummy (empty) array.
9926
9927 -- ALGLIB routine --
9928 20.10.2014
9929 Bochkanov Sergey
9930 *************************************************************************/
rmatrixinternaltrevcmkl(ae_matrix * t,ae_int_t n,ae_int_t side,ae_int_t howmny,ae_matrix * vl,ae_matrix * vr,ae_int_t * m,ae_int_t * info,ae_state * _state)9931 ae_bool rmatrixinternaltrevcmkl(/* Real */ ae_matrix* t,
9932 ae_int_t n,
9933 ae_int_t side,
9934 ae_int_t howmny,
9935 /* Real */ ae_matrix* vl,
9936 /* Real */ ae_matrix* vr,
9937 ae_int_t* m,
9938 ae_int_t* info,
9939 ae_state *_state)
9940 {
9941 #ifndef ALGLIB_INTERCEPTS_MKL
9942 ae_bool result;
9943
9944
9945 result = ae_false;
9946 return result;
9947 #else
9948 return _ialglib_i_rmatrixinternaltrevcmkl(t, n, side, howmny, vl, vr, m, info);
9949 #endif
9950 }
9951
9952
9953 /*************************************************************************
9954 MKL-based kernel.
9955
9956 Returns True if MKL was present and handled request (MKL completion code
9957 is returned as separate output parameter).
9958
9959 D and E are pre-allocated arrays with length N (both of them!). On output,
9960 D constraints eigenvalues, and E is destroyed.
9961
9962 Z is preallocated array[N,N] for ZNeeded<>0; ignored for ZNeeded=0.
9963
9964 EVDResult is modified if and only if MKL is present.
9965
9966 -- ALGLIB routine --
9967 20.10.2014
9968 Bochkanov Sergey
9969 *************************************************************************/
smatrixtdevdmkl(ae_vector * d,ae_vector * e,ae_int_t n,ae_int_t zneeded,ae_matrix * z,ae_bool * evdresult,ae_state * _state)9970 ae_bool smatrixtdevdmkl(/* Real */ ae_vector* d,
9971 /* Real */ ae_vector* e,
9972 ae_int_t n,
9973 ae_int_t zneeded,
9974 /* Real */ ae_matrix* z,
9975 ae_bool* evdresult,
9976 ae_state *_state)
9977 {
9978 #ifndef ALGLIB_INTERCEPTS_MKL
9979 ae_bool result;
9980
9981
9982 result = ae_false;
9983 return result;
9984 #else
9985 return _ialglib_i_smatrixtdevdmkl(d, e, n, zneeded, z, evdresult);
9986 #endif
9987 }
9988
9989
9990 /*************************************************************************
9991 MKL-based kernel.
9992
9993 Returns True if MKL was present and handled request (MKL completion code
9994 is returned as separate output parameter).
9995
9996 D and E are pre-allocated arrays with length N (both of them!). On output,
9997 D constraints eigenvalues, and E is destroyed.
9998
9999 Z is preallocated array[N,N] for ZNeeded<>0; ignored for ZNeeded=0.
10000
10001 EVDResult is modified if and only if MKL is present.
10002
10003 -- ALGLIB routine --
10004 20.10.2014
10005 Bochkanov Sergey
10006 *************************************************************************/
sparsegemvcrsmkl(ae_int_t opa,ae_int_t arows,ae_int_t acols,double alpha,ae_vector * vals,ae_vector * cidx,ae_vector * ridx,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)10007 ae_bool sparsegemvcrsmkl(ae_int_t opa,
10008 ae_int_t arows,
10009 ae_int_t acols,
10010 double alpha,
10011 /* Real */ ae_vector* vals,
10012 /* Integer */ ae_vector* cidx,
10013 /* Integer */ ae_vector* ridx,
10014 /* Real */ ae_vector* x,
10015 ae_int_t ix,
10016 double beta,
10017 /* Real */ ae_vector* y,
10018 ae_int_t iy,
10019 ae_state *_state)
10020 {
10021 #ifndef ALGLIB_INTERCEPTS_MKL
10022 ae_bool result;
10023
10024
10025 result = ae_false;
10026 return result;
10027 #else
10028 return _ialglib_i_sparsegemvcrsmkl(opa, arows, acols, alpha, vals, cidx, ridx, x, ix, beta, y, iy);
10029 #endif
10030 }
10031
10032
10033 #endif
10034 #if defined(AE_COMPILE_SCODES) || !defined(AE_PARTIAL_BUILD)
10035
10036
getrdfserializationcode(ae_state * _state)10037 ae_int_t getrdfserializationcode(ae_state *_state)
10038 {
10039 ae_int_t result;
10040
10041
10042 result = 1;
10043 return result;
10044 }
10045
10046
getkdtreeserializationcode(ae_state * _state)10047 ae_int_t getkdtreeserializationcode(ae_state *_state)
10048 {
10049 ae_int_t result;
10050
10051
10052 result = 2;
10053 return result;
10054 }
10055
10056
getmlpserializationcode(ae_state * _state)10057 ae_int_t getmlpserializationcode(ae_state *_state)
10058 {
10059 ae_int_t result;
10060
10061
10062 result = 3;
10063 return result;
10064 }
10065
10066
getmlpeserializationcode(ae_state * _state)10067 ae_int_t getmlpeserializationcode(ae_state *_state)
10068 {
10069 ae_int_t result;
10070
10071
10072 result = 4;
10073 return result;
10074 }
10075
10076
getrbfserializationcode(ae_state * _state)10077 ae_int_t getrbfserializationcode(ae_state *_state)
10078 {
10079 ae_int_t result;
10080
10081
10082 result = 5;
10083 return result;
10084 }
10085
10086
getspline2dserializationcode(ae_state * _state)10087 ae_int_t getspline2dserializationcode(ae_state *_state)
10088 {
10089 ae_int_t result;
10090
10091
10092 result = 6;
10093 return result;
10094 }
10095
10096
getidwserializationcode(ae_state * _state)10097 ae_int_t getidwserializationcode(ae_state *_state)
10098 {
10099 ae_int_t result;
10100
10101
10102 result = 7;
10103 return result;
10104 }
10105
10106
getsparsematrixserializationcode(ae_state * _state)10107 ae_int_t getsparsematrixserializationcode(ae_state *_state)
10108 {
10109 ae_int_t result;
10110
10111
10112 result = 8;
10113 return result;
10114 }
10115
10116
getknnserializationcode(ae_state * _state)10117 ae_int_t getknnserializationcode(ae_state *_state)
10118 {
10119 ae_int_t result;
10120
10121
10122 result = 108;
10123 return result;
10124 }
10125
10126
getlptestserializationcode(ae_state * _state)10127 ae_int_t getlptestserializationcode(ae_state *_state)
10128 {
10129 ae_int_t result;
10130
10131
10132 result = 200;
10133 return result;
10134 }
10135
10136
10137 #endif
10138 #if defined(AE_COMPILE_TSORT) || !defined(AE_PARTIAL_BUILD)
10139
10140
10141 /*************************************************************************
10142 This function sorts array of real keys by ascending.
10143
10144 Its results are:
10145 * sorted array A
10146 * permutation tables P1, P2
10147
10148 Algorithm outputs permutation tables using two formats:
10149 * as usual permutation of [0..N-1]. If P1[i]=j, then sorted A[i] contains
10150 value which was moved there from J-th position.
10151 * as a sequence of pairwise permutations. Sorted A[] may be obtained by
10152 swaping A[i] and A[P2[i]] for all i from 0 to N-1.
10153
10154 INPUT PARAMETERS:
10155 A - unsorted array
10156 N - array size
10157
10158 OUPUT PARAMETERS:
10159 A - sorted array
10160 P1, P2 - permutation tables, array[N]
10161
10162 NOTES:
10163 this function assumes that A[] is finite; it doesn't checks that
10164 condition. All other conditions (size of input arrays, etc.) are not
10165 checked too.
10166
10167 -- ALGLIB --
10168 Copyright 14.05.2008 by Bochkanov Sergey
10169 *************************************************************************/
tagsort(ae_vector * a,ae_int_t n,ae_vector * p1,ae_vector * p2,ae_state * _state)10170 void tagsort(/* Real */ ae_vector* a,
10171 ae_int_t n,
10172 /* Integer */ ae_vector* p1,
10173 /* Integer */ ae_vector* p2,
10174 ae_state *_state)
10175 {
10176 ae_frame _frame_block;
10177 apbuffers buf;
10178
10179 ae_frame_make(_state, &_frame_block);
10180 memset(&buf, 0, sizeof(buf));
10181 ae_vector_clear(p1);
10182 ae_vector_clear(p2);
10183 _apbuffers_init(&buf, _state, ae_true);
10184
10185 tagsortbuf(a, n, p1, p2, &buf, _state);
10186 ae_frame_leave(_state);
10187 }
10188
10189
10190 /*************************************************************************
10191 Buffered variant of TagSort, which accepts preallocated output arrays as
10192 well as special structure for buffered allocations. If arrays are too
10193 short, they are reallocated. If they are large enough, no memory
10194 allocation is done.
10195
10196 It is intended to be used in the performance-critical parts of code, where
10197 additional allocations can lead to severe performance degradation
10198
10199 -- ALGLIB --
10200 Copyright 14.05.2008 by Bochkanov Sergey
10201 *************************************************************************/
tagsortbuf(ae_vector * a,ae_int_t n,ae_vector * p1,ae_vector * p2,apbuffers * buf,ae_state * _state)10202 void tagsortbuf(/* Real */ ae_vector* a,
10203 ae_int_t n,
10204 /* Integer */ ae_vector* p1,
10205 /* Integer */ ae_vector* p2,
10206 apbuffers* buf,
10207 ae_state *_state)
10208 {
10209 ae_int_t i;
10210 ae_int_t lv;
10211 ae_int_t lp;
10212 ae_int_t rv;
10213 ae_int_t rp;
10214
10215
10216
10217 /*
10218 * Special cases
10219 */
10220 if( n<=0 )
10221 {
10222 return;
10223 }
10224 if( n==1 )
10225 {
10226 ivectorsetlengthatleast(p1, 1, _state);
10227 ivectorsetlengthatleast(p2, 1, _state);
10228 p1->ptr.p_int[0] = 0;
10229 p2->ptr.p_int[0] = 0;
10230 return;
10231 }
10232
10233 /*
10234 * General case, N>1: prepare permutations table P1
10235 */
10236 ivectorsetlengthatleast(p1, n, _state);
10237 for(i=0; i<=n-1; i++)
10238 {
10239 p1->ptr.p_int[i] = i;
10240 }
10241
10242 /*
10243 * General case, N>1: sort, update P1
10244 */
10245 rvectorsetlengthatleast(&buf->ra0, n, _state);
10246 ivectorsetlengthatleast(&buf->ia0, n, _state);
10247 tagsortfasti(a, p1, &buf->ra0, &buf->ia0, n, _state);
10248
10249 /*
10250 * General case, N>1: fill permutations table P2
10251 *
10252 * To fill P2 we maintain two arrays:
10253 * * PV (Buf.IA0), Position(Value). PV[i] contains position of I-th key at the moment
10254 * * VP (Buf.IA1), Value(Position). VP[i] contains key which has position I at the moment
10255 *
10256 * At each step we making permutation of two items:
10257 * Left, which is given by position/value pair LP/LV
10258 * and Right, which is given by RP/RV
10259 * and updating PV[] and VP[] correspondingly.
10260 */
10261 ivectorsetlengthatleast(&buf->ia0, n, _state);
10262 ivectorsetlengthatleast(&buf->ia1, n, _state);
10263 ivectorsetlengthatleast(p2, n, _state);
10264 for(i=0; i<=n-1; i++)
10265 {
10266 buf->ia0.ptr.p_int[i] = i;
10267 buf->ia1.ptr.p_int[i] = i;
10268 }
10269 for(i=0; i<=n-1; i++)
10270 {
10271
10272 /*
10273 * calculate LP, LV, RP, RV
10274 */
10275 lp = i;
10276 lv = buf->ia1.ptr.p_int[lp];
10277 rv = p1->ptr.p_int[i];
10278 rp = buf->ia0.ptr.p_int[rv];
10279
10280 /*
10281 * Fill P2
10282 */
10283 p2->ptr.p_int[i] = rp;
10284
10285 /*
10286 * update PV and VP
10287 */
10288 buf->ia1.ptr.p_int[lp] = rv;
10289 buf->ia1.ptr.p_int[rp] = lv;
10290 buf->ia0.ptr.p_int[lv] = rp;
10291 buf->ia0.ptr.p_int[rv] = lp;
10292 }
10293 }
10294
10295
10296 /*************************************************************************
10297 Same as TagSort, but optimized for real keys and integer labels.
10298
10299 A is sorted, and same permutations are applied to B.
10300
10301 NOTES:
10302 1. this function assumes that A[] is finite; it doesn't checks that
10303 condition. All other conditions (size of input arrays, etc.) are not
10304 checked too.
10305 2. this function uses two buffers, BufA and BufB, each is N elements large.
10306 They may be preallocated (which will save some time) or not, in which
10307 case function will automatically allocate memory.
10308
10309 -- ALGLIB --
10310 Copyright 11.12.2008 by Bochkanov Sergey
10311 *************************************************************************/
tagsortfasti(ae_vector * a,ae_vector * b,ae_vector * bufa,ae_vector * bufb,ae_int_t n,ae_state * _state)10312 void tagsortfasti(/* Real */ ae_vector* a,
10313 /* Integer */ ae_vector* b,
10314 /* Real */ ae_vector* bufa,
10315 /* Integer */ ae_vector* bufb,
10316 ae_int_t n,
10317 ae_state *_state)
10318 {
10319 ae_int_t i;
10320 ae_int_t j;
10321 ae_bool isascending;
10322 ae_bool isdescending;
10323 double tmpr;
10324 ae_int_t tmpi;
10325
10326
10327
10328 /*
10329 * Special case
10330 */
10331 if( n<=1 )
10332 {
10333 return;
10334 }
10335
10336 /*
10337 * Test for already sorted set
10338 */
10339 isascending = ae_true;
10340 isdescending = ae_true;
10341 for(i=1; i<=n-1; i++)
10342 {
10343 isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
10344 isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
10345 }
10346 if( isascending )
10347 {
10348 return;
10349 }
10350 if( isdescending )
10351 {
10352 for(i=0; i<=n-1; i++)
10353 {
10354 j = n-1-i;
10355 if( j<=i )
10356 {
10357 break;
10358 }
10359 tmpr = a->ptr.p_double[i];
10360 a->ptr.p_double[i] = a->ptr.p_double[j];
10361 a->ptr.p_double[j] = tmpr;
10362 tmpi = b->ptr.p_int[i];
10363 b->ptr.p_int[i] = b->ptr.p_int[j];
10364 b->ptr.p_int[j] = tmpi;
10365 }
10366 return;
10367 }
10368
10369 /*
10370 * General case
10371 */
10372 if( bufa->cnt<n )
10373 {
10374 ae_vector_set_length(bufa, n, _state);
10375 }
10376 if( bufb->cnt<n )
10377 {
10378 ae_vector_set_length(bufb, n, _state);
10379 }
10380 tsort_tagsortfastirec(a, b, bufa, bufb, 0, n-1, _state);
10381 }
10382
10383
10384 /*************************************************************************
10385 Same as TagSort, but optimized for real keys and real labels.
10386
10387 A is sorted, and same permutations are applied to B.
10388
10389 NOTES:
10390 1. this function assumes that A[] is finite; it doesn't checks that
10391 condition. All other conditions (size of input arrays, etc.) are not
10392 checked too.
10393 2. this function uses two buffers, BufA and BufB, each is N elements large.
10394 They may be preallocated (which will save some time) or not, in which
10395 case function will automatically allocate memory.
10396
10397 -- ALGLIB --
10398 Copyright 11.12.2008 by Bochkanov Sergey
10399 *************************************************************************/
tagsortfastr(ae_vector * a,ae_vector * b,ae_vector * bufa,ae_vector * bufb,ae_int_t n,ae_state * _state)10400 void tagsortfastr(/* Real */ ae_vector* a,
10401 /* Real */ ae_vector* b,
10402 /* Real */ ae_vector* bufa,
10403 /* Real */ ae_vector* bufb,
10404 ae_int_t n,
10405 ae_state *_state)
10406 {
10407 ae_int_t i;
10408 ae_int_t j;
10409 ae_bool isascending;
10410 ae_bool isdescending;
10411 double tmpr;
10412
10413
10414
10415 /*
10416 * Special case
10417 */
10418 if( n<=1 )
10419 {
10420 return;
10421 }
10422
10423 /*
10424 * Test for already sorted set
10425 */
10426 isascending = ae_true;
10427 isdescending = ae_true;
10428 for(i=1; i<=n-1; i++)
10429 {
10430 isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
10431 isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
10432 }
10433 if( isascending )
10434 {
10435 return;
10436 }
10437 if( isdescending )
10438 {
10439 for(i=0; i<=n-1; i++)
10440 {
10441 j = n-1-i;
10442 if( j<=i )
10443 {
10444 break;
10445 }
10446 tmpr = a->ptr.p_double[i];
10447 a->ptr.p_double[i] = a->ptr.p_double[j];
10448 a->ptr.p_double[j] = tmpr;
10449 tmpr = b->ptr.p_double[i];
10450 b->ptr.p_double[i] = b->ptr.p_double[j];
10451 b->ptr.p_double[j] = tmpr;
10452 }
10453 return;
10454 }
10455
10456 /*
10457 * General case
10458 */
10459 if( bufa->cnt<n )
10460 {
10461 ae_vector_set_length(bufa, n, _state);
10462 }
10463 if( bufb->cnt<n )
10464 {
10465 ae_vector_set_length(bufb, n, _state);
10466 }
10467 tsort_tagsortfastrrec(a, b, bufa, bufb, 0, n-1, _state);
10468 }
10469
10470
10471 /*************************************************************************
10472 Same as TagSort, but optimized for real keys without labels.
10473
10474 A is sorted, and that's all.
10475
10476 NOTES:
10477 1. this function assumes that A[] is finite; it doesn't checks that
10478 condition. All other conditions (size of input arrays, etc.) are not
10479 checked too.
10480 2. this function uses buffer, BufA, which is N elements large. It may be
10481 preallocated (which will save some time) or not, in which case
10482 function will automatically allocate memory.
10483
10484 -- ALGLIB --
10485 Copyright 11.12.2008 by Bochkanov Sergey
10486 *************************************************************************/
tagsortfast(ae_vector * a,ae_vector * bufa,ae_int_t n,ae_state * _state)10487 void tagsortfast(/* Real */ ae_vector* a,
10488 /* Real */ ae_vector* bufa,
10489 ae_int_t n,
10490 ae_state *_state)
10491 {
10492 ae_int_t i;
10493 ae_int_t j;
10494 ae_bool isascending;
10495 ae_bool isdescending;
10496 double tmpr;
10497
10498
10499
10500 /*
10501 * Special case
10502 */
10503 if( n<=1 )
10504 {
10505 return;
10506 }
10507
10508 /*
10509 * Test for already sorted set
10510 */
10511 isascending = ae_true;
10512 isdescending = ae_true;
10513 for(i=1; i<=n-1; i++)
10514 {
10515 isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
10516 isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
10517 }
10518 if( isascending )
10519 {
10520 return;
10521 }
10522 if( isdescending )
10523 {
10524 for(i=0; i<=n-1; i++)
10525 {
10526 j = n-1-i;
10527 if( j<=i )
10528 {
10529 break;
10530 }
10531 tmpr = a->ptr.p_double[i];
10532 a->ptr.p_double[i] = a->ptr.p_double[j];
10533 a->ptr.p_double[j] = tmpr;
10534 }
10535 return;
10536 }
10537
10538 /*
10539 * General case
10540 */
10541 if( bufa->cnt<n )
10542 {
10543 ae_vector_set_length(bufa, n, _state);
10544 }
10545 tsort_tagsortfastrec(a, bufa, 0, n-1, _state);
10546 }
10547
10548
10549 /*************************************************************************
10550 Sorting function optimized for integer keys and real labels, can be used
10551 to sort middle of the array
10552
10553 A is sorted, and same permutations are applied to B.
10554
10555 NOTES:
10556 this function assumes that A[] is finite; it doesn't checks that
10557 condition. All other conditions (size of input arrays, etc.) are not
10558 checked too.
10559
10560 -- ALGLIB --
10561 Copyright 11.12.2008 by Bochkanov Sergey
10562 *************************************************************************/
tagsortmiddleir(ae_vector * a,ae_vector * b,ae_int_t offset,ae_int_t n,ae_state * _state)10563 void tagsortmiddleir(/* Integer */ ae_vector* a,
10564 /* Real */ ae_vector* b,
10565 ae_int_t offset,
10566 ae_int_t n,
10567 ae_state *_state)
10568 {
10569 ae_int_t i;
10570 ae_int_t k;
10571 ae_int_t t;
10572 ae_int_t tmp;
10573 double tmpr;
10574 ae_int_t p0;
10575 ae_int_t p1;
10576 ae_int_t at;
10577 ae_int_t ak;
10578 ae_int_t ak1;
10579 double bt;
10580
10581
10582
10583 /*
10584 * Special cases
10585 */
10586 if( n<=1 )
10587 {
10588 return;
10589 }
10590
10591 /*
10592 * General case, N>1: sort, update B
10593 */
10594 for(i=2; i<=n; i++)
10595 {
10596 t = i;
10597 while(t!=1)
10598 {
10599 k = t/2;
10600 p0 = offset+k-1;
10601 p1 = offset+t-1;
10602 ak = a->ptr.p_int[p0];
10603 at = a->ptr.p_int[p1];
10604 if( ak>=at )
10605 {
10606 break;
10607 }
10608 a->ptr.p_int[p0] = at;
10609 a->ptr.p_int[p1] = ak;
10610 tmpr = b->ptr.p_double[p0];
10611 b->ptr.p_double[p0] = b->ptr.p_double[p1];
10612 b->ptr.p_double[p1] = tmpr;
10613 t = k;
10614 }
10615 }
10616 for(i=n-1; i>=1; i--)
10617 {
10618 p0 = offset+0;
10619 p1 = offset+i;
10620 tmp = a->ptr.p_int[p1];
10621 a->ptr.p_int[p1] = a->ptr.p_int[p0];
10622 a->ptr.p_int[p0] = tmp;
10623 at = tmp;
10624 tmpr = b->ptr.p_double[p1];
10625 b->ptr.p_double[p1] = b->ptr.p_double[p0];
10626 b->ptr.p_double[p0] = tmpr;
10627 bt = tmpr;
10628 t = 0;
10629 for(;;)
10630 {
10631 k = 2*t+1;
10632 if( k+1>i )
10633 {
10634 break;
10635 }
10636 p0 = offset+t;
10637 p1 = offset+k;
10638 ak = a->ptr.p_int[p1];
10639 if( k+1<i )
10640 {
10641 ak1 = a->ptr.p_int[p1+1];
10642 if( ak1>ak )
10643 {
10644 ak = ak1;
10645 p1 = p1+1;
10646 k = k+1;
10647 }
10648 }
10649 if( at>=ak )
10650 {
10651 break;
10652 }
10653 a->ptr.p_int[p1] = at;
10654 a->ptr.p_int[p0] = ak;
10655 b->ptr.p_double[p0] = b->ptr.p_double[p1];
10656 b->ptr.p_double[p1] = bt;
10657 t = k;
10658 }
10659 }
10660 }
10661
10662
10663 /*************************************************************************
10664 Sorting function optimized for integer keys and real labels, can be used
10665 to sort middle of the array
10666
10667 A is sorted, and same permutations are applied to B.
10668
10669 NOTES:
10670 this function assumes that A[] is finite; it doesn't checks that
10671 condition. All other conditions (size of input arrays, etc.) are not
10672 checked too.
10673
10674 -- ALGLIB --
10675 Copyright 11.12.2008 by Bochkanov Sergey
10676 *************************************************************************/
tagsortmiddlei(ae_vector * a,ae_int_t offset,ae_int_t n,ae_state * _state)10677 void tagsortmiddlei(/* Integer */ ae_vector* a,
10678 ae_int_t offset,
10679 ae_int_t n,
10680 ae_state *_state)
10681 {
10682 ae_int_t i;
10683 ae_int_t k;
10684 ae_int_t t;
10685 ae_int_t tmp;
10686 ae_int_t p0;
10687 ae_int_t p1;
10688 ae_int_t at;
10689 ae_int_t ak;
10690 ae_int_t ak1;
10691
10692
10693
10694 /*
10695 * Special cases
10696 */
10697 if( n<=1 )
10698 {
10699 return;
10700 }
10701
10702 /*
10703 * General case, N>1: sort, update B
10704 */
10705 for(i=2; i<=n; i++)
10706 {
10707 t = i;
10708 while(t!=1)
10709 {
10710 k = t/2;
10711 p0 = offset+k-1;
10712 p1 = offset+t-1;
10713 ak = a->ptr.p_int[p0];
10714 at = a->ptr.p_int[p1];
10715 if( ak>=at )
10716 {
10717 break;
10718 }
10719 a->ptr.p_int[p0] = at;
10720 a->ptr.p_int[p1] = ak;
10721 t = k;
10722 }
10723 }
10724 for(i=n-1; i>=1; i--)
10725 {
10726 p0 = offset+0;
10727 p1 = offset+i;
10728 tmp = a->ptr.p_int[p1];
10729 a->ptr.p_int[p1] = a->ptr.p_int[p0];
10730 a->ptr.p_int[p0] = tmp;
10731 at = tmp;
10732 t = 0;
10733 for(;;)
10734 {
10735 k = 2*t+1;
10736 if( k+1>i )
10737 {
10738 break;
10739 }
10740 p0 = offset+t;
10741 p1 = offset+k;
10742 ak = a->ptr.p_int[p1];
10743 if( k+1<i )
10744 {
10745 ak1 = a->ptr.p_int[p1+1];
10746 if( ak1>ak )
10747 {
10748 ak = ak1;
10749 p1 = p1+1;
10750 k = k+1;
10751 }
10752 }
10753 if( at>=ak )
10754 {
10755 break;
10756 }
10757 a->ptr.p_int[p1] = at;
10758 a->ptr.p_int[p0] = ak;
10759 t = k;
10760 }
10761 }
10762 }
10763
10764
10765 /*************************************************************************
10766 Sorting function optimized for integer values (only keys, no labels), can
10767 be used to sort middle of the array
10768
10769 -- ALGLIB --
10770 Copyright 11.12.2008 by Bochkanov Sergey
10771 *************************************************************************/
sortmiddlei(ae_vector * a,ae_int_t offset,ae_int_t n,ae_state * _state)10772 void sortmiddlei(/* Integer */ ae_vector* a,
10773 ae_int_t offset,
10774 ae_int_t n,
10775 ae_state *_state)
10776 {
10777 ae_int_t i;
10778 ae_int_t k;
10779 ae_int_t t;
10780 ae_int_t tmp;
10781 ae_int_t p0;
10782 ae_int_t p1;
10783 ae_int_t at;
10784 ae_int_t ak;
10785 ae_int_t ak1;
10786
10787
10788
10789 /*
10790 * Special cases
10791 */
10792 if( n<=1 )
10793 {
10794 return;
10795 }
10796
10797 /*
10798 * General case, N>1: sort, update B
10799 */
10800 for(i=2; i<=n; i++)
10801 {
10802 t = i;
10803 while(t!=1)
10804 {
10805 k = t/2;
10806 p0 = offset+k-1;
10807 p1 = offset+t-1;
10808 ak = a->ptr.p_int[p0];
10809 at = a->ptr.p_int[p1];
10810 if( ak>=at )
10811 {
10812 break;
10813 }
10814 a->ptr.p_int[p0] = at;
10815 a->ptr.p_int[p1] = ak;
10816 t = k;
10817 }
10818 }
10819 for(i=n-1; i>=1; i--)
10820 {
10821 p0 = offset+0;
10822 p1 = offset+i;
10823 tmp = a->ptr.p_int[p1];
10824 a->ptr.p_int[p1] = a->ptr.p_int[p0];
10825 a->ptr.p_int[p0] = tmp;
10826 at = tmp;
10827 t = 0;
10828 for(;;)
10829 {
10830 k = 2*t+1;
10831 if( k+1>i )
10832 {
10833 break;
10834 }
10835 p0 = offset+t;
10836 p1 = offset+k;
10837 ak = a->ptr.p_int[p1];
10838 if( k+1<i )
10839 {
10840 ak1 = a->ptr.p_int[p1+1];
10841 if( ak1>ak )
10842 {
10843 ak = ak1;
10844 p1 = p1+1;
10845 k = k+1;
10846 }
10847 }
10848 if( at>=ak )
10849 {
10850 break;
10851 }
10852 a->ptr.p_int[p1] = at;
10853 a->ptr.p_int[p0] = ak;
10854 t = k;
10855 }
10856 }
10857 }
10858
10859
10860 /*************************************************************************
10861 Heap operations: adds element to the heap
10862
10863 PARAMETERS:
10864 A - heap itself, must be at least array[0..N]
10865 B - array of integer tags, which are updated according to
10866 permutations in the heap
10867 N - size of the heap (without new element).
10868 updated on output
10869 VA - value of the element being added
10870 VB - value of the tag
10871
10872 -- ALGLIB --
10873 Copyright 28.02.2010 by Bochkanov Sergey
10874 *************************************************************************/
tagheappushi(ae_vector * a,ae_vector * b,ae_int_t * n,double va,ae_int_t vb,ae_state * _state)10875 void tagheappushi(/* Real */ ae_vector* a,
10876 /* Integer */ ae_vector* b,
10877 ae_int_t* n,
10878 double va,
10879 ae_int_t vb,
10880 ae_state *_state)
10881 {
10882 ae_int_t j;
10883 ae_int_t k;
10884 double v;
10885
10886
10887 if( *n<0 )
10888 {
10889 return;
10890 }
10891
10892 /*
10893 * N=0 is a special case
10894 */
10895 if( *n==0 )
10896 {
10897 a->ptr.p_double[0] = va;
10898 b->ptr.p_int[0] = vb;
10899 *n = *n+1;
10900 return;
10901 }
10902
10903 /*
10904 * add current point to the heap
10905 * (add to the bottom, then move up)
10906 *
10907 * we don't write point to the heap
10908 * until its final position is determined
10909 * (it allow us to reduce number of array access operations)
10910 */
10911 j = *n;
10912 *n = *n+1;
10913 while(j>0)
10914 {
10915 k = (j-1)/2;
10916 v = a->ptr.p_double[k];
10917 if( v<va )
10918 {
10919
10920 /*
10921 * swap with higher element
10922 */
10923 a->ptr.p_double[j] = v;
10924 b->ptr.p_int[j] = b->ptr.p_int[k];
10925 j = k;
10926 }
10927 else
10928 {
10929
10930 /*
10931 * element in its place. terminate.
10932 */
10933 break;
10934 }
10935 }
10936 a->ptr.p_double[j] = va;
10937 b->ptr.p_int[j] = vb;
10938 }
10939
10940
10941 /*************************************************************************
10942 Heap operations: replaces top element with new element
10943 (which is moved down)
10944
10945 PARAMETERS:
10946 A - heap itself, must be at least array[0..N-1]
10947 B - array of integer tags, which are updated according to
10948 permutations in the heap
10949 N - size of the heap
10950 VA - value of the element which replaces top element
10951 VB - value of the tag
10952
10953 -- ALGLIB --
10954 Copyright 28.02.2010 by Bochkanov Sergey
10955 *************************************************************************/
tagheapreplacetopi(ae_vector * a,ae_vector * b,ae_int_t n,double va,ae_int_t vb,ae_state * _state)10956 void tagheapreplacetopi(/* Real */ ae_vector* a,
10957 /* Integer */ ae_vector* b,
10958 ae_int_t n,
10959 double va,
10960 ae_int_t vb,
10961 ae_state *_state)
10962 {
10963 ae_int_t j;
10964 ae_int_t k1;
10965 ae_int_t k2;
10966 double v;
10967 double v1;
10968 double v2;
10969
10970
10971 if( n<1 )
10972 {
10973 return;
10974 }
10975
10976 /*
10977 * N=1 is a special case
10978 */
10979 if( n==1 )
10980 {
10981 a->ptr.p_double[0] = va;
10982 b->ptr.p_int[0] = vb;
10983 return;
10984 }
10985
10986 /*
10987 * move down through heap:
10988 * * J - current element
10989 * * K1 - first child (always exists)
10990 * * K2 - second child (may not exists)
10991 *
10992 * we don't write point to the heap
10993 * until its final position is determined
10994 * (it allow us to reduce number of array access operations)
10995 */
10996 j = 0;
10997 k1 = 1;
10998 k2 = 2;
10999 while(k1<n)
11000 {
11001 if( k2>=n )
11002 {
11003
11004 /*
11005 * only one child.
11006 *
11007 * swap and terminate (because this child
11008 * have no siblings due to heap structure)
11009 */
11010 v = a->ptr.p_double[k1];
11011 if( v>va )
11012 {
11013 a->ptr.p_double[j] = v;
11014 b->ptr.p_int[j] = b->ptr.p_int[k1];
11015 j = k1;
11016 }
11017 break;
11018 }
11019 else
11020 {
11021
11022 /*
11023 * two childs
11024 */
11025 v1 = a->ptr.p_double[k1];
11026 v2 = a->ptr.p_double[k2];
11027 if( v1>v2 )
11028 {
11029 if( va<v1 )
11030 {
11031 a->ptr.p_double[j] = v1;
11032 b->ptr.p_int[j] = b->ptr.p_int[k1];
11033 j = k1;
11034 }
11035 else
11036 {
11037 break;
11038 }
11039 }
11040 else
11041 {
11042 if( va<v2 )
11043 {
11044 a->ptr.p_double[j] = v2;
11045 b->ptr.p_int[j] = b->ptr.p_int[k2];
11046 j = k2;
11047 }
11048 else
11049 {
11050 break;
11051 }
11052 }
11053 k1 = 2*j+1;
11054 k2 = 2*j+2;
11055 }
11056 }
11057 a->ptr.p_double[j] = va;
11058 b->ptr.p_int[j] = vb;
11059 }
11060
11061
11062 /*************************************************************************
11063 Heap operations: pops top element from the heap
11064
11065 PARAMETERS:
11066 A - heap itself, must be at least array[0..N-1]
11067 B - array of integer tags, which are updated according to
11068 permutations in the heap
11069 N - size of the heap, N>=1
11070
11071 On output top element is moved to A[N-1], B[N-1], heap is reordered, N is
11072 decreased by 1.
11073
11074 -- ALGLIB --
11075 Copyright 28.02.2010 by Bochkanov Sergey
11076 *************************************************************************/
tagheappopi(ae_vector * a,ae_vector * b,ae_int_t * n,ae_state * _state)11077 void tagheappopi(/* Real */ ae_vector* a,
11078 /* Integer */ ae_vector* b,
11079 ae_int_t* n,
11080 ae_state *_state)
11081 {
11082 double va;
11083 ae_int_t vb;
11084
11085
11086 if( *n<1 )
11087 {
11088 return;
11089 }
11090
11091 /*
11092 * N=1 is a special case
11093 */
11094 if( *n==1 )
11095 {
11096 *n = 0;
11097 return;
11098 }
11099
11100 /*
11101 * swap top element and last element,
11102 * then reorder heap
11103 */
11104 va = a->ptr.p_double[*n-1];
11105 vb = b->ptr.p_int[*n-1];
11106 a->ptr.p_double[*n-1] = a->ptr.p_double[0];
11107 b->ptr.p_int[*n-1] = b->ptr.p_int[0];
11108 *n = *n-1;
11109 tagheapreplacetopi(a, b, *n, va, vb, _state);
11110 }
11111
11112
11113 /*************************************************************************
11114 Search first element less than T in sorted array.
11115
11116 PARAMETERS:
11117 A - sorted array by ascending from 0 to N-1
11118 N - number of elements in array
11119 T - the desired element
11120
11121 RESULT:
11122 The very first element's index, which isn't less than T.
11123 In the case when there aren't such elements, returns N.
11124 *************************************************************************/
lowerbound(ae_vector * a,ae_int_t n,double t,ae_state * _state)11125 ae_int_t lowerbound(/* Real */ ae_vector* a,
11126 ae_int_t n,
11127 double t,
11128 ae_state *_state)
11129 {
11130 ae_int_t l;
11131 ae_int_t half;
11132 ae_int_t first;
11133 ae_int_t middle;
11134 ae_int_t result;
11135
11136
11137 l = n;
11138 first = 0;
11139 while(l>0)
11140 {
11141 half = l/2;
11142 middle = first+half;
11143 if( ae_fp_less(a->ptr.p_double[middle],t) )
11144 {
11145 first = middle+1;
11146 l = l-half-1;
11147 }
11148 else
11149 {
11150 l = half;
11151 }
11152 }
11153 result = first;
11154 return result;
11155 }
11156
11157
11158 /*************************************************************************
11159 Search first element more than T in sorted array.
11160
11161 PARAMETERS:
11162 A - sorted array by ascending from 0 to N-1
11163 N - number of elements in array
11164 T - the desired element
11165
11166 RESULT:
11167 The very first element's index, which more than T.
11168 In the case when there aren't such elements, returns N.
11169 *************************************************************************/
upperbound(ae_vector * a,ae_int_t n,double t,ae_state * _state)11170 ae_int_t upperbound(/* Real */ ae_vector* a,
11171 ae_int_t n,
11172 double t,
11173 ae_state *_state)
11174 {
11175 ae_int_t l;
11176 ae_int_t half;
11177 ae_int_t first;
11178 ae_int_t middle;
11179 ae_int_t result;
11180
11181
11182 l = n;
11183 first = 0;
11184 while(l>0)
11185 {
11186 half = l/2;
11187 middle = first+half;
11188 if( ae_fp_less(t,a->ptr.p_double[middle]) )
11189 {
11190 l = half;
11191 }
11192 else
11193 {
11194 first = middle+1;
11195 l = l-half-1;
11196 }
11197 }
11198 result = first;
11199 return result;
11200 }
11201
11202
11203 /*************************************************************************
11204 Internal TagSortFastI: sorts A[I1...I2] (both bounds are included),
11205 applies same permutations to B.
11206
11207 -- ALGLIB --
11208 Copyright 06.09.2010 by Bochkanov Sergey
11209 *************************************************************************/
tsort_tagsortfastirec(ae_vector * a,ae_vector * b,ae_vector * bufa,ae_vector * bufb,ae_int_t i1,ae_int_t i2,ae_state * _state)11210 static void tsort_tagsortfastirec(/* Real */ ae_vector* a,
11211 /* Integer */ ae_vector* b,
11212 /* Real */ ae_vector* bufa,
11213 /* Integer */ ae_vector* bufb,
11214 ae_int_t i1,
11215 ae_int_t i2,
11216 ae_state *_state)
11217 {
11218 ae_int_t i;
11219 ae_int_t j;
11220 ae_int_t k;
11221 ae_int_t cntless;
11222 ae_int_t cnteq;
11223 ae_int_t cntgreater;
11224 double tmpr;
11225 ae_int_t tmpi;
11226 double v0;
11227 double v1;
11228 double v2;
11229 double vp;
11230
11231
11232
11233 /*
11234 * Fast exit
11235 */
11236 if( i2<=i1 )
11237 {
11238 return;
11239 }
11240
11241 /*
11242 * Non-recursive sort for small arrays
11243 */
11244 if( i2-i1<=16 )
11245 {
11246 for(j=i1+1; j<=i2; j++)
11247 {
11248
11249 /*
11250 * Search elements [I1..J-1] for place to insert Jth element.
11251 *
11252 * This code stops immediately if we can leave A[J] at J-th position
11253 * (all elements have same value of A[J] larger than any of them)
11254 */
11255 tmpr = a->ptr.p_double[j];
11256 tmpi = j;
11257 for(k=j-1; k>=i1; k--)
11258 {
11259 if( a->ptr.p_double[k]<=tmpr )
11260 {
11261 break;
11262 }
11263 tmpi = k;
11264 }
11265 k = tmpi;
11266
11267 /*
11268 * Insert Jth element into Kth position
11269 */
11270 if( k!=j )
11271 {
11272 tmpr = a->ptr.p_double[j];
11273 tmpi = b->ptr.p_int[j];
11274 for(i=j-1; i>=k; i--)
11275 {
11276 a->ptr.p_double[i+1] = a->ptr.p_double[i];
11277 b->ptr.p_int[i+1] = b->ptr.p_int[i];
11278 }
11279 a->ptr.p_double[k] = tmpr;
11280 b->ptr.p_int[k] = tmpi;
11281 }
11282 }
11283 return;
11284 }
11285
11286 /*
11287 * Quicksort: choose pivot
11288 * Here we assume that I2-I1>=2
11289 */
11290 v0 = a->ptr.p_double[i1];
11291 v1 = a->ptr.p_double[i1+(i2-i1)/2];
11292 v2 = a->ptr.p_double[i2];
11293 if( v0>v1 )
11294 {
11295 tmpr = v1;
11296 v1 = v0;
11297 v0 = tmpr;
11298 }
11299 if( v1>v2 )
11300 {
11301 tmpr = v2;
11302 v2 = v1;
11303 v1 = tmpr;
11304 }
11305 if( v0>v1 )
11306 {
11307 tmpr = v1;
11308 v1 = v0;
11309 v0 = tmpr;
11310 }
11311 vp = v1;
11312
11313 /*
11314 * now pass through A/B and:
11315 * * move elements that are LESS than VP to the left of A/B
11316 * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
11317 * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
11318 * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
11319 * * move elements from the left of BufA/BufB to the end of A/B
11320 */
11321 cntless = 0;
11322 cnteq = 0;
11323 cntgreater = 0;
11324 for(i=i1; i<=i2; i++)
11325 {
11326 v0 = a->ptr.p_double[i];
11327 if( v0<vp )
11328 {
11329
11330 /*
11331 * LESS
11332 */
11333 k = i1+cntless;
11334 if( i!=k )
11335 {
11336 a->ptr.p_double[k] = v0;
11337 b->ptr.p_int[k] = b->ptr.p_int[i];
11338 }
11339 cntless = cntless+1;
11340 continue;
11341 }
11342 if( v0==vp )
11343 {
11344
11345 /*
11346 * EQUAL
11347 */
11348 k = i2-cnteq;
11349 bufa->ptr.p_double[k] = v0;
11350 bufb->ptr.p_int[k] = b->ptr.p_int[i];
11351 cnteq = cnteq+1;
11352 continue;
11353 }
11354
11355 /*
11356 * GREATER
11357 */
11358 k = i1+cntgreater;
11359 bufa->ptr.p_double[k] = v0;
11360 bufb->ptr.p_int[k] = b->ptr.p_int[i];
11361 cntgreater = cntgreater+1;
11362 }
11363 for(i=0; i<=cnteq-1; i++)
11364 {
11365 j = i1+cntless+cnteq-1-i;
11366 k = i2+i-(cnteq-1);
11367 a->ptr.p_double[j] = bufa->ptr.p_double[k];
11368 b->ptr.p_int[j] = bufb->ptr.p_int[k];
11369 }
11370 for(i=0; i<=cntgreater-1; i++)
11371 {
11372 j = i1+cntless+cnteq+i;
11373 k = i1+i;
11374 a->ptr.p_double[j] = bufa->ptr.p_double[k];
11375 b->ptr.p_int[j] = bufb->ptr.p_int[k];
11376 }
11377
11378 /*
11379 * Sort left and right parts of the array (ignoring middle part)
11380 */
11381 tsort_tagsortfastirec(a, b, bufa, bufb, i1, i1+cntless-1, _state);
11382 tsort_tagsortfastirec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state);
11383 }
11384
11385
11386 /*************************************************************************
11387 Internal TagSortFastR: sorts A[I1...I2] (both bounds are included),
11388 applies same permutations to B.
11389
11390 -- ALGLIB --
11391 Copyright 06.09.2010 by Bochkanov Sergey
11392 *************************************************************************/
tsort_tagsortfastrrec(ae_vector * a,ae_vector * b,ae_vector * bufa,ae_vector * bufb,ae_int_t i1,ae_int_t i2,ae_state * _state)11393 static void tsort_tagsortfastrrec(/* Real */ ae_vector* a,
11394 /* Real */ ae_vector* b,
11395 /* Real */ ae_vector* bufa,
11396 /* Real */ ae_vector* bufb,
11397 ae_int_t i1,
11398 ae_int_t i2,
11399 ae_state *_state)
11400 {
11401 ae_int_t i;
11402 ae_int_t j;
11403 ae_int_t k;
11404 double tmpr;
11405 double tmpr2;
11406 ae_int_t tmpi;
11407 ae_int_t cntless;
11408 ae_int_t cnteq;
11409 ae_int_t cntgreater;
11410 double v0;
11411 double v1;
11412 double v2;
11413 double vp;
11414
11415
11416
11417 /*
11418 * Fast exit
11419 */
11420 if( i2<=i1 )
11421 {
11422 return;
11423 }
11424
11425 /*
11426 * Non-recursive sort for small arrays
11427 */
11428 if( i2-i1<=16 )
11429 {
11430 for(j=i1+1; j<=i2; j++)
11431 {
11432
11433 /*
11434 * Search elements [I1..J-1] for place to insert Jth element.
11435 *
11436 * This code stops immediatly if we can leave A[J] at J-th position
11437 * (all elements have same value of A[J] larger than any of them)
11438 */
11439 tmpr = a->ptr.p_double[j];
11440 tmpi = j;
11441 for(k=j-1; k>=i1; k--)
11442 {
11443 if( a->ptr.p_double[k]<=tmpr )
11444 {
11445 break;
11446 }
11447 tmpi = k;
11448 }
11449 k = tmpi;
11450
11451 /*
11452 * Insert Jth element into Kth position
11453 */
11454 if( k!=j )
11455 {
11456 tmpr = a->ptr.p_double[j];
11457 tmpr2 = b->ptr.p_double[j];
11458 for(i=j-1; i>=k; i--)
11459 {
11460 a->ptr.p_double[i+1] = a->ptr.p_double[i];
11461 b->ptr.p_double[i+1] = b->ptr.p_double[i];
11462 }
11463 a->ptr.p_double[k] = tmpr;
11464 b->ptr.p_double[k] = tmpr2;
11465 }
11466 }
11467 return;
11468 }
11469
11470 /*
11471 * Quicksort: choose pivot
11472 * Here we assume that I2-I1>=16
11473 */
11474 v0 = a->ptr.p_double[i1];
11475 v1 = a->ptr.p_double[i1+(i2-i1)/2];
11476 v2 = a->ptr.p_double[i2];
11477 if( v0>v1 )
11478 {
11479 tmpr = v1;
11480 v1 = v0;
11481 v0 = tmpr;
11482 }
11483 if( v1>v2 )
11484 {
11485 tmpr = v2;
11486 v2 = v1;
11487 v1 = tmpr;
11488 }
11489 if( v0>v1 )
11490 {
11491 tmpr = v1;
11492 v1 = v0;
11493 v0 = tmpr;
11494 }
11495 vp = v1;
11496
11497 /*
11498 * now pass through A/B and:
11499 * * move elements that are LESS than VP to the left of A/B
11500 * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
11501 * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
11502 * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
11503 * * move elements from the left of BufA/BufB to the end of A/B
11504 */
11505 cntless = 0;
11506 cnteq = 0;
11507 cntgreater = 0;
11508 for(i=i1; i<=i2; i++)
11509 {
11510 v0 = a->ptr.p_double[i];
11511 if( v0<vp )
11512 {
11513
11514 /*
11515 * LESS
11516 */
11517 k = i1+cntless;
11518 if( i!=k )
11519 {
11520 a->ptr.p_double[k] = v0;
11521 b->ptr.p_double[k] = b->ptr.p_double[i];
11522 }
11523 cntless = cntless+1;
11524 continue;
11525 }
11526 if( v0==vp )
11527 {
11528
11529 /*
11530 * EQUAL
11531 */
11532 k = i2-cnteq;
11533 bufa->ptr.p_double[k] = v0;
11534 bufb->ptr.p_double[k] = b->ptr.p_double[i];
11535 cnteq = cnteq+1;
11536 continue;
11537 }
11538
11539 /*
11540 * GREATER
11541 */
11542 k = i1+cntgreater;
11543 bufa->ptr.p_double[k] = v0;
11544 bufb->ptr.p_double[k] = b->ptr.p_double[i];
11545 cntgreater = cntgreater+1;
11546 }
11547 for(i=0; i<=cnteq-1; i++)
11548 {
11549 j = i1+cntless+cnteq-1-i;
11550 k = i2+i-(cnteq-1);
11551 a->ptr.p_double[j] = bufa->ptr.p_double[k];
11552 b->ptr.p_double[j] = bufb->ptr.p_double[k];
11553 }
11554 for(i=0; i<=cntgreater-1; i++)
11555 {
11556 j = i1+cntless+cnteq+i;
11557 k = i1+i;
11558 a->ptr.p_double[j] = bufa->ptr.p_double[k];
11559 b->ptr.p_double[j] = bufb->ptr.p_double[k];
11560 }
11561
11562 /*
11563 * Sort left and right parts of the array (ignoring middle part)
11564 */
11565 tsort_tagsortfastrrec(a, b, bufa, bufb, i1, i1+cntless-1, _state);
11566 tsort_tagsortfastrrec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state);
11567 }
11568
11569
11570 /*************************************************************************
11571 Internal TagSortFastI: sorts A[I1...I2] (both bounds are included),
11572 applies same permutations to B.
11573
11574 -- ALGLIB --
11575 Copyright 06.09.2010 by Bochkanov Sergey
11576 *************************************************************************/
tsort_tagsortfastrec(ae_vector * a,ae_vector * bufa,ae_int_t i1,ae_int_t i2,ae_state * _state)11577 static void tsort_tagsortfastrec(/* Real */ ae_vector* a,
11578 /* Real */ ae_vector* bufa,
11579 ae_int_t i1,
11580 ae_int_t i2,
11581 ae_state *_state)
11582 {
11583 ae_int_t cntless;
11584 ae_int_t cnteq;
11585 ae_int_t cntgreater;
11586 ae_int_t i;
11587 ae_int_t j;
11588 ae_int_t k;
11589 double tmpr;
11590 ae_int_t tmpi;
11591 double v0;
11592 double v1;
11593 double v2;
11594 double vp;
11595
11596
11597
11598 /*
11599 * Fast exit
11600 */
11601 if( i2<=i1 )
11602 {
11603 return;
11604 }
11605
11606 /*
11607 * Non-recursive sort for small arrays
11608 */
11609 if( i2-i1<=16 )
11610 {
11611 for(j=i1+1; j<=i2; j++)
11612 {
11613
11614 /*
11615 * Search elements [I1..J-1] for place to insert Jth element.
11616 *
11617 * This code stops immediatly if we can leave A[J] at J-th position
11618 * (all elements have same value of A[J] larger than any of them)
11619 */
11620 tmpr = a->ptr.p_double[j];
11621 tmpi = j;
11622 for(k=j-1; k>=i1; k--)
11623 {
11624 if( a->ptr.p_double[k]<=tmpr )
11625 {
11626 break;
11627 }
11628 tmpi = k;
11629 }
11630 k = tmpi;
11631
11632 /*
11633 * Insert Jth element into Kth position
11634 */
11635 if( k!=j )
11636 {
11637 tmpr = a->ptr.p_double[j];
11638 for(i=j-1; i>=k; i--)
11639 {
11640 a->ptr.p_double[i+1] = a->ptr.p_double[i];
11641 }
11642 a->ptr.p_double[k] = tmpr;
11643 }
11644 }
11645 return;
11646 }
11647
11648 /*
11649 * Quicksort: choose pivot
11650 * Here we assume that I2-I1>=16
11651 */
11652 v0 = a->ptr.p_double[i1];
11653 v1 = a->ptr.p_double[i1+(i2-i1)/2];
11654 v2 = a->ptr.p_double[i2];
11655 if( v0>v1 )
11656 {
11657 tmpr = v1;
11658 v1 = v0;
11659 v0 = tmpr;
11660 }
11661 if( v1>v2 )
11662 {
11663 tmpr = v2;
11664 v2 = v1;
11665 v1 = tmpr;
11666 }
11667 if( v0>v1 )
11668 {
11669 tmpr = v1;
11670 v1 = v0;
11671 v0 = tmpr;
11672 }
11673 vp = v1;
11674
11675 /*
11676 * now pass through A/B and:
11677 * * move elements that are LESS than VP to the left of A/B
11678 * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
11679 * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
11680 * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
11681 * * move elements from the left of BufA/BufB to the end of A/B
11682 */
11683 cntless = 0;
11684 cnteq = 0;
11685 cntgreater = 0;
11686 for(i=i1; i<=i2; i++)
11687 {
11688 v0 = a->ptr.p_double[i];
11689 if( v0<vp )
11690 {
11691
11692 /*
11693 * LESS
11694 */
11695 k = i1+cntless;
11696 if( i!=k )
11697 {
11698 a->ptr.p_double[k] = v0;
11699 }
11700 cntless = cntless+1;
11701 continue;
11702 }
11703 if( v0==vp )
11704 {
11705
11706 /*
11707 * EQUAL
11708 */
11709 k = i2-cnteq;
11710 bufa->ptr.p_double[k] = v0;
11711 cnteq = cnteq+1;
11712 continue;
11713 }
11714
11715 /*
11716 * GREATER
11717 */
11718 k = i1+cntgreater;
11719 bufa->ptr.p_double[k] = v0;
11720 cntgreater = cntgreater+1;
11721 }
11722 for(i=0; i<=cnteq-1; i++)
11723 {
11724 j = i1+cntless+cnteq-1-i;
11725 k = i2+i-(cnteq-1);
11726 a->ptr.p_double[j] = bufa->ptr.p_double[k];
11727 }
11728 for(i=0; i<=cntgreater-1; i++)
11729 {
11730 j = i1+cntless+cnteq+i;
11731 k = i1+i;
11732 a->ptr.p_double[j] = bufa->ptr.p_double[k];
11733 }
11734
11735 /*
11736 * Sort left and right parts of the array (ignoring middle part)
11737 */
11738 tsort_tagsortfastrec(a, bufa, i1, i1+cntless-1, _state);
11739 tsort_tagsortfastrec(a, bufa, i1+cntless+cnteq, i2, _state);
11740 }
11741
11742
11743 #endif
11744 #if defined(AE_COMPILE_BLAS) || !defined(AE_PARTIAL_BUILD)
11745
11746
vectornorm2(ae_vector * x,ae_int_t i1,ae_int_t i2,ae_state * _state)11747 double vectornorm2(/* Real */ ae_vector* x,
11748 ae_int_t i1,
11749 ae_int_t i2,
11750 ae_state *_state)
11751 {
11752 ae_int_t n;
11753 ae_int_t ix;
11754 double absxi;
11755 double scl;
11756 double ssq;
11757 double result;
11758
11759
11760 n = i2-i1+1;
11761 if( n<1 )
11762 {
11763 result = (double)(0);
11764 return result;
11765 }
11766 if( n==1 )
11767 {
11768 result = ae_fabs(x->ptr.p_double[i1], _state);
11769 return result;
11770 }
11771 scl = (double)(0);
11772 ssq = (double)(1);
11773 for(ix=i1; ix<=i2; ix++)
11774 {
11775 if( ae_fp_neq(x->ptr.p_double[ix],(double)(0)) )
11776 {
11777 absxi = ae_fabs(x->ptr.p_double[ix], _state);
11778 if( ae_fp_less(scl,absxi) )
11779 {
11780 ssq = 1+ssq*ae_sqr(scl/absxi, _state);
11781 scl = absxi;
11782 }
11783 else
11784 {
11785 ssq = ssq+ae_sqr(absxi/scl, _state);
11786 }
11787 }
11788 }
11789 result = scl*ae_sqrt(ssq, _state);
11790 return result;
11791 }
11792
11793
vectoridxabsmax(ae_vector * x,ae_int_t i1,ae_int_t i2,ae_state * _state)11794 ae_int_t vectoridxabsmax(/* Real */ ae_vector* x,
11795 ae_int_t i1,
11796 ae_int_t i2,
11797 ae_state *_state)
11798 {
11799 ae_int_t i;
11800 ae_int_t result;
11801
11802
11803 result = i1;
11804 for(i=i1+1; i<=i2; i++)
11805 {
11806 if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[result], _state)) )
11807 {
11808 result = i;
11809 }
11810 }
11811 return result;
11812 }
11813
11814
columnidxabsmax(ae_matrix * x,ae_int_t i1,ae_int_t i2,ae_int_t j,ae_state * _state)11815 ae_int_t columnidxabsmax(/* Real */ ae_matrix* x,
11816 ae_int_t i1,
11817 ae_int_t i2,
11818 ae_int_t j,
11819 ae_state *_state)
11820 {
11821 ae_int_t i;
11822 ae_int_t result;
11823
11824
11825 result = i1;
11826 for(i=i1+1; i<=i2; i++)
11827 {
11828 if( ae_fp_greater(ae_fabs(x->ptr.pp_double[i][j], _state),ae_fabs(x->ptr.pp_double[result][j], _state)) )
11829 {
11830 result = i;
11831 }
11832 }
11833 return result;
11834 }
11835
11836
rowidxabsmax(ae_matrix * x,ae_int_t j1,ae_int_t j2,ae_int_t i,ae_state * _state)11837 ae_int_t rowidxabsmax(/* Real */ ae_matrix* x,
11838 ae_int_t j1,
11839 ae_int_t j2,
11840 ae_int_t i,
11841 ae_state *_state)
11842 {
11843 ae_int_t j;
11844 ae_int_t result;
11845
11846
11847 result = j1;
11848 for(j=j1+1; j<=j2; j++)
11849 {
11850 if( ae_fp_greater(ae_fabs(x->ptr.pp_double[i][j], _state),ae_fabs(x->ptr.pp_double[i][result], _state)) )
11851 {
11852 result = j;
11853 }
11854 }
11855 return result;
11856 }
11857
11858
upperhessenberg1norm(ae_matrix * a,ae_int_t i1,ae_int_t i2,ae_int_t j1,ae_int_t j2,ae_vector * work,ae_state * _state)11859 double upperhessenberg1norm(/* Real */ ae_matrix* a,
11860 ae_int_t i1,
11861 ae_int_t i2,
11862 ae_int_t j1,
11863 ae_int_t j2,
11864 /* Real */ ae_vector* work,
11865 ae_state *_state)
11866 {
11867 ae_int_t i;
11868 ae_int_t j;
11869 double result;
11870
11871
11872 ae_assert(i2-i1==j2-j1, "UpperHessenberg1Norm: I2-I1<>J2-J1!", _state);
11873 for(j=j1; j<=j2; j++)
11874 {
11875 work->ptr.p_double[j] = (double)(0);
11876 }
11877 for(i=i1; i<=i2; i++)
11878 {
11879 for(j=ae_maxint(j1, j1+i-i1-1, _state); j<=j2; j++)
11880 {
11881 work->ptr.p_double[j] = work->ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
11882 }
11883 }
11884 result = (double)(0);
11885 for(j=j1; j<=j2; j++)
11886 {
11887 result = ae_maxreal(result, work->ptr.p_double[j], _state);
11888 }
11889 return result;
11890 }
11891
11892
copymatrix(ae_matrix * a,ae_int_t is1,ae_int_t is2,ae_int_t js1,ae_int_t js2,ae_matrix * b,ae_int_t id1,ae_int_t id2,ae_int_t jd1,ae_int_t jd2,ae_state * _state)11893 void copymatrix(/* Real */ ae_matrix* a,
11894 ae_int_t is1,
11895 ae_int_t is2,
11896 ae_int_t js1,
11897 ae_int_t js2,
11898 /* Real */ ae_matrix* b,
11899 ae_int_t id1,
11900 ae_int_t id2,
11901 ae_int_t jd1,
11902 ae_int_t jd2,
11903 ae_state *_state)
11904 {
11905 ae_int_t isrc;
11906 ae_int_t idst;
11907
11908
11909 if( is1>is2||js1>js2 )
11910 {
11911 return;
11912 }
11913 ae_assert(is2-is1==id2-id1, "CopyMatrix: different sizes!", _state);
11914 ae_assert(js2-js1==jd2-jd1, "CopyMatrix: different sizes!", _state);
11915 for(isrc=is1; isrc<=is2; isrc++)
11916 {
11917 idst = isrc-is1+id1;
11918 ae_v_move(&b->ptr.pp_double[idst][jd1], 1, &a->ptr.pp_double[isrc][js1], 1, ae_v_len(jd1,jd2));
11919 }
11920 }
11921
11922
inplacetranspose(ae_matrix * a,ae_int_t i1,ae_int_t i2,ae_int_t j1,ae_int_t j2,ae_vector * work,ae_state * _state)11923 void inplacetranspose(/* Real */ ae_matrix* a,
11924 ae_int_t i1,
11925 ae_int_t i2,
11926 ae_int_t j1,
11927 ae_int_t j2,
11928 /* Real */ ae_vector* work,
11929 ae_state *_state)
11930 {
11931 ae_int_t i;
11932 ae_int_t j;
11933 ae_int_t ips;
11934 ae_int_t jps;
11935 ae_int_t l;
11936
11937
11938 if( i1>i2||j1>j2 )
11939 {
11940 return;
11941 }
11942 ae_assert(i1-i2==j1-j2, "InplaceTranspose error: incorrect array size!", _state);
11943 for(i=i1; i<=i2-1; i++)
11944 {
11945 j = j1+i-i1;
11946 ips = i+1;
11947 jps = j1+ips-i1;
11948 l = i2-i;
11949 ae_v_move(&work->ptr.p_double[1], 1, &a->ptr.pp_double[ips][j], a->stride, ae_v_len(1,l));
11950 ae_v_move(&a->ptr.pp_double[ips][j], a->stride, &a->ptr.pp_double[i][jps], 1, ae_v_len(ips,i2));
11951 ae_v_move(&a->ptr.pp_double[i][jps], 1, &work->ptr.p_double[1], 1, ae_v_len(jps,j2));
11952 }
11953 }
11954
11955
copyandtranspose(ae_matrix * a,ae_int_t is1,ae_int_t is2,ae_int_t js1,ae_int_t js2,ae_matrix * b,ae_int_t id1,ae_int_t id2,ae_int_t jd1,ae_int_t jd2,ae_state * _state)11956 void copyandtranspose(/* Real */ ae_matrix* a,
11957 ae_int_t is1,
11958 ae_int_t is2,
11959 ae_int_t js1,
11960 ae_int_t js2,
11961 /* Real */ ae_matrix* b,
11962 ae_int_t id1,
11963 ae_int_t id2,
11964 ae_int_t jd1,
11965 ae_int_t jd2,
11966 ae_state *_state)
11967 {
11968 ae_int_t isrc;
11969 ae_int_t jdst;
11970
11971
11972 if( is1>is2||js1>js2 )
11973 {
11974 return;
11975 }
11976 ae_assert(is2-is1==jd2-jd1, "CopyAndTranspose: different sizes!", _state);
11977 ae_assert(js2-js1==id2-id1, "CopyAndTranspose: different sizes!", _state);
11978 for(isrc=is1; isrc<=is2; isrc++)
11979 {
11980 jdst = isrc-is1+jd1;
11981 ae_v_move(&b->ptr.pp_double[id1][jdst], b->stride, &a->ptr.pp_double[isrc][js1], 1, ae_v_len(id1,id2));
11982 }
11983 }
11984
11985
matrixvectormultiply(ae_matrix * a,ae_int_t i1,ae_int_t i2,ae_int_t j1,ae_int_t j2,ae_bool trans,ae_vector * x,ae_int_t ix1,ae_int_t ix2,double alpha,ae_vector * y,ae_int_t iy1,ae_int_t iy2,double beta,ae_state * _state)11986 void matrixvectormultiply(/* Real */ ae_matrix* a,
11987 ae_int_t i1,
11988 ae_int_t i2,
11989 ae_int_t j1,
11990 ae_int_t j2,
11991 ae_bool trans,
11992 /* Real */ ae_vector* x,
11993 ae_int_t ix1,
11994 ae_int_t ix2,
11995 double alpha,
11996 /* Real */ ae_vector* y,
11997 ae_int_t iy1,
11998 ae_int_t iy2,
11999 double beta,
12000 ae_state *_state)
12001 {
12002 ae_int_t i;
12003 double v;
12004
12005
12006 if( !trans )
12007 {
12008
12009 /*
12010 * y := alpha*A*x + beta*y;
12011 */
12012 if( i1>i2||j1>j2 )
12013 {
12014 return;
12015 }
12016 ae_assert(j2-j1==ix2-ix1, "MatrixVectorMultiply: A and X dont match!", _state);
12017 ae_assert(i2-i1==iy2-iy1, "MatrixVectorMultiply: A and Y dont match!", _state);
12018
12019 /*
12020 * beta*y
12021 */
12022 if( ae_fp_eq(beta,(double)(0)) )
12023 {
12024 for(i=iy1; i<=iy2; i++)
12025 {
12026 y->ptr.p_double[i] = (double)(0);
12027 }
12028 }
12029 else
12030 {
12031 ae_v_muld(&y->ptr.p_double[iy1], 1, ae_v_len(iy1,iy2), beta);
12032 }
12033
12034 /*
12035 * alpha*A*x
12036 */
12037 for(i=i1; i<=i2; i++)
12038 {
12039 v = ae_v_dotproduct(&a->ptr.pp_double[i][j1], 1, &x->ptr.p_double[ix1], 1, ae_v_len(j1,j2));
12040 y->ptr.p_double[iy1+i-i1] = y->ptr.p_double[iy1+i-i1]+alpha*v;
12041 }
12042 }
12043 else
12044 {
12045
12046 /*
12047 * y := alpha*A'*x + beta*y;
12048 */
12049 if( i1>i2||j1>j2 )
12050 {
12051 return;
12052 }
12053 ae_assert(i2-i1==ix2-ix1, "MatrixVectorMultiply: A and X dont match!", _state);
12054 ae_assert(j2-j1==iy2-iy1, "MatrixVectorMultiply: A and Y dont match!", _state);
12055
12056 /*
12057 * beta*y
12058 */
12059 if( ae_fp_eq(beta,(double)(0)) )
12060 {
12061 for(i=iy1; i<=iy2; i++)
12062 {
12063 y->ptr.p_double[i] = (double)(0);
12064 }
12065 }
12066 else
12067 {
12068 ae_v_muld(&y->ptr.p_double[iy1], 1, ae_v_len(iy1,iy2), beta);
12069 }
12070
12071 /*
12072 * alpha*A'*x
12073 */
12074 for(i=i1; i<=i2; i++)
12075 {
12076 v = alpha*x->ptr.p_double[ix1+i-i1];
12077 ae_v_addd(&y->ptr.p_double[iy1], 1, &a->ptr.pp_double[i][j1], 1, ae_v_len(iy1,iy2), v);
12078 }
12079 }
12080 }
12081
12082
pythag2(double x,double y,ae_state * _state)12083 double pythag2(double x, double y, ae_state *_state)
12084 {
12085 double w;
12086 double xabs;
12087 double yabs;
12088 double z;
12089 double result;
12090
12091
12092 xabs = ae_fabs(x, _state);
12093 yabs = ae_fabs(y, _state);
12094 w = ae_maxreal(xabs, yabs, _state);
12095 z = ae_minreal(xabs, yabs, _state);
12096 if( ae_fp_eq(z,(double)(0)) )
12097 {
12098 result = w;
12099 }
12100 else
12101 {
12102 result = w*ae_sqrt(1+ae_sqr(z/w, _state), _state);
12103 }
12104 return result;
12105 }
12106
12107
matrixmatrixmultiply(ae_matrix * a,ae_int_t ai1,ae_int_t ai2,ae_int_t aj1,ae_int_t aj2,ae_bool transa,ae_matrix * b,ae_int_t bi1,ae_int_t bi2,ae_int_t bj1,ae_int_t bj2,ae_bool transb,double alpha,ae_matrix * c,ae_int_t ci1,ae_int_t ci2,ae_int_t cj1,ae_int_t cj2,double beta,ae_vector * work,ae_state * _state)12108 void matrixmatrixmultiply(/* Real */ ae_matrix* a,
12109 ae_int_t ai1,
12110 ae_int_t ai2,
12111 ae_int_t aj1,
12112 ae_int_t aj2,
12113 ae_bool transa,
12114 /* Real */ ae_matrix* b,
12115 ae_int_t bi1,
12116 ae_int_t bi2,
12117 ae_int_t bj1,
12118 ae_int_t bj2,
12119 ae_bool transb,
12120 double alpha,
12121 /* Real */ ae_matrix* c,
12122 ae_int_t ci1,
12123 ae_int_t ci2,
12124 ae_int_t cj1,
12125 ae_int_t cj2,
12126 double beta,
12127 /* Real */ ae_vector* work,
12128 ae_state *_state)
12129 {
12130 ae_int_t arows;
12131 ae_int_t acols;
12132 ae_int_t brows;
12133 ae_int_t bcols;
12134 ae_int_t crows;
12135 ae_int_t i;
12136 ae_int_t j;
12137 ae_int_t k;
12138 ae_int_t l;
12139 ae_int_t r;
12140 double v;
12141
12142
12143
12144 /*
12145 * Setup
12146 */
12147 if( !transa )
12148 {
12149 arows = ai2-ai1+1;
12150 acols = aj2-aj1+1;
12151 }
12152 else
12153 {
12154 arows = aj2-aj1+1;
12155 acols = ai2-ai1+1;
12156 }
12157 if( !transb )
12158 {
12159 brows = bi2-bi1+1;
12160 bcols = bj2-bj1+1;
12161 }
12162 else
12163 {
12164 brows = bj2-bj1+1;
12165 bcols = bi2-bi1+1;
12166 }
12167 ae_assert(acols==brows, "MatrixMatrixMultiply: incorrect matrix sizes!", _state);
12168 if( ((arows<=0||acols<=0)||brows<=0)||bcols<=0 )
12169 {
12170 return;
12171 }
12172 crows = arows;
12173
12174 /*
12175 * Test WORK
12176 */
12177 i = ae_maxint(arows, acols, _state);
12178 i = ae_maxint(brows, i, _state);
12179 i = ae_maxint(i, bcols, _state);
12180 work->ptr.p_double[1] = (double)(0);
12181 work->ptr.p_double[i] = (double)(0);
12182
12183 /*
12184 * Prepare C
12185 */
12186 if( ae_fp_eq(beta,(double)(0)) )
12187 {
12188 for(i=ci1; i<=ci2; i++)
12189 {
12190 for(j=cj1; j<=cj2; j++)
12191 {
12192 c->ptr.pp_double[i][j] = (double)(0);
12193 }
12194 }
12195 }
12196 else
12197 {
12198 for(i=ci1; i<=ci2; i++)
12199 {
12200 ae_v_muld(&c->ptr.pp_double[i][cj1], 1, ae_v_len(cj1,cj2), beta);
12201 }
12202 }
12203
12204 /*
12205 * A*B
12206 */
12207 if( !transa&&!transb )
12208 {
12209 for(l=ai1; l<=ai2; l++)
12210 {
12211 for(r=bi1; r<=bi2; r++)
12212 {
12213 v = alpha*a->ptr.pp_double[l][aj1+r-bi1];
12214 k = ci1+l-ai1;
12215 ae_v_addd(&c->ptr.pp_double[k][cj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(cj1,cj2), v);
12216 }
12217 }
12218 return;
12219 }
12220
12221 /*
12222 * A*B'
12223 */
12224 if( !transa&&transb )
12225 {
12226 if( arows*acols<brows*bcols )
12227 {
12228 for(r=bi1; r<=bi2; r++)
12229 {
12230 for(l=ai1; l<=ai2; l++)
12231 {
12232 v = ae_v_dotproduct(&a->ptr.pp_double[l][aj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(aj1,aj2));
12233 c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1]+alpha*v;
12234 }
12235 }
12236 return;
12237 }
12238 else
12239 {
12240 for(l=ai1; l<=ai2; l++)
12241 {
12242 for(r=bi1; r<=bi2; r++)
12243 {
12244 v = ae_v_dotproduct(&a->ptr.pp_double[l][aj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(aj1,aj2));
12245 c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1]+alpha*v;
12246 }
12247 }
12248 return;
12249 }
12250 }
12251
12252 /*
12253 * A'*B
12254 */
12255 if( transa&&!transb )
12256 {
12257 for(l=aj1; l<=aj2; l++)
12258 {
12259 for(r=bi1; r<=bi2; r++)
12260 {
12261 v = alpha*a->ptr.pp_double[ai1+r-bi1][l];
12262 k = ci1+l-aj1;
12263 ae_v_addd(&c->ptr.pp_double[k][cj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(cj1,cj2), v);
12264 }
12265 }
12266 return;
12267 }
12268
12269 /*
12270 * A'*B'
12271 */
12272 if( transa&&transb )
12273 {
12274 if( arows*acols<brows*bcols )
12275 {
12276 for(r=bi1; r<=bi2; r++)
12277 {
12278 k = cj1+r-bi1;
12279 for(i=1; i<=crows; i++)
12280 {
12281 work->ptr.p_double[i] = 0.0;
12282 }
12283 for(l=ai1; l<=ai2; l++)
12284 {
12285 v = alpha*b->ptr.pp_double[r][bj1+l-ai1];
12286 ae_v_addd(&work->ptr.p_double[1], 1, &a->ptr.pp_double[l][aj1], 1, ae_v_len(1,crows), v);
12287 }
12288 ae_v_add(&c->ptr.pp_double[ci1][k], c->stride, &work->ptr.p_double[1], 1, ae_v_len(ci1,ci2));
12289 }
12290 return;
12291 }
12292 else
12293 {
12294 for(l=aj1; l<=aj2; l++)
12295 {
12296 k = ai2-ai1+1;
12297 ae_v_move(&work->ptr.p_double[1], 1, &a->ptr.pp_double[ai1][l], a->stride, ae_v_len(1,k));
12298 for(r=bi1; r<=bi2; r++)
12299 {
12300 v = ae_v_dotproduct(&work->ptr.p_double[1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(1,k));
12301 c->ptr.pp_double[ci1+l-aj1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-aj1][cj1+r-bi1]+alpha*v;
12302 }
12303 }
12304 return;
12305 }
12306 }
12307 }
12308
12309
12310 #endif
12311 #if defined(AE_COMPILE_ROTATIONS) || !defined(AE_PARTIAL_BUILD)
12312
12313
12314 /*************************************************************************
12315 Application of a sequence of elementary rotations to a matrix
12316
12317 The algorithm pre-multiplies the matrix by a sequence of rotation
12318 transformations which is given by arrays C and S. Depending on the value
12319 of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true)
12320 rows are rotated, or the rows N and N-1, N-2 and N-3 and so on, are rotated.
12321
12322 Not the whole matrix but only a part of it is transformed (rows from M1 to
12323 M2, columns from N1 to N2). Only the elements of this submatrix are changed.
12324
12325 Input parameters:
12326 IsForward - the sequence of the rotation application.
12327 M1,M2 - the range of rows to be transformed.
12328 N1, N2 - the range of columns to be transformed.
12329 C,S - transformation coefficients.
12330 Array whose index ranges within [1..M2-M1].
12331 A - processed matrix.
12332 WORK - working array whose index ranges within [N1..N2].
12333
12334 Output parameters:
12335 A - transformed matrix.
12336
12337 Utility subroutine.
12338 *************************************************************************/
applyrotationsfromtheleft(ae_bool isforward,ae_int_t m1,ae_int_t m2,ae_int_t n1,ae_int_t n2,ae_vector * c,ae_vector * s,ae_matrix * a,ae_vector * work,ae_state * _state)12339 void applyrotationsfromtheleft(ae_bool isforward,
12340 ae_int_t m1,
12341 ae_int_t m2,
12342 ae_int_t n1,
12343 ae_int_t n2,
12344 /* Real */ ae_vector* c,
12345 /* Real */ ae_vector* s,
12346 /* Real */ ae_matrix* a,
12347 /* Real */ ae_vector* work,
12348 ae_state *_state)
12349 {
12350 ae_int_t j;
12351 ae_int_t jp1;
12352 double ctemp;
12353 double stemp;
12354 double temp;
12355
12356
12357 if( m1>m2||n1>n2 )
12358 {
12359 return;
12360 }
12361
12362 /*
12363 * Form P * A
12364 */
12365 if( isforward )
12366 {
12367 if( n1!=n2 )
12368 {
12369
12370 /*
12371 * Common case: N1<>N2
12372 */
12373 for(j=m1; j<=m2-1; j++)
12374 {
12375 ctemp = c->ptr.p_double[j-m1+1];
12376 stemp = s->ptr.p_double[j-m1+1];
12377 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12378 {
12379 jp1 = j+1;
12380 ae_v_moved(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), ctemp);
12381 ae_v_subd(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), stemp);
12382 ae_v_muld(&a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), ctemp);
12383 ae_v_addd(&a->ptr.pp_double[j][n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), stemp);
12384 ae_v_move(&a->ptr.pp_double[jp1][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2));
12385 }
12386 }
12387 }
12388 else
12389 {
12390
12391 /*
12392 * Special case: N1=N2
12393 */
12394 for(j=m1; j<=m2-1; j++)
12395 {
12396 ctemp = c->ptr.p_double[j-m1+1];
12397 stemp = s->ptr.p_double[j-m1+1];
12398 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12399 {
12400 temp = a->ptr.pp_double[j+1][n1];
12401 a->ptr.pp_double[j+1][n1] = ctemp*temp-stemp*a->ptr.pp_double[j][n1];
12402 a->ptr.pp_double[j][n1] = stemp*temp+ctemp*a->ptr.pp_double[j][n1];
12403 }
12404 }
12405 }
12406 }
12407 else
12408 {
12409 if( n1!=n2 )
12410 {
12411
12412 /*
12413 * Common case: N1<>N2
12414 */
12415 for(j=m2-1; j>=m1; j--)
12416 {
12417 ctemp = c->ptr.p_double[j-m1+1];
12418 stemp = s->ptr.p_double[j-m1+1];
12419 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12420 {
12421 jp1 = j+1;
12422 ae_v_moved(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), ctemp);
12423 ae_v_subd(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), stemp);
12424 ae_v_muld(&a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), ctemp);
12425 ae_v_addd(&a->ptr.pp_double[j][n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), stemp);
12426 ae_v_move(&a->ptr.pp_double[jp1][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2));
12427 }
12428 }
12429 }
12430 else
12431 {
12432
12433 /*
12434 * Special case: N1=N2
12435 */
12436 for(j=m2-1; j>=m1; j--)
12437 {
12438 ctemp = c->ptr.p_double[j-m1+1];
12439 stemp = s->ptr.p_double[j-m1+1];
12440 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12441 {
12442 temp = a->ptr.pp_double[j+1][n1];
12443 a->ptr.pp_double[j+1][n1] = ctemp*temp-stemp*a->ptr.pp_double[j][n1];
12444 a->ptr.pp_double[j][n1] = stemp*temp+ctemp*a->ptr.pp_double[j][n1];
12445 }
12446 }
12447 }
12448 }
12449 }
12450
12451
12452 /*************************************************************************
12453 Application of a sequence of elementary rotations to a matrix
12454
12455 The algorithm post-multiplies the matrix by a sequence of rotation
12456 transformations which is given by arrays C and S. Depending on the value
12457 of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true)
12458 rows are rotated, or the rows N and N-1, N-2 and N-3 and so on are rotated.
12459
12460 Not the whole matrix but only a part of it is transformed (rows from M1
12461 to M2, columns from N1 to N2). Only the elements of this submatrix are changed.
12462
12463 Input parameters:
12464 IsForward - the sequence of the rotation application.
12465 M1,M2 - the range of rows to be transformed.
12466 N1, N2 - the range of columns to be transformed.
12467 C,S - transformation coefficients.
12468 Array whose index ranges within [1..N2-N1].
12469 A - processed matrix.
12470 WORK - working array whose index ranges within [M1..M2].
12471
12472 Output parameters:
12473 A - transformed matrix.
12474
12475 Utility subroutine.
12476 *************************************************************************/
applyrotationsfromtheright(ae_bool isforward,ae_int_t m1,ae_int_t m2,ae_int_t n1,ae_int_t n2,ae_vector * c,ae_vector * s,ae_matrix * a,ae_vector * work,ae_state * _state)12477 void applyrotationsfromtheright(ae_bool isforward,
12478 ae_int_t m1,
12479 ae_int_t m2,
12480 ae_int_t n1,
12481 ae_int_t n2,
12482 /* Real */ ae_vector* c,
12483 /* Real */ ae_vector* s,
12484 /* Real */ ae_matrix* a,
12485 /* Real */ ae_vector* work,
12486 ae_state *_state)
12487 {
12488 ae_int_t j;
12489 ae_int_t jp1;
12490 double ctemp;
12491 double stemp;
12492 double temp;
12493
12494
12495
12496 /*
12497 * Form A * P'
12498 */
12499 if( isforward )
12500 {
12501 if( m1!=m2 )
12502 {
12503
12504 /*
12505 * Common case: M1<>M2
12506 */
12507 for(j=n1; j<=n2-1; j++)
12508 {
12509 ctemp = c->ptr.p_double[j-n1+1];
12510 stemp = s->ptr.p_double[j-n1+1];
12511 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12512 {
12513 jp1 = j+1;
12514 ae_v_moved(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), ctemp);
12515 ae_v_subd(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), stemp);
12516 ae_v_muld(&a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), ctemp);
12517 ae_v_addd(&a->ptr.pp_double[m1][j], a->stride, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), stemp);
12518 ae_v_move(&a->ptr.pp_double[m1][jp1], a->stride, &work->ptr.p_double[m1], 1, ae_v_len(m1,m2));
12519 }
12520 }
12521 }
12522 else
12523 {
12524
12525 /*
12526 * Special case: M1=M2
12527 */
12528 for(j=n1; j<=n2-1; j++)
12529 {
12530 ctemp = c->ptr.p_double[j-n1+1];
12531 stemp = s->ptr.p_double[j-n1+1];
12532 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12533 {
12534 temp = a->ptr.pp_double[m1][j+1];
12535 a->ptr.pp_double[m1][j+1] = ctemp*temp-stemp*a->ptr.pp_double[m1][j];
12536 a->ptr.pp_double[m1][j] = stemp*temp+ctemp*a->ptr.pp_double[m1][j];
12537 }
12538 }
12539 }
12540 }
12541 else
12542 {
12543 if( m1!=m2 )
12544 {
12545
12546 /*
12547 * Common case: M1<>M2
12548 */
12549 for(j=n2-1; j>=n1; j--)
12550 {
12551 ctemp = c->ptr.p_double[j-n1+1];
12552 stemp = s->ptr.p_double[j-n1+1];
12553 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12554 {
12555 jp1 = j+1;
12556 ae_v_moved(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), ctemp);
12557 ae_v_subd(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), stemp);
12558 ae_v_muld(&a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), ctemp);
12559 ae_v_addd(&a->ptr.pp_double[m1][j], a->stride, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), stemp);
12560 ae_v_move(&a->ptr.pp_double[m1][jp1], a->stride, &work->ptr.p_double[m1], 1, ae_v_len(m1,m2));
12561 }
12562 }
12563 }
12564 else
12565 {
12566
12567 /*
12568 * Special case: M1=M2
12569 */
12570 for(j=n2-1; j>=n1; j--)
12571 {
12572 ctemp = c->ptr.p_double[j-n1+1];
12573 stemp = s->ptr.p_double[j-n1+1];
12574 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12575 {
12576 temp = a->ptr.pp_double[m1][j+1];
12577 a->ptr.pp_double[m1][j+1] = ctemp*temp-stemp*a->ptr.pp_double[m1][j];
12578 a->ptr.pp_double[m1][j] = stemp*temp+ctemp*a->ptr.pp_double[m1][j];
12579 }
12580 }
12581 }
12582 }
12583 }
12584
12585
12586 /*************************************************************************
12587 The subroutine generates the elementary rotation, so that:
12588
12589 [ CS SN ] . [ F ] = [ R ]
12590 [ -SN CS ] [ G ] [ 0 ]
12591
12592 CS**2 + SN**2 = 1
12593 *************************************************************************/
generaterotation(double f,double g,double * cs,double * sn,double * r,ae_state * _state)12594 void generaterotation(double f,
12595 double g,
12596 double* cs,
12597 double* sn,
12598 double* r,
12599 ae_state *_state)
12600 {
12601 double f1;
12602 double g1;
12603
12604 *cs = 0;
12605 *sn = 0;
12606 *r = 0;
12607
12608 if( ae_fp_eq(g,(double)(0)) )
12609 {
12610 *cs = (double)(1);
12611 *sn = (double)(0);
12612 *r = f;
12613 }
12614 else
12615 {
12616 if( ae_fp_eq(f,(double)(0)) )
12617 {
12618 *cs = (double)(0);
12619 *sn = (double)(1);
12620 *r = g;
12621 }
12622 else
12623 {
12624 f1 = f;
12625 g1 = g;
12626 if( ae_fp_greater(ae_fabs(f1, _state),ae_fabs(g1, _state)) )
12627 {
12628 *r = ae_fabs(f1, _state)*ae_sqrt(1+ae_sqr(g1/f1, _state), _state);
12629 }
12630 else
12631 {
12632 *r = ae_fabs(g1, _state)*ae_sqrt(1+ae_sqr(f1/g1, _state), _state);
12633 }
12634 *cs = f1/(*r);
12635 *sn = g1/(*r);
12636 if( ae_fp_greater(ae_fabs(f, _state),ae_fabs(g, _state))&&ae_fp_less(*cs,(double)(0)) )
12637 {
12638 *cs = -*cs;
12639 *sn = -*sn;
12640 *r = -*r;
12641 }
12642 }
12643 }
12644 }
12645
12646
12647 #endif
12648 #if defined(AE_COMPILE_BASICSTATOPS) || !defined(AE_PARTIAL_BUILD)
12649
12650
12651 /*************************************************************************
12652 Internal tied ranking subroutine.
12653
12654 INPUT PARAMETERS:
12655 X - array to rank
12656 N - array size
12657 IsCentered- whether ranks are centered or not:
12658 * True - ranks are centered in such way that their
12659 sum is zero
12660 * False - ranks are not centered
12661 Buf - temporary buffers
12662
12663 NOTE: when IsCentered is True and all X[] are equal, this function fills
12664 X by zeros (exact zeros are used, not sum which is only approximately
12665 equal to zero).
12666 *************************************************************************/
rankx(ae_vector * x,ae_int_t n,ae_bool iscentered,apbuffers * buf,ae_state * _state)12667 void rankx(/* Real */ ae_vector* x,
12668 ae_int_t n,
12669 ae_bool iscentered,
12670 apbuffers* buf,
12671 ae_state *_state)
12672 {
12673 ae_int_t i;
12674 ae_int_t j;
12675 ae_int_t k;
12676 double tmp;
12677 double voffs;
12678
12679
12680
12681 /*
12682 * Prepare
12683 */
12684 if( n<1 )
12685 {
12686 return;
12687 }
12688 if( n==1 )
12689 {
12690 x->ptr.p_double[0] = (double)(0);
12691 return;
12692 }
12693 if( buf->ra1.cnt<n )
12694 {
12695 ae_vector_set_length(&buf->ra1, n, _state);
12696 }
12697 if( buf->ia1.cnt<n )
12698 {
12699 ae_vector_set_length(&buf->ia1, n, _state);
12700 }
12701 for(i=0; i<=n-1; i++)
12702 {
12703 buf->ra1.ptr.p_double[i] = x->ptr.p_double[i];
12704 buf->ia1.ptr.p_int[i] = i;
12705 }
12706 tagsortfasti(&buf->ra1, &buf->ia1, &buf->ra2, &buf->ia2, n, _state);
12707
12708 /*
12709 * Special test for all values being equal
12710 */
12711 if( ae_fp_eq(buf->ra1.ptr.p_double[0],buf->ra1.ptr.p_double[n-1]) )
12712 {
12713 if( iscentered )
12714 {
12715 tmp = 0.0;
12716 }
12717 else
12718 {
12719 tmp = (double)(n-1)/(double)2;
12720 }
12721 for(i=0; i<=n-1; i++)
12722 {
12723 x->ptr.p_double[i] = tmp;
12724 }
12725 return;
12726 }
12727
12728 /*
12729 * compute tied ranks
12730 */
12731 i = 0;
12732 while(i<=n-1)
12733 {
12734 j = i+1;
12735 while(j<=n-1)
12736 {
12737 if( ae_fp_neq(buf->ra1.ptr.p_double[j],buf->ra1.ptr.p_double[i]) )
12738 {
12739 break;
12740 }
12741 j = j+1;
12742 }
12743 for(k=i; k<=j-1; k++)
12744 {
12745 buf->ra1.ptr.p_double[k] = (double)(i+j-1)/(double)2;
12746 }
12747 i = j;
12748 }
12749
12750 /*
12751 * back to x
12752 */
12753 if( iscentered )
12754 {
12755 voffs = (double)(n-1)/(double)2;
12756 }
12757 else
12758 {
12759 voffs = 0.0;
12760 }
12761 for(i=0; i<=n-1; i++)
12762 {
12763 x->ptr.p_double[buf->ia1.ptr.p_int[i]] = buf->ra1.ptr.p_double[i]-voffs;
12764 }
12765 }
12766
12767
12768 /*************************************************************************
12769 Internal untied ranking subroutine.
12770
12771 INPUT PARAMETERS:
12772 X - array to rank
12773 N - array size
12774 Buf - temporary buffers
12775
12776 Returns untied ranks (in case of a tie ranks are resolved arbitrarily).
12777 *************************************************************************/
rankxuntied(ae_vector * x,ae_int_t n,apbuffers * buf,ae_state * _state)12778 void rankxuntied(/* Real */ ae_vector* x,
12779 ae_int_t n,
12780 apbuffers* buf,
12781 ae_state *_state)
12782 {
12783 ae_int_t i;
12784
12785
12786
12787 /*
12788 * Prepare
12789 */
12790 if( n<1 )
12791 {
12792 return;
12793 }
12794 if( n==1 )
12795 {
12796 x->ptr.p_double[0] = (double)(0);
12797 return;
12798 }
12799 if( buf->ra1.cnt<n )
12800 {
12801 ae_vector_set_length(&buf->ra1, n, _state);
12802 }
12803 if( buf->ia1.cnt<n )
12804 {
12805 ae_vector_set_length(&buf->ia1, n, _state);
12806 }
12807 for(i=0; i<=n-1; i++)
12808 {
12809 buf->ra1.ptr.p_double[i] = x->ptr.p_double[i];
12810 buf->ia1.ptr.p_int[i] = i;
12811 }
12812 tagsortfasti(&buf->ra1, &buf->ia1, &buf->ra2, &buf->ia2, n, _state);
12813 for(i=0; i<=n-1; i++)
12814 {
12815 x->ptr.p_double[buf->ia1.ptr.p_int[i]] = (double)(i);
12816 }
12817 }
12818
12819
12820 #endif
12821 #if defined(AE_COMPILE_TRLINSOLVE) || !defined(AE_PARTIAL_BUILD)
12822
12823
12824 /*************************************************************************
12825 Utility subroutine performing the "safe" solution of system of linear
12826 equations with triangular coefficient matrices.
12827
12828 The subroutine uses scaling and solves the scaled system A*x=s*b (where s
12829 is a scalar value) instead of A*x=b, choosing s so that x can be
12830 represented by a floating-point number. The closer the system gets to a
12831 singular, the less s is. If the system is singular, s=0 and x contains the
12832 non-trivial solution of equation A*x=0.
12833
12834 The feature of an algorithm is that it could not cause an overflow or a
12835 division by zero regardless of the matrix used as the input.
12836
12837 The algorithm can solve systems of equations with upper/lower triangular
12838 matrices, with/without unit diagonal, and systems of type A*x=b or A'*x=b
12839 (where A' is a transposed matrix A).
12840
12841 Input parameters:
12842 A - system matrix. Array whose indexes range within [0..N-1, 0..N-1].
12843 N - size of matrix A.
12844 X - right-hand member of a system.
12845 Array whose index ranges within [0..N-1].
12846 IsUpper - matrix type. If it is True, the system matrix is the upper
12847 triangular and is located in the corresponding part of
12848 matrix A.
12849 Trans - problem type. If it is True, the problem to be solved is
12850 A'*x=b, otherwise it is A*x=b.
12851 Isunit - matrix type. If it is True, the system matrix has a unit
12852 diagonal (the elements on the main diagonal are not used
12853 in the calculation process), otherwise the matrix is considered
12854 to be a general triangular matrix.
12855
12856 Output parameters:
12857 X - solution. Array whose index ranges within [0..N-1].
12858 S - scaling factor.
12859
12860 -- LAPACK auxiliary routine (version 3.0) --
12861 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
12862 Courant Institute, Argonne National Lab, and Rice University
12863 June 30, 1992
12864 *************************************************************************/
rmatrixtrsafesolve(ae_matrix * a,ae_int_t n,ae_vector * x,double * s,ae_bool isupper,ae_bool istrans,ae_bool isunit,ae_state * _state)12865 void rmatrixtrsafesolve(/* Real */ ae_matrix* a,
12866 ae_int_t n,
12867 /* Real */ ae_vector* x,
12868 double* s,
12869 ae_bool isupper,
12870 ae_bool istrans,
12871 ae_bool isunit,
12872 ae_state *_state)
12873 {
12874 ae_frame _frame_block;
12875 ae_bool normin;
12876 ae_vector cnorm;
12877 ae_matrix a1;
12878 ae_vector x1;
12879 ae_int_t i;
12880
12881 ae_frame_make(_state, &_frame_block);
12882 memset(&cnorm, 0, sizeof(cnorm));
12883 memset(&a1, 0, sizeof(a1));
12884 memset(&x1, 0, sizeof(x1));
12885 *s = 0;
12886 ae_vector_init(&cnorm, 0, DT_REAL, _state, ae_true);
12887 ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true);
12888 ae_vector_init(&x1, 0, DT_REAL, _state, ae_true);
12889
12890
12891 /*
12892 * From 0-based to 1-based
12893 */
12894 normin = ae_false;
12895 ae_matrix_set_length(&a1, n+1, n+1, _state);
12896 ae_vector_set_length(&x1, n+1, _state);
12897 for(i=1; i<=n; i++)
12898 {
12899 ae_v_move(&a1.ptr.pp_double[i][1], 1, &a->ptr.pp_double[i-1][0], 1, ae_v_len(1,n));
12900 }
12901 ae_v_move(&x1.ptr.p_double[1], 1, &x->ptr.p_double[0], 1, ae_v_len(1,n));
12902
12903 /*
12904 * Solve 1-based
12905 */
12906 safesolvetriangular(&a1, n, &x1, s, isupper, istrans, isunit, normin, &cnorm, _state);
12907
12908 /*
12909 * From 1-based to 0-based
12910 */
12911 ae_v_move(&x->ptr.p_double[0], 1, &x1.ptr.p_double[1], 1, ae_v_len(0,n-1));
12912 ae_frame_leave(_state);
12913 }
12914
12915
12916 /*************************************************************************
12917 Obsolete 1-based subroutine.
12918 See RMatrixTRSafeSolve for 0-based replacement.
12919 *************************************************************************/
safesolvetriangular(ae_matrix * a,ae_int_t n,ae_vector * x,double * s,ae_bool isupper,ae_bool istrans,ae_bool isunit,ae_bool normin,ae_vector * cnorm,ae_state * _state)12920 void safesolvetriangular(/* Real */ ae_matrix* a,
12921 ae_int_t n,
12922 /* Real */ ae_vector* x,
12923 double* s,
12924 ae_bool isupper,
12925 ae_bool istrans,
12926 ae_bool isunit,
12927 ae_bool normin,
12928 /* Real */ ae_vector* cnorm,
12929 ae_state *_state)
12930 {
12931 ae_int_t i;
12932 ae_int_t imax;
12933 ae_int_t j;
12934 ae_int_t jfirst;
12935 ae_int_t jinc;
12936 ae_int_t jlast;
12937 ae_int_t jm1;
12938 ae_int_t jp1;
12939 ae_int_t ip1;
12940 ae_int_t im1;
12941 ae_int_t k;
12942 ae_int_t flg;
12943 double v;
12944 double vd;
12945 double bignum;
12946 double grow;
12947 double rec;
12948 double smlnum;
12949 double sumj;
12950 double tjj;
12951 double tjjs;
12952 double tmax;
12953 double tscal;
12954 double uscal;
12955 double xbnd;
12956 double xj;
12957 double xmax;
12958 ae_bool notran;
12959 ae_bool upper;
12960 ae_bool nounit;
12961
12962 *s = 0;
12963
12964 upper = isupper;
12965 notran = !istrans;
12966 nounit = !isunit;
12967
12968 /*
12969 * these initializers are not really necessary,
12970 * but without them compiler complains about uninitialized locals
12971 */
12972 tjjs = (double)(0);
12973
12974 /*
12975 * Quick return if possible
12976 */
12977 if( n==0 )
12978 {
12979 return;
12980 }
12981
12982 /*
12983 * Determine machine dependent parameters to control overflow.
12984 */
12985 smlnum = ae_minrealnumber/(ae_machineepsilon*2);
12986 bignum = 1/smlnum;
12987 *s = (double)(1);
12988 if( !normin )
12989 {
12990 ae_vector_set_length(cnorm, n+1, _state);
12991
12992 /*
12993 * Compute the 1-norm of each column, not including the diagonal.
12994 */
12995 if( upper )
12996 {
12997
12998 /*
12999 * A is upper triangular.
13000 */
13001 for(j=1; j<=n; j++)
13002 {
13003 v = (double)(0);
13004 for(k=1; k<=j-1; k++)
13005 {
13006 v = v+ae_fabs(a->ptr.pp_double[k][j], _state);
13007 }
13008 cnorm->ptr.p_double[j] = v;
13009 }
13010 }
13011 else
13012 {
13013
13014 /*
13015 * A is lower triangular.
13016 */
13017 for(j=1; j<=n-1; j++)
13018 {
13019 v = (double)(0);
13020 for(k=j+1; k<=n; k++)
13021 {
13022 v = v+ae_fabs(a->ptr.pp_double[k][j], _state);
13023 }
13024 cnorm->ptr.p_double[j] = v;
13025 }
13026 cnorm->ptr.p_double[n] = (double)(0);
13027 }
13028 }
13029
13030 /*
13031 * Scale the column norms by TSCAL if the maximum element in CNORM is
13032 * greater than BIGNUM.
13033 */
13034 imax = 1;
13035 for(k=2; k<=n; k++)
13036 {
13037 if( ae_fp_greater(cnorm->ptr.p_double[k],cnorm->ptr.p_double[imax]) )
13038 {
13039 imax = k;
13040 }
13041 }
13042 tmax = cnorm->ptr.p_double[imax];
13043 if( ae_fp_less_eq(tmax,bignum) )
13044 {
13045 tscal = (double)(1);
13046 }
13047 else
13048 {
13049 tscal = 1/(smlnum*tmax);
13050 ae_v_muld(&cnorm->ptr.p_double[1], 1, ae_v_len(1,n), tscal);
13051 }
13052
13053 /*
13054 * Compute a bound on the computed solution vector to see if the
13055 * Level 2 BLAS routine DTRSV can be used.
13056 */
13057 j = 1;
13058 for(k=2; k<=n; k++)
13059 {
13060 if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[j], _state)) )
13061 {
13062 j = k;
13063 }
13064 }
13065 xmax = ae_fabs(x->ptr.p_double[j], _state);
13066 xbnd = xmax;
13067 if( notran )
13068 {
13069
13070 /*
13071 * Compute the growth in A * x = b.
13072 */
13073 if( upper )
13074 {
13075 jfirst = n;
13076 jlast = 1;
13077 jinc = -1;
13078 }
13079 else
13080 {
13081 jfirst = 1;
13082 jlast = n;
13083 jinc = 1;
13084 }
13085 if( ae_fp_neq(tscal,(double)(1)) )
13086 {
13087 grow = (double)(0);
13088 }
13089 else
13090 {
13091 if( nounit )
13092 {
13093
13094 /*
13095 * A is non-unit triangular.
13096 *
13097 * Compute GROW = 1/G(j) and XBND = 1/M(j).
13098 * Initially, G(0) = max{x(i), i=1,...,n}.
13099 */
13100 grow = 1/ae_maxreal(xbnd, smlnum, _state);
13101 xbnd = grow;
13102 j = jfirst;
13103 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13104 {
13105
13106 /*
13107 * Exit the loop if the growth factor is too small.
13108 */
13109 if( ae_fp_less_eq(grow,smlnum) )
13110 {
13111 break;
13112 }
13113
13114 /*
13115 * M(j) = G(j-1) / abs(A(j,j))
13116 */
13117 tjj = ae_fabs(a->ptr.pp_double[j][j], _state);
13118 xbnd = ae_minreal(xbnd, ae_minreal((double)(1), tjj, _state)*grow, _state);
13119 if( ae_fp_greater_eq(tjj+cnorm->ptr.p_double[j],smlnum) )
13120 {
13121
13122 /*
13123 * G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
13124 */
13125 grow = grow*(tjj/(tjj+cnorm->ptr.p_double[j]));
13126 }
13127 else
13128 {
13129
13130 /*
13131 * G(j) could overflow, set GROW to 0.
13132 */
13133 grow = (double)(0);
13134 }
13135 if( j==jlast )
13136 {
13137 grow = xbnd;
13138 }
13139 j = j+jinc;
13140 }
13141 }
13142 else
13143 {
13144
13145 /*
13146 * A is unit triangular.
13147 *
13148 * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
13149 */
13150 grow = ae_minreal((double)(1), 1/ae_maxreal(xbnd, smlnum, _state), _state);
13151 j = jfirst;
13152 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13153 {
13154
13155 /*
13156 * Exit the loop if the growth factor is too small.
13157 */
13158 if( ae_fp_less_eq(grow,smlnum) )
13159 {
13160 break;
13161 }
13162
13163 /*
13164 * G(j) = G(j-1)*( 1 + CNORM(j) )
13165 */
13166 grow = grow*(1/(1+cnorm->ptr.p_double[j]));
13167 j = j+jinc;
13168 }
13169 }
13170 }
13171 }
13172 else
13173 {
13174
13175 /*
13176 * Compute the growth in A' * x = b.
13177 */
13178 if( upper )
13179 {
13180 jfirst = 1;
13181 jlast = n;
13182 jinc = 1;
13183 }
13184 else
13185 {
13186 jfirst = n;
13187 jlast = 1;
13188 jinc = -1;
13189 }
13190 if( ae_fp_neq(tscal,(double)(1)) )
13191 {
13192 grow = (double)(0);
13193 }
13194 else
13195 {
13196 if( nounit )
13197 {
13198
13199 /*
13200 * A is non-unit triangular.
13201 *
13202 * Compute GROW = 1/G(j) and XBND = 1/M(j).
13203 * Initially, M(0) = max{x(i), i=1,...,n}.
13204 */
13205 grow = 1/ae_maxreal(xbnd, smlnum, _state);
13206 xbnd = grow;
13207 j = jfirst;
13208 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13209 {
13210
13211 /*
13212 * Exit the loop if the growth factor is too small.
13213 */
13214 if( ae_fp_less_eq(grow,smlnum) )
13215 {
13216 break;
13217 }
13218
13219 /*
13220 * G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
13221 */
13222 xj = 1+cnorm->ptr.p_double[j];
13223 grow = ae_minreal(grow, xbnd/xj, _state);
13224
13225 /*
13226 * M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
13227 */
13228 tjj = ae_fabs(a->ptr.pp_double[j][j], _state);
13229 if( ae_fp_greater(xj,tjj) )
13230 {
13231 xbnd = xbnd*(tjj/xj);
13232 }
13233 if( j==jlast )
13234 {
13235 grow = ae_minreal(grow, xbnd, _state);
13236 }
13237 j = j+jinc;
13238 }
13239 }
13240 else
13241 {
13242
13243 /*
13244 * A is unit triangular.
13245 *
13246 * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
13247 */
13248 grow = ae_minreal((double)(1), 1/ae_maxreal(xbnd, smlnum, _state), _state);
13249 j = jfirst;
13250 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13251 {
13252
13253 /*
13254 * Exit the loop if the growth factor is too small.
13255 */
13256 if( ae_fp_less_eq(grow,smlnum) )
13257 {
13258 break;
13259 }
13260
13261 /*
13262 * G(j) = ( 1 + CNORM(j) )*G(j-1)
13263 */
13264 xj = 1+cnorm->ptr.p_double[j];
13265 grow = grow/xj;
13266 j = j+jinc;
13267 }
13268 }
13269 }
13270 }
13271 if( ae_fp_greater(grow*tscal,smlnum) )
13272 {
13273
13274 /*
13275 * Use the Level 2 BLAS solve if the reciprocal of the bound on
13276 * elements of X is not too small.
13277 */
13278 if( (upper&¬ran)||(!upper&&!notran) )
13279 {
13280 if( nounit )
13281 {
13282 vd = a->ptr.pp_double[n][n];
13283 }
13284 else
13285 {
13286 vd = (double)(1);
13287 }
13288 x->ptr.p_double[n] = x->ptr.p_double[n]/vd;
13289 for(i=n-1; i>=1; i--)
13290 {
13291 ip1 = i+1;
13292 if( upper )
13293 {
13294 v = ae_v_dotproduct(&a->ptr.pp_double[i][ip1], 1, &x->ptr.p_double[ip1], 1, ae_v_len(ip1,n));
13295 }
13296 else
13297 {
13298 v = ae_v_dotproduct(&a->ptr.pp_double[ip1][i], a->stride, &x->ptr.p_double[ip1], 1, ae_v_len(ip1,n));
13299 }
13300 if( nounit )
13301 {
13302 vd = a->ptr.pp_double[i][i];
13303 }
13304 else
13305 {
13306 vd = (double)(1);
13307 }
13308 x->ptr.p_double[i] = (x->ptr.p_double[i]-v)/vd;
13309 }
13310 }
13311 else
13312 {
13313 if( nounit )
13314 {
13315 vd = a->ptr.pp_double[1][1];
13316 }
13317 else
13318 {
13319 vd = (double)(1);
13320 }
13321 x->ptr.p_double[1] = x->ptr.p_double[1]/vd;
13322 for(i=2; i<=n; i++)
13323 {
13324 im1 = i-1;
13325 if( upper )
13326 {
13327 v = ae_v_dotproduct(&a->ptr.pp_double[1][i], a->stride, &x->ptr.p_double[1], 1, ae_v_len(1,im1));
13328 }
13329 else
13330 {
13331 v = ae_v_dotproduct(&a->ptr.pp_double[i][1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,im1));
13332 }
13333 if( nounit )
13334 {
13335 vd = a->ptr.pp_double[i][i];
13336 }
13337 else
13338 {
13339 vd = (double)(1);
13340 }
13341 x->ptr.p_double[i] = (x->ptr.p_double[i]-v)/vd;
13342 }
13343 }
13344 }
13345 else
13346 {
13347
13348 /*
13349 * Use a Level 1 BLAS solve, scaling intermediate results.
13350 */
13351 if( ae_fp_greater(xmax,bignum) )
13352 {
13353
13354 /*
13355 * Scale X so that its components are less than or equal to
13356 * BIGNUM in absolute value.
13357 */
13358 *s = bignum/xmax;
13359 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), *s);
13360 xmax = bignum;
13361 }
13362 if( notran )
13363 {
13364
13365 /*
13366 * Solve A * x = b
13367 */
13368 j = jfirst;
13369 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13370 {
13371
13372 /*
13373 * Compute x(j) = b(j) / A(j,j), scaling x if necessary.
13374 */
13375 xj = ae_fabs(x->ptr.p_double[j], _state);
13376 flg = 0;
13377 if( nounit )
13378 {
13379 tjjs = a->ptr.pp_double[j][j]*tscal;
13380 }
13381 else
13382 {
13383 tjjs = tscal;
13384 if( ae_fp_eq(tscal,(double)(1)) )
13385 {
13386 flg = 100;
13387 }
13388 }
13389 if( flg!=100 )
13390 {
13391 tjj = ae_fabs(tjjs, _state);
13392 if( ae_fp_greater(tjj,smlnum) )
13393 {
13394
13395 /*
13396 * abs(A(j,j)) > SMLNUM:
13397 */
13398 if( ae_fp_less(tjj,(double)(1)) )
13399 {
13400 if( ae_fp_greater(xj,tjj*bignum) )
13401 {
13402
13403 /*
13404 * Scale x by 1/b(j).
13405 */
13406 rec = 1/xj;
13407 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13408 *s = *s*rec;
13409 xmax = xmax*rec;
13410 }
13411 }
13412 x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
13413 xj = ae_fabs(x->ptr.p_double[j], _state);
13414 }
13415 else
13416 {
13417 if( ae_fp_greater(tjj,(double)(0)) )
13418 {
13419
13420 /*
13421 * 0 < abs(A(j,j)) <= SMLNUM:
13422 */
13423 if( ae_fp_greater(xj,tjj*bignum) )
13424 {
13425
13426 /*
13427 * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
13428 * to avoid overflow when dividing by A(j,j).
13429 */
13430 rec = tjj*bignum/xj;
13431 if( ae_fp_greater(cnorm->ptr.p_double[j],(double)(1)) )
13432 {
13433
13434 /*
13435 * Scale by 1/CNORM(j) to avoid overflow when
13436 * multiplying x(j) times column j.
13437 */
13438 rec = rec/cnorm->ptr.p_double[j];
13439 }
13440 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13441 *s = *s*rec;
13442 xmax = xmax*rec;
13443 }
13444 x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
13445 xj = ae_fabs(x->ptr.p_double[j], _state);
13446 }
13447 else
13448 {
13449
13450 /*
13451 * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and
13452 * scale = 0, and compute a solution to A*x = 0.
13453 */
13454 for(i=1; i<=n; i++)
13455 {
13456 x->ptr.p_double[i] = (double)(0);
13457 }
13458 x->ptr.p_double[j] = (double)(1);
13459 xj = (double)(1);
13460 *s = (double)(0);
13461 xmax = (double)(0);
13462 }
13463 }
13464 }
13465
13466 /*
13467 * Scale x if necessary to avoid overflow when adding a
13468 * multiple of column j of A.
13469 */
13470 if( ae_fp_greater(xj,(double)(1)) )
13471 {
13472 rec = 1/xj;
13473 if( ae_fp_greater(cnorm->ptr.p_double[j],(bignum-xmax)*rec) )
13474 {
13475
13476 /*
13477 * Scale x by 1/(2*abs(x(j))).
13478 */
13479 rec = rec*0.5;
13480 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13481 *s = *s*rec;
13482 }
13483 }
13484 else
13485 {
13486 if( ae_fp_greater(xj*cnorm->ptr.p_double[j],bignum-xmax) )
13487 {
13488
13489 /*
13490 * Scale x by 1/2.
13491 */
13492 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), 0.5);
13493 *s = *s*0.5;
13494 }
13495 }
13496 if( upper )
13497 {
13498 if( j>1 )
13499 {
13500
13501 /*
13502 * Compute the update
13503 * x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
13504 */
13505 v = x->ptr.p_double[j]*tscal;
13506 jm1 = j-1;
13507 ae_v_subd(&x->ptr.p_double[1], 1, &a->ptr.pp_double[1][j], a->stride, ae_v_len(1,jm1), v);
13508 i = 1;
13509 for(k=2; k<=j-1; k++)
13510 {
13511 if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[i], _state)) )
13512 {
13513 i = k;
13514 }
13515 }
13516 xmax = ae_fabs(x->ptr.p_double[i], _state);
13517 }
13518 }
13519 else
13520 {
13521 if( j<n )
13522 {
13523
13524 /*
13525 * Compute the update
13526 * x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
13527 */
13528 jp1 = j+1;
13529 v = x->ptr.p_double[j]*tscal;
13530 ae_v_subd(&x->ptr.p_double[jp1], 1, &a->ptr.pp_double[jp1][j], a->stride, ae_v_len(jp1,n), v);
13531 i = j+1;
13532 for(k=j+2; k<=n; k++)
13533 {
13534 if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[i], _state)) )
13535 {
13536 i = k;
13537 }
13538 }
13539 xmax = ae_fabs(x->ptr.p_double[i], _state);
13540 }
13541 }
13542 j = j+jinc;
13543 }
13544 }
13545 else
13546 {
13547
13548 /*
13549 * Solve A' * x = b
13550 */
13551 j = jfirst;
13552 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13553 {
13554
13555 /*
13556 * Compute x(j) = b(j) - sum A(k,j)*x(k).
13557 * k<>j
13558 */
13559 xj = ae_fabs(x->ptr.p_double[j], _state);
13560 uscal = tscal;
13561 rec = 1/ae_maxreal(xmax, (double)(1), _state);
13562 if( ae_fp_greater(cnorm->ptr.p_double[j],(bignum-xj)*rec) )
13563 {
13564
13565 /*
13566 * If x(j) could overflow, scale x by 1/(2*XMAX).
13567 */
13568 rec = rec*0.5;
13569 if( nounit )
13570 {
13571 tjjs = a->ptr.pp_double[j][j]*tscal;
13572 }
13573 else
13574 {
13575 tjjs = tscal;
13576 }
13577 tjj = ae_fabs(tjjs, _state);
13578 if( ae_fp_greater(tjj,(double)(1)) )
13579 {
13580
13581 /*
13582 * Divide by A(j,j) when scaling x if A(j,j) > 1.
13583 */
13584 rec = ae_minreal((double)(1), rec*tjj, _state);
13585 uscal = uscal/tjjs;
13586 }
13587 if( ae_fp_less(rec,(double)(1)) )
13588 {
13589 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13590 *s = *s*rec;
13591 xmax = xmax*rec;
13592 }
13593 }
13594 sumj = (double)(0);
13595 if( ae_fp_eq(uscal,(double)(1)) )
13596 {
13597
13598 /*
13599 * If the scaling needed for A in the dot product is 1,
13600 * call DDOT to perform the dot product.
13601 */
13602 if( upper )
13603 {
13604 if( j>1 )
13605 {
13606 jm1 = j-1;
13607 sumj = ae_v_dotproduct(&a->ptr.pp_double[1][j], a->stride, &x->ptr.p_double[1], 1, ae_v_len(1,jm1));
13608 }
13609 else
13610 {
13611 sumj = (double)(0);
13612 }
13613 }
13614 else
13615 {
13616 if( j<n )
13617 {
13618 jp1 = j+1;
13619 sumj = ae_v_dotproduct(&a->ptr.pp_double[jp1][j], a->stride, &x->ptr.p_double[jp1], 1, ae_v_len(jp1,n));
13620 }
13621 }
13622 }
13623 else
13624 {
13625
13626 /*
13627 * Otherwise, use in-line code for the dot product.
13628 */
13629 if( upper )
13630 {
13631 for(i=1; i<=j-1; i++)
13632 {
13633 v = a->ptr.pp_double[i][j]*uscal;
13634 sumj = sumj+v*x->ptr.p_double[i];
13635 }
13636 }
13637 else
13638 {
13639 if( j<n )
13640 {
13641 for(i=j+1; i<=n; i++)
13642 {
13643 v = a->ptr.pp_double[i][j]*uscal;
13644 sumj = sumj+v*x->ptr.p_double[i];
13645 }
13646 }
13647 }
13648 }
13649 if( ae_fp_eq(uscal,tscal) )
13650 {
13651
13652 /*
13653 * Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)
13654 * was not used to scale the dotproduct.
13655 */
13656 x->ptr.p_double[j] = x->ptr.p_double[j]-sumj;
13657 xj = ae_fabs(x->ptr.p_double[j], _state);
13658 flg = 0;
13659 if( nounit )
13660 {
13661 tjjs = a->ptr.pp_double[j][j]*tscal;
13662 }
13663 else
13664 {
13665 tjjs = tscal;
13666 if( ae_fp_eq(tscal,(double)(1)) )
13667 {
13668 flg = 150;
13669 }
13670 }
13671
13672 /*
13673 * Compute x(j) = x(j) / A(j,j), scaling if necessary.
13674 */
13675 if( flg!=150 )
13676 {
13677 tjj = ae_fabs(tjjs, _state);
13678 if( ae_fp_greater(tjj,smlnum) )
13679 {
13680
13681 /*
13682 * abs(A(j,j)) > SMLNUM:
13683 */
13684 if( ae_fp_less(tjj,(double)(1)) )
13685 {
13686 if( ae_fp_greater(xj,tjj*bignum) )
13687 {
13688
13689 /*
13690 * Scale X by 1/abs(x(j)).
13691 */
13692 rec = 1/xj;
13693 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13694 *s = *s*rec;
13695 xmax = xmax*rec;
13696 }
13697 }
13698 x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
13699 }
13700 else
13701 {
13702 if( ae_fp_greater(tjj,(double)(0)) )
13703 {
13704
13705 /*
13706 * 0 < abs(A(j,j)) <= SMLNUM:
13707 */
13708 if( ae_fp_greater(xj,tjj*bignum) )
13709 {
13710
13711 /*
13712 * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
13713 */
13714 rec = tjj*bignum/xj;
13715 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13716 *s = *s*rec;
13717 xmax = xmax*rec;
13718 }
13719 x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
13720 }
13721 else
13722 {
13723
13724 /*
13725 * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and
13726 * scale = 0, and compute a solution to A'*x = 0.
13727 */
13728 for(i=1; i<=n; i++)
13729 {
13730 x->ptr.p_double[i] = (double)(0);
13731 }
13732 x->ptr.p_double[j] = (double)(1);
13733 *s = (double)(0);
13734 xmax = (double)(0);
13735 }
13736 }
13737 }
13738 }
13739 else
13740 {
13741
13742 /*
13743 * Compute x(j) := x(j) / A(j,j) - sumj if the dot
13744 * product has already been divided by 1/A(j,j).
13745 */
13746 x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs-sumj;
13747 }
13748 xmax = ae_maxreal(xmax, ae_fabs(x->ptr.p_double[j], _state), _state);
13749 j = j+jinc;
13750 }
13751 }
13752 *s = *s/tscal;
13753 }
13754
13755 /*
13756 * Scale the column norms by 1/TSCAL for return.
13757 */
13758 if( ae_fp_neq(tscal,(double)(1)) )
13759 {
13760 v = 1/tscal;
13761 ae_v_muld(&cnorm->ptr.p_double[1], 1, ae_v_len(1,n), v);
13762 }
13763 }
13764
13765
13766 #endif
13767 #if defined(AE_COMPILE_SAFESOLVE) || !defined(AE_PARTIAL_BUILD)
13768
13769
13770 /*************************************************************************
13771 Real implementation of CMatrixScaledTRSafeSolve
13772
13773 -- ALGLIB routine --
13774 21.01.2010
13775 Bochkanov Sergey
13776 *************************************************************************/
rmatrixscaledtrsafesolve(ae_matrix * a,double sa,ae_int_t n,ae_vector * x,ae_bool isupper,ae_int_t trans,ae_bool isunit,double maxgrowth,ae_state * _state)13777 ae_bool rmatrixscaledtrsafesolve(/* Real */ ae_matrix* a,
13778 double sa,
13779 ae_int_t n,
13780 /* Real */ ae_vector* x,
13781 ae_bool isupper,
13782 ae_int_t trans,
13783 ae_bool isunit,
13784 double maxgrowth,
13785 ae_state *_state)
13786 {
13787 ae_frame _frame_block;
13788 double lnmax;
13789 double nrmb;
13790 double nrmx;
13791 ae_int_t i;
13792 ae_complex alpha;
13793 ae_complex beta;
13794 double vr;
13795 ae_complex cx;
13796 ae_vector tmp;
13797 ae_bool result;
13798
13799 ae_frame_make(_state, &_frame_block);
13800 memset(&tmp, 0, sizeof(tmp));
13801 ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
13802
13803 ae_assert(n>0, "RMatrixTRSafeSolve: incorrect N!", _state);
13804 ae_assert(trans==0||trans==1, "RMatrixTRSafeSolve: incorrect Trans!", _state);
13805 result = ae_true;
13806 lnmax = ae_log(ae_maxrealnumber, _state);
13807
13808 /*
13809 * Quick return if possible
13810 */
13811 if( n<=0 )
13812 {
13813 ae_frame_leave(_state);
13814 return result;
13815 }
13816
13817 /*
13818 * Load norms: right part and X
13819 */
13820 nrmb = (double)(0);
13821 for(i=0; i<=n-1; i++)
13822 {
13823 nrmb = ae_maxreal(nrmb, ae_fabs(x->ptr.p_double[i], _state), _state);
13824 }
13825 nrmx = (double)(0);
13826
13827 /*
13828 * Solve
13829 */
13830 ae_vector_set_length(&tmp, n, _state);
13831 result = ae_true;
13832 if( isupper&&trans==0 )
13833 {
13834
13835 /*
13836 * U*x = b
13837 */
13838 for(i=n-1; i>=0; i--)
13839 {
13840
13841 /*
13842 * Task is reduced to alpha*x[i] = beta
13843 */
13844 if( isunit )
13845 {
13846 alpha = ae_complex_from_d(sa);
13847 }
13848 else
13849 {
13850 alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
13851 }
13852 if( i<n-1 )
13853 {
13854 ae_v_moved(&tmp.ptr.p_double[i+1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sa);
13855 vr = ae_v_dotproduct(&tmp.ptr.p_double[i+1], 1, &x->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1));
13856 beta = ae_complex_from_d(x->ptr.p_double[i]-vr);
13857 }
13858 else
13859 {
13860 beta = ae_complex_from_d(x->ptr.p_double[i]);
13861 }
13862
13863 /*
13864 * solve alpha*x[i] = beta
13865 */
13866 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
13867 if( !result )
13868 {
13869 ae_frame_leave(_state);
13870 return result;
13871 }
13872 x->ptr.p_double[i] = cx.x;
13873 }
13874 ae_frame_leave(_state);
13875 return result;
13876 }
13877 if( !isupper&&trans==0 )
13878 {
13879
13880 /*
13881 * L*x = b
13882 */
13883 for(i=0; i<=n-1; i++)
13884 {
13885
13886 /*
13887 * Task is reduced to alpha*x[i] = beta
13888 */
13889 if( isunit )
13890 {
13891 alpha = ae_complex_from_d(sa);
13892 }
13893 else
13894 {
13895 alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
13896 }
13897 if( i>0 )
13898 {
13899 ae_v_moved(&tmp.ptr.p_double[0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sa);
13900 vr = ae_v_dotproduct(&tmp.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,i-1));
13901 beta = ae_complex_from_d(x->ptr.p_double[i]-vr);
13902 }
13903 else
13904 {
13905 beta = ae_complex_from_d(x->ptr.p_double[i]);
13906 }
13907
13908 /*
13909 * solve alpha*x[i] = beta
13910 */
13911 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
13912 if( !result )
13913 {
13914 ae_frame_leave(_state);
13915 return result;
13916 }
13917 x->ptr.p_double[i] = cx.x;
13918 }
13919 ae_frame_leave(_state);
13920 return result;
13921 }
13922 if( isupper&&trans==1 )
13923 {
13924
13925 /*
13926 * U^T*x = b
13927 */
13928 for(i=0; i<=n-1; i++)
13929 {
13930
13931 /*
13932 * Task is reduced to alpha*x[i] = beta
13933 */
13934 if( isunit )
13935 {
13936 alpha = ae_complex_from_d(sa);
13937 }
13938 else
13939 {
13940 alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
13941 }
13942 beta = ae_complex_from_d(x->ptr.p_double[i]);
13943
13944 /*
13945 * solve alpha*x[i] = beta
13946 */
13947 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
13948 if( !result )
13949 {
13950 ae_frame_leave(_state);
13951 return result;
13952 }
13953 x->ptr.p_double[i] = cx.x;
13954
13955 /*
13956 * update the rest of right part
13957 */
13958 if( i<n-1 )
13959 {
13960 vr = cx.x;
13961 ae_v_moved(&tmp.ptr.p_double[i+1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sa);
13962 ae_v_subd(&x->ptr.p_double[i+1], 1, &tmp.ptr.p_double[i+1], 1, ae_v_len(i+1,n-1), vr);
13963 }
13964 }
13965 ae_frame_leave(_state);
13966 return result;
13967 }
13968 if( !isupper&&trans==1 )
13969 {
13970
13971 /*
13972 * L^T*x = b
13973 */
13974 for(i=n-1; i>=0; i--)
13975 {
13976
13977 /*
13978 * Task is reduced to alpha*x[i] = beta
13979 */
13980 if( isunit )
13981 {
13982 alpha = ae_complex_from_d(sa);
13983 }
13984 else
13985 {
13986 alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
13987 }
13988 beta = ae_complex_from_d(x->ptr.p_double[i]);
13989
13990 /*
13991 * solve alpha*x[i] = beta
13992 */
13993 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
13994 if( !result )
13995 {
13996 ae_frame_leave(_state);
13997 return result;
13998 }
13999 x->ptr.p_double[i] = cx.x;
14000
14001 /*
14002 * update the rest of right part
14003 */
14004 if( i>0 )
14005 {
14006 vr = cx.x;
14007 ae_v_moved(&tmp.ptr.p_double[0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sa);
14008 ae_v_subd(&x->ptr.p_double[0], 1, &tmp.ptr.p_double[0], 1, ae_v_len(0,i-1), vr);
14009 }
14010 }
14011 ae_frame_leave(_state);
14012 return result;
14013 }
14014 result = ae_false;
14015 ae_frame_leave(_state);
14016 return result;
14017 }
14018
14019
14020 /*************************************************************************
14021 Internal subroutine for safe solution of
14022
14023 SA*op(A)=b
14024
14025 where A is NxN upper/lower triangular/unitriangular matrix, op(A) is
14026 either identity transform, transposition or Hermitian transposition, SA is
14027 a scaling factor such that max(|SA*A[i,j]|) is close to 1.0 in magnutude.
14028
14029 This subroutine limits relative growth of solution (in inf-norm) by
14030 MaxGrowth, returning False if growth exceeds MaxGrowth. Degenerate or
14031 near-degenerate matrices are handled correctly (False is returned) as long
14032 as MaxGrowth is significantly less than MaxRealNumber/norm(b).
14033
14034 -- ALGLIB routine --
14035 21.01.2010
14036 Bochkanov Sergey
14037 *************************************************************************/
cmatrixscaledtrsafesolve(ae_matrix * a,double sa,ae_int_t n,ae_vector * x,ae_bool isupper,ae_int_t trans,ae_bool isunit,double maxgrowth,ae_state * _state)14038 ae_bool cmatrixscaledtrsafesolve(/* Complex */ ae_matrix* a,
14039 double sa,
14040 ae_int_t n,
14041 /* Complex */ ae_vector* x,
14042 ae_bool isupper,
14043 ae_int_t trans,
14044 ae_bool isunit,
14045 double maxgrowth,
14046 ae_state *_state)
14047 {
14048 ae_frame _frame_block;
14049 double lnmax;
14050 double nrmb;
14051 double nrmx;
14052 ae_int_t i;
14053 ae_complex alpha;
14054 ae_complex beta;
14055 ae_complex vc;
14056 ae_vector tmp;
14057 ae_bool result;
14058
14059 ae_frame_make(_state, &_frame_block);
14060 memset(&tmp, 0, sizeof(tmp));
14061 ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
14062
14063 ae_assert(n>0, "CMatrixTRSafeSolve: incorrect N!", _state);
14064 ae_assert((trans==0||trans==1)||trans==2, "CMatrixTRSafeSolve: incorrect Trans!", _state);
14065 result = ae_true;
14066 lnmax = ae_log(ae_maxrealnumber, _state);
14067
14068 /*
14069 * Quick return if possible
14070 */
14071 if( n<=0 )
14072 {
14073 ae_frame_leave(_state);
14074 return result;
14075 }
14076
14077 /*
14078 * Load norms: right part and X
14079 */
14080 nrmb = (double)(0);
14081 for(i=0; i<=n-1; i++)
14082 {
14083 nrmb = ae_maxreal(nrmb, ae_c_abs(x->ptr.p_complex[i], _state), _state);
14084 }
14085 nrmx = (double)(0);
14086
14087 /*
14088 * Solve
14089 */
14090 ae_vector_set_length(&tmp, n, _state);
14091 result = ae_true;
14092 if( isupper&&trans==0 )
14093 {
14094
14095 /*
14096 * U*x = b
14097 */
14098 for(i=n-1; i>=0; i--)
14099 {
14100
14101 /*
14102 * Task is reduced to alpha*x[i] = beta
14103 */
14104 if( isunit )
14105 {
14106 alpha = ae_complex_from_d(sa);
14107 }
14108 else
14109 {
14110 alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
14111 }
14112 if( i<n-1 )
14113 {
14114 ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1), sa);
14115 vc = ae_v_cdotproduct(&tmp.ptr.p_complex[i+1], 1, "N", &x->ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1));
14116 beta = ae_c_sub(x->ptr.p_complex[i],vc);
14117 }
14118 else
14119 {
14120 beta = x->ptr.p_complex[i];
14121 }
14122
14123 /*
14124 * solve alpha*x[i] = beta
14125 */
14126 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14127 if( !result )
14128 {
14129 ae_frame_leave(_state);
14130 return result;
14131 }
14132 x->ptr.p_complex[i] = vc;
14133 }
14134 ae_frame_leave(_state);
14135 return result;
14136 }
14137 if( !isupper&&trans==0 )
14138 {
14139
14140 /*
14141 * L*x = b
14142 */
14143 for(i=0; i<=n-1; i++)
14144 {
14145
14146 /*
14147 * Task is reduced to alpha*x[i] = beta
14148 */
14149 if( isunit )
14150 {
14151 alpha = ae_complex_from_d(sa);
14152 }
14153 else
14154 {
14155 alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
14156 }
14157 if( i>0 )
14158 {
14159 ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,i-1), sa);
14160 vc = ae_v_cdotproduct(&tmp.ptr.p_complex[0], 1, "N", &x->ptr.p_complex[0], 1, "N", ae_v_len(0,i-1));
14161 beta = ae_c_sub(x->ptr.p_complex[i],vc);
14162 }
14163 else
14164 {
14165 beta = x->ptr.p_complex[i];
14166 }
14167
14168 /*
14169 * solve alpha*x[i] = beta
14170 */
14171 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14172 if( !result )
14173 {
14174 ae_frame_leave(_state);
14175 return result;
14176 }
14177 x->ptr.p_complex[i] = vc;
14178 }
14179 ae_frame_leave(_state);
14180 return result;
14181 }
14182 if( isupper&&trans==1 )
14183 {
14184
14185 /*
14186 * U^T*x = b
14187 */
14188 for(i=0; i<=n-1; i++)
14189 {
14190
14191 /*
14192 * Task is reduced to alpha*x[i] = beta
14193 */
14194 if( isunit )
14195 {
14196 alpha = ae_complex_from_d(sa);
14197 }
14198 else
14199 {
14200 alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
14201 }
14202 beta = x->ptr.p_complex[i];
14203
14204 /*
14205 * solve alpha*x[i] = beta
14206 */
14207 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14208 if( !result )
14209 {
14210 ae_frame_leave(_state);
14211 return result;
14212 }
14213 x->ptr.p_complex[i] = vc;
14214
14215 /*
14216 * update the rest of right part
14217 */
14218 if( i<n-1 )
14219 {
14220 ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1), sa);
14221 ae_v_csubc(&x->ptr.p_complex[i+1], 1, &tmp.ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1), vc);
14222 }
14223 }
14224 ae_frame_leave(_state);
14225 return result;
14226 }
14227 if( !isupper&&trans==1 )
14228 {
14229
14230 /*
14231 * L^T*x = b
14232 */
14233 for(i=n-1; i>=0; i--)
14234 {
14235
14236 /*
14237 * Task is reduced to alpha*x[i] = beta
14238 */
14239 if( isunit )
14240 {
14241 alpha = ae_complex_from_d(sa);
14242 }
14243 else
14244 {
14245 alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
14246 }
14247 beta = x->ptr.p_complex[i];
14248
14249 /*
14250 * solve alpha*x[i] = beta
14251 */
14252 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14253 if( !result )
14254 {
14255 ae_frame_leave(_state);
14256 return result;
14257 }
14258 x->ptr.p_complex[i] = vc;
14259
14260 /*
14261 * update the rest of right part
14262 */
14263 if( i>0 )
14264 {
14265 ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,i-1), sa);
14266 ae_v_csubc(&x->ptr.p_complex[0], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(0,i-1), vc);
14267 }
14268 }
14269 ae_frame_leave(_state);
14270 return result;
14271 }
14272 if( isupper&&trans==2 )
14273 {
14274
14275 /*
14276 * U^H*x = b
14277 */
14278 for(i=0; i<=n-1; i++)
14279 {
14280
14281 /*
14282 * Task is reduced to alpha*x[i] = beta
14283 */
14284 if( isunit )
14285 {
14286 alpha = ae_complex_from_d(sa);
14287 }
14288 else
14289 {
14290 alpha = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[i][i], _state),sa);
14291 }
14292 beta = x->ptr.p_complex[i];
14293
14294 /*
14295 * solve alpha*x[i] = beta
14296 */
14297 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14298 if( !result )
14299 {
14300 ae_frame_leave(_state);
14301 return result;
14302 }
14303 x->ptr.p_complex[i] = vc;
14304
14305 /*
14306 * update the rest of right part
14307 */
14308 if( i<n-1 )
14309 {
14310 ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "Conj", ae_v_len(i+1,n-1), sa);
14311 ae_v_csubc(&x->ptr.p_complex[i+1], 1, &tmp.ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1), vc);
14312 }
14313 }
14314 ae_frame_leave(_state);
14315 return result;
14316 }
14317 if( !isupper&&trans==2 )
14318 {
14319
14320 /*
14321 * L^T*x = b
14322 */
14323 for(i=n-1; i>=0; i--)
14324 {
14325
14326 /*
14327 * Task is reduced to alpha*x[i] = beta
14328 */
14329 if( isunit )
14330 {
14331 alpha = ae_complex_from_d(sa);
14332 }
14333 else
14334 {
14335 alpha = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[i][i], _state),sa);
14336 }
14337 beta = x->ptr.p_complex[i];
14338
14339 /*
14340 * solve alpha*x[i] = beta
14341 */
14342 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14343 if( !result )
14344 {
14345 ae_frame_leave(_state);
14346 return result;
14347 }
14348 x->ptr.p_complex[i] = vc;
14349
14350 /*
14351 * update the rest of right part
14352 */
14353 if( i>0 )
14354 {
14355 ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "Conj", ae_v_len(0,i-1), sa);
14356 ae_v_csubc(&x->ptr.p_complex[0], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(0,i-1), vc);
14357 }
14358 }
14359 ae_frame_leave(_state);
14360 return result;
14361 }
14362 result = ae_false;
14363 ae_frame_leave(_state);
14364 return result;
14365 }
14366
14367
14368 /*************************************************************************
14369 complex basic solver-updater for reduced linear system
14370
14371 alpha*x[i] = beta
14372
14373 solves this equation and updates it in overlfow-safe manner (keeping track
14374 of relative growth of solution).
14375
14376 Parameters:
14377 Alpha - alpha
14378 Beta - beta
14379 LnMax - precomputed Ln(MaxRealNumber)
14380 BNorm - inf-norm of b (right part of original system)
14381 MaxGrowth- maximum growth of norm(x) relative to norm(b)
14382 XNorm - inf-norm of other components of X (which are already processed)
14383 it is updated by CBasicSolveAndUpdate.
14384 X - solution
14385
14386 -- ALGLIB routine --
14387 26.01.2009
14388 Bochkanov Sergey
14389 *************************************************************************/
safesolve_cbasicsolveandupdate(ae_complex alpha,ae_complex beta,double lnmax,double bnorm,double maxgrowth,double * xnorm,ae_complex * x,ae_state * _state)14390 static ae_bool safesolve_cbasicsolveandupdate(ae_complex alpha,
14391 ae_complex beta,
14392 double lnmax,
14393 double bnorm,
14394 double maxgrowth,
14395 double* xnorm,
14396 ae_complex* x,
14397 ae_state *_state)
14398 {
14399 double v;
14400 ae_bool result;
14401
14402 x->x = 0;
14403 x->y = 0;
14404
14405 result = ae_false;
14406 if( ae_c_eq_d(alpha,(double)(0)) )
14407 {
14408 return result;
14409 }
14410 if( ae_c_neq_d(beta,(double)(0)) )
14411 {
14412
14413 /*
14414 * alpha*x[i]=beta
14415 */
14416 v = ae_log(ae_c_abs(beta, _state), _state)-ae_log(ae_c_abs(alpha, _state), _state);
14417 if( ae_fp_greater(v,lnmax) )
14418 {
14419 return result;
14420 }
14421 *x = ae_c_div(beta,alpha);
14422 }
14423 else
14424 {
14425
14426 /*
14427 * alpha*x[i]=0
14428 */
14429 *x = ae_complex_from_i(0);
14430 }
14431
14432 /*
14433 * update NrmX, test growth limit
14434 */
14435 *xnorm = ae_maxreal(*xnorm, ae_c_abs(*x, _state), _state);
14436 if( ae_fp_greater(*xnorm,maxgrowth*bnorm) )
14437 {
14438 return result;
14439 }
14440 result = ae_true;
14441 return result;
14442 }
14443
14444
14445 #endif
14446 #if defined(AE_COMPILE_XBLAS) || !defined(AE_PARTIAL_BUILD)
14447
14448
14449 /*************************************************************************
14450 More precise dot-product. Absolute error of subroutine result is about
14451 1 ulp of max(MX,V), where:
14452 MX = max( |a[i]*b[i]| )
14453 V = |(a,b)|
14454
14455 INPUT PARAMETERS
14456 A - array[0..N-1], vector 1
14457 B - array[0..N-1], vector 2
14458 N - vectors length, N<2^29.
14459 Temp - array[0..N-1], pre-allocated temporary storage
14460
14461 OUTPUT PARAMETERS
14462 R - (A,B)
14463 RErr - estimate of error. This estimate accounts for both errors
14464 during calculation of (A,B) and errors introduced by
14465 rounding of A and B to fit in double (about 1 ulp).
14466
14467 -- ALGLIB --
14468 Copyright 24.08.2009 by Bochkanov Sergey
14469 *************************************************************************/
xdot(ae_vector * a,ae_vector * b,ae_int_t n,ae_vector * temp,double * r,double * rerr,ae_state * _state)14470 void xdot(/* Real */ ae_vector* a,
14471 /* Real */ ae_vector* b,
14472 ae_int_t n,
14473 /* Real */ ae_vector* temp,
14474 double* r,
14475 double* rerr,
14476 ae_state *_state)
14477 {
14478 ae_int_t i;
14479 double mx;
14480 double v;
14481
14482 *r = 0;
14483 *rerr = 0;
14484
14485
14486 /*
14487 * special cases:
14488 * * N=0
14489 */
14490 if( n==0 )
14491 {
14492 *r = (double)(0);
14493 *rerr = (double)(0);
14494 return;
14495 }
14496 mx = (double)(0);
14497 for(i=0; i<=n-1; i++)
14498 {
14499 v = a->ptr.p_double[i]*b->ptr.p_double[i];
14500 temp->ptr.p_double[i] = v;
14501 mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
14502 }
14503 if( ae_fp_eq(mx,(double)(0)) )
14504 {
14505 *r = (double)(0);
14506 *rerr = (double)(0);
14507 return;
14508 }
14509 xblas_xsum(temp, mx, n, r, rerr, _state);
14510 }
14511
14512
14513 /*************************************************************************
14514 More precise complex dot-product. Absolute error of subroutine result is
14515 about 1 ulp of max(MX,V), where:
14516 MX = max( |a[i]*b[i]| )
14517 V = |(a,b)|
14518
14519 INPUT PARAMETERS
14520 A - array[0..N-1], vector 1
14521 B - array[0..N-1], vector 2
14522 N - vectors length, N<2^29.
14523 Temp - array[0..2*N-1], pre-allocated temporary storage
14524
14525 OUTPUT PARAMETERS
14526 R - (A,B)
14527 RErr - estimate of error. This estimate accounts for both errors
14528 during calculation of (A,B) and errors introduced by
14529 rounding of A and B to fit in double (about 1 ulp).
14530
14531 -- ALGLIB --
14532 Copyright 27.01.2010 by Bochkanov Sergey
14533 *************************************************************************/
xcdot(ae_vector * a,ae_vector * b,ae_int_t n,ae_vector * temp,ae_complex * r,double * rerr,ae_state * _state)14534 void xcdot(/* Complex */ ae_vector* a,
14535 /* Complex */ ae_vector* b,
14536 ae_int_t n,
14537 /* Real */ ae_vector* temp,
14538 ae_complex* r,
14539 double* rerr,
14540 ae_state *_state)
14541 {
14542 ae_int_t i;
14543 double mx;
14544 double v;
14545 double rerrx;
14546 double rerry;
14547
14548 r->x = 0;
14549 r->y = 0;
14550 *rerr = 0;
14551
14552
14553 /*
14554 * special cases:
14555 * * N=0
14556 */
14557 if( n==0 )
14558 {
14559 *r = ae_complex_from_i(0);
14560 *rerr = (double)(0);
14561 return;
14562 }
14563
14564 /*
14565 * calculate real part
14566 */
14567 mx = (double)(0);
14568 for(i=0; i<=n-1; i++)
14569 {
14570 v = a->ptr.p_complex[i].x*b->ptr.p_complex[i].x;
14571 temp->ptr.p_double[2*i+0] = v;
14572 mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
14573 v = -a->ptr.p_complex[i].y*b->ptr.p_complex[i].y;
14574 temp->ptr.p_double[2*i+1] = v;
14575 mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
14576 }
14577 if( ae_fp_eq(mx,(double)(0)) )
14578 {
14579 r->x = (double)(0);
14580 rerrx = (double)(0);
14581 }
14582 else
14583 {
14584 xblas_xsum(temp, mx, 2*n, &r->x, &rerrx, _state);
14585 }
14586
14587 /*
14588 * calculate imaginary part
14589 */
14590 mx = (double)(0);
14591 for(i=0; i<=n-1; i++)
14592 {
14593 v = a->ptr.p_complex[i].x*b->ptr.p_complex[i].y;
14594 temp->ptr.p_double[2*i+0] = v;
14595 mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
14596 v = a->ptr.p_complex[i].y*b->ptr.p_complex[i].x;
14597 temp->ptr.p_double[2*i+1] = v;
14598 mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
14599 }
14600 if( ae_fp_eq(mx,(double)(0)) )
14601 {
14602 r->y = (double)(0);
14603 rerry = (double)(0);
14604 }
14605 else
14606 {
14607 xblas_xsum(temp, mx, 2*n, &r->y, &rerry, _state);
14608 }
14609
14610 /*
14611 * total error
14612 */
14613 if( ae_fp_eq(rerrx,(double)(0))&&ae_fp_eq(rerry,(double)(0)) )
14614 {
14615 *rerr = (double)(0);
14616 }
14617 else
14618 {
14619 *rerr = ae_maxreal(rerrx, rerry, _state)*ae_sqrt(1+ae_sqr(ae_minreal(rerrx, rerry, _state)/ae_maxreal(rerrx, rerry, _state), _state), _state);
14620 }
14621 }
14622
14623
14624 /*************************************************************************
14625 Internal subroutine for extra-precise calculation of SUM(w[i]).
14626
14627 INPUT PARAMETERS:
14628 W - array[0..N-1], values to be added
14629 W is modified during calculations.
14630 MX - max(W[i])
14631 N - array size
14632
14633 OUTPUT PARAMETERS:
14634 R - SUM(w[i])
14635 RErr- error estimate for R
14636
14637 -- ALGLIB --
14638 Copyright 24.08.2009 by Bochkanov Sergey
14639 *************************************************************************/
xblas_xsum(ae_vector * w,double mx,ae_int_t n,double * r,double * rerr,ae_state * _state)14640 static void xblas_xsum(/* Real */ ae_vector* w,
14641 double mx,
14642 ae_int_t n,
14643 double* r,
14644 double* rerr,
14645 ae_state *_state)
14646 {
14647 ae_int_t i;
14648 ae_int_t k;
14649 ae_int_t ks;
14650 double v;
14651 double s;
14652 double ln2;
14653 double chunk;
14654 double invchunk;
14655 ae_bool allzeros;
14656
14657 *r = 0;
14658 *rerr = 0;
14659
14660
14661 /*
14662 * special cases:
14663 * * N=0
14664 * * N is too large to use integer arithmetics
14665 */
14666 if( n==0 )
14667 {
14668 *r = (double)(0);
14669 *rerr = (double)(0);
14670 return;
14671 }
14672 if( ae_fp_eq(mx,(double)(0)) )
14673 {
14674 *r = (double)(0);
14675 *rerr = (double)(0);
14676 return;
14677 }
14678 ae_assert(n<536870912, "XDot: N is too large!", _state);
14679
14680 /*
14681 * Prepare
14682 */
14683 ln2 = ae_log((double)(2), _state);
14684 *rerr = mx*ae_machineepsilon;
14685
14686 /*
14687 * 1. find S such that 0.5<=S*MX<1
14688 * 2. multiply W by S, so task is normalized in some sense
14689 * 3. S:=1/S so we can obtain original vector multiplying by S
14690 */
14691 k = ae_round(ae_log(mx, _state)/ln2, _state);
14692 s = xblas_xfastpow((double)(2), -k, _state);
14693 if( !ae_isfinite(s, _state) )
14694 {
14695
14696 /*
14697 * Overflow or underflow during evaluation of S; fallback low-precision code
14698 */
14699 *r = (double)(0);
14700 *rerr = mx*ae_machineepsilon;
14701 for(i=0; i<=n-1; i++)
14702 {
14703 *r = *r+w->ptr.p_double[i];
14704 }
14705 return;
14706 }
14707 while(ae_fp_greater_eq(s*mx,(double)(1)))
14708 {
14709 s = 0.5*s;
14710 }
14711 while(ae_fp_less(s*mx,0.5))
14712 {
14713 s = 2*s;
14714 }
14715 ae_v_muld(&w->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
14716 s = 1/s;
14717
14718 /*
14719 * find Chunk=2^M such that N*Chunk<2^29
14720 *
14721 * we have chosen upper limit (2^29) with enough space left
14722 * to tolerate possible problems with rounding and N's close
14723 * to the limit, so we don't want to be very strict here.
14724 */
14725 k = ae_trunc(ae_log((double)536870912/(double)n, _state)/ln2, _state);
14726 chunk = xblas_xfastpow((double)(2), k, _state);
14727 if( ae_fp_less(chunk,(double)(2)) )
14728 {
14729 chunk = (double)(2);
14730 }
14731 invchunk = 1/chunk;
14732
14733 /*
14734 * calculate result
14735 */
14736 *r = (double)(0);
14737 ae_v_muld(&w->ptr.p_double[0], 1, ae_v_len(0,n-1), chunk);
14738 for(;;)
14739 {
14740 s = s*invchunk;
14741 allzeros = ae_true;
14742 ks = 0;
14743 for(i=0; i<=n-1; i++)
14744 {
14745 v = w->ptr.p_double[i];
14746 k = ae_trunc(v, _state);
14747 if( ae_fp_neq(v,(double)(k)) )
14748 {
14749 allzeros = ae_false;
14750 }
14751 w->ptr.p_double[i] = chunk*(v-k);
14752 ks = ks+k;
14753 }
14754 *r = *r+s*ks;
14755 v = ae_fabs(*r, _state);
14756 if( allzeros||ae_fp_eq(s*n+mx,mx) )
14757 {
14758 break;
14759 }
14760 }
14761
14762 /*
14763 * correct error
14764 */
14765 *rerr = ae_maxreal(*rerr, ae_fabs(*r, _state)*ae_machineepsilon, _state);
14766 }
14767
14768
14769 /*************************************************************************
14770 Fast Pow
14771
14772 -- ALGLIB --
14773 Copyright 24.08.2009 by Bochkanov Sergey
14774 *************************************************************************/
xblas_xfastpow(double r,ae_int_t n,ae_state * _state)14775 static double xblas_xfastpow(double r, ae_int_t n, ae_state *_state)
14776 {
14777 double result;
14778
14779
14780 result = (double)(0);
14781 if( n>0 )
14782 {
14783 if( n%2==0 )
14784 {
14785 result = ae_sqr(xblas_xfastpow(r, n/2, _state), _state);
14786 }
14787 else
14788 {
14789 result = r*xblas_xfastpow(r, n-1, _state);
14790 }
14791 return result;
14792 }
14793 if( n==0 )
14794 {
14795 result = (double)(1);
14796 }
14797 if( n<0 )
14798 {
14799 result = xblas_xfastpow(1/r, -n, _state);
14800 }
14801 return result;
14802 }
14803
14804
14805 #endif
14806 #if defined(AE_COMPILE_LINMIN) || !defined(AE_PARTIAL_BUILD)
14807
14808
14809 /*************************************************************************
14810 Normalizes direction/step pair: makes |D|=1, scales Stp.
14811 If |D|=0, it returns, leavind D/Stp unchanged.
14812
14813 -- ALGLIB --
14814 Copyright 01.04.2010 by Bochkanov Sergey
14815 *************************************************************************/
linminnormalized(ae_vector * d,double * stp,ae_int_t n,ae_state * _state)14816 void linminnormalized(/* Real */ ae_vector* d,
14817 double* stp,
14818 ae_int_t n,
14819 ae_state *_state)
14820 {
14821 double mx;
14822 double s;
14823 ae_int_t i;
14824
14825
14826
14827 /*
14828 * first, scale D to avoid underflow/overflow durng squaring
14829 */
14830 mx = (double)(0);
14831 for(i=0; i<=n-1; i++)
14832 {
14833 mx = ae_maxreal(mx, ae_fabs(d->ptr.p_double[i], _state), _state);
14834 }
14835 if( ae_fp_eq(mx,(double)(0)) )
14836 {
14837 return;
14838 }
14839 s = 1/mx;
14840 ae_v_muld(&d->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
14841 *stp = *stp/s;
14842
14843 /*
14844 * normalize D
14845 */
14846 s = ae_v_dotproduct(&d->ptr.p_double[0], 1, &d->ptr.p_double[0], 1, ae_v_len(0,n-1));
14847 s = 1/ae_sqrt(s, _state);
14848 ae_v_muld(&d->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
14849 *stp = *stp/s;
14850 }
14851
14852
14853 /*************************************************************************
14854 THE PURPOSE OF MCSRCH IS TO FIND A STEP WHICH SATISFIES A SUFFICIENT
14855 DECREASE CONDITION AND A CURVATURE CONDITION.
14856
14857 AT EACH STAGE THE SUBROUTINE UPDATES AN INTERVAL OF UNCERTAINTY WITH
14858 ENDPOINTS STX AND STY. THE INTERVAL OF UNCERTAINTY IS INITIALLY CHOSEN
14859 SO THAT IT CONTAINS A MINIMIZER OF THE MODIFIED FUNCTION
14860
14861 F(X+STP*S) - F(X) - FTOL*STP*(GRADF(X)'S).
14862
14863 IF A STEP IS OBTAINED FOR WHICH THE MODIFIED FUNCTION HAS A NONPOSITIVE
14864 FUNCTION VALUE AND NONNEGATIVE DERIVATIVE, THEN THE INTERVAL OF
14865 UNCERTAINTY IS CHOSEN SO THAT IT CONTAINS A MINIMIZER OF F(X+STP*S).
14866
14867 THE ALGORITHM IS DESIGNED TO FIND A STEP WHICH SATISFIES THE SUFFICIENT
14868 DECREASE CONDITION
14869
14870 F(X+STP*S) .LE. F(X) + FTOL*STP*(GRADF(X)'S),
14871
14872 AND THE CURVATURE CONDITION
14873
14874 ABS(GRADF(X+STP*S)'S)) .LE. GTOL*ABS(GRADF(X)'S).
14875
14876 IF FTOL IS LESS THAN GTOL AND IF, FOR EXAMPLE, THE FUNCTION IS BOUNDED
14877 BELOW, THEN THERE IS ALWAYS A STEP WHICH SATISFIES BOTH CONDITIONS.
14878 IF NO STEP CAN BE FOUND WHICH SATISFIES BOTH CONDITIONS, THEN THE
14879 ALGORITHM USUALLY STOPS WHEN ROUNDING ERRORS PREVENT FURTHER PROGRESS.
14880 IN THIS CASE STP ONLY SATISFIES THE SUFFICIENT DECREASE CONDITION.
14881
14882
14883 :::::::::::::IMPORTANT NOTES:::::::::::::
14884
14885 NOTE 1:
14886
14887 This routine guarantees that it will stop at the last point where function
14888 value was calculated. It won't make several additional function evaluations
14889 after finding good point. So if you store function evaluations requested by
14890 this routine, you can be sure that last one is the point where we've stopped.
14891
14892 NOTE 2:
14893
14894 when 0<StpMax<StpMin, algorithm will terminate with INFO=5 and Stp=StpMax
14895
14896 NOTE 3:
14897
14898 this algorithm guarantees that, if MCINFO=1 or MCINFO=5, then:
14899 * F(final_point)<F(initial_point) - strict inequality
14900 * final_point<>initial_point - after rounding to machine precision
14901
14902 NOTE 4:
14903
14904 when non-descent direction is specified, algorithm stops with MCINFO=0,
14905 Stp=0 and initial point at X[].
14906 :::::::::::::::::::::::::::::::::::::::::
14907
14908
14909 PARAMETERS DESCRIPRION
14910
14911 STAGE IS ZERO ON FIRST CALL, ZERO ON FINAL EXIT
14912
14913 N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER OF VARIABLES.
14914
14915 X IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE BASE POINT FOR
14916 THE LINE SEARCH. ON OUTPUT IT CONTAINS X+STP*S.
14917
14918 F IS A VARIABLE. ON INPUT IT MUST CONTAIN THE VALUE OF F AT X. ON OUTPUT
14919 IT CONTAINS THE VALUE OF F AT X + STP*S.
14920
14921 G IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE GRADIENT OF F AT X.
14922 ON OUTPUT IT CONTAINS THE GRADIENT OF F AT X + STP*S.
14923
14924 S IS AN INPUT ARRAY OF LENGTH N WHICH SPECIFIES THE SEARCH DIRECTION.
14925
14926 STP IS A NONNEGATIVE VARIABLE. ON INPUT STP CONTAINS AN INITIAL ESTIMATE
14927 OF A SATISFACTORY STEP. ON OUTPUT STP CONTAINS THE FINAL ESTIMATE.
14928
14929 FTOL AND GTOL ARE NONNEGATIVE INPUT VARIABLES. TERMINATION OCCURS WHEN THE
14930 SUFFICIENT DECREASE CONDITION AND THE DIRECTIONAL DERIVATIVE CONDITION ARE
14931 SATISFIED.
14932
14933 XTOL IS A NONNEGATIVE INPUT VARIABLE. TERMINATION OCCURS WHEN THE RELATIVE
14934 WIDTH OF THE INTERVAL OF UNCERTAINTY IS AT MOST XTOL.
14935
14936 STPMIN AND STPMAX ARE NONNEGATIVE INPUT VARIABLES WHICH SPECIFY LOWER AND
14937 UPPER BOUNDS FOR THE STEP.
14938
14939 MAXFEV IS A POSITIVE INTEGER INPUT VARIABLE. TERMINATION OCCURS WHEN THE
14940 NUMBER OF CALLS TO FCN IS AT LEAST MAXFEV BY THE END OF AN ITERATION.
14941
14942 INFO IS AN INTEGER OUTPUT VARIABLE SET AS FOLLOWS:
14943 INFO = 0 IMPROPER INPUT PARAMETERS.
14944
14945 INFO = 1 THE SUFFICIENT DECREASE CONDITION AND THE
14946 DIRECTIONAL DERIVATIVE CONDITION HOLD.
14947
14948 INFO = 2 RELATIVE WIDTH OF THE INTERVAL OF UNCERTAINTY
14949 IS AT MOST XTOL.
14950
14951 INFO = 3 NUMBER OF CALLS TO FCN HAS REACHED MAXFEV.
14952
14953 INFO = 4 THE STEP IS AT THE LOWER BOUND STPMIN.
14954
14955 INFO = 5 THE STEP IS AT THE UPPER BOUND STPMAX.
14956
14957 INFO = 6 ROUNDING ERRORS PREVENT FURTHER PROGRESS.
14958 THERE MAY NOT BE A STEP WHICH SATISFIES THE
14959 SUFFICIENT DECREASE AND CURVATURE CONDITIONS.
14960 TOLERANCES MAY BE TOO SMALL.
14961
14962 NFEV IS AN INTEGER OUTPUT VARIABLE SET TO THE NUMBER OF CALLS TO FCN.
14963
14964 WA IS A WORK ARRAY OF LENGTH N.
14965
14966 ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JUNE 1983
14967 JORGE J. MORE', DAVID J. THUENTE
14968 *************************************************************************/
mcsrch(ae_int_t n,ae_vector * x,double * f,ae_vector * g,ae_vector * s,double * stp,double stpmax,double gtol,ae_int_t * info,ae_int_t * nfev,ae_vector * wa,linminstate * state,ae_int_t * stage,ae_state * _state)14969 void mcsrch(ae_int_t n,
14970 /* Real */ ae_vector* x,
14971 double* f,
14972 /* Real */ ae_vector* g,
14973 /* Real */ ae_vector* s,
14974 double* stp,
14975 double stpmax,
14976 double gtol,
14977 ae_int_t* info,
14978 ae_int_t* nfev,
14979 /* Real */ ae_vector* wa,
14980 linminstate* state,
14981 ae_int_t* stage,
14982 ae_state *_state)
14983 {
14984 ae_int_t i;
14985 double v;
14986 double p5;
14987 double p66;
14988 double zero;
14989
14990
14991
14992 /*
14993 * init
14994 */
14995 p5 = 0.5;
14996 p66 = 0.66;
14997 state->xtrapf = 4.0;
14998 zero = (double)(0);
14999 if( ae_fp_eq(stpmax,(double)(0)) )
15000 {
15001 stpmax = linmin_defstpmax;
15002 }
15003 if( ae_fp_less(*stp,linmin_stpmin) )
15004 {
15005 *stp = linmin_stpmin;
15006 }
15007 if( ae_fp_greater(*stp,stpmax) )
15008 {
15009 *stp = stpmax;
15010 }
15011
15012 /*
15013 * Main cycle
15014 */
15015 for(;;)
15016 {
15017 if( *stage==0 )
15018 {
15019
15020 /*
15021 * NEXT
15022 */
15023 *stage = 2;
15024 continue;
15025 }
15026 if( *stage==2 )
15027 {
15028 state->infoc = 1;
15029 *info = 0;
15030
15031 /*
15032 * CHECK THE INPUT PARAMETERS FOR ERRORS.
15033 */
15034 if( ae_fp_less(stpmax,linmin_stpmin)&&ae_fp_greater(stpmax,(double)(0)) )
15035 {
15036 *info = 5;
15037 *stp = stpmax;
15038 *stage = 0;
15039 return;
15040 }
15041 if( ((((((n<=0||ae_fp_less_eq(*stp,(double)(0)))||ae_fp_less(linmin_ftol,(double)(0)))||ae_fp_less(gtol,zero))||ae_fp_less(linmin_xtol,zero))||ae_fp_less(linmin_stpmin,zero))||ae_fp_less(stpmax,linmin_stpmin))||linmin_maxfev<=0 )
15042 {
15043 *stage = 0;
15044 return;
15045 }
15046
15047 /*
15048 * COMPUTE THE INITIAL GRADIENT IN THE SEARCH DIRECTION
15049 * AND CHECK THAT S IS A DESCENT DIRECTION.
15050 */
15051 v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
15052 state->dginit = v;
15053 if( ae_fp_greater_eq(state->dginit,(double)(0)) )
15054 {
15055 *stage = 0;
15056 *stp = (double)(0);
15057 return;
15058 }
15059
15060 /*
15061 * INITIALIZE LOCAL VARIABLES.
15062 */
15063 state->brackt = ae_false;
15064 state->stage1 = ae_true;
15065 *nfev = 0;
15066 state->finit = *f;
15067 state->dgtest = linmin_ftol*state->dginit;
15068 state->width = stpmax-linmin_stpmin;
15069 state->width1 = state->width/p5;
15070 ae_v_move(&wa->ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
15071
15072 /*
15073 * THE VARIABLES STX, FX, DGX CONTAIN THE VALUES OF THE STEP,
15074 * FUNCTION, AND DIRECTIONAL DERIVATIVE AT THE BEST STEP.
15075 * THE VARIABLES STY, FY, DGY CONTAIN THE VALUE OF THE STEP,
15076 * FUNCTION, AND DERIVATIVE AT THE OTHER ENDPOINT OF
15077 * THE INTERVAL OF UNCERTAINTY.
15078 * THE VARIABLES STP, F, DG CONTAIN THE VALUES OF THE STEP,
15079 * FUNCTION, AND DERIVATIVE AT THE CURRENT STEP.
15080 */
15081 state->stx = (double)(0);
15082 state->fx = state->finit;
15083 state->dgx = state->dginit;
15084 state->sty = (double)(0);
15085 state->fy = state->finit;
15086 state->dgy = state->dginit;
15087
15088 /*
15089 * NEXT
15090 */
15091 *stage = 3;
15092 continue;
15093 }
15094 if( *stage==3 )
15095 {
15096
15097 /*
15098 * START OF ITERATION.
15099 *
15100 * SET THE MINIMUM AND MAXIMUM STEPS TO CORRESPOND
15101 * TO THE PRESENT INTERVAL OF UNCERTAINTY.
15102 */
15103 if( state->brackt )
15104 {
15105 if( ae_fp_less(state->stx,state->sty) )
15106 {
15107 state->stmin = state->stx;
15108 state->stmax = state->sty;
15109 }
15110 else
15111 {
15112 state->stmin = state->sty;
15113 state->stmax = state->stx;
15114 }
15115 }
15116 else
15117 {
15118 state->stmin = state->stx;
15119 state->stmax = *stp+state->xtrapf*(*stp-state->stx);
15120 }
15121
15122 /*
15123 * FORCE THE STEP TO BE WITHIN THE BOUNDS STPMAX AND STPMIN.
15124 */
15125 if( ae_fp_greater(*stp,stpmax) )
15126 {
15127 *stp = stpmax;
15128 }
15129 if( ae_fp_less(*stp,linmin_stpmin) )
15130 {
15131 *stp = linmin_stpmin;
15132 }
15133
15134 /*
15135 * IF AN UNUSUAL TERMINATION IS TO OCCUR THEN LET
15136 * STP BE THE LOWEST POINT OBTAINED SO FAR.
15137 */
15138 if( (((state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||*nfev>=linmin_maxfev-1)||state->infoc==0)||(state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,linmin_xtol*state->stmax)) )
15139 {
15140 *stp = state->stx;
15141 }
15142
15143 /*
15144 * EVALUATE THE FUNCTION AND GRADIENT AT STP
15145 * AND COMPUTE THE DIRECTIONAL DERIVATIVE.
15146 */
15147 ae_v_move(&x->ptr.p_double[0], 1, &wa->ptr.p_double[0], 1, ae_v_len(0,n-1));
15148 ae_v_addd(&x->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1), *stp);
15149
15150 /*
15151 * NEXT
15152 */
15153 *stage = 4;
15154 return;
15155 }
15156 if( *stage==4 )
15157 {
15158 *info = 0;
15159 *nfev = *nfev+1;
15160 v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
15161 state->dg = v;
15162 state->ftest1 = state->finit+*stp*state->dgtest;
15163
15164 /*
15165 * TEST FOR CONVERGENCE.
15166 */
15167 if( (state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||state->infoc==0 )
15168 {
15169 *info = 6;
15170 }
15171 if( ((ae_fp_eq(*stp,stpmax)&&ae_fp_less(*f,state->finit))&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_less_eq(state->dg,state->dgtest) )
15172 {
15173 *info = 5;
15174 }
15175 if( ae_fp_eq(*stp,linmin_stpmin)&&((ae_fp_greater_eq(*f,state->finit)||ae_fp_greater(*f,state->ftest1))||ae_fp_greater_eq(state->dg,state->dgtest)) )
15176 {
15177 *info = 4;
15178 }
15179 if( *nfev>=linmin_maxfev )
15180 {
15181 *info = 3;
15182 }
15183 if( state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,linmin_xtol*state->stmax) )
15184 {
15185 *info = 2;
15186 }
15187 if( (ae_fp_less(*f,state->finit)&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_less_eq(ae_fabs(state->dg, _state),-gtol*state->dginit) )
15188 {
15189 *info = 1;
15190 }
15191
15192 /*
15193 * CHECK FOR TERMINATION.
15194 */
15195 if( *info!=0 )
15196 {
15197
15198 /*
15199 * Check guarantees provided by the function for INFO=1 or INFO=5
15200 */
15201 if( *info==1||*info==5 )
15202 {
15203 v = 0.0;
15204 for(i=0; i<=n-1; i++)
15205 {
15206 v = v+(wa->ptr.p_double[i]-x->ptr.p_double[i])*(wa->ptr.p_double[i]-x->ptr.p_double[i]);
15207 }
15208 if( ae_fp_greater_eq(*f,state->finit)||ae_fp_eq(v,0.0) )
15209 {
15210 *info = 6;
15211 }
15212 }
15213 *stage = 0;
15214 return;
15215 }
15216
15217 /*
15218 * IN THE FIRST STAGE WE SEEK A STEP FOR WHICH THE MODIFIED
15219 * FUNCTION HAS A NONPOSITIVE VALUE AND NONNEGATIVE DERIVATIVE.
15220 */
15221 if( (state->stage1&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_greater_eq(state->dg,ae_minreal(linmin_ftol, gtol, _state)*state->dginit) )
15222 {
15223 state->stage1 = ae_false;
15224 }
15225
15226 /*
15227 * A MODIFIED FUNCTION IS USED TO PREDICT THE STEP ONLY IF
15228 * WE HAVE NOT OBTAINED A STEP FOR WHICH THE MODIFIED
15229 * FUNCTION HAS A NONPOSITIVE FUNCTION VALUE AND NONNEGATIVE
15230 * DERIVATIVE, AND IF A LOWER FUNCTION VALUE HAS BEEN
15231 * OBTAINED BUT THE DECREASE IS NOT SUFFICIENT.
15232 */
15233 if( (state->stage1&&ae_fp_less_eq(*f,state->fx))&&ae_fp_greater(*f,state->ftest1) )
15234 {
15235
15236 /*
15237 * DEFINE THE MODIFIED FUNCTION AND DERIVATIVE VALUES.
15238 */
15239 state->fm = *f-*stp*state->dgtest;
15240 state->fxm = state->fx-state->stx*state->dgtest;
15241 state->fym = state->fy-state->sty*state->dgtest;
15242 state->dgm = state->dg-state->dgtest;
15243 state->dgxm = state->dgx-state->dgtest;
15244 state->dgym = state->dgy-state->dgtest;
15245
15246 /*
15247 * CALL CSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
15248 * AND TO COMPUTE THE NEW STEP.
15249 */
15250 linmin_mcstep(&state->stx, &state->fxm, &state->dgxm, &state->sty, &state->fym, &state->dgym, stp, state->fm, state->dgm, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
15251
15252 /*
15253 * RESET THE FUNCTION AND GRADIENT VALUES FOR F.
15254 */
15255 state->fx = state->fxm+state->stx*state->dgtest;
15256 state->fy = state->fym+state->sty*state->dgtest;
15257 state->dgx = state->dgxm+state->dgtest;
15258 state->dgy = state->dgym+state->dgtest;
15259 }
15260 else
15261 {
15262
15263 /*
15264 * CALL MCSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
15265 * AND TO COMPUTE THE NEW STEP.
15266 */
15267 linmin_mcstep(&state->stx, &state->fx, &state->dgx, &state->sty, &state->fy, &state->dgy, stp, *f, state->dg, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
15268 }
15269
15270 /*
15271 * FORCE A SUFFICIENT DECREASE IN THE SIZE OF THE
15272 * INTERVAL OF UNCERTAINTY.
15273 */
15274 if( state->brackt )
15275 {
15276 if( ae_fp_greater_eq(ae_fabs(state->sty-state->stx, _state),p66*state->width1) )
15277 {
15278 *stp = state->stx+p5*(state->sty-state->stx);
15279 }
15280 state->width1 = state->width;
15281 state->width = ae_fabs(state->sty-state->stx, _state);
15282 }
15283
15284 /*
15285 * NEXT.
15286 */
15287 *stage = 3;
15288 continue;
15289 }
15290 }
15291 }
15292
15293
15294 /*************************************************************************
15295 These functions perform Armijo line search using at most FMAX function
15296 evaluations. It doesn't enforce some kind of " sufficient decrease"
15297 criterion - it just tries different Armijo steps and returns optimum found
15298 so far.
15299
15300 Optimization is done using F-rcomm interface:
15301 * ArmijoCreate initializes State structure
15302 (reusing previously allocated buffers)
15303 * ArmijoIteration is subsequently called
15304 * ArmijoResults returns results
15305
15306 INPUT PARAMETERS:
15307 N - problem size
15308 X - array[N], starting point
15309 F - F(X+S*STP)
15310 S - step direction, S>0
15311 STP - step length
15312 STPMAX - maximum value for STP or zero (if no limit is imposed)
15313 FMAX - maximum number of function evaluations
15314 State - optimization state
15315
15316 -- ALGLIB --
15317 Copyright 05.10.2010 by Bochkanov Sergey
15318 *************************************************************************/
armijocreate(ae_int_t n,ae_vector * x,double f,ae_vector * s,double stp,double stpmax,ae_int_t fmax,armijostate * state,ae_state * _state)15319 void armijocreate(ae_int_t n,
15320 /* Real */ ae_vector* x,
15321 double f,
15322 /* Real */ ae_vector* s,
15323 double stp,
15324 double stpmax,
15325 ae_int_t fmax,
15326 armijostate* state,
15327 ae_state *_state)
15328 {
15329
15330
15331 if( state->x.cnt<n )
15332 {
15333 ae_vector_set_length(&state->x, n, _state);
15334 }
15335 if( state->xbase.cnt<n )
15336 {
15337 ae_vector_set_length(&state->xbase, n, _state);
15338 }
15339 if( state->s.cnt<n )
15340 {
15341 ae_vector_set_length(&state->s, n, _state);
15342 }
15343 state->stpmax = stpmax;
15344 state->fmax = fmax;
15345 state->stplen = stp;
15346 state->fcur = f;
15347 state->n = n;
15348 ae_v_move(&state->xbase.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
15349 ae_v_move(&state->s.ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
15350 ae_vector_set_length(&state->rstate.ia, 0+1, _state);
15351 ae_vector_set_length(&state->rstate.ra, 0+1, _state);
15352 state->rstate.stage = -1;
15353 }
15354
15355
15356 /*************************************************************************
15357 This is rcomm-based search function
15358
15359 -- ALGLIB --
15360 Copyright 05.10.2010 by Bochkanov Sergey
15361 *************************************************************************/
armijoiteration(armijostate * state,ae_state * _state)15362 ae_bool armijoiteration(armijostate* state, ae_state *_state)
15363 {
15364 double v;
15365 ae_int_t n;
15366 ae_bool result;
15367
15368
15369
15370 /*
15371 * Reverse communication preparations
15372 * I know it looks ugly, but it works the same way
15373 * anywhere from C++ to Python.
15374 *
15375 * This code initializes locals by:
15376 * * random values determined during code
15377 * generation - on first subroutine call
15378 * * values from previous call - on subsequent calls
15379 */
15380 if( state->rstate.stage>=0 )
15381 {
15382 n = state->rstate.ia.ptr.p_int[0];
15383 v = state->rstate.ra.ptr.p_double[0];
15384 }
15385 else
15386 {
15387 n = 359;
15388 v = -58;
15389 }
15390 if( state->rstate.stage==0 )
15391 {
15392 goto lbl_0;
15393 }
15394 if( state->rstate.stage==1 )
15395 {
15396 goto lbl_1;
15397 }
15398 if( state->rstate.stage==2 )
15399 {
15400 goto lbl_2;
15401 }
15402 if( state->rstate.stage==3 )
15403 {
15404 goto lbl_3;
15405 }
15406
15407 /*
15408 * Routine body
15409 */
15410 if( (ae_fp_less_eq(state->stplen,(double)(0))||ae_fp_less(state->stpmax,(double)(0)))||state->fmax<2 )
15411 {
15412 state->info = 0;
15413 result = ae_false;
15414 return result;
15415 }
15416 if( ae_fp_less_eq(state->stplen,linmin_stpmin) )
15417 {
15418 state->info = 4;
15419 result = ae_false;
15420 return result;
15421 }
15422 n = state->n;
15423 state->nfev = 0;
15424
15425 /*
15426 * We always need F
15427 */
15428 state->needf = ae_true;
15429
15430 /*
15431 * Bound StpLen
15432 */
15433 if( ae_fp_greater(state->stplen,state->stpmax)&&ae_fp_neq(state->stpmax,(double)(0)) )
15434 {
15435 state->stplen = state->stpmax;
15436 }
15437
15438 /*
15439 * Increase length
15440 */
15441 v = state->stplen*linmin_armijofactor;
15442 if( ae_fp_greater(v,state->stpmax)&&ae_fp_neq(state->stpmax,(double)(0)) )
15443 {
15444 v = state->stpmax;
15445 }
15446 ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
15447 ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
15448 state->rstate.stage = 0;
15449 goto lbl_rcomm;
15450 lbl_0:
15451 state->nfev = state->nfev+1;
15452 if( ae_fp_greater_eq(state->f,state->fcur) )
15453 {
15454 goto lbl_4;
15455 }
15456 state->stplen = v;
15457 state->fcur = state->f;
15458 lbl_6:
15459 if( ae_false )
15460 {
15461 goto lbl_7;
15462 }
15463
15464 /*
15465 * test stopping conditions
15466 */
15467 if( state->nfev>=state->fmax )
15468 {
15469 state->info = 3;
15470 result = ae_false;
15471 return result;
15472 }
15473 if( ae_fp_greater_eq(state->stplen,state->stpmax) )
15474 {
15475 state->info = 5;
15476 result = ae_false;
15477 return result;
15478 }
15479
15480 /*
15481 * evaluate F
15482 */
15483 v = state->stplen*linmin_armijofactor;
15484 if( ae_fp_greater(v,state->stpmax)&&ae_fp_neq(state->stpmax,(double)(0)) )
15485 {
15486 v = state->stpmax;
15487 }
15488 ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
15489 ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
15490 state->rstate.stage = 1;
15491 goto lbl_rcomm;
15492 lbl_1:
15493 state->nfev = state->nfev+1;
15494
15495 /*
15496 * make decision
15497 */
15498 if( ae_fp_less(state->f,state->fcur) )
15499 {
15500 state->stplen = v;
15501 state->fcur = state->f;
15502 }
15503 else
15504 {
15505 state->info = 1;
15506 result = ae_false;
15507 return result;
15508 }
15509 goto lbl_6;
15510 lbl_7:
15511 lbl_4:
15512
15513 /*
15514 * Decrease length
15515 */
15516 v = state->stplen/linmin_armijofactor;
15517 ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
15518 ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
15519 state->rstate.stage = 2;
15520 goto lbl_rcomm;
15521 lbl_2:
15522 state->nfev = state->nfev+1;
15523 if( ae_fp_greater_eq(state->f,state->fcur) )
15524 {
15525 goto lbl_8;
15526 }
15527 state->stplen = state->stplen/linmin_armijofactor;
15528 state->fcur = state->f;
15529 lbl_10:
15530 if( ae_false )
15531 {
15532 goto lbl_11;
15533 }
15534
15535 /*
15536 * test stopping conditions
15537 */
15538 if( state->nfev>=state->fmax )
15539 {
15540 state->info = 3;
15541 result = ae_false;
15542 return result;
15543 }
15544 if( ae_fp_less_eq(state->stplen,linmin_stpmin) )
15545 {
15546 state->info = 4;
15547 result = ae_false;
15548 return result;
15549 }
15550
15551 /*
15552 * evaluate F
15553 */
15554 v = state->stplen/linmin_armijofactor;
15555 ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
15556 ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
15557 state->rstate.stage = 3;
15558 goto lbl_rcomm;
15559 lbl_3:
15560 state->nfev = state->nfev+1;
15561
15562 /*
15563 * make decision
15564 */
15565 if( ae_fp_less(state->f,state->fcur) )
15566 {
15567 state->stplen = state->stplen/linmin_armijofactor;
15568 state->fcur = state->f;
15569 }
15570 else
15571 {
15572 state->info = 1;
15573 result = ae_false;
15574 return result;
15575 }
15576 goto lbl_10;
15577 lbl_11:
15578 lbl_8:
15579
15580 /*
15581 * Nothing to be done
15582 */
15583 state->info = 1;
15584 result = ae_false;
15585 return result;
15586
15587 /*
15588 * Saving state
15589 */
15590 lbl_rcomm:
15591 result = ae_true;
15592 state->rstate.ia.ptr.p_int[0] = n;
15593 state->rstate.ra.ptr.p_double[0] = v;
15594 return result;
15595 }
15596
15597
15598 /*************************************************************************
15599 Results of Armijo search
15600
15601 OUTPUT PARAMETERS:
15602 INFO - on output it is set to one of the return codes:
15603 * 0 improper input params
15604 * 1 optimum step is found with at most FMAX evaluations
15605 * 3 FMAX evaluations were used,
15606 X contains optimum found so far
15607 * 4 step is at lower bound STPMIN
15608 * 5 step is at upper bound
15609 STP - step length (in case of failure it is still returned)
15610 F - function value (in case of failure it is still returned)
15611
15612 -- ALGLIB --
15613 Copyright 05.10.2010 by Bochkanov Sergey
15614 *************************************************************************/
armijoresults(armijostate * state,ae_int_t * info,double * stp,double * f,ae_state * _state)15615 void armijoresults(armijostate* state,
15616 ae_int_t* info,
15617 double* stp,
15618 double* f,
15619 ae_state *_state)
15620 {
15621
15622
15623 *info = state->info;
15624 *stp = state->stplen;
15625 *f = state->fcur;
15626 }
15627
15628
linmin_mcstep(double * stx,double * fx,double * dx,double * sty,double * fy,double * dy,double * stp,double fp,double dp,ae_bool * brackt,double stmin,double stmax,ae_int_t * info,ae_state * _state)15629 static void linmin_mcstep(double* stx,
15630 double* fx,
15631 double* dx,
15632 double* sty,
15633 double* fy,
15634 double* dy,
15635 double* stp,
15636 double fp,
15637 double dp,
15638 ae_bool* brackt,
15639 double stmin,
15640 double stmax,
15641 ae_int_t* info,
15642 ae_state *_state)
15643 {
15644 ae_bool bound;
15645 double gamma;
15646 double p;
15647 double q;
15648 double r;
15649 double s;
15650 double sgnd;
15651 double stpc;
15652 double stpf;
15653 double stpq;
15654 double theta;
15655
15656
15657 *info = 0;
15658
15659 /*
15660 * CHECK THE INPUT PARAMETERS FOR ERRORS.
15661 */
15662 if( ((*brackt&&(ae_fp_less_eq(*stp,ae_minreal(*stx, *sty, _state))||ae_fp_greater_eq(*stp,ae_maxreal(*stx, *sty, _state))))||ae_fp_greater_eq(*dx*(*stp-(*stx)),(double)(0)))||ae_fp_less(stmax,stmin) )
15663 {
15664 return;
15665 }
15666
15667 /*
15668 * DETERMINE IF THE DERIVATIVES HAVE OPPOSITE SIGN.
15669 */
15670 sgnd = dp*(*dx/ae_fabs(*dx, _state));
15671
15672 /*
15673 * FIRST CASE. A HIGHER FUNCTION VALUE.
15674 * THE MINIMUM IS BRACKETED. IF THE CUBIC STEP IS CLOSER
15675 * TO STX THAN THE QUADRATIC STEP, THE CUBIC STEP IS TAKEN,
15676 * ELSE THE AVERAGE OF THE CUBIC AND QUADRATIC STEPS IS TAKEN.
15677 */
15678 if( ae_fp_greater(fp,*fx) )
15679 {
15680 *info = 1;
15681 bound = ae_true;
15682 theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
15683 s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
15684 gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
15685 if( ae_fp_less(*stp,*stx) )
15686 {
15687 gamma = -gamma;
15688 }
15689 p = gamma-(*dx)+theta;
15690 q = gamma-(*dx)+gamma+dp;
15691 r = p/q;
15692 stpc = *stx+r*(*stp-(*stx));
15693 stpq = *stx+*dx/((*fx-fp)/(*stp-(*stx))+(*dx))/2*(*stp-(*stx));
15694 if( ae_fp_less(ae_fabs(stpc-(*stx), _state),ae_fabs(stpq-(*stx), _state)) )
15695 {
15696 stpf = stpc;
15697 }
15698 else
15699 {
15700 stpf = stpc+(stpq-stpc)/2;
15701 }
15702 *brackt = ae_true;
15703 }
15704 else
15705 {
15706 if( ae_fp_less(sgnd,(double)(0)) )
15707 {
15708
15709 /*
15710 * SECOND CASE. A LOWER FUNCTION VALUE AND DERIVATIVES OF
15711 * OPPOSITE SIGN. THE MINIMUM IS BRACKETED. IF THE CUBIC
15712 * STEP IS CLOSER TO STX THAN THE QUADRATIC (SECANT) STEP,
15713 * THE CUBIC STEP IS TAKEN, ELSE THE QUADRATIC STEP IS TAKEN.
15714 */
15715 *info = 2;
15716 bound = ae_false;
15717 theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
15718 s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
15719 gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
15720 if( ae_fp_greater(*stp,*stx) )
15721 {
15722 gamma = -gamma;
15723 }
15724 p = gamma-dp+theta;
15725 q = gamma-dp+gamma+(*dx);
15726 r = p/q;
15727 stpc = *stp+r*(*stx-(*stp));
15728 stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
15729 if( ae_fp_greater(ae_fabs(stpc-(*stp), _state),ae_fabs(stpq-(*stp), _state)) )
15730 {
15731 stpf = stpc;
15732 }
15733 else
15734 {
15735 stpf = stpq;
15736 }
15737 *brackt = ae_true;
15738 }
15739 else
15740 {
15741 if( ae_fp_less(ae_fabs(dp, _state),ae_fabs(*dx, _state)) )
15742 {
15743
15744 /*
15745 * THIRD CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
15746 * SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DECREASES.
15747 * THE CUBIC STEP IS ONLY USED IF THE CUBIC TENDS TO INFINITY
15748 * IN THE DIRECTION OF THE STEP OR IF THE MINIMUM OF THE CUBIC
15749 * IS BEYOND STP. OTHERWISE THE CUBIC STEP IS DEFINED TO BE
15750 * EITHER STPMIN OR STPMAX. THE QUADRATIC (SECANT) STEP IS ALSO
15751 * COMPUTED AND IF THE MINIMUM IS BRACKETED THEN THE THE STEP
15752 * CLOSEST TO STX IS TAKEN, ELSE THE STEP FARTHEST AWAY IS TAKEN.
15753 */
15754 *info = 3;
15755 bound = ae_true;
15756 theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
15757 s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
15758
15759 /*
15760 * THE CASE GAMMA = 0 ONLY ARISES IF THE CUBIC DOES NOT TEND
15761 * TO INFINITY IN THE DIRECTION OF THE STEP.
15762 */
15763 gamma = s*ae_sqrt(ae_maxreal((double)(0), ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state), _state);
15764 if( ae_fp_greater(*stp,*stx) )
15765 {
15766 gamma = -gamma;
15767 }
15768 p = gamma-dp+theta;
15769 q = gamma+(*dx-dp)+gamma;
15770 r = p/q;
15771 if( ae_fp_less(r,(double)(0))&&ae_fp_neq(gamma,(double)(0)) )
15772 {
15773 stpc = *stp+r*(*stx-(*stp));
15774 }
15775 else
15776 {
15777 if( ae_fp_greater(*stp,*stx) )
15778 {
15779 stpc = stmax;
15780 }
15781 else
15782 {
15783 stpc = stmin;
15784 }
15785 }
15786 stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
15787 if( *brackt )
15788 {
15789 if( ae_fp_less(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
15790 {
15791 stpf = stpc;
15792 }
15793 else
15794 {
15795 stpf = stpq;
15796 }
15797 }
15798 else
15799 {
15800 if( ae_fp_greater(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
15801 {
15802 stpf = stpc;
15803 }
15804 else
15805 {
15806 stpf = stpq;
15807 }
15808 }
15809 }
15810 else
15811 {
15812
15813 /*
15814 * FOURTH CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
15815 * SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DOES
15816 * NOT DECREASE. IF THE MINIMUM IS NOT BRACKETED, THE STEP
15817 * IS EITHER STPMIN OR STPMAX, ELSE THE CUBIC STEP IS TAKEN.
15818 */
15819 *info = 4;
15820 bound = ae_false;
15821 if( *brackt )
15822 {
15823 theta = 3*(fp-(*fy))/(*sty-(*stp))+(*dy)+dp;
15824 s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dy, _state), ae_fabs(dp, _state), _state), _state);
15825 gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dy/s*(dp/s), _state);
15826 if( ae_fp_greater(*stp,*sty) )
15827 {
15828 gamma = -gamma;
15829 }
15830 p = gamma-dp+theta;
15831 q = gamma-dp+gamma+(*dy);
15832 r = p/q;
15833 stpc = *stp+r*(*sty-(*stp));
15834 stpf = stpc;
15835 }
15836 else
15837 {
15838 if( ae_fp_greater(*stp,*stx) )
15839 {
15840 stpf = stmax;
15841 }
15842 else
15843 {
15844 stpf = stmin;
15845 }
15846 }
15847 }
15848 }
15849 }
15850
15851 /*
15852 * UPDATE THE INTERVAL OF UNCERTAINTY. THIS UPDATE DOES NOT
15853 * DEPEND ON THE NEW STEP OR THE CASE ANALYSIS ABOVE.
15854 */
15855 if( ae_fp_greater(fp,*fx) )
15856 {
15857 *sty = *stp;
15858 *fy = fp;
15859 *dy = dp;
15860 }
15861 else
15862 {
15863 if( ae_fp_less(sgnd,0.0) )
15864 {
15865 *sty = *stx;
15866 *fy = *fx;
15867 *dy = *dx;
15868 }
15869 *stx = *stp;
15870 *fx = fp;
15871 *dx = dp;
15872 }
15873
15874 /*
15875 * COMPUTE THE NEW STEP AND SAFEGUARD IT.
15876 */
15877 stpf = ae_minreal(stmax, stpf, _state);
15878 stpf = ae_maxreal(stmin, stpf, _state);
15879 *stp = stpf;
15880 if( *brackt&&bound )
15881 {
15882 if( ae_fp_greater(*sty,*stx) )
15883 {
15884 *stp = ae_minreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
15885 }
15886 else
15887 {
15888 *stp = ae_maxreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
15889 }
15890 }
15891 }
15892
15893
_linminstate_init(void * _p,ae_state * _state,ae_bool make_automatic)15894 void _linminstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
15895 {
15896 linminstate *p = (linminstate*)_p;
15897 ae_touch_ptr((void*)p);
15898 }
15899
15900
_linminstate_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)15901 void _linminstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
15902 {
15903 linminstate *dst = (linminstate*)_dst;
15904 linminstate *src = (linminstate*)_src;
15905 dst->brackt = src->brackt;
15906 dst->stage1 = src->stage1;
15907 dst->infoc = src->infoc;
15908 dst->dg = src->dg;
15909 dst->dgm = src->dgm;
15910 dst->dginit = src->dginit;
15911 dst->dgtest = src->dgtest;
15912 dst->dgx = src->dgx;
15913 dst->dgxm = src->dgxm;
15914 dst->dgy = src->dgy;
15915 dst->dgym = src->dgym;
15916 dst->finit = src->finit;
15917 dst->ftest1 = src->ftest1;
15918 dst->fm = src->fm;
15919 dst->fx = src->fx;
15920 dst->fxm = src->fxm;
15921 dst->fy = src->fy;
15922 dst->fym = src->fym;
15923 dst->stx = src->stx;
15924 dst->sty = src->sty;
15925 dst->stmin = src->stmin;
15926 dst->stmax = src->stmax;
15927 dst->width = src->width;
15928 dst->width1 = src->width1;
15929 dst->xtrapf = src->xtrapf;
15930 }
15931
15932
_linminstate_clear(void * _p)15933 void _linminstate_clear(void* _p)
15934 {
15935 linminstate *p = (linminstate*)_p;
15936 ae_touch_ptr((void*)p);
15937 }
15938
15939
_linminstate_destroy(void * _p)15940 void _linminstate_destroy(void* _p)
15941 {
15942 linminstate *p = (linminstate*)_p;
15943 ae_touch_ptr((void*)p);
15944 }
15945
15946
_armijostate_init(void * _p,ae_state * _state,ae_bool make_automatic)15947 void _armijostate_init(void* _p, ae_state *_state, ae_bool make_automatic)
15948 {
15949 armijostate *p = (armijostate*)_p;
15950 ae_touch_ptr((void*)p);
15951 ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
15952 ae_vector_init(&p->xbase, 0, DT_REAL, _state, make_automatic);
15953 ae_vector_init(&p->s, 0, DT_REAL, _state, make_automatic);
15954 _rcommstate_init(&p->rstate, _state, make_automatic);
15955 }
15956
15957
_armijostate_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)15958 void _armijostate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
15959 {
15960 armijostate *dst = (armijostate*)_dst;
15961 armijostate *src = (armijostate*)_src;
15962 dst->needf = src->needf;
15963 ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
15964 dst->f = src->f;
15965 dst->n = src->n;
15966 ae_vector_init_copy(&dst->xbase, &src->xbase, _state, make_automatic);
15967 ae_vector_init_copy(&dst->s, &src->s, _state, make_automatic);
15968 dst->stplen = src->stplen;
15969 dst->fcur = src->fcur;
15970 dst->stpmax = src->stpmax;
15971 dst->fmax = src->fmax;
15972 dst->nfev = src->nfev;
15973 dst->info = src->info;
15974 _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
15975 }
15976
15977
_armijostate_clear(void * _p)15978 void _armijostate_clear(void* _p)
15979 {
15980 armijostate *p = (armijostate*)_p;
15981 ae_touch_ptr((void*)p);
15982 ae_vector_clear(&p->x);
15983 ae_vector_clear(&p->xbase);
15984 ae_vector_clear(&p->s);
15985 _rcommstate_clear(&p->rstate);
15986 }
15987
15988
_armijostate_destroy(void * _p)15989 void _armijostate_destroy(void* _p)
15990 {
15991 armijostate *p = (armijostate*)_p;
15992 ae_touch_ptr((void*)p);
15993 ae_vector_destroy(&p->x);
15994 ae_vector_destroy(&p->xbase);
15995 ae_vector_destroy(&p->s);
15996 _rcommstate_destroy(&p->rstate);
15997 }
15998
15999
16000 #endif
16001 #if defined(AE_COMPILE_NEARUNITYUNIT) || !defined(AE_PARTIAL_BUILD)
16002
16003
nulog1p(double x,ae_state * _state)16004 double nulog1p(double x, ae_state *_state)
16005 {
16006 double z;
16007 double lp;
16008 double lq;
16009 double result;
16010
16011
16012 z = 1.0+x;
16013 if( ae_fp_less(z,0.70710678118654752440)||ae_fp_greater(z,1.41421356237309504880) )
16014 {
16015 result = ae_log(z, _state);
16016 return result;
16017 }
16018 z = x*x;
16019 lp = 4.5270000862445199635215E-5;
16020 lp = lp*x+4.9854102823193375972212E-1;
16021 lp = lp*x+6.5787325942061044846969E0;
16022 lp = lp*x+2.9911919328553073277375E1;
16023 lp = lp*x+6.0949667980987787057556E1;
16024 lp = lp*x+5.7112963590585538103336E1;
16025 lp = lp*x+2.0039553499201281259648E1;
16026 lq = 1.0000000000000000000000E0;
16027 lq = lq*x+1.5062909083469192043167E1;
16028 lq = lq*x+8.3047565967967209469434E1;
16029 lq = lq*x+2.2176239823732856465394E2;
16030 lq = lq*x+3.0909872225312059774938E2;
16031 lq = lq*x+2.1642788614495947685003E2;
16032 lq = lq*x+6.0118660497603843919306E1;
16033 z = -0.5*z+x*(z*lp/lq);
16034 result = x+z;
16035 return result;
16036 }
16037
16038
nuexpm1(double x,ae_state * _state)16039 double nuexpm1(double x, ae_state *_state)
16040 {
16041 double r;
16042 double xx;
16043 double ep;
16044 double eq;
16045 double result;
16046
16047
16048 if( ae_fp_less(x,-0.5)||ae_fp_greater(x,0.5) )
16049 {
16050 result = ae_exp(x, _state)-1.0;
16051 return result;
16052 }
16053 xx = x*x;
16054 ep = 1.2617719307481059087798E-4;
16055 ep = ep*xx+3.0299440770744196129956E-2;
16056 ep = ep*xx+9.9999999999999999991025E-1;
16057 eq = 3.0019850513866445504159E-6;
16058 eq = eq*xx+2.5244834034968410419224E-3;
16059 eq = eq*xx+2.2726554820815502876593E-1;
16060 eq = eq*xx+2.0000000000000000000897E0;
16061 r = x*ep;
16062 r = r/(eq-r);
16063 result = r+r;
16064 return result;
16065 }
16066
16067
nucosm1(double x,ae_state * _state)16068 double nucosm1(double x, ae_state *_state)
16069 {
16070 double xx;
16071 double c;
16072 double result;
16073
16074
16075 if( ae_fp_less(x,-0.25*ae_pi)||ae_fp_greater(x,0.25*ae_pi) )
16076 {
16077 result = ae_cos(x, _state)-1;
16078 return result;
16079 }
16080 xx = x*x;
16081 c = 4.7377507964246204691685E-14;
16082 c = c*xx-1.1470284843425359765671E-11;
16083 c = c*xx+2.0876754287081521758361E-9;
16084 c = c*xx-2.7557319214999787979814E-7;
16085 c = c*xx+2.4801587301570552304991E-5;
16086 c = c*xx-1.3888888888888872993737E-3;
16087 c = c*xx+4.1666666666666666609054E-2;
16088 result = -0.5*xx+xx*xx*c;
16089 return result;
16090 }
16091
16092
16093 #endif
16094 #if defined(AE_COMPILE_NTHEORY) || !defined(AE_PARTIAL_BUILD)
16095
16096
findprimitiverootandinverse(ae_int_t n,ae_int_t * proot,ae_int_t * invproot,ae_state * _state)16097 void findprimitiverootandinverse(ae_int_t n,
16098 ae_int_t* proot,
16099 ae_int_t* invproot,
16100 ae_state *_state)
16101 {
16102 ae_int_t candroot;
16103 ae_int_t phin;
16104 ae_int_t q;
16105 ae_int_t f;
16106 ae_bool allnonone;
16107 ae_int_t x;
16108 ae_int_t lastx;
16109 ae_int_t y;
16110 ae_int_t lasty;
16111 ae_int_t a;
16112 ae_int_t b;
16113 ae_int_t t;
16114 ae_int_t n2;
16115
16116 *proot = 0;
16117 *invproot = 0;
16118
16119 ae_assert(n>=3, "FindPrimitiveRootAndInverse: N<3", _state);
16120 *proot = 0;
16121 *invproot = 0;
16122
16123 /*
16124 * check that N is prime
16125 */
16126 ae_assert(ntheory_isprime(n, _state), "FindPrimitiveRoot: N is not prime", _state);
16127
16128 /*
16129 * Because N is prime, Euler totient function is equal to N-1
16130 */
16131 phin = n-1;
16132
16133 /*
16134 * Test different values of PRoot - from 2 to N-1.
16135 * One of these values MUST be primitive root.
16136 *
16137 * For testing we use algorithm from Wiki (Primitive root modulo n):
16138 * * compute phi(N)
16139 * * determine the different prime factors of phi(N), say p1, ..., pk
16140 * * for every element m of Zn*, compute m^(phi(N)/pi) mod N for i=1..k
16141 * using a fast algorithm for modular exponentiation.
16142 * * a number m for which these k results are all different from 1 is a
16143 * primitive root.
16144 */
16145 for(candroot=2; candroot<=n-1; candroot++)
16146 {
16147
16148 /*
16149 * We have current candidate root in CandRoot.
16150 *
16151 * Scan different prime factors of PhiN. Here:
16152 * * F is a current candidate factor
16153 * * Q is a current quotient - amount which was left after dividing PhiN
16154 * by all previous factors
16155 *
16156 * For each factor, perform test mentioned above.
16157 */
16158 q = phin;
16159 f = 2;
16160 allnonone = ae_true;
16161 while(q>1)
16162 {
16163 if( q%f==0 )
16164 {
16165 t = ntheory_modexp(candroot, phin/f, n, _state);
16166 if( t==1 )
16167 {
16168 allnonone = ae_false;
16169 break;
16170 }
16171 while(q%f==0)
16172 {
16173 q = q/f;
16174 }
16175 }
16176 f = f+1;
16177 }
16178 if( allnonone )
16179 {
16180 *proot = candroot;
16181 break;
16182 }
16183 }
16184 ae_assert(*proot>=2, "FindPrimitiveRoot: internal error (root not found)", _state);
16185
16186 /*
16187 * Use extended Euclidean algorithm to find multiplicative inverse of primitive root
16188 */
16189 x = 0;
16190 lastx = 1;
16191 y = 1;
16192 lasty = 0;
16193 a = *proot;
16194 b = n;
16195 while(b!=0)
16196 {
16197 q = a/b;
16198 t = a%b;
16199 a = b;
16200 b = t;
16201 t = lastx-q*x;
16202 lastx = x;
16203 x = t;
16204 t = lasty-q*y;
16205 lasty = y;
16206 y = t;
16207 }
16208 while(lastx<0)
16209 {
16210 lastx = lastx+n;
16211 }
16212 *invproot = lastx;
16213
16214 /*
16215 * Check that it is safe to perform multiplication modulo N.
16216 * Check results for consistency.
16217 */
16218 n2 = (n-1)*(n-1);
16219 ae_assert(n2/(n-1)==n-1, "FindPrimitiveRoot: internal error", _state);
16220 ae_assert(*proot*(*invproot)/(*proot)==(*invproot), "FindPrimitiveRoot: internal error", _state);
16221 ae_assert(*proot*(*invproot)/(*invproot)==(*proot), "FindPrimitiveRoot: internal error", _state);
16222 ae_assert(*proot*(*invproot)%n==1, "FindPrimitiveRoot: internal error", _state);
16223 }
16224
16225
ntheory_isprime(ae_int_t n,ae_state * _state)16226 static ae_bool ntheory_isprime(ae_int_t n, ae_state *_state)
16227 {
16228 ae_int_t p;
16229 ae_bool result;
16230
16231
16232 result = ae_false;
16233 p = 2;
16234 while(p*p<=n)
16235 {
16236 if( n%p==0 )
16237 {
16238 return result;
16239 }
16240 p = p+1;
16241 }
16242 result = ae_true;
16243 return result;
16244 }
16245
16246
ntheory_modmul(ae_int_t a,ae_int_t b,ae_int_t n,ae_state * _state)16247 static ae_int_t ntheory_modmul(ae_int_t a,
16248 ae_int_t b,
16249 ae_int_t n,
16250 ae_state *_state)
16251 {
16252 ae_int_t t;
16253 double ra;
16254 double rb;
16255 ae_int_t result;
16256
16257
16258 ae_assert(a>=0&&a<n, "ModMul: A<0 or A>=N", _state);
16259 ae_assert(b>=0&&b<n, "ModMul: B<0 or B>=N", _state);
16260
16261 /*
16262 * Base cases
16263 */
16264 ra = (double)(a);
16265 rb = (double)(b);
16266 if( b==0||a==0 )
16267 {
16268 result = 0;
16269 return result;
16270 }
16271 if( b==1||a==1 )
16272 {
16273 result = a*b;
16274 return result;
16275 }
16276 if( ae_fp_eq(ra*rb,(double)(a*b)) )
16277 {
16278 result = a*b%n;
16279 return result;
16280 }
16281
16282 /*
16283 * Non-base cases
16284 */
16285 if( b%2==0 )
16286 {
16287
16288 /*
16289 * A*B = (A*(B/2)) * 2
16290 *
16291 * Product T=A*(B/2) is calculated recursively, product T*2 is
16292 * calculated as follows:
16293 * * result:=T-N
16294 * * result:=result+T
16295 * * if result<0 then result:=result+N
16296 *
16297 * In case integer result overflows, we generate exception
16298 */
16299 t = ntheory_modmul(a, b/2, n, _state);
16300 result = t-n;
16301 result = result+t;
16302 if( result<0 )
16303 {
16304 result = result+n;
16305 }
16306 }
16307 else
16308 {
16309
16310 /*
16311 * A*B = (A*(B div 2)) * 2 + A
16312 *
16313 * Product T=A*(B/2) is calculated recursively, product T*2 is
16314 * calculated as follows:
16315 * * result:=T-N
16316 * * result:=result+T
16317 * * if result<0 then result:=result+N
16318 *
16319 * In case integer result overflows, we generate exception
16320 */
16321 t = ntheory_modmul(a, b/2, n, _state);
16322 result = t-n;
16323 result = result+t;
16324 if( result<0 )
16325 {
16326 result = result+n;
16327 }
16328 result = result-n;
16329 result = result+a;
16330 if( result<0 )
16331 {
16332 result = result+n;
16333 }
16334 }
16335 return result;
16336 }
16337
16338
ntheory_modexp(ae_int_t a,ae_int_t b,ae_int_t n,ae_state * _state)16339 static ae_int_t ntheory_modexp(ae_int_t a,
16340 ae_int_t b,
16341 ae_int_t n,
16342 ae_state *_state)
16343 {
16344 ae_int_t t;
16345 ae_int_t result;
16346
16347
16348 ae_assert(a>=0&&a<n, "ModExp: A<0 or A>=N", _state);
16349 ae_assert(b>=0, "ModExp: B<0", _state);
16350
16351 /*
16352 * Base cases
16353 */
16354 if( b==0 )
16355 {
16356 result = 1;
16357 return result;
16358 }
16359 if( b==1 )
16360 {
16361 result = a;
16362 return result;
16363 }
16364
16365 /*
16366 * Non-base cases
16367 */
16368 if( b%2==0 )
16369 {
16370 t = ntheory_modmul(a, a, n, _state);
16371 result = ntheory_modexp(t, b/2, n, _state);
16372 }
16373 else
16374 {
16375 t = ntheory_modmul(a, a, n, _state);
16376 result = ntheory_modexp(t, b/2, n, _state);
16377 result = ntheory_modmul(result, a, n, _state);
16378 }
16379 return result;
16380 }
16381
16382
16383 #endif
16384 #if defined(AE_COMPILE_FTBASE) || !defined(AE_PARTIAL_BUILD)
16385
16386
16387 /*************************************************************************
16388 This subroutine generates FFT plan for K complex FFT's with length N each.
16389
16390 INPUT PARAMETERS:
16391 N - FFT length (in complex numbers), N>=1
16392 K - number of repetitions, K>=1
16393
16394 OUTPUT PARAMETERS:
16395 Plan - plan
16396
16397 -- ALGLIB --
16398 Copyright 05.04.2013 by Bochkanov Sergey
16399 *************************************************************************/
ftcomplexfftplan(ae_int_t n,ae_int_t k,fasttransformplan * plan,ae_state * _state)16400 void ftcomplexfftplan(ae_int_t n,
16401 ae_int_t k,
16402 fasttransformplan* plan,
16403 ae_state *_state)
16404 {
16405 ae_frame _frame_block;
16406 srealarray bluesteinbuf;
16407 ae_int_t rowptr;
16408 ae_int_t bluesteinsize;
16409 ae_int_t precrptr;
16410 ae_int_t preciptr;
16411 ae_int_t precrsize;
16412 ae_int_t precisize;
16413
16414 ae_frame_make(_state, &_frame_block);
16415 memset(&bluesteinbuf, 0, sizeof(bluesteinbuf));
16416 _fasttransformplan_clear(plan);
16417 _srealarray_init(&bluesteinbuf, _state, ae_true);
16418
16419
16420 /*
16421 * Initial check for parameters
16422 */
16423 ae_assert(n>0, "FTComplexFFTPlan: N<=0", _state);
16424 ae_assert(k>0, "FTComplexFFTPlan: K<=0", _state);
16425
16426 /*
16427 * Determine required sizes of precomputed real and integer
16428 * buffers. This stage of code is highly dependent on internals
16429 * of FTComplexFFTPlanRec() and must be kept synchronized with
16430 * possible changes in internals of plan generation function.
16431 *
16432 * Buffer size is determined as follows:
16433 * * N is factorized
16434 * * we factor out anything which is less or equal to MaxRadix
16435 * * prime factor F>RaderThreshold requires 4*FTBaseFindSmooth(2*F-1)
16436 * real entries to store precomputed Quantities for Bluestein's
16437 * transformation
16438 * * prime factor F<=RaderThreshold does NOT require
16439 * precomputed storage
16440 */
16441 precrsize = 0;
16442 precisize = 0;
16443 ftbase_ftdeterminespacerequirements(n, &precrsize, &precisize, _state);
16444 if( precrsize>0 )
16445 {
16446 ae_vector_set_length(&plan->precr, precrsize, _state);
16447 }
16448 if( precisize>0 )
16449 {
16450 ae_vector_set_length(&plan->preci, precisize, _state);
16451 }
16452
16453 /*
16454 * Generate plan
16455 */
16456 rowptr = 0;
16457 precrptr = 0;
16458 preciptr = 0;
16459 bluesteinsize = 1;
16460 ae_vector_set_length(&plan->buffer, 2*n*k, _state);
16461 ftbase_ftcomplexfftplanrec(n, k, ae_true, ae_true, &rowptr, &bluesteinsize, &precrptr, &preciptr, plan, _state);
16462 ae_vector_set_length(&bluesteinbuf.val, bluesteinsize, _state);
16463 ae_shared_pool_set_seed(&plan->bluesteinpool, &bluesteinbuf, sizeof(bluesteinbuf), _srealarray_init, _srealarray_init_copy, _srealarray_destroy, _state);
16464
16465 /*
16466 * Check that actual amount of precomputed space used by transformation
16467 * plan is EXACTLY equal to amount of space allocated by us.
16468 */
16469 ae_assert(precrptr==precrsize, "FTComplexFFTPlan: internal error (PrecRPtr<>PrecRSize)", _state);
16470 ae_assert(preciptr==precisize, "FTComplexFFTPlan: internal error (PrecRPtr<>PrecRSize)", _state);
16471 ae_frame_leave(_state);
16472 }
16473
16474
16475 /*************************************************************************
16476 This subroutine applies transformation plan to input/output array A.
16477
16478 INPUT PARAMETERS:
16479 Plan - transformation plan
16480 A - array, must be large enough for plan to work
16481 OffsA - offset of the subarray to process
16482 RepCnt - repetition count (transformation is repeatedly applied
16483 to subsequent subarrays)
16484
16485 OUTPUT PARAMETERS:
16486 Plan - plan (temporary buffers can be modified, plan itself
16487 is unchanged and can be reused)
16488 A - transformed array
16489
16490 -- ALGLIB --
16491 Copyright 05.04.2013 by Bochkanov Sergey
16492 *************************************************************************/
ftapplyplan(fasttransformplan * plan,ae_vector * a,ae_int_t offsa,ae_int_t repcnt,ae_state * _state)16493 void ftapplyplan(fasttransformplan* plan,
16494 /* Real */ ae_vector* a,
16495 ae_int_t offsa,
16496 ae_int_t repcnt,
16497 ae_state *_state)
16498 {
16499 ae_int_t plansize;
16500 ae_int_t i;
16501
16502
16503 plansize = plan->entries.ptr.pp_int[0][ftbase_coloperandscnt]*plan->entries.ptr.pp_int[0][ftbase_coloperandsize]*plan->entries.ptr.pp_int[0][ftbase_colmicrovectorsize];
16504 for(i=0; i<=repcnt-1; i++)
16505 {
16506 ftbase_ftapplysubplan(plan, 0, a, offsa+plansize*i, 0, &plan->buffer, 1, _state);
16507 }
16508 }
16509
16510
16511 /*************************************************************************
16512 Returns good factorization N=N1*N2.
16513
16514 Usually N1<=N2 (but not always - small N's may be exception).
16515 if N1<>1 then N2<>1.
16516
16517 Factorization is chosen depending on task type and codelets we have.
16518
16519 -- ALGLIB --
16520 Copyright 01.05.2009 by Bochkanov Sergey
16521 *************************************************************************/
ftbasefactorize(ae_int_t n,ae_int_t tasktype,ae_int_t * n1,ae_int_t * n2,ae_state * _state)16522 void ftbasefactorize(ae_int_t n,
16523 ae_int_t tasktype,
16524 ae_int_t* n1,
16525 ae_int_t* n2,
16526 ae_state *_state)
16527 {
16528 ae_int_t j;
16529
16530 *n1 = 0;
16531 *n2 = 0;
16532
16533 *n1 = 0;
16534 *n2 = 0;
16535
16536 /*
16537 * try to find good codelet
16538 */
16539 if( *n1*(*n2)!=n )
16540 {
16541 for(j=ftbase_ftbasecodeletrecommended; j>=2; j--)
16542 {
16543 if( n%j==0 )
16544 {
16545 *n1 = j;
16546 *n2 = n/j;
16547 break;
16548 }
16549 }
16550 }
16551
16552 /*
16553 * try to factorize N
16554 */
16555 if( *n1*(*n2)!=n )
16556 {
16557 for(j=ftbase_ftbasecodeletrecommended+1; j<=n-1; j++)
16558 {
16559 if( n%j==0 )
16560 {
16561 *n1 = j;
16562 *n2 = n/j;
16563 break;
16564 }
16565 }
16566 }
16567
16568 /*
16569 * looks like N is prime :(
16570 */
16571 if( *n1*(*n2)!=n )
16572 {
16573 *n1 = 1;
16574 *n2 = n;
16575 }
16576
16577 /*
16578 * normalize
16579 */
16580 if( *n2==1&&*n1!=1 )
16581 {
16582 *n2 = *n1;
16583 *n1 = 1;
16584 }
16585 }
16586
16587
16588 /*************************************************************************
16589 Is number smooth?
16590
16591 -- ALGLIB --
16592 Copyright 01.05.2009 by Bochkanov Sergey
16593 *************************************************************************/
ftbaseissmooth(ae_int_t n,ae_state * _state)16594 ae_bool ftbaseissmooth(ae_int_t n, ae_state *_state)
16595 {
16596 ae_int_t i;
16597 ae_bool result;
16598
16599
16600 for(i=2; i<=ftbase_ftbasemaxsmoothfactor; i++)
16601 {
16602 while(n%i==0)
16603 {
16604 n = n/i;
16605 }
16606 }
16607 result = n==1;
16608 return result;
16609 }
16610
16611
16612 /*************************************************************************
16613 Returns smallest smooth (divisible only by 2, 3, 5) number that is greater
16614 than or equal to max(N,2)
16615
16616 -- ALGLIB --
16617 Copyright 01.05.2009 by Bochkanov Sergey
16618 *************************************************************************/
ftbasefindsmooth(ae_int_t n,ae_state * _state)16619 ae_int_t ftbasefindsmooth(ae_int_t n, ae_state *_state)
16620 {
16621 ae_int_t best;
16622 ae_int_t result;
16623
16624
16625 best = 2;
16626 while(best<n)
16627 {
16628 best = 2*best;
16629 }
16630 ftbase_ftbasefindsmoothrec(n, 1, 2, &best, _state);
16631 result = best;
16632 return result;
16633 }
16634
16635
16636 /*************************************************************************
16637 Returns smallest smooth (divisible only by 2, 3, 5) even number that is
16638 greater than or equal to max(N,2)
16639
16640 -- ALGLIB --
16641 Copyright 01.05.2009 by Bochkanov Sergey
16642 *************************************************************************/
ftbasefindsmootheven(ae_int_t n,ae_state * _state)16643 ae_int_t ftbasefindsmootheven(ae_int_t n, ae_state *_state)
16644 {
16645 ae_int_t best;
16646 ae_int_t result;
16647
16648
16649 best = 2;
16650 while(best<n)
16651 {
16652 best = 2*best;
16653 }
16654 ftbase_ftbasefindsmoothrec(n, 2, 2, &best, _state);
16655 result = best;
16656 return result;
16657 }
16658
16659
16660 /*************************************************************************
16661 Returns estimate of FLOP count for the FFT.
16662
16663 It is only an estimate based on operations count for the PERFECT FFT
16664 and relative inefficiency of the algorithm actually used.
16665
16666 N should be power of 2, estimates are badly wrong for non-power-of-2 N's.
16667
16668 -- ALGLIB --
16669 Copyright 01.05.2009 by Bochkanov Sergey
16670 *************************************************************************/
ftbasegetflopestimate(ae_int_t n,ae_state * _state)16671 double ftbasegetflopestimate(ae_int_t n, ae_state *_state)
16672 {
16673 double result;
16674
16675
16676 result = ftbase_ftbaseinefficiencyfactor*(4*n*ae_log((double)(n), _state)/ae_log((double)(2), _state)-6*n+8);
16677 return result;
16678 }
16679
16680
16681 /*************************************************************************
16682 This function returns EXACT estimate of the space requirements for N-point
16683 FFT. Internals of this function are highly dependent on details of different
16684 FFTs employed by this unit, so every time algorithm is changed this function
16685 has to be rewritten.
16686
16687 INPUT PARAMETERS:
16688 N - transform length
16689 PrecRSize - must be set to zero
16690 PrecISize - must be set to zero
16691
16692 OUTPUT PARAMETERS:
16693 PrecRSize - number of real temporaries required for transformation
16694 PrecISize - number of integer temporaries required for transformation
16695
16696
16697 -- ALGLIB --
16698 Copyright 05.04.2013 by Bochkanov Sergey
16699 *************************************************************************/
ftbase_ftdeterminespacerequirements(ae_int_t n,ae_int_t * precrsize,ae_int_t * precisize,ae_state * _state)16700 static void ftbase_ftdeterminespacerequirements(ae_int_t n,
16701 ae_int_t* precrsize,
16702 ae_int_t* precisize,
16703 ae_state *_state)
16704 {
16705 ae_int_t ncur;
16706 ae_int_t f;
16707 ae_int_t i;
16708
16709
16710
16711 /*
16712 * Determine required sizes of precomputed real and integer
16713 * buffers. This stage of code is highly dependent on internals
16714 * of FTComplexFFTPlanRec() and must be kept synchronized with
16715 * possible changes in internals of plan generation function.
16716 *
16717 * Buffer size is determined as follows:
16718 * * N is factorized
16719 * * we factor out anything which is less or equal to MaxRadix
16720 * * prime factor F>RaderThreshold requires 4*FTBaseFindSmooth(2*F-1)
16721 * real entries to store precomputed Quantities for Bluestein's
16722 * transformation
16723 * * prime factor F<=RaderThreshold requires 2*(F-1)+ESTIMATE(F-1)
16724 * precomputed storage
16725 */
16726 ncur = n;
16727 for(i=2; i<=ftbase_maxradix; i++)
16728 {
16729 while(ncur%i==0)
16730 {
16731 ncur = ncur/i;
16732 }
16733 }
16734 f = 2;
16735 while(f<=ncur)
16736 {
16737 while(ncur%f==0)
16738 {
16739 if( f>ftbase_raderthreshold )
16740 {
16741 *precrsize = *precrsize+4*ftbasefindsmooth(2*f-1, _state);
16742 }
16743 else
16744 {
16745 *precrsize = *precrsize+2*(f-1);
16746 ftbase_ftdeterminespacerequirements(f-1, precrsize, precisize, _state);
16747 }
16748 ncur = ncur/f;
16749 }
16750 f = f+1;
16751 }
16752 }
16753
16754
16755 /*************************************************************************
16756 Recurrent function called by FTComplexFFTPlan() and other functions. It
16757 recursively builds transformation plan
16758
16759 INPUT PARAMETERS:
16760 N - FFT length (in complex numbers), N>=1
16761 K - number of repetitions, K>=1
16762 ChildPlan - if True, plan generator inserts OpStart/opEnd in the
16763 plan header/footer.
16764 TopmostPlan - if True, plan generator assumes that it is topmost plan:
16765 * it may use global buffer for transpositions
16766 and there is no other plan which executes in parallel
16767 RowPtr - index which points to past-the-last entry generated so far
16768 BluesteinSize- amount of storage (in real numbers) required for Bluestein buffer
16769 PrecRPtr - pointer to unused part of precomputed real buffer (Plan.PrecR):
16770 * when this function stores some data to precomputed buffer,
16771 it advances pointer.
16772 * it is responsibility of the function to assert that
16773 Plan.PrecR has enough space to store data before actually
16774 writing to buffer.
16775 * it is responsibility of the caller to allocate enough
16776 space before calling this function
16777 PrecIPtr - pointer to unused part of precomputed integer buffer (Plan.PrecI):
16778 * when this function stores some data to precomputed buffer,
16779 it advances pointer.
16780 * it is responsibility of the function to assert that
16781 Plan.PrecR has enough space to store data before actually
16782 writing to buffer.
16783 * it is responsibility of the caller to allocate enough
16784 space before calling this function
16785 Plan - plan (generated so far)
16786
16787 OUTPUT PARAMETERS:
16788 RowPtr - updated pointer (advanced by number of entries generated
16789 by function)
16790 BluesteinSize- updated amount
16791 (may be increased, but may never be decreased)
16792
16793 NOTE: in case TopmostPlan is True, ChildPlan is also must be True.
16794
16795 -- ALGLIB --
16796 Copyright 05.04.2013 by Bochkanov Sergey
16797 *************************************************************************/
ftbase_ftcomplexfftplanrec(ae_int_t n,ae_int_t k,ae_bool childplan,ae_bool topmostplan,ae_int_t * rowptr,ae_int_t * bluesteinsize,ae_int_t * precrptr,ae_int_t * preciptr,fasttransformplan * plan,ae_state * _state)16798 static void ftbase_ftcomplexfftplanrec(ae_int_t n,
16799 ae_int_t k,
16800 ae_bool childplan,
16801 ae_bool topmostplan,
16802 ae_int_t* rowptr,
16803 ae_int_t* bluesteinsize,
16804 ae_int_t* precrptr,
16805 ae_int_t* preciptr,
16806 fasttransformplan* plan,
16807 ae_state *_state)
16808 {
16809 ae_frame _frame_block;
16810 srealarray localbuf;
16811 ae_int_t m;
16812 ae_int_t n1;
16813 ae_int_t n2;
16814 ae_int_t gq;
16815 ae_int_t giq;
16816 ae_int_t row0;
16817 ae_int_t row1;
16818 ae_int_t row2;
16819 ae_int_t row3;
16820
16821 ae_frame_make(_state, &_frame_block);
16822 memset(&localbuf, 0, sizeof(localbuf));
16823 _srealarray_init(&localbuf, _state, ae_true);
16824
16825 ae_assert(n>0, "FTComplexFFTPlan: N<=0", _state);
16826 ae_assert(k>0, "FTComplexFFTPlan: K<=0", _state);
16827 ae_assert(!topmostplan||childplan, "FTComplexFFTPlan: ChildPlan is inconsistent with TopmostPlan", _state);
16828
16829 /*
16830 * Try to generate "topmost" plan
16831 */
16832 if( topmostplan&&n>ftbase_recursivethreshold )
16833 {
16834 ftbase_ftfactorize(n, ae_false, &n1, &n2, _state);
16835 if( n1*n2==0 )
16836 {
16837
16838 /*
16839 * Handle prime-factor FFT with Bluestein's FFT.
16840 * Determine size of Bluestein's buffer.
16841 */
16842 m = ftbasefindsmooth(2*n-1, _state);
16843 *bluesteinsize = ae_maxint(2*m, *bluesteinsize, _state);
16844
16845 /*
16846 * Generate plan
16847 */
16848 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16849 ftbase_ftpushentry4(plan, rowptr, ftbase_opbluesteinsfft, k, n, 2, m, 2, *precrptr, 0, _state);
16850 row0 = *rowptr;
16851 ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
16852 ftbase_ftcomplexfftplanrec(m, 1, ae_true, ae_true, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
16853 row1 = *rowptr;
16854 plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
16855 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
16856
16857 /*
16858 * Fill precomputed buffer
16859 */
16860 ftbase_ftprecomputebluesteinsfft(n, m, &plan->precr, *precrptr, _state);
16861
16862 /*
16863 * Update pointer to the precomputed area
16864 */
16865 *precrptr = *precrptr+4*m;
16866 }
16867 else
16868 {
16869
16870 /*
16871 * Handle composite FFT with recursive Cooley-Tukey which
16872 * uses global buffer instead of local one.
16873 */
16874 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16875 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
16876 row0 = *rowptr;
16877 ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n2, n1, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
16878 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
16879 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
16880 row2 = *rowptr;
16881 ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n1, n2, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
16882 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
16883 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
16884 row1 = *rowptr;
16885 ftbase_ftcomplexfftplanrec(n1, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
16886 plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
16887 row3 = *rowptr;
16888 ftbase_ftcomplexfftplanrec(n2, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
16889 plan->entries.ptr.pp_int[row2][ftbase_colparam0] = row3-row2;
16890 }
16891 ae_frame_leave(_state);
16892 return;
16893 }
16894
16895 /*
16896 * Prepare "non-topmost" plan:
16897 * * calculate factorization
16898 * * use local (shared) buffer
16899 * * update buffer size - ANY plan will need at least
16900 * 2*N temporaries, additional requirements can be
16901 * applied later
16902 */
16903 ftbase_ftfactorize(n, ae_false, &n1, &n2, _state);
16904
16905 /*
16906 * Handle FFT's with N1*N2=0: either small-N or prime-factor
16907 */
16908 if( n1*n2==0 )
16909 {
16910 if( n<=ftbase_maxradix )
16911 {
16912
16913 /*
16914 * Small-N FFT
16915 */
16916 if( childplan )
16917 {
16918 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16919 }
16920 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexcodeletfft, k, n, 2, 0, _state);
16921 if( childplan )
16922 {
16923 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
16924 }
16925 ae_frame_leave(_state);
16926 return;
16927 }
16928 if( n<=ftbase_raderthreshold )
16929 {
16930
16931 /*
16932 * Handle prime-factor FFT's with Rader's FFT
16933 */
16934 m = n-1;
16935 if( childplan )
16936 {
16937 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16938 }
16939 findprimitiverootandinverse(n, &gq, &giq, _state);
16940 ftbase_ftpushentry4(plan, rowptr, ftbase_opradersfft, k, n, 2, 2, gq, giq, *precrptr, _state);
16941 ftbase_ftprecomputeradersfft(n, gq, giq, &plan->precr, *precrptr, _state);
16942 *precrptr = *precrptr+2*(n-1);
16943 row0 = *rowptr;
16944 ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
16945 ftbase_ftcomplexfftplanrec(m, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
16946 row1 = *rowptr;
16947 plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
16948 if( childplan )
16949 {
16950 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
16951 }
16952 }
16953 else
16954 {
16955
16956 /*
16957 * Handle prime-factor FFT's with Bluestein's FFT
16958 */
16959 m = ftbasefindsmooth(2*n-1, _state);
16960 *bluesteinsize = ae_maxint(2*m, *bluesteinsize, _state);
16961 if( childplan )
16962 {
16963 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16964 }
16965 ftbase_ftpushentry4(plan, rowptr, ftbase_opbluesteinsfft, k, n, 2, m, 2, *precrptr, 0, _state);
16966 ftbase_ftprecomputebluesteinsfft(n, m, &plan->precr, *precrptr, _state);
16967 *precrptr = *precrptr+4*m;
16968 row0 = *rowptr;
16969 ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
16970 ftbase_ftcomplexfftplanrec(m, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
16971 row1 = *rowptr;
16972 plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
16973 if( childplan )
16974 {
16975 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
16976 }
16977 }
16978 ae_frame_leave(_state);
16979 return;
16980 }
16981
16982 /*
16983 * Handle Cooley-Tukey FFT with small N1
16984 */
16985 if( n1<=ftbase_maxradix )
16986 {
16987
16988 /*
16989 * Specialized transformation for small N1:
16990 * * N2 short inplace FFT's, each N1-point, with integrated twiddle factors
16991 * * N1 long FFT's
16992 * * final transposition
16993 */
16994 if( childplan )
16995 {
16996 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16997 }
16998 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexcodelettwfft, k, n1, 2*n2, 0, _state);
16999 ftbase_ftcomplexfftplanrec(n2, k*n1, ae_false, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
17000 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
17001 if( childplan )
17002 {
17003 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
17004 }
17005 ae_frame_leave(_state);
17006 return;
17007 }
17008
17009 /*
17010 * Handle general Cooley-Tukey FFT, either "flat" or "recursive"
17011 */
17012 if( n<=ftbase_recursivethreshold )
17013 {
17014
17015 /*
17016 * General code for large N1/N2, "flat" version without explicit recurrence
17017 * (nested subplans are inserted directly into the body of the plan)
17018 */
17019 if( childplan )
17020 {
17021 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
17022 }
17023 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
17024 ftbase_ftcomplexfftplanrec(n1, k*n2, ae_false, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
17025 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
17026 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
17027 ftbase_ftcomplexfftplanrec(n2, k*n1, ae_false, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
17028 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
17029 if( childplan )
17030 {
17031 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
17032 }
17033 }
17034 else
17035 {
17036
17037 /*
17038 * General code for large N1/N2, "recursive" version - nested subplans
17039 * are separated from the plan body.
17040 *
17041 * Generate parent plan.
17042 */
17043 if( childplan )
17044 {
17045 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
17046 }
17047 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
17048 row0 = *rowptr;
17049 ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n2, n1, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
17050 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
17051 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
17052 row2 = *rowptr;
17053 ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n1, n2, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
17054 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
17055 if( childplan )
17056 {
17057 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
17058 }
17059
17060 /*
17061 * Generate child subplans, insert refence to parent plans
17062 */
17063 row1 = *rowptr;
17064 ftbase_ftcomplexfftplanrec(n1, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
17065 plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
17066 row3 = *rowptr;
17067 ftbase_ftcomplexfftplanrec(n2, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
17068 plan->entries.ptr.pp_int[row2][ftbase_colparam0] = row3-row2;
17069 }
17070 ae_frame_leave(_state);
17071 }
17072
17073
17074 /*************************************************************************
17075 This function pushes one more entry to the plan. It resizes Entries matrix
17076 if needed.
17077
17078 INPUT PARAMETERS:
17079 Plan - plan (generated so far)
17080 RowPtr - index which points to past-the-last entry generated so far
17081 EType - entry type
17082 EOpCnt - operands count
17083 EOpSize - operand size
17084 EMcvSize - microvector size
17085 EParam0 - parameter 0
17086
17087 OUTPUT PARAMETERS:
17088 Plan - updated plan
17089 RowPtr - updated pointer
17090
17091 NOTE: Param1 is set to -1.
17092
17093 -- ALGLIB --
17094 Copyright 05.04.2013 by Bochkanov Sergey
17095 *************************************************************************/
ftbase_ftpushentry(fasttransformplan * plan,ae_int_t * rowptr,ae_int_t etype,ae_int_t eopcnt,ae_int_t eopsize,ae_int_t emcvsize,ae_int_t eparam0,ae_state * _state)17096 static void ftbase_ftpushentry(fasttransformplan* plan,
17097 ae_int_t* rowptr,
17098 ae_int_t etype,
17099 ae_int_t eopcnt,
17100 ae_int_t eopsize,
17101 ae_int_t emcvsize,
17102 ae_int_t eparam0,
17103 ae_state *_state)
17104 {
17105
17106
17107 ftbase_ftpushentry2(plan, rowptr, etype, eopcnt, eopsize, emcvsize, eparam0, -1, _state);
17108 }
17109
17110
17111 /*************************************************************************
17112 Same as FTPushEntry(), but sets Param0 AND Param1.
17113 This function pushes one more entry to the plan. It resized Entries matrix
17114 if needed.
17115
17116 INPUT PARAMETERS:
17117 Plan - plan (generated so far)
17118 RowPtr - index which points to past-the-last entry generated so far
17119 EType - entry type
17120 EOpCnt - operands count
17121 EOpSize - operand size
17122 EMcvSize - microvector size
17123 EParam0 - parameter 0
17124 EParam1 - parameter 1
17125
17126 OUTPUT PARAMETERS:
17127 Plan - updated plan
17128 RowPtr - updated pointer
17129
17130 -- ALGLIB --
17131 Copyright 05.04.2013 by Bochkanov Sergey
17132 *************************************************************************/
ftbase_ftpushentry2(fasttransformplan * plan,ae_int_t * rowptr,ae_int_t etype,ae_int_t eopcnt,ae_int_t eopsize,ae_int_t emcvsize,ae_int_t eparam0,ae_int_t eparam1,ae_state * _state)17133 static void ftbase_ftpushentry2(fasttransformplan* plan,
17134 ae_int_t* rowptr,
17135 ae_int_t etype,
17136 ae_int_t eopcnt,
17137 ae_int_t eopsize,
17138 ae_int_t emcvsize,
17139 ae_int_t eparam0,
17140 ae_int_t eparam1,
17141 ae_state *_state)
17142 {
17143
17144
17145 if( *rowptr>=plan->entries.rows )
17146 {
17147 imatrixresize(&plan->entries, ae_maxint(2*plan->entries.rows, 1, _state), ftbase_colscnt, _state);
17148 }
17149 plan->entries.ptr.pp_int[*rowptr][ftbase_coltype] = etype;
17150 plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandscnt] = eopcnt;
17151 plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandsize] = eopsize;
17152 plan->entries.ptr.pp_int[*rowptr][ftbase_colmicrovectorsize] = emcvsize;
17153 plan->entries.ptr.pp_int[*rowptr][ftbase_colparam0] = eparam0;
17154 plan->entries.ptr.pp_int[*rowptr][ftbase_colparam1] = eparam1;
17155 plan->entries.ptr.pp_int[*rowptr][ftbase_colparam2] = 0;
17156 plan->entries.ptr.pp_int[*rowptr][ftbase_colparam3] = 0;
17157 *rowptr = *rowptr+1;
17158 }
17159
17160
17161 /*************************************************************************
17162 Same as FTPushEntry(), but sets Param0, Param1, Param2 and Param3.
17163 This function pushes one more entry to the plan. It resized Entries matrix
17164 if needed.
17165
17166 INPUT PARAMETERS:
17167 Plan - plan (generated so far)
17168 RowPtr - index which points to past-the-last entry generated so far
17169 EType - entry type
17170 EOpCnt - operands count
17171 EOpSize - operand size
17172 EMcvSize - microvector size
17173 EParam0 - parameter 0
17174 EParam1 - parameter 1
17175 EParam2 - parameter 2
17176 EParam3 - parameter 3
17177
17178 OUTPUT PARAMETERS:
17179 Plan - updated plan
17180 RowPtr - updated pointer
17181
17182 -- ALGLIB --
17183 Copyright 05.04.2013 by Bochkanov Sergey
17184 *************************************************************************/
ftbase_ftpushentry4(fasttransformplan * plan,ae_int_t * rowptr,ae_int_t etype,ae_int_t eopcnt,ae_int_t eopsize,ae_int_t emcvsize,ae_int_t eparam0,ae_int_t eparam1,ae_int_t eparam2,ae_int_t eparam3,ae_state * _state)17185 static void ftbase_ftpushentry4(fasttransformplan* plan,
17186 ae_int_t* rowptr,
17187 ae_int_t etype,
17188 ae_int_t eopcnt,
17189 ae_int_t eopsize,
17190 ae_int_t emcvsize,
17191 ae_int_t eparam0,
17192 ae_int_t eparam1,
17193 ae_int_t eparam2,
17194 ae_int_t eparam3,
17195 ae_state *_state)
17196 {
17197
17198
17199 if( *rowptr>=plan->entries.rows )
17200 {
17201 imatrixresize(&plan->entries, ae_maxint(2*plan->entries.rows, 1, _state), ftbase_colscnt, _state);
17202 }
17203 plan->entries.ptr.pp_int[*rowptr][ftbase_coltype] = etype;
17204 plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandscnt] = eopcnt;
17205 plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandsize] = eopsize;
17206 plan->entries.ptr.pp_int[*rowptr][ftbase_colmicrovectorsize] = emcvsize;
17207 plan->entries.ptr.pp_int[*rowptr][ftbase_colparam0] = eparam0;
17208 plan->entries.ptr.pp_int[*rowptr][ftbase_colparam1] = eparam1;
17209 plan->entries.ptr.pp_int[*rowptr][ftbase_colparam2] = eparam2;
17210 plan->entries.ptr.pp_int[*rowptr][ftbase_colparam3] = eparam3;
17211 *rowptr = *rowptr+1;
17212 }
17213
17214
17215 /*************************************************************************
17216 This subroutine applies subplan to input/output array A.
17217
17218 INPUT PARAMETERS:
17219 Plan - transformation plan
17220 SubPlan - subplan index
17221 A - array, must be large enough for plan to work
17222 ABase - base offset in array A, this value points to start of
17223 subarray whose length is equal to length of the plan
17224 AOffset - offset with respect to ABase, 0<=AOffset<PlanLength.
17225 This is an offset within large PlanLength-subarray of
17226 the chunk to process.
17227 Buf - temporary buffer whose length is equal to plan length
17228 (without taking into account RepCnt) or larger.
17229 OffsBuf - offset in the buffer array
17230 RepCnt - repetition count (transformation is repeatedly applied
17231 to subsequent subarrays)
17232
17233 OUTPUT PARAMETERS:
17234 Plan - plan (temporary buffers can be modified, plan itself
17235 is unchanged and can be reused)
17236 A - transformed array
17237
17238 -- ALGLIB --
17239 Copyright 05.04.2013 by Bochkanov Sergey
17240 *************************************************************************/
ftbase_ftapplysubplan(fasttransformplan * plan,ae_int_t subplan,ae_vector * a,ae_int_t abase,ae_int_t aoffset,ae_vector * buf,ae_int_t repcnt,ae_state * _state)17241 static void ftbase_ftapplysubplan(fasttransformplan* plan,
17242 ae_int_t subplan,
17243 /* Real */ ae_vector* a,
17244 ae_int_t abase,
17245 ae_int_t aoffset,
17246 /* Real */ ae_vector* buf,
17247 ae_int_t repcnt,
17248 ae_state *_state)
17249 {
17250 ae_frame _frame_block;
17251 ae_int_t rowidx;
17252 ae_int_t i;
17253 ae_int_t n1;
17254 ae_int_t n2;
17255 ae_int_t operation;
17256 ae_int_t operandscnt;
17257 ae_int_t operandsize;
17258 ae_int_t microvectorsize;
17259 ae_int_t param0;
17260 ae_int_t param1;
17261 ae_int_t parentsize;
17262 ae_int_t childsize;
17263 ae_int_t chunksize;
17264 ae_int_t lastchunksize;
17265 srealarray *bufa;
17266 ae_smart_ptr _bufa;
17267 srealarray *bufb;
17268 ae_smart_ptr _bufb;
17269 srealarray *bufc;
17270 ae_smart_ptr _bufc;
17271 srealarray *bufd;
17272 ae_smart_ptr _bufd;
17273
17274 ae_frame_make(_state, &_frame_block);
17275 memset(&_bufa, 0, sizeof(_bufa));
17276 memset(&_bufb, 0, sizeof(_bufb));
17277 memset(&_bufc, 0, sizeof(_bufc));
17278 memset(&_bufd, 0, sizeof(_bufd));
17279 ae_smart_ptr_init(&_bufa, (void**)&bufa, _state, ae_true);
17280 ae_smart_ptr_init(&_bufb, (void**)&bufb, _state, ae_true);
17281 ae_smart_ptr_init(&_bufc, (void**)&bufc, _state, ae_true);
17282 ae_smart_ptr_init(&_bufd, (void**)&bufd, _state, ae_true);
17283
17284 ae_assert(plan->entries.ptr.pp_int[subplan][ftbase_coltype]==ftbase_opstart, "FTApplySubPlan: incorrect subplan header", _state);
17285 rowidx = subplan+1;
17286 while(plan->entries.ptr.pp_int[rowidx][ftbase_coltype]!=ftbase_opend)
17287 {
17288 operation = plan->entries.ptr.pp_int[rowidx][ftbase_coltype];
17289 operandscnt = repcnt*plan->entries.ptr.pp_int[rowidx][ftbase_coloperandscnt];
17290 operandsize = plan->entries.ptr.pp_int[rowidx][ftbase_coloperandsize];
17291 microvectorsize = plan->entries.ptr.pp_int[rowidx][ftbase_colmicrovectorsize];
17292 param0 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
17293 param1 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam1];
17294 touchint(¶m1, _state);
17295
17296 /*
17297 * Process "jump" operation
17298 */
17299 if( operation==ftbase_opjmp )
17300 {
17301 rowidx = rowidx+plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
17302 continue;
17303 }
17304
17305 /*
17306 * Process "parallel call" operation:
17307 * * we perform initial check for consistency between parent and child plans
17308 * * we call FTSplitAndApplyParallelPlan(), which splits parallel plan into
17309 * several parallel tasks
17310 */
17311 if( operation==ftbase_opparallelcall )
17312 {
17313 parentsize = operandsize*microvectorsize;
17314 childsize = plan->entries.ptr.pp_int[rowidx+param0][ftbase_coloperandscnt]*plan->entries.ptr.pp_int[rowidx+param0][ftbase_coloperandsize]*plan->entries.ptr.pp_int[rowidx+param0][ftbase_colmicrovectorsize];
17315 ae_assert(plan->entries.ptr.pp_int[rowidx+param0][ftbase_coltype]==ftbase_opstart, "FTApplySubPlan: incorrect child subplan header", _state);
17316 ae_assert(parentsize==childsize, "FTApplySubPlan: incorrect child subplan header", _state);
17317 chunksize = ae_maxint(ftbase_recursivethreshold/childsize, 1, _state);
17318 lastchunksize = operandscnt%chunksize;
17319 if( lastchunksize==0 )
17320 {
17321 lastchunksize = chunksize;
17322 }
17323 i = 0;
17324 while(i<operandscnt)
17325 {
17326 chunksize = ae_minint(chunksize, operandscnt-i, _state);
17327 ftbase_ftapplysubplan(plan, rowidx+param0, a, abase, aoffset+i*childsize, buf, chunksize, _state);
17328 i = i+chunksize;
17329 }
17330 rowidx = rowidx+1;
17331 continue;
17332 }
17333
17334 /*
17335 * Process "reference complex FFT" operation
17336 */
17337 if( operation==ftbase_opcomplexreffft )
17338 {
17339 ftbase_ftapplycomplexreffft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, buf, _state);
17340 rowidx = rowidx+1;
17341 continue;
17342 }
17343
17344 /*
17345 * Process "codelet FFT" operation
17346 */
17347 if( operation==ftbase_opcomplexcodeletfft )
17348 {
17349 ftbase_ftapplycomplexcodeletfft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, _state);
17350 rowidx = rowidx+1;
17351 continue;
17352 }
17353
17354 /*
17355 * Process "integrated codelet FFT" operation
17356 */
17357 if( operation==ftbase_opcomplexcodelettwfft )
17358 {
17359 ftbase_ftapplycomplexcodelettwfft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, _state);
17360 rowidx = rowidx+1;
17361 continue;
17362 }
17363
17364 /*
17365 * Process Bluestein's FFT operation
17366 */
17367 if( operation==ftbase_opbluesteinsfft )
17368 {
17369 ae_assert(microvectorsize==2, "FTApplySubPlan: microvectorsize!=2 for Bluesteins FFT", _state);
17370 ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufa, _state);
17371 ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufb, _state);
17372 ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufc, _state);
17373 ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufd, _state);
17374 ftbase_ftbluesteinsfft(plan, a, abase, aoffset, operandscnt, operandsize, plan->entries.ptr.pp_int[rowidx][ftbase_colparam0], plan->entries.ptr.pp_int[rowidx][ftbase_colparam2], rowidx+plan->entries.ptr.pp_int[rowidx][ftbase_colparam1], &bufa->val, &bufb->val, &bufc->val, &bufd->val, _state);
17375 ae_shared_pool_recycle(&plan->bluesteinpool, &_bufa, _state);
17376 ae_shared_pool_recycle(&plan->bluesteinpool, &_bufb, _state);
17377 ae_shared_pool_recycle(&plan->bluesteinpool, &_bufc, _state);
17378 ae_shared_pool_recycle(&plan->bluesteinpool, &_bufd, _state);
17379 rowidx = rowidx+1;
17380 continue;
17381 }
17382
17383 /*
17384 * Process Rader's FFT
17385 */
17386 if( operation==ftbase_opradersfft )
17387 {
17388 ftbase_ftradersfft(plan, a, abase, aoffset, operandscnt, operandsize, rowidx+plan->entries.ptr.pp_int[rowidx][ftbase_colparam0], plan->entries.ptr.pp_int[rowidx][ftbase_colparam1], plan->entries.ptr.pp_int[rowidx][ftbase_colparam2], plan->entries.ptr.pp_int[rowidx][ftbase_colparam3], buf, _state);
17389 rowidx = rowidx+1;
17390 continue;
17391 }
17392
17393 /*
17394 * Process "complex twiddle factors" operation
17395 */
17396 if( operation==ftbase_opcomplexfftfactors )
17397 {
17398 ae_assert(microvectorsize==2, "FTApplySubPlan: MicrovectorSize<>1", _state);
17399 n1 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
17400 n2 = operandsize/n1;
17401 for(i=0; i<=operandscnt-1; i++)
17402 {
17403 ftbase_ffttwcalc(a, abase+aoffset+i*operandsize*2, n1, n2, _state);
17404 }
17405 rowidx = rowidx+1;
17406 continue;
17407 }
17408
17409 /*
17410 * Process "complex transposition" operation
17411 */
17412 if( operation==ftbase_opcomplextranspose )
17413 {
17414 ae_assert(microvectorsize==2, "FTApplySubPlan: MicrovectorSize<>1", _state);
17415 n1 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
17416 n2 = operandsize/n1;
17417 for(i=0; i<=operandscnt-1; i++)
17418 {
17419 ftbase_internalcomplexlintranspose(a, n1, n2, abase+aoffset+i*operandsize*2, buf, _state);
17420 }
17421 rowidx = rowidx+1;
17422 continue;
17423 }
17424
17425 /*
17426 * Error
17427 */
17428 ae_assert(ae_false, "FTApplySubPlan: unexpected plan type", _state);
17429 }
17430 ae_frame_leave(_state);
17431 }
17432
17433
17434 /*************************************************************************
17435 This subroutine applies complex reference FFT to input/output array A.
17436
17437 VERY SLOW OPERATION, do not use it in real life plans :)
17438
17439 INPUT PARAMETERS:
17440 A - array, must be large enough for plan to work
17441 Offs - offset of the subarray to process
17442 OperandsCnt - operands count (see description of FastTransformPlan)
17443 OperandSize - operand size (see description of FastTransformPlan)
17444 MicrovectorSize-microvector size (see description of FastTransformPlan)
17445 Buf - temporary array, must be at least OperandsCnt*OperandSize*MicrovectorSize
17446
17447 OUTPUT PARAMETERS:
17448 A - transformed array
17449
17450 -- ALGLIB --
17451 Copyright 05.04.2013 by Bochkanov Sergey
17452 *************************************************************************/
ftbase_ftapplycomplexreffft(ae_vector * a,ae_int_t offs,ae_int_t operandscnt,ae_int_t operandsize,ae_int_t microvectorsize,ae_vector * buf,ae_state * _state)17453 static void ftbase_ftapplycomplexreffft(/* Real */ ae_vector* a,
17454 ae_int_t offs,
17455 ae_int_t operandscnt,
17456 ae_int_t operandsize,
17457 ae_int_t microvectorsize,
17458 /* Real */ ae_vector* buf,
17459 ae_state *_state)
17460 {
17461 ae_int_t opidx;
17462 ae_int_t i;
17463 ae_int_t k;
17464 double hre;
17465 double him;
17466 double c;
17467 double s;
17468 double re;
17469 double im;
17470 ae_int_t n;
17471
17472
17473 ae_assert(operandscnt>=1, "FTApplyComplexRefFFT: OperandsCnt<1", _state);
17474 ae_assert(operandsize>=1, "FTApplyComplexRefFFT: OperandSize<1", _state);
17475 ae_assert(microvectorsize==2, "FTApplyComplexRefFFT: MicrovectorSize<>2", _state);
17476 n = operandsize;
17477 for(opidx=0; opidx<=operandscnt-1; opidx++)
17478 {
17479 for(i=0; i<=n-1; i++)
17480 {
17481 hre = (double)(0);
17482 him = (double)(0);
17483 for(k=0; k<=n-1; k++)
17484 {
17485 re = a->ptr.p_double[offs+opidx*operandsize*2+2*k+0];
17486 im = a->ptr.p_double[offs+opidx*operandsize*2+2*k+1];
17487 c = ae_cos(-2*ae_pi*k*i/n, _state);
17488 s = ae_sin(-2*ae_pi*k*i/n, _state);
17489 hre = hre+c*re-s*im;
17490 him = him+c*im+s*re;
17491 }
17492 buf->ptr.p_double[2*i+0] = hre;
17493 buf->ptr.p_double[2*i+1] = him;
17494 }
17495 for(i=0; i<=operandsize*2-1; i++)
17496 {
17497 a->ptr.p_double[offs+opidx*operandsize*2+i] = buf->ptr.p_double[i];
17498 }
17499 }
17500 }
17501
17502
17503 /*************************************************************************
17504 This subroutine applies complex codelet FFT to input/output array A.
17505
17506 INPUT PARAMETERS:
17507 A - array, must be large enough for plan to work
17508 Offs - offset of the subarray to process
17509 OperandsCnt - operands count (see description of FastTransformPlan)
17510 OperandSize - operand size (see description of FastTransformPlan)
17511 MicrovectorSize-microvector size, must be 2
17512
17513 OUTPUT PARAMETERS:
17514 A - transformed array
17515
17516 -- ALGLIB --
17517 Copyright 05.04.2013 by Bochkanov Sergey
17518 *************************************************************************/
ftbase_ftapplycomplexcodeletfft(ae_vector * a,ae_int_t offs,ae_int_t operandscnt,ae_int_t operandsize,ae_int_t microvectorsize,ae_state * _state)17519 static void ftbase_ftapplycomplexcodeletfft(/* Real */ ae_vector* a,
17520 ae_int_t offs,
17521 ae_int_t operandscnt,
17522 ae_int_t operandsize,
17523 ae_int_t microvectorsize,
17524 ae_state *_state)
17525 {
17526 ae_int_t opidx;
17527 ae_int_t n;
17528 ae_int_t aoffset;
17529 double a0x;
17530 double a0y;
17531 double a1x;
17532 double a1y;
17533 double a2x;
17534 double a2y;
17535 double a3x;
17536 double a3y;
17537 double a4x;
17538 double a4y;
17539 double a5x;
17540 double a5y;
17541 double v0;
17542 double v1;
17543 double v2;
17544 double v3;
17545 double t1x;
17546 double t1y;
17547 double t2x;
17548 double t2y;
17549 double t3x;
17550 double t3y;
17551 double t4x;
17552 double t4y;
17553 double t5x;
17554 double t5y;
17555 double m1x;
17556 double m1y;
17557 double m2x;
17558 double m2y;
17559 double m3x;
17560 double m3y;
17561 double m4x;
17562 double m4y;
17563 double m5x;
17564 double m5y;
17565 double s1x;
17566 double s1y;
17567 double s2x;
17568 double s2y;
17569 double s3x;
17570 double s3y;
17571 double s4x;
17572 double s4y;
17573 double s5x;
17574 double s5y;
17575 double c1;
17576 double c2;
17577 double c3;
17578 double c4;
17579 double c5;
17580 double v;
17581
17582
17583 ae_assert(operandscnt>=1, "FTApplyComplexCodeletFFT: OperandsCnt<1", _state);
17584 ae_assert(operandsize>=1, "FTApplyComplexCodeletFFT: OperandSize<1", _state);
17585 ae_assert(microvectorsize==2, "FTApplyComplexCodeletFFT: MicrovectorSize<>2", _state);
17586 n = operandsize;
17587
17588 /*
17589 * Hard-coded transforms for different N's
17590 */
17591 ae_assert(n<=ftbase_maxradix, "FTApplyComplexCodeletFFT: N>MaxRadix", _state);
17592 if( n==2 )
17593 {
17594 for(opidx=0; opidx<=operandscnt-1; opidx++)
17595 {
17596 aoffset = offs+opidx*operandsize*2;
17597 a0x = a->ptr.p_double[aoffset+0];
17598 a0y = a->ptr.p_double[aoffset+1];
17599 a1x = a->ptr.p_double[aoffset+2];
17600 a1y = a->ptr.p_double[aoffset+3];
17601 v0 = a0x+a1x;
17602 v1 = a0y+a1y;
17603 v2 = a0x-a1x;
17604 v3 = a0y-a1y;
17605 a->ptr.p_double[aoffset+0] = v0;
17606 a->ptr.p_double[aoffset+1] = v1;
17607 a->ptr.p_double[aoffset+2] = v2;
17608 a->ptr.p_double[aoffset+3] = v3;
17609 }
17610 return;
17611 }
17612 if( n==3 )
17613 {
17614 c1 = ae_cos(2*ae_pi/3, _state)-1;
17615 c2 = ae_sin(2*ae_pi/3, _state);
17616 for(opidx=0; opidx<=operandscnt-1; opidx++)
17617 {
17618 aoffset = offs+opidx*operandsize*2;
17619 a0x = a->ptr.p_double[aoffset+0];
17620 a0y = a->ptr.p_double[aoffset+1];
17621 a1x = a->ptr.p_double[aoffset+2];
17622 a1y = a->ptr.p_double[aoffset+3];
17623 a2x = a->ptr.p_double[aoffset+4];
17624 a2y = a->ptr.p_double[aoffset+5];
17625 t1x = a1x+a2x;
17626 t1y = a1y+a2y;
17627 a0x = a0x+t1x;
17628 a0y = a0y+t1y;
17629 m1x = c1*t1x;
17630 m1y = c1*t1y;
17631 m2x = c2*(a1y-a2y);
17632 m2y = c2*(a2x-a1x);
17633 s1x = a0x+m1x;
17634 s1y = a0y+m1y;
17635 a1x = s1x+m2x;
17636 a1y = s1y+m2y;
17637 a2x = s1x-m2x;
17638 a2y = s1y-m2y;
17639 a->ptr.p_double[aoffset+0] = a0x;
17640 a->ptr.p_double[aoffset+1] = a0y;
17641 a->ptr.p_double[aoffset+2] = a1x;
17642 a->ptr.p_double[aoffset+3] = a1y;
17643 a->ptr.p_double[aoffset+4] = a2x;
17644 a->ptr.p_double[aoffset+5] = a2y;
17645 }
17646 return;
17647 }
17648 if( n==4 )
17649 {
17650 for(opidx=0; opidx<=operandscnt-1; opidx++)
17651 {
17652 aoffset = offs+opidx*operandsize*2;
17653 a0x = a->ptr.p_double[aoffset+0];
17654 a0y = a->ptr.p_double[aoffset+1];
17655 a1x = a->ptr.p_double[aoffset+2];
17656 a1y = a->ptr.p_double[aoffset+3];
17657 a2x = a->ptr.p_double[aoffset+4];
17658 a2y = a->ptr.p_double[aoffset+5];
17659 a3x = a->ptr.p_double[aoffset+6];
17660 a3y = a->ptr.p_double[aoffset+7];
17661 t1x = a0x+a2x;
17662 t1y = a0y+a2y;
17663 t2x = a1x+a3x;
17664 t2y = a1y+a3y;
17665 m2x = a0x-a2x;
17666 m2y = a0y-a2y;
17667 m3x = a1y-a3y;
17668 m3y = a3x-a1x;
17669 a->ptr.p_double[aoffset+0] = t1x+t2x;
17670 a->ptr.p_double[aoffset+1] = t1y+t2y;
17671 a->ptr.p_double[aoffset+4] = t1x-t2x;
17672 a->ptr.p_double[aoffset+5] = t1y-t2y;
17673 a->ptr.p_double[aoffset+2] = m2x+m3x;
17674 a->ptr.p_double[aoffset+3] = m2y+m3y;
17675 a->ptr.p_double[aoffset+6] = m2x-m3x;
17676 a->ptr.p_double[aoffset+7] = m2y-m3y;
17677 }
17678 return;
17679 }
17680 if( n==5 )
17681 {
17682 v = 2*ae_pi/5;
17683 c1 = (ae_cos(v, _state)+ae_cos(2*v, _state))/2-1;
17684 c2 = (ae_cos(v, _state)-ae_cos(2*v, _state))/2;
17685 c3 = -ae_sin(v, _state);
17686 c4 = -(ae_sin(v, _state)+ae_sin(2*v, _state));
17687 c5 = ae_sin(v, _state)-ae_sin(2*v, _state);
17688 for(opidx=0; opidx<=operandscnt-1; opidx++)
17689 {
17690 aoffset = offs+opidx*operandsize*2;
17691 t1x = a->ptr.p_double[aoffset+2]+a->ptr.p_double[aoffset+8];
17692 t1y = a->ptr.p_double[aoffset+3]+a->ptr.p_double[aoffset+9];
17693 t2x = a->ptr.p_double[aoffset+4]+a->ptr.p_double[aoffset+6];
17694 t2y = a->ptr.p_double[aoffset+5]+a->ptr.p_double[aoffset+7];
17695 t3x = a->ptr.p_double[aoffset+2]-a->ptr.p_double[aoffset+8];
17696 t3y = a->ptr.p_double[aoffset+3]-a->ptr.p_double[aoffset+9];
17697 t4x = a->ptr.p_double[aoffset+6]-a->ptr.p_double[aoffset+4];
17698 t4y = a->ptr.p_double[aoffset+7]-a->ptr.p_double[aoffset+5];
17699 t5x = t1x+t2x;
17700 t5y = t1y+t2y;
17701 a->ptr.p_double[aoffset+0] = a->ptr.p_double[aoffset+0]+t5x;
17702 a->ptr.p_double[aoffset+1] = a->ptr.p_double[aoffset+1]+t5y;
17703 m1x = c1*t5x;
17704 m1y = c1*t5y;
17705 m2x = c2*(t1x-t2x);
17706 m2y = c2*(t1y-t2y);
17707 m3x = -c3*(t3y+t4y);
17708 m3y = c3*(t3x+t4x);
17709 m4x = -c4*t4y;
17710 m4y = c4*t4x;
17711 m5x = -c5*t3y;
17712 m5y = c5*t3x;
17713 s3x = m3x-m4x;
17714 s3y = m3y-m4y;
17715 s5x = m3x+m5x;
17716 s5y = m3y+m5y;
17717 s1x = a->ptr.p_double[aoffset+0]+m1x;
17718 s1y = a->ptr.p_double[aoffset+1]+m1y;
17719 s2x = s1x+m2x;
17720 s2y = s1y+m2y;
17721 s4x = s1x-m2x;
17722 s4y = s1y-m2y;
17723 a->ptr.p_double[aoffset+2] = s2x+s3x;
17724 a->ptr.p_double[aoffset+3] = s2y+s3y;
17725 a->ptr.p_double[aoffset+4] = s4x+s5x;
17726 a->ptr.p_double[aoffset+5] = s4y+s5y;
17727 a->ptr.p_double[aoffset+6] = s4x-s5x;
17728 a->ptr.p_double[aoffset+7] = s4y-s5y;
17729 a->ptr.p_double[aoffset+8] = s2x-s3x;
17730 a->ptr.p_double[aoffset+9] = s2y-s3y;
17731 }
17732 return;
17733 }
17734 if( n==6 )
17735 {
17736 c1 = ae_cos(2*ae_pi/3, _state)-1;
17737 c2 = ae_sin(2*ae_pi/3, _state);
17738 c3 = ae_cos(-ae_pi/3, _state);
17739 c4 = ae_sin(-ae_pi/3, _state);
17740 for(opidx=0; opidx<=operandscnt-1; opidx++)
17741 {
17742 aoffset = offs+opidx*operandsize*2;
17743 a0x = a->ptr.p_double[aoffset+0];
17744 a0y = a->ptr.p_double[aoffset+1];
17745 a1x = a->ptr.p_double[aoffset+2];
17746 a1y = a->ptr.p_double[aoffset+3];
17747 a2x = a->ptr.p_double[aoffset+4];
17748 a2y = a->ptr.p_double[aoffset+5];
17749 a3x = a->ptr.p_double[aoffset+6];
17750 a3y = a->ptr.p_double[aoffset+7];
17751 a4x = a->ptr.p_double[aoffset+8];
17752 a4y = a->ptr.p_double[aoffset+9];
17753 a5x = a->ptr.p_double[aoffset+10];
17754 a5y = a->ptr.p_double[aoffset+11];
17755 v0 = a0x;
17756 v1 = a0y;
17757 a0x = a0x+a3x;
17758 a0y = a0y+a3y;
17759 a3x = v0-a3x;
17760 a3y = v1-a3y;
17761 v0 = a1x;
17762 v1 = a1y;
17763 a1x = a1x+a4x;
17764 a1y = a1y+a4y;
17765 a4x = v0-a4x;
17766 a4y = v1-a4y;
17767 v0 = a2x;
17768 v1 = a2y;
17769 a2x = a2x+a5x;
17770 a2y = a2y+a5y;
17771 a5x = v0-a5x;
17772 a5y = v1-a5y;
17773 t4x = a4x*c3-a4y*c4;
17774 t4y = a4x*c4+a4y*c3;
17775 a4x = t4x;
17776 a4y = t4y;
17777 t5x = -a5x*c3-a5y*c4;
17778 t5y = a5x*c4-a5y*c3;
17779 a5x = t5x;
17780 a5y = t5y;
17781 t1x = a1x+a2x;
17782 t1y = a1y+a2y;
17783 a0x = a0x+t1x;
17784 a0y = a0y+t1y;
17785 m1x = c1*t1x;
17786 m1y = c1*t1y;
17787 m2x = c2*(a1y-a2y);
17788 m2y = c2*(a2x-a1x);
17789 s1x = a0x+m1x;
17790 s1y = a0y+m1y;
17791 a1x = s1x+m2x;
17792 a1y = s1y+m2y;
17793 a2x = s1x-m2x;
17794 a2y = s1y-m2y;
17795 t1x = a4x+a5x;
17796 t1y = a4y+a5y;
17797 a3x = a3x+t1x;
17798 a3y = a3y+t1y;
17799 m1x = c1*t1x;
17800 m1y = c1*t1y;
17801 m2x = c2*(a4y-a5y);
17802 m2y = c2*(a5x-a4x);
17803 s1x = a3x+m1x;
17804 s1y = a3y+m1y;
17805 a4x = s1x+m2x;
17806 a4y = s1y+m2y;
17807 a5x = s1x-m2x;
17808 a5y = s1y-m2y;
17809 a->ptr.p_double[aoffset+0] = a0x;
17810 a->ptr.p_double[aoffset+1] = a0y;
17811 a->ptr.p_double[aoffset+2] = a3x;
17812 a->ptr.p_double[aoffset+3] = a3y;
17813 a->ptr.p_double[aoffset+4] = a1x;
17814 a->ptr.p_double[aoffset+5] = a1y;
17815 a->ptr.p_double[aoffset+6] = a4x;
17816 a->ptr.p_double[aoffset+7] = a4y;
17817 a->ptr.p_double[aoffset+8] = a2x;
17818 a->ptr.p_double[aoffset+9] = a2y;
17819 a->ptr.p_double[aoffset+10] = a5x;
17820 a->ptr.p_double[aoffset+11] = a5y;
17821 }
17822 return;
17823 }
17824 }
17825
17826
17827 /*************************************************************************
17828 This subroutine applies complex "integrated" codelet FFT to input/output
17829 array A. "Integrated" codelet differs from "normal" one in following ways:
17830 * it can work with MicrovectorSize>1
17831 * hence, it can be used in Cooley-Tukey FFT without transpositions
17832 * it performs inlined multiplication by twiddle factors of Cooley-Tukey
17833 FFT with N2=MicrovectorSize/2.
17834
17835 INPUT PARAMETERS:
17836 A - array, must be large enough for plan to work
17837 Offs - offset of the subarray to process
17838 OperandsCnt - operands count (see description of FastTransformPlan)
17839 OperandSize - operand size (see description of FastTransformPlan)
17840 MicrovectorSize-microvector size, must be 1
17841
17842 OUTPUT PARAMETERS:
17843 A - transformed array
17844
17845 -- ALGLIB --
17846 Copyright 05.04.2013 by Bochkanov Sergey
17847 *************************************************************************/
ftbase_ftapplycomplexcodelettwfft(ae_vector * a,ae_int_t offs,ae_int_t operandscnt,ae_int_t operandsize,ae_int_t microvectorsize,ae_state * _state)17848 static void ftbase_ftapplycomplexcodelettwfft(/* Real */ ae_vector* a,
17849 ae_int_t offs,
17850 ae_int_t operandscnt,
17851 ae_int_t operandsize,
17852 ae_int_t microvectorsize,
17853 ae_state *_state)
17854 {
17855 ae_int_t opidx;
17856 ae_int_t mvidx;
17857 ae_int_t n;
17858 ae_int_t m;
17859 ae_int_t aoffset0;
17860 ae_int_t aoffset2;
17861 ae_int_t aoffset4;
17862 ae_int_t aoffset6;
17863 ae_int_t aoffset8;
17864 ae_int_t aoffset10;
17865 double a0x;
17866 double a0y;
17867 double a1x;
17868 double a1y;
17869 double a2x;
17870 double a2y;
17871 double a3x;
17872 double a3y;
17873 double a4x;
17874 double a4y;
17875 double a5x;
17876 double a5y;
17877 double v0;
17878 double v1;
17879 double v2;
17880 double v3;
17881 double q0x;
17882 double q0y;
17883 double t1x;
17884 double t1y;
17885 double t2x;
17886 double t2y;
17887 double t3x;
17888 double t3y;
17889 double t4x;
17890 double t4y;
17891 double t5x;
17892 double t5y;
17893 double m1x;
17894 double m1y;
17895 double m2x;
17896 double m2y;
17897 double m3x;
17898 double m3y;
17899 double m4x;
17900 double m4y;
17901 double m5x;
17902 double m5y;
17903 double s1x;
17904 double s1y;
17905 double s2x;
17906 double s2y;
17907 double s3x;
17908 double s3y;
17909 double s4x;
17910 double s4y;
17911 double s5x;
17912 double s5y;
17913 double c1;
17914 double c2;
17915 double c3;
17916 double c4;
17917 double c5;
17918 double v;
17919 double tw0;
17920 double tw1;
17921 double twx;
17922 double twxm1;
17923 double twy;
17924 double tw2x;
17925 double tw2y;
17926 double tw3x;
17927 double tw3y;
17928 double tw4x;
17929 double tw4y;
17930 double tw5x;
17931 double tw5y;
17932
17933
17934 ae_assert(operandscnt>=1, "FTApplyComplexCodeletFFT: OperandsCnt<1", _state);
17935 ae_assert(operandsize>=1, "FTApplyComplexCodeletFFT: OperandSize<1", _state);
17936 ae_assert(microvectorsize>=1, "FTApplyComplexCodeletFFT: MicrovectorSize<>1", _state);
17937 ae_assert(microvectorsize%2==0, "FTApplyComplexCodeletFFT: MicrovectorSize is not even", _state);
17938 n = operandsize;
17939 m = microvectorsize/2;
17940
17941 /*
17942 * Hard-coded transforms for different N's
17943 */
17944 ae_assert(n<=ftbase_maxradix, "FTApplyComplexCodeletTwFFT: N>MaxRadix", _state);
17945 if( n==2 )
17946 {
17947 v = -2*ae_pi/(n*m);
17948 tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
17949 tw1 = ae_sin(v, _state);
17950 for(opidx=0; opidx<=operandscnt-1; opidx++)
17951 {
17952 aoffset0 = offs+opidx*operandsize*microvectorsize;
17953 aoffset2 = aoffset0+microvectorsize;
17954 twxm1 = 0.0;
17955 twy = 0.0;
17956 for(mvidx=0; mvidx<=m-1; mvidx++)
17957 {
17958 a0x = a->ptr.p_double[aoffset0];
17959 a0y = a->ptr.p_double[aoffset0+1];
17960 a1x = a->ptr.p_double[aoffset2];
17961 a1y = a->ptr.p_double[aoffset2+1];
17962 v0 = a0x+a1x;
17963 v1 = a0y+a1y;
17964 v2 = a0x-a1x;
17965 v3 = a0y-a1y;
17966 a->ptr.p_double[aoffset0] = v0;
17967 a->ptr.p_double[aoffset0+1] = v1;
17968 a->ptr.p_double[aoffset2] = v2*(1+twxm1)-v3*twy;
17969 a->ptr.p_double[aoffset2+1] = v3*(1+twxm1)+v2*twy;
17970 aoffset0 = aoffset0+2;
17971 aoffset2 = aoffset2+2;
17972 if( (mvidx+1)%ftbase_updatetw==0 )
17973 {
17974 v = -2*ae_pi*(mvidx+1)/(n*m);
17975 twxm1 = ae_sin(0.5*v, _state);
17976 twxm1 = -2*twxm1*twxm1;
17977 twy = ae_sin(v, _state);
17978 }
17979 else
17980 {
17981 v = twxm1+tw0+twxm1*tw0-twy*tw1;
17982 twy = twy+tw1+twxm1*tw1+twy*tw0;
17983 twxm1 = v;
17984 }
17985 }
17986 }
17987 return;
17988 }
17989 if( n==3 )
17990 {
17991 v = -2*ae_pi/(n*m);
17992 tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
17993 tw1 = ae_sin(v, _state);
17994 c1 = ae_cos(2*ae_pi/3, _state)-1;
17995 c2 = ae_sin(2*ae_pi/3, _state);
17996 for(opidx=0; opidx<=operandscnt-1; opidx++)
17997 {
17998 aoffset0 = offs+opidx*operandsize*microvectorsize;
17999 aoffset2 = aoffset0+microvectorsize;
18000 aoffset4 = aoffset2+microvectorsize;
18001 twx = 1.0;
18002 twxm1 = 0.0;
18003 twy = 0.0;
18004 for(mvidx=0; mvidx<=m-1; mvidx++)
18005 {
18006 a0x = a->ptr.p_double[aoffset0];
18007 a0y = a->ptr.p_double[aoffset0+1];
18008 a1x = a->ptr.p_double[aoffset2];
18009 a1y = a->ptr.p_double[aoffset2+1];
18010 a2x = a->ptr.p_double[aoffset4];
18011 a2y = a->ptr.p_double[aoffset4+1];
18012 t1x = a1x+a2x;
18013 t1y = a1y+a2y;
18014 a0x = a0x+t1x;
18015 a0y = a0y+t1y;
18016 m1x = c1*t1x;
18017 m1y = c1*t1y;
18018 m2x = c2*(a1y-a2y);
18019 m2y = c2*(a2x-a1x);
18020 s1x = a0x+m1x;
18021 s1y = a0y+m1y;
18022 a1x = s1x+m2x;
18023 a1y = s1y+m2y;
18024 a2x = s1x-m2x;
18025 a2y = s1y-m2y;
18026 tw2x = twx*twx-twy*twy;
18027 tw2y = 2*twx*twy;
18028 a->ptr.p_double[aoffset0] = a0x;
18029 a->ptr.p_double[aoffset0+1] = a0y;
18030 a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy;
18031 a->ptr.p_double[aoffset2+1] = a1y*twx+a1x*twy;
18032 a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y;
18033 a->ptr.p_double[aoffset4+1] = a2y*tw2x+a2x*tw2y;
18034 aoffset0 = aoffset0+2;
18035 aoffset2 = aoffset2+2;
18036 aoffset4 = aoffset4+2;
18037 if( (mvidx+1)%ftbase_updatetw==0 )
18038 {
18039 v = -2*ae_pi*(mvidx+1)/(n*m);
18040 twxm1 = ae_sin(0.5*v, _state);
18041 twxm1 = -2*twxm1*twxm1;
18042 twy = ae_sin(v, _state);
18043 twx = twxm1+1;
18044 }
18045 else
18046 {
18047 v = twxm1+tw0+twxm1*tw0-twy*tw1;
18048 twy = twy+tw1+twxm1*tw1+twy*tw0;
18049 twxm1 = v;
18050 twx = v+1;
18051 }
18052 }
18053 }
18054 return;
18055 }
18056 if( n==4 )
18057 {
18058 v = -2*ae_pi/(n*m);
18059 tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
18060 tw1 = ae_sin(v, _state);
18061 for(opidx=0; opidx<=operandscnt-1; opidx++)
18062 {
18063 aoffset0 = offs+opidx*operandsize*microvectorsize;
18064 aoffset2 = aoffset0+microvectorsize;
18065 aoffset4 = aoffset2+microvectorsize;
18066 aoffset6 = aoffset4+microvectorsize;
18067 twx = 1.0;
18068 twxm1 = 0.0;
18069 twy = 0.0;
18070 for(mvidx=0; mvidx<=m-1; mvidx++)
18071 {
18072 a0x = a->ptr.p_double[aoffset0];
18073 a0y = a->ptr.p_double[aoffset0+1];
18074 a1x = a->ptr.p_double[aoffset2];
18075 a1y = a->ptr.p_double[aoffset2+1];
18076 a2x = a->ptr.p_double[aoffset4];
18077 a2y = a->ptr.p_double[aoffset4+1];
18078 a3x = a->ptr.p_double[aoffset6];
18079 a3y = a->ptr.p_double[aoffset6+1];
18080 t1x = a0x+a2x;
18081 t1y = a0y+a2y;
18082 t2x = a1x+a3x;
18083 t2y = a1y+a3y;
18084 m2x = a0x-a2x;
18085 m2y = a0y-a2y;
18086 m3x = a1y-a3y;
18087 m3y = a3x-a1x;
18088 tw2x = twx*twx-twy*twy;
18089 tw2y = 2*twx*twy;
18090 tw3x = twx*tw2x-twy*tw2y;
18091 tw3y = twx*tw2y+twy*tw2x;
18092 a1x = m2x+m3x;
18093 a1y = m2y+m3y;
18094 a2x = t1x-t2x;
18095 a2y = t1y-t2y;
18096 a3x = m2x-m3x;
18097 a3y = m2y-m3y;
18098 a->ptr.p_double[aoffset0] = t1x+t2x;
18099 a->ptr.p_double[aoffset0+1] = t1y+t2y;
18100 a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy;
18101 a->ptr.p_double[aoffset2+1] = a1y*twx+a1x*twy;
18102 a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y;
18103 a->ptr.p_double[aoffset4+1] = a2y*tw2x+a2x*tw2y;
18104 a->ptr.p_double[aoffset6] = a3x*tw3x-a3y*tw3y;
18105 a->ptr.p_double[aoffset6+1] = a3y*tw3x+a3x*tw3y;
18106 aoffset0 = aoffset0+2;
18107 aoffset2 = aoffset2+2;
18108 aoffset4 = aoffset4+2;
18109 aoffset6 = aoffset6+2;
18110 if( (mvidx+1)%ftbase_updatetw==0 )
18111 {
18112 v = -2*ae_pi*(mvidx+1)/(n*m);
18113 twxm1 = ae_sin(0.5*v, _state);
18114 twxm1 = -2*twxm1*twxm1;
18115 twy = ae_sin(v, _state);
18116 twx = twxm1+1;
18117 }
18118 else
18119 {
18120 v = twxm1+tw0+twxm1*tw0-twy*tw1;
18121 twy = twy+tw1+twxm1*tw1+twy*tw0;
18122 twxm1 = v;
18123 twx = v+1;
18124 }
18125 }
18126 }
18127 return;
18128 }
18129 if( n==5 )
18130 {
18131 v = -2*ae_pi/(n*m);
18132 tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
18133 tw1 = ae_sin(v, _state);
18134 v = 2*ae_pi/5;
18135 c1 = (ae_cos(v, _state)+ae_cos(2*v, _state))/2-1;
18136 c2 = (ae_cos(v, _state)-ae_cos(2*v, _state))/2;
18137 c3 = -ae_sin(v, _state);
18138 c4 = -(ae_sin(v, _state)+ae_sin(2*v, _state));
18139 c5 = ae_sin(v, _state)-ae_sin(2*v, _state);
18140 for(opidx=0; opidx<=operandscnt-1; opidx++)
18141 {
18142 aoffset0 = offs+opidx*operandsize*microvectorsize;
18143 aoffset2 = aoffset0+microvectorsize;
18144 aoffset4 = aoffset2+microvectorsize;
18145 aoffset6 = aoffset4+microvectorsize;
18146 aoffset8 = aoffset6+microvectorsize;
18147 twx = 1.0;
18148 twxm1 = 0.0;
18149 twy = 0.0;
18150 for(mvidx=0; mvidx<=m-1; mvidx++)
18151 {
18152 a0x = a->ptr.p_double[aoffset0];
18153 a0y = a->ptr.p_double[aoffset0+1];
18154 a1x = a->ptr.p_double[aoffset2];
18155 a1y = a->ptr.p_double[aoffset2+1];
18156 a2x = a->ptr.p_double[aoffset4];
18157 a2y = a->ptr.p_double[aoffset4+1];
18158 a3x = a->ptr.p_double[aoffset6];
18159 a3y = a->ptr.p_double[aoffset6+1];
18160 a4x = a->ptr.p_double[aoffset8];
18161 a4y = a->ptr.p_double[aoffset8+1];
18162 t1x = a1x+a4x;
18163 t1y = a1y+a4y;
18164 t2x = a2x+a3x;
18165 t2y = a2y+a3y;
18166 t3x = a1x-a4x;
18167 t3y = a1y-a4y;
18168 t4x = a3x-a2x;
18169 t4y = a3y-a2y;
18170 t5x = t1x+t2x;
18171 t5y = t1y+t2y;
18172 q0x = a0x+t5x;
18173 q0y = a0y+t5y;
18174 m1x = c1*t5x;
18175 m1y = c1*t5y;
18176 m2x = c2*(t1x-t2x);
18177 m2y = c2*(t1y-t2y);
18178 m3x = -c3*(t3y+t4y);
18179 m3y = c3*(t3x+t4x);
18180 m4x = -c4*t4y;
18181 m4y = c4*t4x;
18182 m5x = -c5*t3y;
18183 m5y = c5*t3x;
18184 s3x = m3x-m4x;
18185 s3y = m3y-m4y;
18186 s5x = m3x+m5x;
18187 s5y = m3y+m5y;
18188 s1x = q0x+m1x;
18189 s1y = q0y+m1y;
18190 s2x = s1x+m2x;
18191 s2y = s1y+m2y;
18192 s4x = s1x-m2x;
18193 s4y = s1y-m2y;
18194 tw2x = twx*twx-twy*twy;
18195 tw2y = 2*twx*twy;
18196 tw3x = twx*tw2x-twy*tw2y;
18197 tw3y = twx*tw2y+twy*tw2x;
18198 tw4x = tw2x*tw2x-tw2y*tw2y;
18199 tw4y = tw2x*tw2y+tw2y*tw2x;
18200 a1x = s2x+s3x;
18201 a1y = s2y+s3y;
18202 a2x = s4x+s5x;
18203 a2y = s4y+s5y;
18204 a3x = s4x-s5x;
18205 a3y = s4y-s5y;
18206 a4x = s2x-s3x;
18207 a4y = s2y-s3y;
18208 a->ptr.p_double[aoffset0] = q0x;
18209 a->ptr.p_double[aoffset0+1] = q0y;
18210 a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy;
18211 a->ptr.p_double[aoffset2+1] = a1x*twy+a1y*twx;
18212 a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y;
18213 a->ptr.p_double[aoffset4+1] = a2x*tw2y+a2y*tw2x;
18214 a->ptr.p_double[aoffset6] = a3x*tw3x-a3y*tw3y;
18215 a->ptr.p_double[aoffset6+1] = a3x*tw3y+a3y*tw3x;
18216 a->ptr.p_double[aoffset8] = a4x*tw4x-a4y*tw4y;
18217 a->ptr.p_double[aoffset8+1] = a4x*tw4y+a4y*tw4x;
18218 aoffset0 = aoffset0+2;
18219 aoffset2 = aoffset2+2;
18220 aoffset4 = aoffset4+2;
18221 aoffset6 = aoffset6+2;
18222 aoffset8 = aoffset8+2;
18223 if( (mvidx+1)%ftbase_updatetw==0 )
18224 {
18225 v = -2*ae_pi*(mvidx+1)/(n*m);
18226 twxm1 = ae_sin(0.5*v, _state);
18227 twxm1 = -2*twxm1*twxm1;
18228 twy = ae_sin(v, _state);
18229 twx = twxm1+1;
18230 }
18231 else
18232 {
18233 v = twxm1+tw0+twxm1*tw0-twy*tw1;
18234 twy = twy+tw1+twxm1*tw1+twy*tw0;
18235 twxm1 = v;
18236 twx = v+1;
18237 }
18238 }
18239 }
18240 return;
18241 }
18242 if( n==6 )
18243 {
18244 c1 = ae_cos(2*ae_pi/3, _state)-1;
18245 c2 = ae_sin(2*ae_pi/3, _state);
18246 c3 = ae_cos(-ae_pi/3, _state);
18247 c4 = ae_sin(-ae_pi/3, _state);
18248 v = -2*ae_pi/(n*m);
18249 tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
18250 tw1 = ae_sin(v, _state);
18251 for(opidx=0; opidx<=operandscnt-1; opidx++)
18252 {
18253 aoffset0 = offs+opidx*operandsize*microvectorsize;
18254 aoffset2 = aoffset0+microvectorsize;
18255 aoffset4 = aoffset2+microvectorsize;
18256 aoffset6 = aoffset4+microvectorsize;
18257 aoffset8 = aoffset6+microvectorsize;
18258 aoffset10 = aoffset8+microvectorsize;
18259 twx = 1.0;
18260 twxm1 = 0.0;
18261 twy = 0.0;
18262 for(mvidx=0; mvidx<=m-1; mvidx++)
18263 {
18264 a0x = a->ptr.p_double[aoffset0+0];
18265 a0y = a->ptr.p_double[aoffset0+1];
18266 a1x = a->ptr.p_double[aoffset2+0];
18267 a1y = a->ptr.p_double[aoffset2+1];
18268 a2x = a->ptr.p_double[aoffset4+0];
18269 a2y = a->ptr.p_double[aoffset4+1];
18270 a3x = a->ptr.p_double[aoffset6+0];
18271 a3y = a->ptr.p_double[aoffset6+1];
18272 a4x = a->ptr.p_double[aoffset8+0];
18273 a4y = a->ptr.p_double[aoffset8+1];
18274 a5x = a->ptr.p_double[aoffset10+0];
18275 a5y = a->ptr.p_double[aoffset10+1];
18276 v0 = a0x;
18277 v1 = a0y;
18278 a0x = a0x+a3x;
18279 a0y = a0y+a3y;
18280 a3x = v0-a3x;
18281 a3y = v1-a3y;
18282 v0 = a1x;
18283 v1 = a1y;
18284 a1x = a1x+a4x;
18285 a1y = a1y+a4y;
18286 a4x = v0-a4x;
18287 a4y = v1-a4y;
18288 v0 = a2x;
18289 v1 = a2y;
18290 a2x = a2x+a5x;
18291 a2y = a2y+a5y;
18292 a5x = v0-a5x;
18293 a5y = v1-a5y;
18294 t4x = a4x*c3-a4y*c4;
18295 t4y = a4x*c4+a4y*c3;
18296 a4x = t4x;
18297 a4y = t4y;
18298 t5x = -a5x*c3-a5y*c4;
18299 t5y = a5x*c4-a5y*c3;
18300 a5x = t5x;
18301 a5y = t5y;
18302 t1x = a1x+a2x;
18303 t1y = a1y+a2y;
18304 a0x = a0x+t1x;
18305 a0y = a0y+t1y;
18306 m1x = c1*t1x;
18307 m1y = c1*t1y;
18308 m2x = c2*(a1y-a2y);
18309 m2y = c2*(a2x-a1x);
18310 s1x = a0x+m1x;
18311 s1y = a0y+m1y;
18312 a1x = s1x+m2x;
18313 a1y = s1y+m2y;
18314 a2x = s1x-m2x;
18315 a2y = s1y-m2y;
18316 t1x = a4x+a5x;
18317 t1y = a4y+a5y;
18318 a3x = a3x+t1x;
18319 a3y = a3y+t1y;
18320 m1x = c1*t1x;
18321 m1y = c1*t1y;
18322 m2x = c2*(a4y-a5y);
18323 m2y = c2*(a5x-a4x);
18324 s1x = a3x+m1x;
18325 s1y = a3y+m1y;
18326 a4x = s1x+m2x;
18327 a4y = s1y+m2y;
18328 a5x = s1x-m2x;
18329 a5y = s1y-m2y;
18330 tw2x = twx*twx-twy*twy;
18331 tw2y = 2*twx*twy;
18332 tw3x = twx*tw2x-twy*tw2y;
18333 tw3y = twx*tw2y+twy*tw2x;
18334 tw4x = tw2x*tw2x-tw2y*tw2y;
18335 tw4y = 2*tw2x*tw2y;
18336 tw5x = tw3x*tw2x-tw3y*tw2y;
18337 tw5y = tw3x*tw2y+tw3y*tw2x;
18338 a->ptr.p_double[aoffset0+0] = a0x;
18339 a->ptr.p_double[aoffset0+1] = a0y;
18340 a->ptr.p_double[aoffset2+0] = a3x*twx-a3y*twy;
18341 a->ptr.p_double[aoffset2+1] = a3y*twx+a3x*twy;
18342 a->ptr.p_double[aoffset4+0] = a1x*tw2x-a1y*tw2y;
18343 a->ptr.p_double[aoffset4+1] = a1y*tw2x+a1x*tw2y;
18344 a->ptr.p_double[aoffset6+0] = a4x*tw3x-a4y*tw3y;
18345 a->ptr.p_double[aoffset6+1] = a4y*tw3x+a4x*tw3y;
18346 a->ptr.p_double[aoffset8+0] = a2x*tw4x-a2y*tw4y;
18347 a->ptr.p_double[aoffset8+1] = a2y*tw4x+a2x*tw4y;
18348 a->ptr.p_double[aoffset10+0] = a5x*tw5x-a5y*tw5y;
18349 a->ptr.p_double[aoffset10+1] = a5y*tw5x+a5x*tw5y;
18350 aoffset0 = aoffset0+2;
18351 aoffset2 = aoffset2+2;
18352 aoffset4 = aoffset4+2;
18353 aoffset6 = aoffset6+2;
18354 aoffset8 = aoffset8+2;
18355 aoffset10 = aoffset10+2;
18356 if( (mvidx+1)%ftbase_updatetw==0 )
18357 {
18358 v = -2*ae_pi*(mvidx+1)/(n*m);
18359 twxm1 = ae_sin(0.5*v, _state);
18360 twxm1 = -2*twxm1*twxm1;
18361 twy = ae_sin(v, _state);
18362 twx = twxm1+1;
18363 }
18364 else
18365 {
18366 v = twxm1+tw0+twxm1*tw0-twy*tw1;
18367 twy = twy+tw1+twxm1*tw1+twy*tw0;
18368 twxm1 = v;
18369 twx = v+1;
18370 }
18371 }
18372 }
18373 return;
18374 }
18375 }
18376
18377
18378 /*************************************************************************
18379 This subroutine precomputes data for complex Bluestein's FFT and writes
18380 them to array PrecR[] at specified offset. It is responsibility of the
18381 caller to make sure that PrecR[] is large enough.
18382
18383 INPUT PARAMETERS:
18384 N - original size of the transform
18385 M - size of the "padded" Bluestein's transform
18386 PrecR - preallocated array
18387 Offs - offset
18388
18389 OUTPUT PARAMETERS:
18390 PrecR - data at Offs:Offs+4*M-1 are modified:
18391 * PrecR[Offs:Offs+2*M-1] stores Z[k]=exp(i*pi*k^2/N)
18392 * PrecR[Offs+2*M:Offs+4*M-1] stores FFT of the Z
18393 Other parts of PrecR are unchanged.
18394
18395 NOTE: this function performs internal M-point FFT. It allocates temporary
18396 plan which is destroyed after leaving this function.
18397
18398 -- ALGLIB --
18399 Copyright 08.05.2013 by Bochkanov Sergey
18400 *************************************************************************/
ftbase_ftprecomputebluesteinsfft(ae_int_t n,ae_int_t m,ae_vector * precr,ae_int_t offs,ae_state * _state)18401 static void ftbase_ftprecomputebluesteinsfft(ae_int_t n,
18402 ae_int_t m,
18403 /* Real */ ae_vector* precr,
18404 ae_int_t offs,
18405 ae_state *_state)
18406 {
18407 ae_frame _frame_block;
18408 ae_int_t i;
18409 double bx;
18410 double by;
18411 fasttransformplan plan;
18412
18413 ae_frame_make(_state, &_frame_block);
18414 memset(&plan, 0, sizeof(plan));
18415 _fasttransformplan_init(&plan, _state, ae_true);
18416
18417
18418 /*
18419 * Fill first half of PrecR with b[k] = exp(i*pi*k^2/N)
18420 */
18421 for(i=0; i<=2*m-1; i++)
18422 {
18423 precr->ptr.p_double[offs+i] = (double)(0);
18424 }
18425 for(i=0; i<=n-1; i++)
18426 {
18427 bx = ae_cos(ae_pi/n*i*i, _state);
18428 by = ae_sin(ae_pi/n*i*i, _state);
18429 precr->ptr.p_double[offs+2*i+0] = bx;
18430 precr->ptr.p_double[offs+2*i+1] = by;
18431 precr->ptr.p_double[offs+2*((m-i)%m)+0] = bx;
18432 precr->ptr.p_double[offs+2*((m-i)%m)+1] = by;
18433 }
18434
18435 /*
18436 * Precomputed FFT
18437 */
18438 ftcomplexfftplan(m, 1, &plan, _state);
18439 for(i=0; i<=2*m-1; i++)
18440 {
18441 precr->ptr.p_double[offs+2*m+i] = precr->ptr.p_double[offs+i];
18442 }
18443 ftbase_ftapplysubplan(&plan, 0, precr, offs+2*m, 0, &plan.buffer, 1, _state);
18444 ae_frame_leave(_state);
18445 }
18446
18447
18448 /*************************************************************************
18449 This subroutine applies complex Bluestein's FFT to input/output array A.
18450
18451 INPUT PARAMETERS:
18452 Plan - transformation plan
18453 A - array, must be large enough for plan to work
18454 ABase - base offset in array A, this value points to start of
18455 subarray whose length is equal to length of the plan
18456 AOffset - offset with respect to ABase, 0<=AOffset<PlanLength.
18457 This is an offset within large PlanLength-subarray of
18458 the chunk to process.
18459 OperandsCnt - number of repeated operands (length N each)
18460 N - original data length (measured in complex numbers)
18461 M - padded data length (measured in complex numbers)
18462 PrecOffs - offset of the precomputed data for the plan
18463 SubPlan - position of the length-M FFT subplan which is used by
18464 transformation
18465 BufA - temporary buffer, at least 2*M elements
18466 BufB - temporary buffer, at least 2*M elements
18467 BufC - temporary buffer, at least 2*M elements
18468 BufD - temporary buffer, at least 2*M elements
18469
18470 OUTPUT PARAMETERS:
18471 A - transformed array
18472
18473 -- ALGLIB --
18474 Copyright 05.04.2013 by Bochkanov Sergey
18475 *************************************************************************/
ftbase_ftbluesteinsfft(fasttransformplan * plan,ae_vector * a,ae_int_t abase,ae_int_t aoffset,ae_int_t operandscnt,ae_int_t n,ae_int_t m,ae_int_t precoffs,ae_int_t subplan,ae_vector * bufa,ae_vector * bufb,ae_vector * bufc,ae_vector * bufd,ae_state * _state)18476 static void ftbase_ftbluesteinsfft(fasttransformplan* plan,
18477 /* Real */ ae_vector* a,
18478 ae_int_t abase,
18479 ae_int_t aoffset,
18480 ae_int_t operandscnt,
18481 ae_int_t n,
18482 ae_int_t m,
18483 ae_int_t precoffs,
18484 ae_int_t subplan,
18485 /* Real */ ae_vector* bufa,
18486 /* Real */ ae_vector* bufb,
18487 /* Real */ ae_vector* bufc,
18488 /* Real */ ae_vector* bufd,
18489 ae_state *_state)
18490 {
18491 ae_int_t op;
18492 ae_int_t i;
18493 double x;
18494 double y;
18495 double bx;
18496 double by;
18497 double ax;
18498 double ay;
18499 double rx;
18500 double ry;
18501 ae_int_t p0;
18502 ae_int_t p1;
18503 ae_int_t p2;
18504
18505
18506 for(op=0; op<=operandscnt-1; op++)
18507 {
18508
18509 /*
18510 * Multiply A by conj(Z), store to buffer.
18511 * Pad A by zeros.
18512 *
18513 * NOTE: Z[k]=exp(i*pi*k^2/N)
18514 */
18515 p0 = abase+aoffset+op*2*n;
18516 p1 = precoffs;
18517 for(i=0; i<=n-1; i++)
18518 {
18519 x = a->ptr.p_double[p0+0];
18520 y = a->ptr.p_double[p0+1];
18521 bx = plan->precr.ptr.p_double[p1+0];
18522 by = -plan->precr.ptr.p_double[p1+1];
18523 bufa->ptr.p_double[2*i+0] = x*bx-y*by;
18524 bufa->ptr.p_double[2*i+1] = x*by+y*bx;
18525 p0 = p0+2;
18526 p1 = p1+2;
18527 }
18528 for(i=2*n; i<=2*m-1; i++)
18529 {
18530 bufa->ptr.p_double[i] = (double)(0);
18531 }
18532
18533 /*
18534 * Perform convolution of A and Z (using precomputed
18535 * FFT of Z stored in Plan structure).
18536 */
18537 ftbase_ftapplysubplan(plan, subplan, bufa, 0, 0, bufc, 1, _state);
18538 p0 = 0;
18539 p1 = precoffs+2*m;
18540 for(i=0; i<=m-1; i++)
18541 {
18542 ax = bufa->ptr.p_double[p0+0];
18543 ay = bufa->ptr.p_double[p0+1];
18544 bx = plan->precr.ptr.p_double[p1+0];
18545 by = plan->precr.ptr.p_double[p1+1];
18546 bufa->ptr.p_double[p0+0] = ax*bx-ay*by;
18547 bufa->ptr.p_double[p0+1] = -(ax*by+ay*bx);
18548 p0 = p0+2;
18549 p1 = p1+2;
18550 }
18551 ftbase_ftapplysubplan(plan, subplan, bufa, 0, 0, bufc, 1, _state);
18552
18553 /*
18554 * Post processing:
18555 * A:=conj(Z)*conj(A)/M
18556 * Here conj(A)/M corresponds to last stage of inverse DFT,
18557 * and conj(Z) comes from Bluestein's FFT algorithm.
18558 */
18559 p0 = precoffs;
18560 p1 = 0;
18561 p2 = abase+aoffset+op*2*n;
18562 for(i=0; i<=n-1; i++)
18563 {
18564 bx = plan->precr.ptr.p_double[p0+0];
18565 by = plan->precr.ptr.p_double[p0+1];
18566 rx = bufa->ptr.p_double[p1+0]/m;
18567 ry = -bufa->ptr.p_double[p1+1]/m;
18568 a->ptr.p_double[p2+0] = rx*bx-ry*(-by);
18569 a->ptr.p_double[p2+1] = rx*(-by)+ry*bx;
18570 p0 = p0+2;
18571 p1 = p1+2;
18572 p2 = p2+2;
18573 }
18574 }
18575 }
18576
18577
18578 /*************************************************************************
18579 This subroutine precomputes data for complex Rader's FFT and writes them
18580 to array PrecR[] at specified offset. It is responsibility of the caller
18581 to make sure that PrecR[] is large enough.
18582
18583 INPUT PARAMETERS:
18584 N - original size of the transform (before reduction to N-1)
18585 RQ - primitive root modulo N
18586 RIQ - inverse of primitive root modulo N
18587 PrecR - preallocated array
18588 Offs - offset
18589
18590 OUTPUT PARAMETERS:
18591 PrecR - data at Offs:Offs+2*(N-1)-1 store FFT of Rader's factors,
18592 other parts of PrecR are unchanged.
18593
18594 NOTE: this function performs internal (N-1)-point FFT. It allocates temporary
18595 plan which is destroyed after leaving this function.
18596
18597 -- ALGLIB --
18598 Copyright 08.05.2013 by Bochkanov Sergey
18599 *************************************************************************/
ftbase_ftprecomputeradersfft(ae_int_t n,ae_int_t rq,ae_int_t riq,ae_vector * precr,ae_int_t offs,ae_state * _state)18600 static void ftbase_ftprecomputeradersfft(ae_int_t n,
18601 ae_int_t rq,
18602 ae_int_t riq,
18603 /* Real */ ae_vector* precr,
18604 ae_int_t offs,
18605 ae_state *_state)
18606 {
18607 ae_frame _frame_block;
18608 ae_int_t q;
18609 fasttransformplan plan;
18610 ae_int_t kiq;
18611 double v;
18612
18613 ae_frame_make(_state, &_frame_block);
18614 memset(&plan, 0, sizeof(plan));
18615 _fasttransformplan_init(&plan, _state, ae_true);
18616
18617
18618 /*
18619 * Fill PrecR with Rader factors, perform FFT
18620 */
18621 kiq = 1;
18622 for(q=0; q<=n-2; q++)
18623 {
18624 v = -2*ae_pi*kiq/n;
18625 precr->ptr.p_double[offs+2*q+0] = ae_cos(v, _state);
18626 precr->ptr.p_double[offs+2*q+1] = ae_sin(v, _state);
18627 kiq = kiq*riq%n;
18628 }
18629 ftcomplexfftplan(n-1, 1, &plan, _state);
18630 ftbase_ftapplysubplan(&plan, 0, precr, offs, 0, &plan.buffer, 1, _state);
18631 ae_frame_leave(_state);
18632 }
18633
18634
18635 /*************************************************************************
18636 This subroutine applies complex Rader's FFT to input/output array A.
18637
18638 INPUT PARAMETERS:
18639 A - array, must be large enough for plan to work
18640 ABase - base offset in array A, this value points to start of
18641 subarray whose length is equal to length of the plan
18642 AOffset - offset with respect to ABase, 0<=AOffset<PlanLength.
18643 This is an offset within large PlanLength-subarray of
18644 the chunk to process.
18645 OperandsCnt - number of repeated operands (length N each)
18646 N - original data length (measured in complex numbers)
18647 SubPlan - position of the (N-1)-point FFT subplan which is used
18648 by transformation
18649 RQ - primitive root modulo N
18650 RIQ - inverse of primitive root modulo N
18651 PrecOffs - offset of the precomputed data for the plan
18652 Buf - temporary array
18653
18654 OUTPUT PARAMETERS:
18655 A - transformed array
18656
18657 -- ALGLIB --
18658 Copyright 05.04.2013 by Bochkanov Sergey
18659 *************************************************************************/
ftbase_ftradersfft(fasttransformplan * plan,ae_vector * a,ae_int_t abase,ae_int_t aoffset,ae_int_t operandscnt,ae_int_t n,ae_int_t subplan,ae_int_t rq,ae_int_t riq,ae_int_t precoffs,ae_vector * buf,ae_state * _state)18660 static void ftbase_ftradersfft(fasttransformplan* plan,
18661 /* Real */ ae_vector* a,
18662 ae_int_t abase,
18663 ae_int_t aoffset,
18664 ae_int_t operandscnt,
18665 ae_int_t n,
18666 ae_int_t subplan,
18667 ae_int_t rq,
18668 ae_int_t riq,
18669 ae_int_t precoffs,
18670 /* Real */ ae_vector* buf,
18671 ae_state *_state)
18672 {
18673 ae_int_t opidx;
18674 ae_int_t i;
18675 ae_int_t q;
18676 ae_int_t kq;
18677 ae_int_t kiq;
18678 double x0;
18679 double y0;
18680 ae_int_t p0;
18681 ae_int_t p1;
18682 double ax;
18683 double ay;
18684 double bx;
18685 double by;
18686 double rx;
18687 double ry;
18688
18689
18690 ae_assert(operandscnt>=1, "FTApplyComplexRefFFT: OperandsCnt<1", _state);
18691
18692 /*
18693 * Process operands
18694 */
18695 for(opidx=0; opidx<=operandscnt-1; opidx++)
18696 {
18697
18698 /*
18699 * fill QA
18700 */
18701 kq = 1;
18702 p0 = abase+aoffset+opidx*n*2;
18703 p1 = aoffset+opidx*n*2;
18704 rx = a->ptr.p_double[p0+0];
18705 ry = a->ptr.p_double[p0+1];
18706 x0 = rx;
18707 y0 = ry;
18708 for(q=0; q<=n-2; q++)
18709 {
18710 ax = a->ptr.p_double[p0+2*kq+0];
18711 ay = a->ptr.p_double[p0+2*kq+1];
18712 buf->ptr.p_double[p1+0] = ax;
18713 buf->ptr.p_double[p1+1] = ay;
18714 rx = rx+ax;
18715 ry = ry+ay;
18716 kq = kq*rq%n;
18717 p1 = p1+2;
18718 }
18719 p0 = abase+aoffset+opidx*n*2;
18720 p1 = aoffset+opidx*n*2;
18721 for(q=0; q<=n-2; q++)
18722 {
18723 a->ptr.p_double[p0] = buf->ptr.p_double[p1];
18724 a->ptr.p_double[p0+1] = buf->ptr.p_double[p1+1];
18725 p0 = p0+2;
18726 p1 = p1+2;
18727 }
18728
18729 /*
18730 * Convolution
18731 */
18732 ftbase_ftapplysubplan(plan, subplan, a, abase, aoffset+opidx*n*2, buf, 1, _state);
18733 p0 = abase+aoffset+opidx*n*2;
18734 p1 = precoffs;
18735 for(i=0; i<=n-2; i++)
18736 {
18737 ax = a->ptr.p_double[p0+0];
18738 ay = a->ptr.p_double[p0+1];
18739 bx = plan->precr.ptr.p_double[p1+0];
18740 by = plan->precr.ptr.p_double[p1+1];
18741 a->ptr.p_double[p0+0] = ax*bx-ay*by;
18742 a->ptr.p_double[p0+1] = -(ax*by+ay*bx);
18743 p0 = p0+2;
18744 p1 = p1+2;
18745 }
18746 ftbase_ftapplysubplan(plan, subplan, a, abase, aoffset+opidx*n*2, buf, 1, _state);
18747 p0 = abase+aoffset+opidx*n*2;
18748 for(i=0; i<=n-2; i++)
18749 {
18750 a->ptr.p_double[p0+0] = a->ptr.p_double[p0+0]/(n-1);
18751 a->ptr.p_double[p0+1] = -a->ptr.p_double[p0+1]/(n-1);
18752 p0 = p0+2;
18753 }
18754
18755 /*
18756 * Result
18757 */
18758 buf->ptr.p_double[aoffset+opidx*n*2+0] = rx;
18759 buf->ptr.p_double[aoffset+opidx*n*2+1] = ry;
18760 kiq = 1;
18761 p0 = aoffset+opidx*n*2;
18762 p1 = abase+aoffset+opidx*n*2;
18763 for(q=0; q<=n-2; q++)
18764 {
18765 buf->ptr.p_double[p0+2*kiq+0] = x0+a->ptr.p_double[p1+0];
18766 buf->ptr.p_double[p0+2*kiq+1] = y0+a->ptr.p_double[p1+1];
18767 kiq = kiq*riq%n;
18768 p1 = p1+2;
18769 }
18770 p0 = abase+aoffset+opidx*n*2;
18771 p1 = aoffset+opidx*n*2;
18772 for(q=0; q<=n-1; q++)
18773 {
18774 a->ptr.p_double[p0] = buf->ptr.p_double[p1];
18775 a->ptr.p_double[p0+1] = buf->ptr.p_double[p1+1];
18776 p0 = p0+2;
18777 p1 = p1+2;
18778 }
18779 }
18780 }
18781
18782
18783 /*************************************************************************
18784 Factorizes task size N into product of two smaller sizes N1 and N2
18785
18786 INPUT PARAMETERS:
18787 N - task size, N>0
18788 IsRoot - whether taks is root task (first one in a sequence)
18789
18790 OUTPUT PARAMETERS:
18791 N1, N2 - such numbers that:
18792 * for prime N: N1=N2=0
18793 * for composite N<=MaxRadix: N1=N2=0
18794 * for composite N>MaxRadix: 1<=N1<=N2, N1*N2=N
18795
18796 -- ALGLIB --
18797 Copyright 08.04.2013 by Bochkanov Sergey
18798 *************************************************************************/
ftbase_ftfactorize(ae_int_t n,ae_bool isroot,ae_int_t * n1,ae_int_t * n2,ae_state * _state)18799 static void ftbase_ftfactorize(ae_int_t n,
18800 ae_bool isroot,
18801 ae_int_t* n1,
18802 ae_int_t* n2,
18803 ae_state *_state)
18804 {
18805 ae_int_t j;
18806 ae_int_t k;
18807
18808 *n1 = 0;
18809 *n2 = 0;
18810
18811 ae_assert(n>0, "FTFactorize: N<=0", _state);
18812 *n1 = 0;
18813 *n2 = 0;
18814
18815 /*
18816 * Small N
18817 */
18818 if( n<=ftbase_maxradix )
18819 {
18820 return;
18821 }
18822
18823 /*
18824 * Large N, recursive split
18825 */
18826 if( n>ftbase_recursivethreshold )
18827 {
18828 k = ae_iceil(ae_sqrt((double)(n), _state), _state)+1;
18829 ae_assert(k*k>=n, "FTFactorize: internal error during recursive factorization", _state);
18830 for(j=k; j>=2; j--)
18831 {
18832 if( n%j==0 )
18833 {
18834 *n1 = ae_minint(n/j, j, _state);
18835 *n2 = ae_maxint(n/j, j, _state);
18836 return;
18837 }
18838 }
18839 }
18840
18841 /*
18842 * N>MaxRadix, try to find good codelet
18843 */
18844 for(j=ftbase_maxradix; j>=2; j--)
18845 {
18846 if( n%j==0 )
18847 {
18848 *n1 = j;
18849 *n2 = n/j;
18850 break;
18851 }
18852 }
18853
18854 /*
18855 * In case no good codelet was found,
18856 * try to factorize N into product of ANY primes.
18857 */
18858 if( *n1*(*n2)!=n )
18859 {
18860 for(j=2; j<=n-1; j++)
18861 {
18862 if( n%j==0 )
18863 {
18864 *n1 = j;
18865 *n2 = n/j;
18866 break;
18867 }
18868 if( j*j>n )
18869 {
18870 break;
18871 }
18872 }
18873 }
18874
18875 /*
18876 * normalize
18877 */
18878 if( *n1>(*n2) )
18879 {
18880 j = *n1;
18881 *n1 = *n2;
18882 *n2 = j;
18883 }
18884 }
18885
18886
18887 /*************************************************************************
18888 Returns optimistic estimate of the FFT cost, in UNITs (1 UNIT = 100 KFLOPs)
18889
18890 INPUT PARAMETERS:
18891 N - task size, N>0
18892
18893 RESULU:
18894 cost in UNITs, rounded down to nearest integer
18895
18896 NOTE: If FFT cost is less than 1 UNIT, it will return 0 as result.
18897
18898 -- ALGLIB --
18899 Copyright 08.04.2013 by Bochkanov Sergey
18900 *************************************************************************/
ftbase_ftoptimisticestimate(ae_int_t n,ae_state * _state)18901 static ae_int_t ftbase_ftoptimisticestimate(ae_int_t n, ae_state *_state)
18902 {
18903 ae_int_t result;
18904
18905
18906 ae_assert(n>0, "FTOptimisticEstimate: N<=0", _state);
18907 result = ae_ifloor(1.0E-5*5*n*ae_log((double)(n), _state)/ae_log((double)(2), _state), _state);
18908 return result;
18909 }
18910
18911
18912 /*************************************************************************
18913 Twiddle factors calculation
18914
18915 -- ALGLIB --
18916 Copyright 01.05.2009 by Bochkanov Sergey
18917 *************************************************************************/
ftbase_ffttwcalc(ae_vector * a,ae_int_t aoffset,ae_int_t n1,ae_int_t n2,ae_state * _state)18918 static void ftbase_ffttwcalc(/* Real */ ae_vector* a,
18919 ae_int_t aoffset,
18920 ae_int_t n1,
18921 ae_int_t n2,
18922 ae_state *_state)
18923 {
18924 ae_int_t i;
18925 ae_int_t j2;
18926 ae_int_t n;
18927 ae_int_t halfn1;
18928 ae_int_t offs;
18929 double x;
18930 double y;
18931 double twxm1;
18932 double twy;
18933 double twbasexm1;
18934 double twbasey;
18935 double twrowxm1;
18936 double twrowy;
18937 double tmpx;
18938 double tmpy;
18939 double v;
18940 ae_int_t updatetw2;
18941
18942
18943
18944 /*
18945 * Multiplication by twiddle factors for complex Cooley-Tukey FFT
18946 * with N factorized as N1*N2.
18947 *
18948 * Naive solution to this problem is given below:
18949 *
18950 * > for K:=1 to N2-1 do
18951 * > for J:=1 to N1-1 do
18952 * > begin
18953 * > Idx:=K*N1+J;
18954 * > X:=A[AOffset+2*Idx+0];
18955 * > Y:=A[AOffset+2*Idx+1];
18956 * > TwX:=Cos(-2*Pi()*K*J/(N1*N2));
18957 * > TwY:=Sin(-2*Pi()*K*J/(N1*N2));
18958 * > A[AOffset+2*Idx+0]:=X*TwX-Y*TwY;
18959 * > A[AOffset+2*Idx+1]:=X*TwY+Y*TwX;
18960 * > end;
18961 *
18962 * However, there are exist more efficient solutions.
18963 *
18964 * Each pass of the inner cycle corresponds to multiplication of one
18965 * entry of A by W[k,j]=exp(-I*2*pi*k*j/N). This factor can be rewritten
18966 * as exp(-I*2*pi*k/N)^j. So we can replace costly exponentiation by
18967 * repeated multiplication: W[k,j+1]=W[k,j]*exp(-I*2*pi*k/N), with
18968 * second factor being computed once in the beginning of the iteration.
18969 *
18970 * Also, exp(-I*2*pi*k/N) can be represented as exp(-I*2*pi/N)^k, i.e.
18971 * we have W[K+1,1]=W[K,1]*W[1,1].
18972 *
18973 * In our loop we use following variables:
18974 * * [TwBaseXM1,TwBaseY] = [cos(2*pi/N)-1, sin(2*pi/N)]
18975 * * [TwRowXM1, TwRowY] = [cos(2*pi*I/N)-1, sin(2*pi*I/N)]
18976 * * [TwXM1, TwY] = [cos(2*pi*I*J/N)-1, sin(2*pi*I*J/N)]
18977 *
18978 * Meaning of the variables:
18979 * * [TwXM1,TwY] is current twiddle factor W[I,J]
18980 * * [TwRowXM1, TwRowY] is W[I,1]
18981 * * [TwBaseXM1,TwBaseY] is W[1,1]
18982 *
18983 * During inner loop we multiply current twiddle factor by W[I,1],
18984 * during outer loop we update W[I,1].
18985 *
18986 */
18987 ae_assert(ftbase_updatetw>=2, "FFTTwCalc: internal error - UpdateTw<2", _state);
18988 updatetw2 = ftbase_updatetw/2;
18989 halfn1 = n1/2;
18990 n = n1*n2;
18991 v = -2*ae_pi/n;
18992 twbasexm1 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
18993 twbasey = ae_sin(v, _state);
18994 twrowxm1 = (double)(0);
18995 twrowy = (double)(0);
18996 offs = aoffset;
18997 for(i=0; i<=n2-1; i++)
18998 {
18999
19000 /*
19001 * Initialize twiddle factor for current row
19002 */
19003 twxm1 = (double)(0);
19004 twy = (double)(0);
19005
19006 /*
19007 * N1-point block is separated into 2-point chunks and residual 1-point chunk
19008 * (in case N1 is odd). Unrolled loop is several times faster.
19009 */
19010 for(j2=0; j2<=halfn1-1; j2++)
19011 {
19012
19013 /*
19014 * Processing:
19015 * * process first element in a chunk.
19016 * * update twiddle factor (unconditional update)
19017 * * process second element
19018 * * conditional update of the twiddle factor
19019 */
19020 x = a->ptr.p_double[offs+0];
19021 y = a->ptr.p_double[offs+1];
19022 tmpx = x*(1+twxm1)-y*twy;
19023 tmpy = x*twy+y*(1+twxm1);
19024 a->ptr.p_double[offs+0] = tmpx;
19025 a->ptr.p_double[offs+1] = tmpy;
19026 tmpx = (1+twxm1)*twrowxm1-twy*twrowy;
19027 twy = twy+(1+twxm1)*twrowy+twy*twrowxm1;
19028 twxm1 = twxm1+tmpx;
19029 x = a->ptr.p_double[offs+2];
19030 y = a->ptr.p_double[offs+3];
19031 tmpx = x*(1+twxm1)-y*twy;
19032 tmpy = x*twy+y*(1+twxm1);
19033 a->ptr.p_double[offs+2] = tmpx;
19034 a->ptr.p_double[offs+3] = tmpy;
19035 offs = offs+4;
19036 if( (j2+1)%updatetw2==0&&j2<halfn1-1 )
19037 {
19038
19039 /*
19040 * Recalculate twiddle factor
19041 */
19042 v = -2*ae_pi*i*2*(j2+1)/n;
19043 twxm1 = ae_sin(0.5*v, _state);
19044 twxm1 = -2*twxm1*twxm1;
19045 twy = ae_sin(v, _state);
19046 }
19047 else
19048 {
19049
19050 /*
19051 * Update twiddle factor
19052 */
19053 tmpx = (1+twxm1)*twrowxm1-twy*twrowy;
19054 twy = twy+(1+twxm1)*twrowy+twy*twrowxm1;
19055 twxm1 = twxm1+tmpx;
19056 }
19057 }
19058 if( n1%2==1 )
19059 {
19060
19061 /*
19062 * Handle residual chunk
19063 */
19064 x = a->ptr.p_double[offs+0];
19065 y = a->ptr.p_double[offs+1];
19066 tmpx = x*(1+twxm1)-y*twy;
19067 tmpy = x*twy+y*(1+twxm1);
19068 a->ptr.p_double[offs+0] = tmpx;
19069 a->ptr.p_double[offs+1] = tmpy;
19070 offs = offs+2;
19071 }
19072
19073 /*
19074 * update TwRow: TwRow(new) = TwRow(old)*TwBase
19075 */
19076 if( i<n2-1 )
19077 {
19078 if( (i+1)%ftbase_updatetw==0 )
19079 {
19080 v = -2*ae_pi*(i+1)/n;
19081 twrowxm1 = ae_sin(0.5*v, _state);
19082 twrowxm1 = -2*twrowxm1*twrowxm1;
19083 twrowy = ae_sin(v, _state);
19084 }
19085 else
19086 {
19087 tmpx = twbasexm1+twrowxm1*twbasexm1-twrowy*twbasey;
19088 tmpy = twbasey+twrowxm1*twbasey+twrowy*twbasexm1;
19089 twrowxm1 = twrowxm1+tmpx;
19090 twrowy = twrowy+tmpy;
19091 }
19092 }
19093 }
19094 }
19095
19096
19097 /*************************************************************************
19098 Linear transpose: transpose complex matrix stored in 1-dimensional array
19099
19100 -- ALGLIB --
19101 Copyright 01.05.2009 by Bochkanov Sergey
19102 *************************************************************************/
ftbase_internalcomplexlintranspose(ae_vector * a,ae_int_t m,ae_int_t n,ae_int_t astart,ae_vector * buf,ae_state * _state)19103 static void ftbase_internalcomplexlintranspose(/* Real */ ae_vector* a,
19104 ae_int_t m,
19105 ae_int_t n,
19106 ae_int_t astart,
19107 /* Real */ ae_vector* buf,
19108 ae_state *_state)
19109 {
19110
19111
19112 ftbase_ffticltrec(a, astart, n, buf, 0, m, m, n, _state);
19113 ae_v_move(&a->ptr.p_double[astart], 1, &buf->ptr.p_double[0], 1, ae_v_len(astart,astart+2*m*n-1));
19114 }
19115
19116
19117 /*************************************************************************
19118 Recurrent subroutine for a InternalComplexLinTranspose
19119
19120 Write A^T to B, where:
19121 * A is m*n complex matrix stored in array A as pairs of real/image values,
19122 beginning from AStart position, with AStride stride
19123 * B is n*m complex matrix stored in array B as pairs of real/image values,
19124 beginning from BStart position, with BStride stride
19125 stride is measured in complex numbers, i.e. in real/image pairs.
19126
19127 -- ALGLIB --
19128 Copyright 01.05.2009 by Bochkanov Sergey
19129 *************************************************************************/
ftbase_ffticltrec(ae_vector * a,ae_int_t astart,ae_int_t astride,ae_vector * b,ae_int_t bstart,ae_int_t bstride,ae_int_t m,ae_int_t n,ae_state * _state)19130 static void ftbase_ffticltrec(/* Real */ ae_vector* a,
19131 ae_int_t astart,
19132 ae_int_t astride,
19133 /* Real */ ae_vector* b,
19134 ae_int_t bstart,
19135 ae_int_t bstride,
19136 ae_int_t m,
19137 ae_int_t n,
19138 ae_state *_state)
19139 {
19140 ae_int_t i;
19141 ae_int_t j;
19142 ae_int_t idx1;
19143 ae_int_t idx2;
19144 ae_int_t m2;
19145 ae_int_t m1;
19146 ae_int_t n1;
19147
19148
19149 if( m==0||n==0 )
19150 {
19151 return;
19152 }
19153 if( ae_maxint(m, n, _state)<=8 )
19154 {
19155 m2 = 2*bstride;
19156 for(i=0; i<=m-1; i++)
19157 {
19158 idx1 = bstart+2*i;
19159 idx2 = astart+2*i*astride;
19160 for(j=0; j<=n-1; j++)
19161 {
19162 b->ptr.p_double[idx1+0] = a->ptr.p_double[idx2+0];
19163 b->ptr.p_double[idx1+1] = a->ptr.p_double[idx2+1];
19164 idx1 = idx1+m2;
19165 idx2 = idx2+2;
19166 }
19167 }
19168 return;
19169 }
19170 if( n>m )
19171 {
19172
19173 /*
19174 * New partition:
19175 *
19176 * "A^T -> B" becomes "(A1 A2)^T -> ( B1 )
19177 * ( B2 )
19178 */
19179 n1 = n/2;
19180 if( n-n1>=8&&n1%8!=0 )
19181 {
19182 n1 = n1+(8-n1%8);
19183 }
19184 ae_assert(n-n1>0, "Assertion failed", _state);
19185 ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m, n1, _state);
19186 ftbase_ffticltrec(a, astart+2*n1, astride, b, bstart+2*n1*bstride, bstride, m, n-n1, _state);
19187 }
19188 else
19189 {
19190
19191 /*
19192 * New partition:
19193 *
19194 * "A^T -> B" becomes "( A1 )^T -> ( B1 B2 )
19195 * ( A2 )
19196 */
19197 m1 = m/2;
19198 if( m-m1>=8&&m1%8!=0 )
19199 {
19200 m1 = m1+(8-m1%8);
19201 }
19202 ae_assert(m-m1>0, "Assertion failed", _state);
19203 ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m1, n, _state);
19204 ftbase_ffticltrec(a, astart+2*m1*astride, astride, b, bstart+2*m1, bstride, m-m1, n, _state);
19205 }
19206 }
19207
19208
19209 /*************************************************************************
19210 recurrent subroutine for FFTFindSmoothRec
19211
19212 -- ALGLIB --
19213 Copyright 01.05.2009 by Bochkanov Sergey
19214 *************************************************************************/
ftbase_ftbasefindsmoothrec(ae_int_t n,ae_int_t seed,ae_int_t leastfactor,ae_int_t * best,ae_state * _state)19215 static void ftbase_ftbasefindsmoothrec(ae_int_t n,
19216 ae_int_t seed,
19217 ae_int_t leastfactor,
19218 ae_int_t* best,
19219 ae_state *_state)
19220 {
19221
19222
19223 ae_assert(ftbase_ftbasemaxsmoothfactor<=5, "FTBaseFindSmoothRec: internal error!", _state);
19224 if( seed>=n )
19225 {
19226 *best = ae_minint(*best, seed, _state);
19227 return;
19228 }
19229 if( leastfactor<=2 )
19230 {
19231 ftbase_ftbasefindsmoothrec(n, seed*2, 2, best, _state);
19232 }
19233 if( leastfactor<=3 )
19234 {
19235 ftbase_ftbasefindsmoothrec(n, seed*3, 3, best, _state);
19236 }
19237 if( leastfactor<=5 )
19238 {
19239 ftbase_ftbasefindsmoothrec(n, seed*5, 5, best, _state);
19240 }
19241 }
19242
19243
_fasttransformplan_init(void * _p,ae_state * _state,ae_bool make_automatic)19244 void _fasttransformplan_init(void* _p, ae_state *_state, ae_bool make_automatic)
19245 {
19246 fasttransformplan *p = (fasttransformplan*)_p;
19247 ae_touch_ptr((void*)p);
19248 ae_matrix_init(&p->entries, 0, 0, DT_INT, _state, make_automatic);
19249 ae_vector_init(&p->buffer, 0, DT_REAL, _state, make_automatic);
19250 ae_vector_init(&p->precr, 0, DT_REAL, _state, make_automatic);
19251 ae_vector_init(&p->preci, 0, DT_REAL, _state, make_automatic);
19252 ae_shared_pool_init(&p->bluesteinpool, _state, make_automatic);
19253 }
19254
19255
_fasttransformplan_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)19256 void _fasttransformplan_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
19257 {
19258 fasttransformplan *dst = (fasttransformplan*)_dst;
19259 fasttransformplan *src = (fasttransformplan*)_src;
19260 ae_matrix_init_copy(&dst->entries, &src->entries, _state, make_automatic);
19261 ae_vector_init_copy(&dst->buffer, &src->buffer, _state, make_automatic);
19262 ae_vector_init_copy(&dst->precr, &src->precr, _state, make_automatic);
19263 ae_vector_init_copy(&dst->preci, &src->preci, _state, make_automatic);
19264 ae_shared_pool_init_copy(&dst->bluesteinpool, &src->bluesteinpool, _state, make_automatic);
19265 }
19266
19267
_fasttransformplan_clear(void * _p)19268 void _fasttransformplan_clear(void* _p)
19269 {
19270 fasttransformplan *p = (fasttransformplan*)_p;
19271 ae_touch_ptr((void*)p);
19272 ae_matrix_clear(&p->entries);
19273 ae_vector_clear(&p->buffer);
19274 ae_vector_clear(&p->precr);
19275 ae_vector_clear(&p->preci);
19276 ae_shared_pool_clear(&p->bluesteinpool);
19277 }
19278
19279
_fasttransformplan_destroy(void * _p)19280 void _fasttransformplan_destroy(void* _p)
19281 {
19282 fasttransformplan *p = (fasttransformplan*)_p;
19283 ae_touch_ptr((void*)p);
19284 ae_matrix_destroy(&p->entries);
19285 ae_vector_destroy(&p->buffer);
19286 ae_vector_destroy(&p->precr);
19287 ae_vector_destroy(&p->preci);
19288 ae_shared_pool_destroy(&p->bluesteinpool);
19289 }
19290
19291
19292 #endif
19293 #if defined(AE_COMPILE_HPCCORES) || !defined(AE_PARTIAL_BUILD)
19294
19295
19296 /*************************************************************************
19297 Prepares HPC compuations of chunked gradient with HPCChunkedGradient().
19298 You have to call this function before calling HPCChunkedGradient() for
19299 a new set of weights. You have to call it only once, see example below:
19300
19301 HOW TO PROCESS DATASET WITH THIS FUNCTION:
19302 Grad:=0
19303 HPCPrepareChunkedGradient(Weights, WCount, NTotal, NOut, Buf)
19304 foreach chunk-of-dataset do
19305 HPCChunkedGradient(...)
19306 HPCFinalizeChunkedGradient(Buf, Grad)
19307
19308 *************************************************************************/
hpcpreparechunkedgradient(ae_vector * weights,ae_int_t wcount,ae_int_t ntotal,ae_int_t nin,ae_int_t nout,mlpbuffers * buf,ae_state * _state)19309 void hpcpreparechunkedgradient(/* Real */ ae_vector* weights,
19310 ae_int_t wcount,
19311 ae_int_t ntotal,
19312 ae_int_t nin,
19313 ae_int_t nout,
19314 mlpbuffers* buf,
19315 ae_state *_state)
19316 {
19317 ae_int_t i;
19318 ae_int_t batch4size;
19319 ae_int_t chunksize;
19320
19321
19322 chunksize = 4;
19323 batch4size = 3*chunksize*ntotal+chunksize*(2*nout+1);
19324 if( buf->xy.rows<chunksize||buf->xy.cols<nin+nout )
19325 {
19326 ae_matrix_set_length(&buf->xy, chunksize, nin+nout, _state);
19327 }
19328 if( buf->xy2.rows<chunksize||buf->xy2.cols<nin+nout )
19329 {
19330 ae_matrix_set_length(&buf->xy2, chunksize, nin+nout, _state);
19331 }
19332 if( buf->xyrow.cnt<nin+nout )
19333 {
19334 ae_vector_set_length(&buf->xyrow, nin+nout, _state);
19335 }
19336 if( buf->x.cnt<nin )
19337 {
19338 ae_vector_set_length(&buf->x, nin, _state);
19339 }
19340 if( buf->y.cnt<nout )
19341 {
19342 ae_vector_set_length(&buf->y, nout, _state);
19343 }
19344 if( buf->desiredy.cnt<nout )
19345 {
19346 ae_vector_set_length(&buf->desiredy, nout, _state);
19347 }
19348 if( buf->batch4buf.cnt<batch4size )
19349 {
19350 ae_vector_set_length(&buf->batch4buf, batch4size, _state);
19351 }
19352 if( buf->hpcbuf.cnt<wcount )
19353 {
19354 ae_vector_set_length(&buf->hpcbuf, wcount, _state);
19355 }
19356 if( buf->g.cnt<wcount )
19357 {
19358 ae_vector_set_length(&buf->g, wcount, _state);
19359 }
19360 if( !hpccores_hpcpreparechunkedgradientx(weights, wcount, &buf->hpcbuf, _state) )
19361 {
19362 for(i=0; i<=wcount-1; i++)
19363 {
19364 buf->hpcbuf.ptr.p_double[i] = 0.0;
19365 }
19366 }
19367 buf->wcount = wcount;
19368 buf->ntotal = ntotal;
19369 buf->nin = nin;
19370 buf->nout = nout;
19371 buf->chunksize = chunksize;
19372 }
19373
19374
19375 /*************************************************************************
19376 Finalizes HPC compuations of chunked gradient with HPCChunkedGradient().
19377 You have to call this function after calling HPCChunkedGradient() for
19378 a new set of weights. You have to call it only once, see example below:
19379
19380 HOW TO PROCESS DATASET WITH THIS FUNCTION:
19381 Grad:=0
19382 HPCPrepareChunkedGradient(Weights, WCount, NTotal, NOut, Buf)
19383 foreach chunk-of-dataset do
19384 HPCChunkedGradient(...)
19385 HPCFinalizeChunkedGradient(Buf, Grad)
19386
19387 *************************************************************************/
hpcfinalizechunkedgradient(mlpbuffers * buf,ae_vector * grad,ae_state * _state)19388 void hpcfinalizechunkedgradient(mlpbuffers* buf,
19389 /* Real */ ae_vector* grad,
19390 ae_state *_state)
19391 {
19392 ae_int_t i;
19393
19394
19395 if( !hpccores_hpcfinalizechunkedgradientx(&buf->hpcbuf, buf->wcount, grad, _state) )
19396 {
19397 for(i=0; i<=buf->wcount-1; i++)
19398 {
19399 grad->ptr.p_double[i] = grad->ptr.p_double[i]+buf->hpcbuf.ptr.p_double[i];
19400 }
19401 }
19402 }
19403
19404
19405 /*************************************************************************
19406 Fast kernel for chunked gradient.
19407
19408 *************************************************************************/
hpcchunkedgradient(ae_vector * weights,ae_vector * structinfo,ae_vector * columnmeans,ae_vector * columnsigmas,ae_matrix * xy,ae_int_t cstart,ae_int_t csize,ae_vector * batch4buf,ae_vector * hpcbuf,double * e,ae_bool naturalerrorfunc,ae_state * _state)19409 ae_bool hpcchunkedgradient(/* Real */ ae_vector* weights,
19410 /* Integer */ ae_vector* structinfo,
19411 /* Real */ ae_vector* columnmeans,
19412 /* Real */ ae_vector* columnsigmas,
19413 /* Real */ ae_matrix* xy,
19414 ae_int_t cstart,
19415 ae_int_t csize,
19416 /* Real */ ae_vector* batch4buf,
19417 /* Real */ ae_vector* hpcbuf,
19418 double* e,
19419 ae_bool naturalerrorfunc,
19420 ae_state *_state)
19421 {
19422 #ifndef ALGLIB_INTERCEPTS_SSE2
19423 ae_bool result;
19424
19425
19426 result = ae_false;
19427 return result;
19428 #else
19429 return _ialglib_i_hpcchunkedgradient(weights, structinfo, columnmeans, columnsigmas, xy, cstart, csize, batch4buf, hpcbuf, e, naturalerrorfunc);
19430 #endif
19431 }
19432
19433
19434 /*************************************************************************
19435 Fast kernel for chunked processing.
19436
19437 *************************************************************************/
hpcchunkedprocess(ae_vector * weights,ae_vector * structinfo,ae_vector * columnmeans,ae_vector * columnsigmas,ae_matrix * xy,ae_int_t cstart,ae_int_t csize,ae_vector * batch4buf,ae_vector * hpcbuf,ae_state * _state)19438 ae_bool hpcchunkedprocess(/* Real */ ae_vector* weights,
19439 /* Integer */ ae_vector* structinfo,
19440 /* Real */ ae_vector* columnmeans,
19441 /* Real */ ae_vector* columnsigmas,
19442 /* Real */ ae_matrix* xy,
19443 ae_int_t cstart,
19444 ae_int_t csize,
19445 /* Real */ ae_vector* batch4buf,
19446 /* Real */ ae_vector* hpcbuf,
19447 ae_state *_state)
19448 {
19449 #ifndef ALGLIB_INTERCEPTS_SSE2
19450 ae_bool result;
19451
19452
19453 result = ae_false;
19454 return result;
19455 #else
19456 return _ialglib_i_hpcchunkedprocess(weights, structinfo, columnmeans, columnsigmas, xy, cstart, csize, batch4buf, hpcbuf);
19457 #endif
19458 }
19459
19460
19461 /*************************************************************************
19462 Stub function.
19463
19464 -- ALGLIB routine --
19465 14.06.2013
19466 Bochkanov Sergey
19467 *************************************************************************/
hpccores_hpcpreparechunkedgradientx(ae_vector * weights,ae_int_t wcount,ae_vector * hpcbuf,ae_state * _state)19468 static ae_bool hpccores_hpcpreparechunkedgradientx(/* Real */ ae_vector* weights,
19469 ae_int_t wcount,
19470 /* Real */ ae_vector* hpcbuf,
19471 ae_state *_state)
19472 {
19473 #ifndef ALGLIB_INTERCEPTS_SSE2
19474 ae_bool result;
19475
19476
19477 result = ae_false;
19478 return result;
19479 #else
19480 return _ialglib_i_hpcpreparechunkedgradientx(weights, wcount, hpcbuf);
19481 #endif
19482 }
19483
19484
19485 /*************************************************************************
19486 Stub function.
19487
19488 -- ALGLIB routine --
19489 14.06.2013
19490 Bochkanov Sergey
19491 *************************************************************************/
hpccores_hpcfinalizechunkedgradientx(ae_vector * buf,ae_int_t wcount,ae_vector * grad,ae_state * _state)19492 static ae_bool hpccores_hpcfinalizechunkedgradientx(/* Real */ ae_vector* buf,
19493 ae_int_t wcount,
19494 /* Real */ ae_vector* grad,
19495 ae_state *_state)
19496 {
19497 #ifndef ALGLIB_INTERCEPTS_SSE2
19498 ae_bool result;
19499
19500
19501 result = ae_false;
19502 return result;
19503 #else
19504 return _ialglib_i_hpcfinalizechunkedgradientx(buf, wcount, grad);
19505 #endif
19506 }
19507
19508
_mlpbuffers_init(void * _p,ae_state * _state,ae_bool make_automatic)19509 void _mlpbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
19510 {
19511 mlpbuffers *p = (mlpbuffers*)_p;
19512 ae_touch_ptr((void*)p);
19513 ae_vector_init(&p->batch4buf, 0, DT_REAL, _state, make_automatic);
19514 ae_vector_init(&p->hpcbuf, 0, DT_REAL, _state, make_automatic);
19515 ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic);
19516 ae_matrix_init(&p->xy2, 0, 0, DT_REAL, _state, make_automatic);
19517 ae_vector_init(&p->xyrow, 0, DT_REAL, _state, make_automatic);
19518 ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
19519 ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
19520 ae_vector_init(&p->desiredy, 0, DT_REAL, _state, make_automatic);
19521 ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic);
19522 ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic);
19523 }
19524
19525
_mlpbuffers_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)19526 void _mlpbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
19527 {
19528 mlpbuffers *dst = (mlpbuffers*)_dst;
19529 mlpbuffers *src = (mlpbuffers*)_src;
19530 dst->chunksize = src->chunksize;
19531 dst->ntotal = src->ntotal;
19532 dst->nin = src->nin;
19533 dst->nout = src->nout;
19534 dst->wcount = src->wcount;
19535 ae_vector_init_copy(&dst->batch4buf, &src->batch4buf, _state, make_automatic);
19536 ae_vector_init_copy(&dst->hpcbuf, &src->hpcbuf, _state, make_automatic);
19537 ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic);
19538 ae_matrix_init_copy(&dst->xy2, &src->xy2, _state, make_automatic);
19539 ae_vector_init_copy(&dst->xyrow, &src->xyrow, _state, make_automatic);
19540 ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
19541 ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
19542 ae_vector_init_copy(&dst->desiredy, &src->desiredy, _state, make_automatic);
19543 dst->e = src->e;
19544 ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic);
19545 ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
19546 }
19547
19548
_mlpbuffers_clear(void * _p)19549 void _mlpbuffers_clear(void* _p)
19550 {
19551 mlpbuffers *p = (mlpbuffers*)_p;
19552 ae_touch_ptr((void*)p);
19553 ae_vector_clear(&p->batch4buf);
19554 ae_vector_clear(&p->hpcbuf);
19555 ae_matrix_clear(&p->xy);
19556 ae_matrix_clear(&p->xy2);
19557 ae_vector_clear(&p->xyrow);
19558 ae_vector_clear(&p->x);
19559 ae_vector_clear(&p->y);
19560 ae_vector_clear(&p->desiredy);
19561 ae_vector_clear(&p->g);
19562 ae_vector_clear(&p->tmp0);
19563 }
19564
19565
_mlpbuffers_destroy(void * _p)19566 void _mlpbuffers_destroy(void* _p)
19567 {
19568 mlpbuffers *p = (mlpbuffers*)_p;
19569 ae_touch_ptr((void*)p);
19570 ae_vector_destroy(&p->batch4buf);
19571 ae_vector_destroy(&p->hpcbuf);
19572 ae_matrix_destroy(&p->xy);
19573 ae_matrix_destroy(&p->xy2);
19574 ae_vector_destroy(&p->xyrow);
19575 ae_vector_destroy(&p->x);
19576 ae_vector_destroy(&p->y);
19577 ae_vector_destroy(&p->desiredy);
19578 ae_vector_destroy(&p->g);
19579 ae_vector_destroy(&p->tmp0);
19580 }
19581
19582
19583 #endif
19584 #if defined(AE_COMPILE_ALGLIBBASICS) || !defined(AE_PARTIAL_BUILD)
19585
19586
19587 #endif
19588
19589 }
19590
19591