1 /*************************************************************************
2 ALGLIB 3.18.0 (source code generated 2021-10-25)
3 Copyright (c) Sergey Bochkanov (ALGLIB project).
4 
5 >>> SOURCE LICENSE >>>
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation (www.fsf.org); either version 2 of the
9 License, or (at your option) any later version.
10 
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 A copy of the GNU General Public License is available at
17 http://www.fsf.org/licensing/licenses
18 >>> END OF LICENSE >>>
19 *************************************************************************/
20 #ifdef _MSC_VER
21 #define _CRT_SECURE_NO_WARNINGS
22 #endif
23 #include "stdafx.h"
24 #include "alglibinternal.h"
25 
26 // disable some irrelevant warnings
27 #if (AE_COMPILER==AE_MSVC) && !defined(AE_ALL_WARNINGS)
28 #pragma warning(disable:4100)
29 #pragma warning(disable:4127)
30 #pragma warning(disable:4611)
31 #pragma warning(disable:4702)
32 #pragma warning(disable:4996)
33 #endif
34 
35 /////////////////////////////////////////////////////////////////////////
36 //
37 // THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
38 //
39 /////////////////////////////////////////////////////////////////////////
40 namespace alglib
41 {
42 
43 
44 }
45 
46 /////////////////////////////////////////////////////////////////////////
47 //
48 // THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
49 //
50 /////////////////////////////////////////////////////////////////////////
51 namespace alglib_impl
52 {
53 #if defined(AE_COMPILE_APSERV) || !defined(AE_PARTIAL_BUILD)
54 
55 
56 #endif
57 #if defined(AE_COMPILE_ABLASF) || !defined(AE_PARTIAL_BUILD)
58 #ifdef ALGLIB_NO_FAST_KERNELS
59 static ae_bool ablasf_rgemm32basecase(ae_int_t m,
60      ae_int_t n,
61      ae_int_t k,
62      double alpha,
63      /* Real    */ ae_matrix* a,
64      ae_int_t ia,
65      ae_int_t ja,
66      ae_int_t optypea,
67      /* Real    */ ae_matrix* b,
68      ae_int_t ib,
69      ae_int_t jb,
70      ae_int_t optypeb,
71      double beta,
72      /* Real    */ ae_matrix* c,
73      ae_int_t ic,
74      ae_int_t jc,
75      ae_state *_state);
76 #endif
77 
78 
79 #endif
80 #if defined(AE_COMPILE_HBLAS) || !defined(AE_PARTIAL_BUILD)
81 
82 
83 #endif
84 #if defined(AE_COMPILE_CREFLECTIONS) || !defined(AE_PARTIAL_BUILD)
85 
86 
87 #endif
88 #if defined(AE_COMPILE_SBLAS) || !defined(AE_PARTIAL_BUILD)
89 
90 
91 #endif
92 #if defined(AE_COMPILE_ABLASMKL) || !defined(AE_PARTIAL_BUILD)
93 
94 
95 #endif
96 #if defined(AE_COMPILE_SCODES) || !defined(AE_PARTIAL_BUILD)
97 
98 
99 #endif
100 #if defined(AE_COMPILE_TSORT) || !defined(AE_PARTIAL_BUILD)
101 static void tsort_tagsortfastirec(/* Real    */ ae_vector* a,
102      /* Integer */ ae_vector* b,
103      /* Real    */ ae_vector* bufa,
104      /* Integer */ ae_vector* bufb,
105      ae_int_t i1,
106      ae_int_t i2,
107      ae_state *_state);
108 static void tsort_tagsortfastrrec(/* Real    */ ae_vector* a,
109      /* Real    */ ae_vector* b,
110      /* Real    */ ae_vector* bufa,
111      /* Real    */ ae_vector* bufb,
112      ae_int_t i1,
113      ae_int_t i2,
114      ae_state *_state);
115 static void tsort_tagsortfastrec(/* Real    */ ae_vector* a,
116      /* Real    */ ae_vector* bufa,
117      ae_int_t i1,
118      ae_int_t i2,
119      ae_state *_state);
120 
121 
122 #endif
123 #if defined(AE_COMPILE_BLAS) || !defined(AE_PARTIAL_BUILD)
124 
125 
126 #endif
127 #if defined(AE_COMPILE_ROTATIONS) || !defined(AE_PARTIAL_BUILD)
128 
129 
130 #endif
131 #if defined(AE_COMPILE_BASICSTATOPS) || !defined(AE_PARTIAL_BUILD)
132 
133 
134 #endif
135 #if defined(AE_COMPILE_TRLINSOLVE) || !defined(AE_PARTIAL_BUILD)
136 
137 
138 #endif
139 #if defined(AE_COMPILE_SAFESOLVE) || !defined(AE_PARTIAL_BUILD)
140 static ae_bool safesolve_cbasicsolveandupdate(ae_complex alpha,
141      ae_complex beta,
142      double lnmax,
143      double bnorm,
144      double maxgrowth,
145      double* xnorm,
146      ae_complex* x,
147      ae_state *_state);
148 
149 
150 #endif
151 #if defined(AE_COMPILE_XBLAS) || !defined(AE_PARTIAL_BUILD)
152 static void xblas_xsum(/* Real    */ ae_vector* w,
153      double mx,
154      ae_int_t n,
155      double* r,
156      double* rerr,
157      ae_state *_state);
158 static double xblas_xfastpow(double r, ae_int_t n, ae_state *_state);
159 
160 
161 #endif
162 #if defined(AE_COMPILE_LINMIN) || !defined(AE_PARTIAL_BUILD)
163 static double linmin_ftol = 0.001;
164 static double linmin_xtol = 100*ae_machineepsilon;
165 static ae_int_t linmin_maxfev = 20;
166 static double linmin_stpmin = 1.0E-50;
167 static double linmin_defstpmax = 1.0E+50;
168 static double linmin_armijofactor = 1.3;
169 static void linmin_mcstep(double* stx,
170      double* fx,
171      double* dx,
172      double* sty,
173      double* fy,
174      double* dy,
175      double* stp,
176      double fp,
177      double dp,
178      ae_bool* brackt,
179      double stmin,
180      double stmax,
181      ae_int_t* info,
182      ae_state *_state);
183 
184 
185 #endif
186 #if defined(AE_COMPILE_NEARUNITYUNIT) || !defined(AE_PARTIAL_BUILD)
187 
188 
189 #endif
190 #if defined(AE_COMPILE_NTHEORY) || !defined(AE_PARTIAL_BUILD)
191 static ae_bool ntheory_isprime(ae_int_t n, ae_state *_state);
192 static ae_int_t ntheory_modmul(ae_int_t a,
193      ae_int_t b,
194      ae_int_t n,
195      ae_state *_state);
196 static ae_int_t ntheory_modexp(ae_int_t a,
197      ae_int_t b,
198      ae_int_t n,
199      ae_state *_state);
200 
201 
202 #endif
203 #if defined(AE_COMPILE_FTBASE) || !defined(AE_PARTIAL_BUILD)
204 static ae_int_t ftbase_coltype = 0;
205 static ae_int_t ftbase_coloperandscnt = 1;
206 static ae_int_t ftbase_coloperandsize = 2;
207 static ae_int_t ftbase_colmicrovectorsize = 3;
208 static ae_int_t ftbase_colparam0 = 4;
209 static ae_int_t ftbase_colparam1 = 5;
210 static ae_int_t ftbase_colparam2 = 6;
211 static ae_int_t ftbase_colparam3 = 7;
212 static ae_int_t ftbase_colscnt = 8;
213 static ae_int_t ftbase_opend = 0;
214 static ae_int_t ftbase_opcomplexreffft = 1;
215 static ae_int_t ftbase_opbluesteinsfft = 2;
216 static ae_int_t ftbase_opcomplexcodeletfft = 3;
217 static ae_int_t ftbase_opcomplexcodelettwfft = 4;
218 static ae_int_t ftbase_opradersfft = 5;
219 static ae_int_t ftbase_opcomplextranspose = -1;
220 static ae_int_t ftbase_opcomplexfftfactors = -2;
221 static ae_int_t ftbase_opstart = -3;
222 static ae_int_t ftbase_opjmp = -4;
223 static ae_int_t ftbase_opparallelcall = -5;
224 static ae_int_t ftbase_maxradix = 6;
225 static ae_int_t ftbase_updatetw = 16;
226 static ae_int_t ftbase_recursivethreshold = 1024;
227 static ae_int_t ftbase_raderthreshold = 19;
228 static ae_int_t ftbase_ftbasecodeletrecommended = 5;
229 static double ftbase_ftbaseinefficiencyfactor = 1.3;
230 static ae_int_t ftbase_ftbasemaxsmoothfactor = 5;
231 static void ftbase_ftdeterminespacerequirements(ae_int_t n,
232      ae_int_t* precrsize,
233      ae_int_t* precisize,
234      ae_state *_state);
235 static void ftbase_ftcomplexfftplanrec(ae_int_t n,
236      ae_int_t k,
237      ae_bool childplan,
238      ae_bool topmostplan,
239      ae_int_t* rowptr,
240      ae_int_t* bluesteinsize,
241      ae_int_t* precrptr,
242      ae_int_t* preciptr,
243      fasttransformplan* plan,
244      ae_state *_state);
245 static void ftbase_ftpushentry(fasttransformplan* plan,
246      ae_int_t* rowptr,
247      ae_int_t etype,
248      ae_int_t eopcnt,
249      ae_int_t eopsize,
250      ae_int_t emcvsize,
251      ae_int_t eparam0,
252      ae_state *_state);
253 static void ftbase_ftpushentry2(fasttransformplan* plan,
254      ae_int_t* rowptr,
255      ae_int_t etype,
256      ae_int_t eopcnt,
257      ae_int_t eopsize,
258      ae_int_t emcvsize,
259      ae_int_t eparam0,
260      ae_int_t eparam1,
261      ae_state *_state);
262 static void ftbase_ftpushentry4(fasttransformplan* plan,
263      ae_int_t* rowptr,
264      ae_int_t etype,
265      ae_int_t eopcnt,
266      ae_int_t eopsize,
267      ae_int_t emcvsize,
268      ae_int_t eparam0,
269      ae_int_t eparam1,
270      ae_int_t eparam2,
271      ae_int_t eparam3,
272      ae_state *_state);
273 static void ftbase_ftapplysubplan(fasttransformplan* plan,
274      ae_int_t subplan,
275      /* Real    */ ae_vector* a,
276      ae_int_t abase,
277      ae_int_t aoffset,
278      /* Real    */ ae_vector* buf,
279      ae_int_t repcnt,
280      ae_state *_state);
281 static void ftbase_ftapplycomplexreffft(/* Real    */ ae_vector* a,
282      ae_int_t offs,
283      ae_int_t operandscnt,
284      ae_int_t operandsize,
285      ae_int_t microvectorsize,
286      /* Real    */ ae_vector* buf,
287      ae_state *_state);
288 static void ftbase_ftapplycomplexcodeletfft(/* Real    */ ae_vector* a,
289      ae_int_t offs,
290      ae_int_t operandscnt,
291      ae_int_t operandsize,
292      ae_int_t microvectorsize,
293      ae_state *_state);
294 static void ftbase_ftapplycomplexcodelettwfft(/* Real    */ ae_vector* a,
295      ae_int_t offs,
296      ae_int_t operandscnt,
297      ae_int_t operandsize,
298      ae_int_t microvectorsize,
299      ae_state *_state);
300 static void ftbase_ftprecomputebluesteinsfft(ae_int_t n,
301      ae_int_t m,
302      /* Real    */ ae_vector* precr,
303      ae_int_t offs,
304      ae_state *_state);
305 static void ftbase_ftbluesteinsfft(fasttransformplan* plan,
306      /* Real    */ ae_vector* a,
307      ae_int_t abase,
308      ae_int_t aoffset,
309      ae_int_t operandscnt,
310      ae_int_t n,
311      ae_int_t m,
312      ae_int_t precoffs,
313      ae_int_t subplan,
314      /* Real    */ ae_vector* bufa,
315      /* Real    */ ae_vector* bufb,
316      /* Real    */ ae_vector* bufc,
317      /* Real    */ ae_vector* bufd,
318      ae_state *_state);
319 static void ftbase_ftprecomputeradersfft(ae_int_t n,
320      ae_int_t rq,
321      ae_int_t riq,
322      /* Real    */ ae_vector* precr,
323      ae_int_t offs,
324      ae_state *_state);
325 static void ftbase_ftradersfft(fasttransformplan* plan,
326      /* Real    */ ae_vector* a,
327      ae_int_t abase,
328      ae_int_t aoffset,
329      ae_int_t operandscnt,
330      ae_int_t n,
331      ae_int_t subplan,
332      ae_int_t rq,
333      ae_int_t riq,
334      ae_int_t precoffs,
335      /* Real    */ ae_vector* buf,
336      ae_state *_state);
337 static void ftbase_ftfactorize(ae_int_t n,
338      ae_bool isroot,
339      ae_int_t* n1,
340      ae_int_t* n2,
341      ae_state *_state);
342 static ae_int_t ftbase_ftoptimisticestimate(ae_int_t n, ae_state *_state);
343 static void ftbase_ffttwcalc(/* Real    */ ae_vector* a,
344      ae_int_t aoffset,
345      ae_int_t n1,
346      ae_int_t n2,
347      ae_state *_state);
348 static void ftbase_internalcomplexlintranspose(/* Real    */ ae_vector* a,
349      ae_int_t m,
350      ae_int_t n,
351      ae_int_t astart,
352      /* Real    */ ae_vector* buf,
353      ae_state *_state);
354 static void ftbase_ffticltrec(/* Real    */ ae_vector* a,
355      ae_int_t astart,
356      ae_int_t astride,
357      /* Real    */ ae_vector* b,
358      ae_int_t bstart,
359      ae_int_t bstride,
360      ae_int_t m,
361      ae_int_t n,
362      ae_state *_state);
363 static void ftbase_ftbasefindsmoothrec(ae_int_t n,
364      ae_int_t seed,
365      ae_int_t leastfactor,
366      ae_int_t* best,
367      ae_state *_state);
368 
369 
370 #endif
371 #if defined(AE_COMPILE_HPCCORES) || !defined(AE_PARTIAL_BUILD)
372 static ae_bool hpccores_hpcpreparechunkedgradientx(/* Real    */ ae_vector* weights,
373      ae_int_t wcount,
374      /* Real    */ ae_vector* hpcbuf,
375      ae_state *_state);
376 static ae_bool hpccores_hpcfinalizechunkedgradientx(/* Real    */ ae_vector* buf,
377      ae_int_t wcount,
378      /* Real    */ ae_vector* grad,
379      ae_state *_state);
380 
381 
382 #endif
383 #if defined(AE_COMPILE_ALGLIBBASICS) || !defined(AE_PARTIAL_BUILD)
384 
385 
386 #endif
387 
388 #if defined(AE_COMPILE_APSERV) || !defined(AE_PARTIAL_BUILD)
389 
390 
391 /*************************************************************************
392 Internally calls SetErrorFlag() with condition:
393 
394     Abs(Val-RefVal)>Tol*Max(Abs(RefVal),S)
395 
396 This function is used to test relative error in Val against  RefVal,  with
397 relative error being replaced by absolute when scale  of  RefVal  is  less
398 than S.
399 
400 This function returns value of COND.
401 *************************************************************************/
seterrorflagdiff(ae_bool * flag,double val,double refval,double tol,double s,ae_state * _state)402 void seterrorflagdiff(ae_bool* flag,
403      double val,
404      double refval,
405      double tol,
406      double s,
407      ae_state *_state)
408 {
409 
410 
411     ae_set_error_flag(flag, ae_fp_greater(ae_fabs(val-refval, _state),tol*ae_maxreal(ae_fabs(refval, _state), s, _state)), __FILE__, __LINE__, "apserv.ap:162");
412 }
413 
414 
415 /*************************************************************************
416 The function always returns False.
417 It may be used sometimes to prevent spurious warnings.
418 
419   -- ALGLIB --
420      Copyright 17.09.2012 by Bochkanov Sergey
421 *************************************************************************/
alwaysfalse(ae_state * _state)422 ae_bool alwaysfalse(ae_state *_state)
423 {
424     ae_bool result;
425 
426 
427     result = ae_false;
428     return result;
429 }
430 
431 
432 /*************************************************************************
433 The function "touches" integer - it is used  to  avoid  compiler  messages
434 about unused variables (in rare cases when we do NOT want to remove  these
435 variables).
436 
437   -- ALGLIB --
438      Copyright 17.09.2012 by Bochkanov Sergey
439 *************************************************************************/
touchint(ae_int_t * a,ae_state * _state)440 void touchint(ae_int_t* a, ae_state *_state)
441 {
442 
443 
444 }
445 
446 
447 /*************************************************************************
448 The function "touches" real   -  it is used  to  avoid  compiler  messages
449 about unused variables (in rare cases when we do NOT want to remove  these
450 variables).
451 
452   -- ALGLIB --
453      Copyright 17.09.2012 by Bochkanov Sergey
454 *************************************************************************/
touchreal(double * a,ae_state * _state)455 void touchreal(double* a, ae_state *_state)
456 {
457 
458 
459 }
460 
461 
462 /*************************************************************************
463 The function performs zero-coalescing on real value.
464 
465 NOTE: no check is performed for B<>0
466 
467   -- ALGLIB --
468      Copyright 18.05.2015 by Bochkanov Sergey
469 *************************************************************************/
coalesce(double a,double b,ae_state * _state)470 double coalesce(double a, double b, ae_state *_state)
471 {
472     double result;
473 
474 
475     result = a;
476     if( ae_fp_eq(a,0.0) )
477     {
478         result = b;
479     }
480     return result;
481 }
482 
483 
484 /*************************************************************************
485 The function performs zero-coalescing on integer value.
486 
487 NOTE: no check is performed for B<>0
488 
489   -- ALGLIB --
490      Copyright 18.05.2015 by Bochkanov Sergey
491 *************************************************************************/
coalescei(ae_int_t a,ae_int_t b,ae_state * _state)492 ae_int_t coalescei(ae_int_t a, ae_int_t b, ae_state *_state)
493 {
494     ae_int_t result;
495 
496 
497     result = a;
498     if( a==0 )
499     {
500         result = b;
501     }
502     return result;
503 }
504 
505 
506 /*************************************************************************
507 The function convert integer value to real value.
508 
509   -- ALGLIB --
510      Copyright 17.09.2012 by Bochkanov Sergey
511 *************************************************************************/
inttoreal(ae_int_t a,ae_state * _state)512 double inttoreal(ae_int_t a, ae_state *_state)
513 {
514     double result;
515 
516 
517     result = (double)(a);
518     return result;
519 }
520 
521 
522 /*************************************************************************
523 The function calculates binary logarithm.
524 
525 NOTE: it costs twice as much as Ln(x)
526 
527   -- ALGLIB --
528      Copyright 17.09.2012 by Bochkanov Sergey
529 *************************************************************************/
logbase2(double x,ae_state * _state)530 double logbase2(double x, ae_state *_state)
531 {
532     double result;
533 
534 
535     result = ae_log(x, _state)/ae_log((double)(2), _state);
536     return result;
537 }
538 
539 
540 /*************************************************************************
541 This function compares two numbers for approximate equality, with tolerance
542 to errors as large as tol.
543 
544 
545   -- ALGLIB --
546      Copyright 02.12.2009 by Bochkanov Sergey
547 *************************************************************************/
approxequal(double a,double b,double tol,ae_state * _state)548 ae_bool approxequal(double a, double b, double tol, ae_state *_state)
549 {
550     ae_bool result;
551 
552 
553     result = ae_fp_less_eq(ae_fabs(a-b, _state),tol);
554     return result;
555 }
556 
557 
558 /*************************************************************************
559 This function compares two numbers for approximate equality, with tolerance
560 to errors as large as max(|a|,|b|)*tol.
561 
562 
563   -- ALGLIB --
564      Copyright 02.12.2009 by Bochkanov Sergey
565 *************************************************************************/
approxequalrel(double a,double b,double tol,ae_state * _state)566 ae_bool approxequalrel(double a, double b, double tol, ae_state *_state)
567 {
568     ae_bool result;
569 
570 
571     result = ae_fp_less_eq(ae_fabs(a-b, _state),ae_maxreal(ae_fabs(a, _state), ae_fabs(b, _state), _state)*tol);
572     return result;
573 }
574 
575 
576 /*************************************************************************
577 This  function  generates  1-dimensional  general  interpolation task with
578 moderate Lipshitz constant (close to 1.0)
579 
580 If N=1 then suborutine generates only one point at the middle of [A,B]
581 
582   -- ALGLIB --
583      Copyright 02.12.2009 by Bochkanov Sergey
584 *************************************************************************/
taskgenint1d(double a,double b,ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)585 void taskgenint1d(double a,
586      double b,
587      ae_int_t n,
588      /* Real    */ ae_vector* x,
589      /* Real    */ ae_vector* y,
590      ae_state *_state)
591 {
592     ae_int_t i;
593     double h;
594 
595     ae_vector_clear(x);
596     ae_vector_clear(y);
597 
598     ae_assert(n>=1, "TaskGenInterpolationEqdist1D: N<1!", _state);
599     ae_vector_set_length(x, n, _state);
600     ae_vector_set_length(y, n, _state);
601     if( n>1 )
602     {
603         x->ptr.p_double[0] = a;
604         y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
605         h = (b-a)/(n-1);
606         for(i=1; i<=n-1; i++)
607         {
608             if( i!=n-1 )
609             {
610                 x->ptr.p_double[i] = a+(i+0.2*(2*ae_randomreal(_state)-1))*h;
611             }
612             else
613             {
614                 x->ptr.p_double[i] = b;
615             }
616             y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
617         }
618     }
619     else
620     {
621         x->ptr.p_double[0] = 0.5*(a+b);
622         y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
623     }
624 }
625 
626 
627 /*************************************************************************
628 This function generates  1-dimensional equidistant interpolation task with
629 moderate Lipshitz constant (close to 1.0)
630 
631 If N=1 then suborutine generates only one point at the middle of [A,B]
632 
633   -- ALGLIB --
634      Copyright 02.12.2009 by Bochkanov Sergey
635 *************************************************************************/
taskgenint1dequidist(double a,double b,ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)636 void taskgenint1dequidist(double a,
637      double b,
638      ae_int_t n,
639      /* Real    */ ae_vector* x,
640      /* Real    */ ae_vector* y,
641      ae_state *_state)
642 {
643     ae_int_t i;
644     double h;
645 
646     ae_vector_clear(x);
647     ae_vector_clear(y);
648 
649     ae_assert(n>=1, "TaskGenInterpolationEqdist1D: N<1!", _state);
650     ae_vector_set_length(x, n, _state);
651     ae_vector_set_length(y, n, _state);
652     if( n>1 )
653     {
654         x->ptr.p_double[0] = a;
655         y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
656         h = (b-a)/(n-1);
657         for(i=1; i<=n-1; i++)
658         {
659             x->ptr.p_double[i] = a+i*h;
660             y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*h;
661         }
662     }
663     else
664     {
665         x->ptr.p_double[0] = 0.5*(a+b);
666         y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
667     }
668 }
669 
670 
671 /*************************************************************************
672 This function generates  1-dimensional Chebyshev-1 interpolation task with
673 moderate Lipshitz constant (close to 1.0)
674 
675 If N=1 then suborutine generates only one point at the middle of [A,B]
676 
677   -- ALGLIB --
678      Copyright 02.12.2009 by Bochkanov Sergey
679 *************************************************************************/
taskgenint1dcheb1(double a,double b,ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)680 void taskgenint1dcheb1(double a,
681      double b,
682      ae_int_t n,
683      /* Real    */ ae_vector* x,
684      /* Real    */ ae_vector* y,
685      ae_state *_state)
686 {
687     ae_int_t i;
688 
689     ae_vector_clear(x);
690     ae_vector_clear(y);
691 
692     ae_assert(n>=1, "TaskGenInterpolation1DCheb1: N<1!", _state);
693     ae_vector_set_length(x, n, _state);
694     ae_vector_set_length(y, n, _state);
695     if( n>1 )
696     {
697         for(i=0; i<=n-1; i++)
698         {
699             x->ptr.p_double[i] = 0.5*(b+a)+0.5*(b-a)*ae_cos(ae_pi*(2*i+1)/(2*n), _state);
700             if( i==0 )
701             {
702                 y->ptr.p_double[i] = 2*ae_randomreal(_state)-1;
703             }
704             else
705             {
706                 y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
707             }
708         }
709     }
710     else
711     {
712         x->ptr.p_double[0] = 0.5*(a+b);
713         y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
714     }
715 }
716 
717 
718 /*************************************************************************
719 This function generates  1-dimensional Chebyshev-2 interpolation task with
720 moderate Lipshitz constant (close to 1.0)
721 
722 If N=1 then suborutine generates only one point at the middle of [A,B]
723 
724   -- ALGLIB --
725      Copyright 02.12.2009 by Bochkanov Sergey
726 *************************************************************************/
taskgenint1dcheb2(double a,double b,ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)727 void taskgenint1dcheb2(double a,
728      double b,
729      ae_int_t n,
730      /* Real    */ ae_vector* x,
731      /* Real    */ ae_vector* y,
732      ae_state *_state)
733 {
734     ae_int_t i;
735 
736     ae_vector_clear(x);
737     ae_vector_clear(y);
738 
739     ae_assert(n>=1, "TaskGenInterpolation1DCheb2: N<1!", _state);
740     ae_vector_set_length(x, n, _state);
741     ae_vector_set_length(y, n, _state);
742     if( n>1 )
743     {
744         for(i=0; i<=n-1; i++)
745         {
746             x->ptr.p_double[i] = 0.5*(b+a)+0.5*(b-a)*ae_cos(ae_pi*i/(n-1), _state);
747             if( i==0 )
748             {
749                 y->ptr.p_double[i] = 2*ae_randomreal(_state)-1;
750             }
751             else
752             {
753                 y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
754             }
755         }
756     }
757     else
758     {
759         x->ptr.p_double[0] = 0.5*(a+b);
760         y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
761     }
762 }
763 
764 
765 /*************************************************************************
766 This function checks that all values from X[] are distinct. It does more
767 than just usual floating point comparison:
768 * first, it calculates max(X) and min(X)
769 * second, it maps X[] from [min,max] to [1,2]
770 * only at this stage actual comparison is done
771 
772 The meaning of such check is to ensure that all values are "distinct enough"
773 and will not cause interpolation subroutine to fail.
774 
775 NOTE:
776     X[] must be sorted by ascending (subroutine ASSERT's it)
777 
778   -- ALGLIB --
779      Copyright 02.12.2009 by Bochkanov Sergey
780 *************************************************************************/
aredistinct(ae_vector * x,ae_int_t n,ae_state * _state)781 ae_bool aredistinct(/* Real    */ ae_vector* x,
782      ae_int_t n,
783      ae_state *_state)
784 {
785     double a;
786     double b;
787     ae_int_t i;
788     ae_bool nonsorted;
789     ae_bool result;
790 
791 
792     ae_assert(n>=1, "APSERVAreDistinct: internal error (N<1)", _state);
793     if( n==1 )
794     {
795 
796         /*
797          * everything is alright, it is up to caller to decide whether it
798          * can interpolate something with just one point
799          */
800         result = ae_true;
801         return result;
802     }
803     a = x->ptr.p_double[0];
804     b = x->ptr.p_double[0];
805     nonsorted = ae_false;
806     for(i=1; i<=n-1; i++)
807     {
808         a = ae_minreal(a, x->ptr.p_double[i], _state);
809         b = ae_maxreal(b, x->ptr.p_double[i], _state);
810         nonsorted = nonsorted||ae_fp_greater_eq(x->ptr.p_double[i-1],x->ptr.p_double[i]);
811     }
812     ae_assert(!nonsorted, "APSERVAreDistinct: internal error (not sorted)", _state);
813     for(i=1; i<=n-1; i++)
814     {
815         if( ae_fp_eq((x->ptr.p_double[i]-a)/(b-a)+1,(x->ptr.p_double[i-1]-a)/(b-a)+1) )
816         {
817             result = ae_false;
818             return result;
819         }
820     }
821     result = ae_true;
822     return result;
823 }
824 
825 
826 /*************************************************************************
827 This function checks that two boolean values are the same (both  are  True
828 or both are False).
829 
830   -- ALGLIB --
831      Copyright 02.12.2009 by Bochkanov Sergey
832 *************************************************************************/
aresameboolean(ae_bool v1,ae_bool v2,ae_state * _state)833 ae_bool aresameboolean(ae_bool v1, ae_bool v2, ae_state *_state)
834 {
835     ae_bool result;
836 
837 
838     result = (v1&&v2)||(!v1&&!v2);
839     return result;
840 }
841 
842 
843 /*************************************************************************
844 Resizes X and fills by zeros
845 
846   -- ALGLIB --
847      Copyright 20.03.2009 by Bochkanov Sergey
848 *************************************************************************/
setlengthzero(ae_vector * x,ae_int_t n,ae_state * _state)849 void setlengthzero(/* Real    */ ae_vector* x,
850      ae_int_t n,
851      ae_state *_state)
852 {
853     ae_int_t i;
854 
855 
856     ae_assert(n>=0, "SetLengthZero: N<0", _state);
857     ae_vector_set_length(x, n, _state);
858     for(i=0; i<=n-1; i++)
859     {
860         x->ptr.p_double[i] = (double)(0);
861     }
862 }
863 
864 
865 /*************************************************************************
866 If Length(X)<N, resizes X
867 
868   -- ALGLIB --
869      Copyright 20.03.2009 by Bochkanov Sergey
870 *************************************************************************/
bvectorsetlengthatleast(ae_vector * x,ae_int_t n,ae_state * _state)871 void bvectorsetlengthatleast(/* Boolean */ ae_vector* x,
872      ae_int_t n,
873      ae_state *_state)
874 {
875 
876 
877     if( x->cnt<n )
878     {
879         ae_vector_set_length(x, n, _state);
880     }
881 }
882 
883 
884 /*************************************************************************
885 If Length(X)<N, resizes X
886 
887   -- ALGLIB --
888      Copyright 20.03.2009 by Bochkanov Sergey
889 *************************************************************************/
ivectorsetlengthatleast(ae_vector * x,ae_int_t n,ae_state * _state)890 void ivectorsetlengthatleast(/* Integer */ ae_vector* x,
891      ae_int_t n,
892      ae_state *_state)
893 {
894 
895 
896     if( x->cnt<n )
897     {
898         ae_vector_set_length(x, n, _state);
899     }
900 }
901 
902 
903 /*************************************************************************
904 If Length(X)<N, resizes X
905 
906   -- ALGLIB --
907      Copyright 20.03.2009 by Bochkanov Sergey
908 *************************************************************************/
rvectorsetlengthatleast(ae_vector * x,ae_int_t n,ae_state * _state)909 void rvectorsetlengthatleast(/* Real    */ ae_vector* x,
910      ae_int_t n,
911      ae_state *_state)
912 {
913 
914 
915     if( x->cnt<n )
916     {
917         ae_vector_set_length(x, n, _state);
918     }
919 }
920 
921 
922 /*************************************************************************
923 If Cols(X)<N or Rows(X)<M, resizes X
924 
925   -- ALGLIB --
926      Copyright 20.03.2009 by Bochkanov Sergey
927 *************************************************************************/
rmatrixsetlengthatleast(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)928 void rmatrixsetlengthatleast(/* Real    */ ae_matrix* x,
929      ae_int_t m,
930      ae_int_t n,
931      ae_state *_state)
932 {
933 
934 
935     if( m>0&&n>0 )
936     {
937         if( x->rows<m||x->cols<n )
938         {
939             ae_matrix_set_length(x, m, n, _state);
940         }
941     }
942 }
943 
944 
945 /*************************************************************************
946 If Cols(X)<N or Rows(X)<M, resizes X
947 
948   -- ALGLIB --
949      Copyright 20.03.2009 by Bochkanov Sergey
950 *************************************************************************/
bmatrixsetlengthatleast(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)951 void bmatrixsetlengthatleast(/* Boolean */ ae_matrix* x,
952      ae_int_t m,
953      ae_int_t n,
954      ae_state *_state)
955 {
956 
957 
958     if( m>0&&n>0 )
959     {
960         if( x->rows<m||x->cols<n )
961         {
962             ae_matrix_set_length(x, m, n, _state);
963         }
964     }
965 }
966 
967 
968 /*************************************************************************
969 Grows X, i.e. changes its size in such a way that:
970 a) contents is preserved
971 b) new size is at least N
972 c) new size can be larger than N, so subsequent grow() calls can return
973    without reallocation
974 
975   -- ALGLIB --
976      Copyright 20.03.2009 by Bochkanov Sergey
977 *************************************************************************/
bvectorgrowto(ae_vector * x,ae_int_t n,ae_state * _state)978 void bvectorgrowto(/* Boolean */ ae_vector* x,
979      ae_int_t n,
980      ae_state *_state)
981 {
982     ae_frame _frame_block;
983     ae_vector oldx;
984     ae_int_t i;
985     ae_int_t n2;
986 
987     ae_frame_make(_state, &_frame_block);
988     memset(&oldx, 0, sizeof(oldx));
989     ae_vector_init(&oldx, 0, DT_BOOL, _state, ae_true);
990 
991 
992     /*
993      * Enough place
994      */
995     if( x->cnt>=n )
996     {
997         ae_frame_leave(_state);
998         return;
999     }
1000 
1001     /*
1002      * Choose new size
1003      */
1004     n = ae_maxint(n, ae_round(1.8*x->cnt+1, _state), _state);
1005 
1006     /*
1007      * Grow
1008      */
1009     n2 = x->cnt;
1010     ae_swap_vectors(x, &oldx);
1011     ae_vector_set_length(x, n, _state);
1012     for(i=0; i<=n-1; i++)
1013     {
1014         if( i<n2 )
1015         {
1016             x->ptr.p_bool[i] = oldx.ptr.p_bool[i];
1017         }
1018         else
1019         {
1020             x->ptr.p_bool[i] = ae_false;
1021         }
1022     }
1023     ae_frame_leave(_state);
1024 }
1025 
1026 
1027 /*************************************************************************
1028 Grows X, i.e. changes its size in such a way that:
1029 a) contents is preserved
1030 b) new size is at least N
1031 c) new size can be larger than N, so subsequent grow() calls can return
1032    without reallocation
1033 
1034   -- ALGLIB --
1035      Copyright 20.03.2009 by Bochkanov Sergey
1036 *************************************************************************/
ivectorgrowto(ae_vector * x,ae_int_t n,ae_state * _state)1037 void ivectorgrowto(/* Integer */ ae_vector* x,
1038      ae_int_t n,
1039      ae_state *_state)
1040 {
1041     ae_frame _frame_block;
1042     ae_vector oldx;
1043     ae_int_t i;
1044     ae_int_t n2;
1045 
1046     ae_frame_make(_state, &_frame_block);
1047     memset(&oldx, 0, sizeof(oldx));
1048     ae_vector_init(&oldx, 0, DT_INT, _state, ae_true);
1049 
1050 
1051     /*
1052      * Enough place
1053      */
1054     if( x->cnt>=n )
1055     {
1056         ae_frame_leave(_state);
1057         return;
1058     }
1059 
1060     /*
1061      * Choose new size
1062      */
1063     n = ae_maxint(n, ae_round(1.8*x->cnt+1, _state), _state);
1064 
1065     /*
1066      * Grow
1067      */
1068     n2 = x->cnt;
1069     ae_swap_vectors(x, &oldx);
1070     ae_vector_set_length(x, n, _state);
1071     for(i=0; i<=n-1; i++)
1072     {
1073         if( i<n2 )
1074         {
1075             x->ptr.p_int[i] = oldx.ptr.p_int[i];
1076         }
1077         else
1078         {
1079             x->ptr.p_int[i] = 0;
1080         }
1081     }
1082     ae_frame_leave(_state);
1083 }
1084 
1085 
1086 /*************************************************************************
1087 Grows X, i.e. appends rows in such a way that:
1088 a) contents is preserved
1089 b) new row count is at least N
1090 c) new row count can be larger than N, so subsequent grow() calls can return
1091    without reallocation
1092 d) new matrix has at least MinCols columns (if less than specified amount
1093    of columns is present, new columns are added with undefined contents);
1094    MinCols can be 0 or negative value = ignored
1095 
1096   -- ALGLIB --
1097      Copyright 20.03.2009 by Bochkanov Sergey
1098 *************************************************************************/
rmatrixgrowrowsto(ae_matrix * a,ae_int_t n,ae_int_t mincols,ae_state * _state)1099 void rmatrixgrowrowsto(/* Real    */ ae_matrix* a,
1100      ae_int_t n,
1101      ae_int_t mincols,
1102      ae_state *_state)
1103 {
1104     ae_frame _frame_block;
1105     ae_matrix olda;
1106     ae_int_t i;
1107     ae_int_t j;
1108     ae_int_t n2;
1109     ae_int_t m;
1110 
1111     ae_frame_make(_state, &_frame_block);
1112     memset(&olda, 0, sizeof(olda));
1113     ae_matrix_init(&olda, 0, 0, DT_REAL, _state, ae_true);
1114 
1115 
1116     /*
1117      * Enough place?
1118      */
1119     if( a->rows>=n&&a->cols>=mincols )
1120     {
1121         ae_frame_leave(_state);
1122         return;
1123     }
1124 
1125     /*
1126      * Sizes and metrics
1127      */
1128     if( a->rows<n )
1129     {
1130         n = ae_maxint(n, ae_round(1.8*a->rows+1, _state), _state);
1131     }
1132     n2 = ae_minint(a->rows, n, _state);
1133     m = a->cols;
1134 
1135     /*
1136      * Grow
1137      */
1138     ae_swap_matrices(a, &olda);
1139     ae_matrix_set_length(a, n, ae_maxint(m, mincols, _state), _state);
1140     for(i=0; i<=n2-1; i++)
1141     {
1142         for(j=0; j<=m-1; j++)
1143         {
1144             a->ptr.pp_double[i][j] = olda.ptr.pp_double[i][j];
1145         }
1146     }
1147     ae_frame_leave(_state);
1148 }
1149 
1150 
1151 /*************************************************************************
1152 Grows X, i.e. appends cols in such a way that:
1153 a) contents is preserved
1154 b) new col count is at least N
1155 c) new col count can be larger than N, so subsequent grow() calls can return
1156    without reallocation
1157 d) new matrix has at least MinRows row (if less than specified amount
1158    of rows is present, new rows are added with undefined contents);
1159    MinRows can be 0 or negative value = ignored
1160 
1161   -- ALGLIB --
1162      Copyright 20.03.2009 by Bochkanov Sergey
1163 *************************************************************************/
rmatrixgrowcolsto(ae_matrix * a,ae_int_t n,ae_int_t minrows,ae_state * _state)1164 void rmatrixgrowcolsto(/* Real    */ ae_matrix* a,
1165      ae_int_t n,
1166      ae_int_t minrows,
1167      ae_state *_state)
1168 {
1169     ae_frame _frame_block;
1170     ae_matrix olda;
1171     ae_int_t i;
1172     ae_int_t j;
1173     ae_int_t n2;
1174     ae_int_t m;
1175 
1176     ae_frame_make(_state, &_frame_block);
1177     memset(&olda, 0, sizeof(olda));
1178     ae_matrix_init(&olda, 0, 0, DT_REAL, _state, ae_true);
1179 
1180 
1181     /*
1182      * Enough place?
1183      */
1184     if( a->cols>=n&&a->rows>=minrows )
1185     {
1186         ae_frame_leave(_state);
1187         return;
1188     }
1189 
1190     /*
1191      * Sizes and metrics
1192      */
1193     if( a->cols<n )
1194     {
1195         n = ae_maxint(n, ae_round(1.8*a->cols+1, _state), _state);
1196     }
1197     n2 = ae_minint(a->cols, n, _state);
1198     m = a->rows;
1199 
1200     /*
1201      * Grow
1202      */
1203     ae_swap_matrices(a, &olda);
1204     ae_matrix_set_length(a, ae_maxint(m, minrows, _state), n, _state);
1205     for(i=0; i<=m-1; i++)
1206     {
1207         for(j=0; j<=n2-1; j++)
1208         {
1209             a->ptr.pp_double[i][j] = olda.ptr.pp_double[i][j];
1210         }
1211     }
1212     ae_frame_leave(_state);
1213 }
1214 
1215 
1216 /*************************************************************************
1217 Grows X, i.e. changes its size in such a way that:
1218 a) contents is preserved
1219 b) new size is at least N
1220 c) new size can be larger than N, so subsequent grow() calls can return
1221    without reallocation
1222 
1223   -- ALGLIB --
1224      Copyright 20.03.2009 by Bochkanov Sergey
1225 *************************************************************************/
rvectorgrowto(ae_vector * x,ae_int_t n,ae_state * _state)1226 void rvectorgrowto(/* Real    */ ae_vector* x,
1227      ae_int_t n,
1228      ae_state *_state)
1229 {
1230     ae_frame _frame_block;
1231     ae_vector oldx;
1232     ae_int_t i;
1233     ae_int_t n2;
1234 
1235     ae_frame_make(_state, &_frame_block);
1236     memset(&oldx, 0, sizeof(oldx));
1237     ae_vector_init(&oldx, 0, DT_REAL, _state, ae_true);
1238 
1239 
1240     /*
1241      * Enough place
1242      */
1243     if( x->cnt>=n )
1244     {
1245         ae_frame_leave(_state);
1246         return;
1247     }
1248 
1249     /*
1250      * Choose new size
1251      */
1252     n = ae_maxint(n, ae_round(1.8*x->cnt+1, _state), _state);
1253 
1254     /*
1255      * Grow
1256      */
1257     n2 = x->cnt;
1258     ae_swap_vectors(x, &oldx);
1259     ae_vector_set_length(x, n, _state);
1260     for(i=0; i<=n-1; i++)
1261     {
1262         if( i<n2 )
1263         {
1264             x->ptr.p_double[i] = oldx.ptr.p_double[i];
1265         }
1266         else
1267         {
1268             x->ptr.p_double[i] = (double)(0);
1269         }
1270     }
1271     ae_frame_leave(_state);
1272 }
1273 
1274 
1275 /*************************************************************************
1276 Resizes X and:
1277 * preserves old contents of X
1278 * fills new elements by zeros
1279 
1280   -- ALGLIB --
1281      Copyright 20.03.2009 by Bochkanov Sergey
1282 *************************************************************************/
ivectorresize(ae_vector * x,ae_int_t n,ae_state * _state)1283 void ivectorresize(/* Integer */ ae_vector* x,
1284      ae_int_t n,
1285      ae_state *_state)
1286 {
1287     ae_frame _frame_block;
1288     ae_vector oldx;
1289     ae_int_t i;
1290     ae_int_t n2;
1291 
1292     ae_frame_make(_state, &_frame_block);
1293     memset(&oldx, 0, sizeof(oldx));
1294     ae_vector_init(&oldx, 0, DT_INT, _state, ae_true);
1295 
1296     n2 = x->cnt;
1297     ae_swap_vectors(x, &oldx);
1298     ae_vector_set_length(x, n, _state);
1299     for(i=0; i<=n-1; i++)
1300     {
1301         if( i<n2 )
1302         {
1303             x->ptr.p_int[i] = oldx.ptr.p_int[i];
1304         }
1305         else
1306         {
1307             x->ptr.p_int[i] = 0;
1308         }
1309     }
1310     ae_frame_leave(_state);
1311 }
1312 
1313 
1314 /*************************************************************************
1315 Resizes X and:
1316 * preserves old contents of X
1317 * fills new elements by zeros
1318 
1319   -- ALGLIB --
1320      Copyright 20.03.2009 by Bochkanov Sergey
1321 *************************************************************************/
rvectorresize(ae_vector * x,ae_int_t n,ae_state * _state)1322 void rvectorresize(/* Real    */ ae_vector* x,
1323      ae_int_t n,
1324      ae_state *_state)
1325 {
1326     ae_frame _frame_block;
1327     ae_vector oldx;
1328     ae_int_t i;
1329     ae_int_t n2;
1330 
1331     ae_frame_make(_state, &_frame_block);
1332     memset(&oldx, 0, sizeof(oldx));
1333     ae_vector_init(&oldx, 0, DT_REAL, _state, ae_true);
1334 
1335     n2 = x->cnt;
1336     ae_swap_vectors(x, &oldx);
1337     ae_vector_set_length(x, n, _state);
1338     for(i=0; i<=n-1; i++)
1339     {
1340         if( i<n2 )
1341         {
1342             x->ptr.p_double[i] = oldx.ptr.p_double[i];
1343         }
1344         else
1345         {
1346             x->ptr.p_double[i] = (double)(0);
1347         }
1348     }
1349     ae_frame_leave(_state);
1350 }
1351 
1352 
1353 /*************************************************************************
1354 Resizes X and:
1355 * preserves old contents of X
1356 * fills new elements by zeros
1357 
1358   -- ALGLIB --
1359      Copyright 20.03.2009 by Bochkanov Sergey
1360 *************************************************************************/
rmatrixresize(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)1361 void rmatrixresize(/* Real    */ ae_matrix* x,
1362      ae_int_t m,
1363      ae_int_t n,
1364      ae_state *_state)
1365 {
1366     ae_frame _frame_block;
1367     ae_matrix oldx;
1368     ae_int_t i;
1369     ae_int_t j;
1370     ae_int_t m2;
1371     ae_int_t n2;
1372 
1373     ae_frame_make(_state, &_frame_block);
1374     memset(&oldx, 0, sizeof(oldx));
1375     ae_matrix_init(&oldx, 0, 0, DT_REAL, _state, ae_true);
1376 
1377     m2 = x->rows;
1378     n2 = x->cols;
1379     ae_swap_matrices(x, &oldx);
1380     ae_matrix_set_length(x, m, n, _state);
1381     for(i=0; i<=m-1; i++)
1382     {
1383         for(j=0; j<=n-1; j++)
1384         {
1385             if( i<m2&&j<n2 )
1386             {
1387                 x->ptr.pp_double[i][j] = oldx.ptr.pp_double[i][j];
1388             }
1389             else
1390             {
1391                 x->ptr.pp_double[i][j] = 0.0;
1392             }
1393         }
1394     }
1395     ae_frame_leave(_state);
1396 }
1397 
1398 
1399 /*************************************************************************
1400 Resizes X and:
1401 * preserves old contents of X
1402 * fills new elements by zeros
1403 
1404   -- ALGLIB --
1405      Copyright 20.03.2009 by Bochkanov Sergey
1406 *************************************************************************/
imatrixresize(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)1407 void imatrixresize(/* Integer */ ae_matrix* x,
1408      ae_int_t m,
1409      ae_int_t n,
1410      ae_state *_state)
1411 {
1412     ae_frame _frame_block;
1413     ae_matrix oldx;
1414     ae_int_t i;
1415     ae_int_t j;
1416     ae_int_t m2;
1417     ae_int_t n2;
1418 
1419     ae_frame_make(_state, &_frame_block);
1420     memset(&oldx, 0, sizeof(oldx));
1421     ae_matrix_init(&oldx, 0, 0, DT_INT, _state, ae_true);
1422 
1423     m2 = x->rows;
1424     n2 = x->cols;
1425     ae_swap_matrices(x, &oldx);
1426     ae_matrix_set_length(x, m, n, _state);
1427     for(i=0; i<=m-1; i++)
1428     {
1429         for(j=0; j<=n-1; j++)
1430         {
1431             if( i<m2&&j<n2 )
1432             {
1433                 x->ptr.pp_int[i][j] = oldx.ptr.pp_int[i][j];
1434             }
1435             else
1436             {
1437                 x->ptr.pp_int[i][j] = 0;
1438             }
1439         }
1440     }
1441     ae_frame_leave(_state);
1442 }
1443 
1444 
1445 /*************************************************************************
1446 appends element to X
1447 
1448   -- ALGLIB --
1449      Copyright 20.03.2009 by Bochkanov Sergey
1450 *************************************************************************/
ivectorappend(ae_vector * x,ae_int_t v,ae_state * _state)1451 void ivectorappend(/* Integer */ ae_vector* x,
1452      ae_int_t v,
1453      ae_state *_state)
1454 {
1455     ae_frame _frame_block;
1456     ae_vector oldx;
1457     ae_int_t i;
1458     ae_int_t n;
1459 
1460     ae_frame_make(_state, &_frame_block);
1461     memset(&oldx, 0, sizeof(oldx));
1462     ae_vector_init(&oldx, 0, DT_INT, _state, ae_true);
1463 
1464     n = x->cnt;
1465     ae_swap_vectors(x, &oldx);
1466     ae_vector_set_length(x, n+1, _state);
1467     for(i=0; i<=n-1; i++)
1468     {
1469         x->ptr.p_int[i] = oldx.ptr.p_int[i];
1470     }
1471     x->ptr.p_int[n] = v;
1472     ae_frame_leave(_state);
1473 }
1474 
1475 
1476 /*************************************************************************
1477 This function checks that length(X) is at least N and first N values  from
1478 X[] are finite
1479 
1480   -- ALGLIB --
1481      Copyright 18.06.2010 by Bochkanov Sergey
1482 *************************************************************************/
isfinitevector(ae_vector * x,ae_int_t n,ae_state * _state)1483 ae_bool isfinitevector(/* Real    */ ae_vector* x,
1484      ae_int_t n,
1485      ae_state *_state)
1486 {
1487     ae_int_t i;
1488     double v;
1489     ae_bool result;
1490 
1491 
1492     ae_assert(n>=0, "APSERVIsFiniteVector: internal error (N<0)", _state);
1493     if( n==0 )
1494     {
1495         result = ae_true;
1496         return result;
1497     }
1498     if( x->cnt<n )
1499     {
1500         result = ae_false;
1501         return result;
1502     }
1503     v = (double)(0);
1504     for(i=0; i<=n-1; i++)
1505     {
1506         v = 0.01*v+x->ptr.p_double[i];
1507     }
1508     result = ae_isfinite(v, _state);
1509     return result;
1510 }
1511 
1512 
1513 /*************************************************************************
1514 This function checks that first N values from X[] are finite
1515 
1516   -- ALGLIB --
1517      Copyright 18.06.2010 by Bochkanov Sergey
1518 *************************************************************************/
isfinitecvector(ae_vector * z,ae_int_t n,ae_state * _state)1519 ae_bool isfinitecvector(/* Complex */ ae_vector* z,
1520      ae_int_t n,
1521      ae_state *_state)
1522 {
1523     ae_int_t i;
1524     ae_bool result;
1525 
1526 
1527     ae_assert(n>=0, "APSERVIsFiniteCVector: internal error (N<0)", _state);
1528     for(i=0; i<=n-1; i++)
1529     {
1530         if( !ae_isfinite(z->ptr.p_complex[i].x, _state)||!ae_isfinite(z->ptr.p_complex[i].y, _state) )
1531         {
1532             result = ae_false;
1533             return result;
1534         }
1535     }
1536     result = ae_true;
1537     return result;
1538 }
1539 
1540 
1541 /*************************************************************************
1542 This function checks that size of X is at least MxN and values from
1543 X[0..M-1,0..N-1] are finite.
1544 
1545   -- ALGLIB --
1546      Copyright 18.06.2010 by Bochkanov Sergey
1547 *************************************************************************/
apservisfinitematrix(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)1548 ae_bool apservisfinitematrix(/* Real    */ ae_matrix* x,
1549      ae_int_t m,
1550      ae_int_t n,
1551      ae_state *_state)
1552 {
1553     ae_int_t i;
1554     ae_int_t j;
1555     ae_bool result;
1556 
1557 
1558     ae_assert(n>=0, "APSERVIsFiniteMatrix: internal error (N<0)", _state);
1559     ae_assert(m>=0, "APSERVIsFiniteMatrix: internal error (M<0)", _state);
1560     if( m==0||n==0 )
1561     {
1562         result = ae_true;
1563         return result;
1564     }
1565     if( x->rows<m||x->cols<n )
1566     {
1567         result = ae_false;
1568         return result;
1569     }
1570     for(i=0; i<=m-1; i++)
1571     {
1572         for(j=0; j<=n-1; j++)
1573         {
1574             if( !ae_isfinite(x->ptr.pp_double[i][j], _state) )
1575             {
1576                 result = ae_false;
1577                 return result;
1578             }
1579         }
1580     }
1581     result = ae_true;
1582     return result;
1583 }
1584 
1585 
1586 /*************************************************************************
1587 This function checks that all values from X[0..M-1,0..N-1] are finite
1588 
1589   -- ALGLIB --
1590      Copyright 18.06.2010 by Bochkanov Sergey
1591 *************************************************************************/
apservisfinitecmatrix(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)1592 ae_bool apservisfinitecmatrix(/* Complex */ ae_matrix* x,
1593      ae_int_t m,
1594      ae_int_t n,
1595      ae_state *_state)
1596 {
1597     ae_int_t i;
1598     ae_int_t j;
1599     ae_bool result;
1600 
1601 
1602     ae_assert(n>=0, "APSERVIsFiniteCMatrix: internal error (N<0)", _state);
1603     ae_assert(m>=0, "APSERVIsFiniteCMatrix: internal error (M<0)", _state);
1604     for(i=0; i<=m-1; i++)
1605     {
1606         for(j=0; j<=n-1; j++)
1607         {
1608             if( !ae_isfinite(x->ptr.pp_complex[i][j].x, _state)||!ae_isfinite(x->ptr.pp_complex[i][j].y, _state) )
1609             {
1610                 result = ae_false;
1611                 return result;
1612             }
1613         }
1614     }
1615     result = ae_true;
1616     return result;
1617 }
1618 
1619 
1620 /*************************************************************************
1621 This function checks that size of X is at least NxN and all values from
1622 upper/lower triangle of X[0..N-1,0..N-1] are finite
1623 
1624   -- ALGLIB --
1625      Copyright 18.06.2010 by Bochkanov Sergey
1626 *************************************************************************/
isfinitertrmatrix(ae_matrix * x,ae_int_t n,ae_bool isupper,ae_state * _state)1627 ae_bool isfinitertrmatrix(/* Real    */ ae_matrix* x,
1628      ae_int_t n,
1629      ae_bool isupper,
1630      ae_state *_state)
1631 {
1632     ae_int_t i;
1633     ae_int_t j1;
1634     ae_int_t j2;
1635     ae_int_t j;
1636     ae_bool result;
1637 
1638 
1639     ae_assert(n>=0, "APSERVIsFiniteRTRMatrix: internal error (N<0)", _state);
1640     if( n==0 )
1641     {
1642         result = ae_true;
1643         return result;
1644     }
1645     if( x->rows<n||x->cols<n )
1646     {
1647         result = ae_false;
1648         return result;
1649     }
1650     for(i=0; i<=n-1; i++)
1651     {
1652         if( isupper )
1653         {
1654             j1 = i;
1655             j2 = n-1;
1656         }
1657         else
1658         {
1659             j1 = 0;
1660             j2 = i;
1661         }
1662         for(j=j1; j<=j2; j++)
1663         {
1664             if( !ae_isfinite(x->ptr.pp_double[i][j], _state) )
1665             {
1666                 result = ae_false;
1667                 return result;
1668             }
1669         }
1670     }
1671     result = ae_true;
1672     return result;
1673 }
1674 
1675 
1676 /*************************************************************************
1677 This function checks that all values from upper/lower triangle of
1678 X[0..N-1,0..N-1] are finite
1679 
1680   -- ALGLIB --
1681      Copyright 18.06.2010 by Bochkanov Sergey
1682 *************************************************************************/
apservisfinitectrmatrix(ae_matrix * x,ae_int_t n,ae_bool isupper,ae_state * _state)1683 ae_bool apservisfinitectrmatrix(/* Complex */ ae_matrix* x,
1684      ae_int_t n,
1685      ae_bool isupper,
1686      ae_state *_state)
1687 {
1688     ae_int_t i;
1689     ae_int_t j1;
1690     ae_int_t j2;
1691     ae_int_t j;
1692     ae_bool result;
1693 
1694 
1695     ae_assert(n>=0, "APSERVIsFiniteCTRMatrix: internal error (N<0)", _state);
1696     for(i=0; i<=n-1; i++)
1697     {
1698         if( isupper )
1699         {
1700             j1 = i;
1701             j2 = n-1;
1702         }
1703         else
1704         {
1705             j1 = 0;
1706             j2 = i;
1707         }
1708         for(j=j1; j<=j2; j++)
1709         {
1710             if( !ae_isfinite(x->ptr.pp_complex[i][j].x, _state)||!ae_isfinite(x->ptr.pp_complex[i][j].y, _state) )
1711             {
1712                 result = ae_false;
1713                 return result;
1714             }
1715         }
1716     }
1717     result = ae_true;
1718     return result;
1719 }
1720 
1721 
1722 /*************************************************************************
1723 This function checks that all values from X[0..M-1,0..N-1] are  finite  or
1724 NaN's.
1725 
1726   -- ALGLIB --
1727      Copyright 18.06.2010 by Bochkanov Sergey
1728 *************************************************************************/
apservisfiniteornanmatrix(ae_matrix * x,ae_int_t m,ae_int_t n,ae_state * _state)1729 ae_bool apservisfiniteornanmatrix(/* Real    */ ae_matrix* x,
1730      ae_int_t m,
1731      ae_int_t n,
1732      ae_state *_state)
1733 {
1734     ae_int_t i;
1735     ae_int_t j;
1736     ae_bool result;
1737 
1738 
1739     ae_assert(n>=0, "APSERVIsFiniteOrNaNMatrix: internal error (N<0)", _state);
1740     ae_assert(m>=0, "APSERVIsFiniteOrNaNMatrix: internal error (M<0)", _state);
1741     for(i=0; i<=m-1; i++)
1742     {
1743         for(j=0; j<=n-1; j++)
1744         {
1745             if( !(ae_isfinite(x->ptr.pp_double[i][j], _state)||ae_isnan(x->ptr.pp_double[i][j], _state)) )
1746             {
1747                 result = ae_false;
1748                 return result;
1749             }
1750         }
1751     }
1752     result = ae_true;
1753     return result;
1754 }
1755 
1756 
1757 /*************************************************************************
1758 Safe sqrt(x^2+y^2)
1759 
1760   -- ALGLIB --
1761      Copyright by Bochkanov Sergey
1762 *************************************************************************/
safepythag2(double x,double y,ae_state * _state)1763 double safepythag2(double x, double y, ae_state *_state)
1764 {
1765     double w;
1766     double xabs;
1767     double yabs;
1768     double z;
1769     double result;
1770 
1771 
1772     xabs = ae_fabs(x, _state);
1773     yabs = ae_fabs(y, _state);
1774     w = ae_maxreal(xabs, yabs, _state);
1775     z = ae_minreal(xabs, yabs, _state);
1776     if( ae_fp_eq(z,(double)(0)) )
1777     {
1778         result = w;
1779     }
1780     else
1781     {
1782         result = w*ae_sqrt(1+ae_sqr(z/w, _state), _state);
1783     }
1784     return result;
1785 }
1786 
1787 
1788 /*************************************************************************
1789 Safe sqrt(x^2+y^2)
1790 
1791   -- ALGLIB --
1792      Copyright by Bochkanov Sergey
1793 *************************************************************************/
safepythag3(double x,double y,double z,ae_state * _state)1794 double safepythag3(double x, double y, double z, ae_state *_state)
1795 {
1796     double w;
1797     double result;
1798 
1799 
1800     w = ae_maxreal(ae_fabs(x, _state), ae_maxreal(ae_fabs(y, _state), ae_fabs(z, _state), _state), _state);
1801     if( ae_fp_eq(w,(double)(0)) )
1802     {
1803         result = (double)(0);
1804         return result;
1805     }
1806     x = x/w;
1807     y = y/w;
1808     z = z/w;
1809     result = w*ae_sqrt(ae_sqr(x, _state)+ae_sqr(y, _state)+ae_sqr(z, _state), _state);
1810     return result;
1811 }
1812 
1813 
1814 /*************************************************************************
1815 Safe division.
1816 
1817 This function attempts to calculate R=X/Y without overflow.
1818 
1819 It returns:
1820 * +1, if abs(X/Y)>=MaxRealNumber or undefined - overflow-like situation
1821       (no overlfow is generated, R is either NAN, PosINF, NegINF)
1822 *  0, if MinRealNumber<abs(X/Y)<MaxRealNumber or X=0, Y<>0
1823       (R contains result, may be zero)
1824 * -1, if 0<abs(X/Y)<MinRealNumber - underflow-like situation
1825       (R contains zero; it corresponds to underflow)
1826 
1827 No overflow is generated in any case.
1828 
1829   -- ALGLIB --
1830      Copyright by Bochkanov Sergey
1831 *************************************************************************/
saferdiv(double x,double y,double * r,ae_state * _state)1832 ae_int_t saferdiv(double x, double y, double* r, ae_state *_state)
1833 {
1834     ae_int_t result;
1835 
1836     *r = 0;
1837 
1838 
1839     /*
1840      * Two special cases:
1841      * * Y=0
1842      * * X=0 and Y<>0
1843      */
1844     if( ae_fp_eq(y,(double)(0)) )
1845     {
1846         result = 1;
1847         if( ae_fp_eq(x,(double)(0)) )
1848         {
1849             *r = _state->v_nan;
1850         }
1851         if( ae_fp_greater(x,(double)(0)) )
1852         {
1853             *r = _state->v_posinf;
1854         }
1855         if( ae_fp_less(x,(double)(0)) )
1856         {
1857             *r = _state->v_neginf;
1858         }
1859         return result;
1860     }
1861     if( ae_fp_eq(x,(double)(0)) )
1862     {
1863         *r = (double)(0);
1864         result = 0;
1865         return result;
1866     }
1867 
1868     /*
1869      * make Y>0
1870      */
1871     if( ae_fp_less(y,(double)(0)) )
1872     {
1873         x = -x;
1874         y = -y;
1875     }
1876 
1877     /*
1878      *
1879      */
1880     if( ae_fp_greater_eq(y,(double)(1)) )
1881     {
1882         *r = x/y;
1883         if( ae_fp_less_eq(ae_fabs(*r, _state),ae_minrealnumber) )
1884         {
1885             result = -1;
1886             *r = (double)(0);
1887         }
1888         else
1889         {
1890             result = 0;
1891         }
1892     }
1893     else
1894     {
1895         if( ae_fp_greater_eq(ae_fabs(x, _state),ae_maxrealnumber*y) )
1896         {
1897             if( ae_fp_greater(x,(double)(0)) )
1898             {
1899                 *r = _state->v_posinf;
1900             }
1901             else
1902             {
1903                 *r = _state->v_neginf;
1904             }
1905             result = 1;
1906         }
1907         else
1908         {
1909             *r = x/y;
1910             result = 0;
1911         }
1912     }
1913     return result;
1914 }
1915 
1916 
1917 /*************************************************************************
1918 This function calculates "safe" min(X/Y,V) for positive finite X, Y, V.
1919 No overflow is generated in any case.
1920 
1921   -- ALGLIB --
1922      Copyright by Bochkanov Sergey
1923 *************************************************************************/
safeminposrv(double x,double y,double v,ae_state * _state)1924 double safeminposrv(double x, double y, double v, ae_state *_state)
1925 {
1926     double r;
1927     double result;
1928 
1929 
1930     if( ae_fp_greater_eq(y,(double)(1)) )
1931     {
1932 
1933         /*
1934          * Y>=1, we can safely divide by Y
1935          */
1936         r = x/y;
1937         result = v;
1938         if( ae_fp_greater(v,r) )
1939         {
1940             result = r;
1941         }
1942         else
1943         {
1944             result = v;
1945         }
1946     }
1947     else
1948     {
1949 
1950         /*
1951          * Y<1, we can safely multiply by Y
1952          */
1953         if( ae_fp_less(x,v*y) )
1954         {
1955             result = x/y;
1956         }
1957         else
1958         {
1959             result = v;
1960         }
1961     }
1962     return result;
1963 }
1964 
1965 
1966 /*************************************************************************
1967 This function makes periodic mapping of X to [A,B].
1968 
1969 It accepts X, A, B (A>B). It returns T which lies in  [A,B] and integer K,
1970 such that X = T + K*(B-A).
1971 
1972 NOTES:
1973 * K is represented as real value, although actually it is integer
1974 * T is guaranteed to be in [A,B]
1975 * T replaces X
1976 
1977   -- ALGLIB --
1978      Copyright by Bochkanov Sergey
1979 *************************************************************************/
apperiodicmap(double * x,double a,double b,double * k,ae_state * _state)1980 void apperiodicmap(double* x,
1981      double a,
1982      double b,
1983      double* k,
1984      ae_state *_state)
1985 {
1986 
1987     *k = 0;
1988 
1989     ae_assert(ae_fp_less(a,b), "APPeriodicMap: internal error!", _state);
1990     *k = (double)(ae_ifloor((*x-a)/(b-a), _state));
1991     *x = *x-*k*(b-a);
1992     while(ae_fp_less(*x,a))
1993     {
1994         *x = *x+(b-a);
1995         *k = *k-1;
1996     }
1997     while(ae_fp_greater(*x,b))
1998     {
1999         *x = *x-(b-a);
2000         *k = *k+1;
2001     }
2002     *x = ae_maxreal(*x, a, _state);
2003     *x = ae_minreal(*x, b, _state);
2004 }
2005 
2006 
2007 /*************************************************************************
2008 Returns random normal number using low-quality system-provided generator
2009 
2010   -- ALGLIB --
2011      Copyright 20.03.2009 by Bochkanov Sergey
2012 *************************************************************************/
randomnormal(ae_state * _state)2013 double randomnormal(ae_state *_state)
2014 {
2015     double u;
2016     double v;
2017     double s;
2018     double result;
2019 
2020 
2021     for(;;)
2022     {
2023         u = 2*ae_randomreal(_state)-1;
2024         v = 2*ae_randomreal(_state)-1;
2025         s = ae_sqr(u, _state)+ae_sqr(v, _state);
2026         if( ae_fp_greater(s,(double)(0))&&ae_fp_less(s,(double)(1)) )
2027         {
2028 
2029             /*
2030              * two Sqrt's instead of one to
2031              * avoid overflow when S is too small
2032              */
2033             s = ae_sqrt(-2*ae_log(s, _state), _state)/ae_sqrt(s, _state);
2034             result = u*s;
2035             break;
2036         }
2037     }
2038     return result;
2039 }
2040 
2041 
2042 /*************************************************************************
2043 Generates random unit vector using low-quality system-provided generator.
2044 Reallocates array if its size is too short.
2045 
2046   -- ALGLIB --
2047      Copyright 20.03.2009 by Bochkanov Sergey
2048 *************************************************************************/
randomunit(ae_int_t n,ae_vector * x,ae_state * _state)2049 void randomunit(ae_int_t n, /* Real    */ ae_vector* x, ae_state *_state)
2050 {
2051     ae_int_t i;
2052     double v;
2053     double vv;
2054 
2055 
2056     ae_assert(n>0, "RandomUnit: N<=0", _state);
2057     if( x->cnt<n )
2058     {
2059         ae_vector_set_length(x, n, _state);
2060     }
2061     do
2062     {
2063         v = 0.0;
2064         for(i=0; i<=n-1; i++)
2065         {
2066             vv = randomnormal(_state);
2067             x->ptr.p_double[i] = vv;
2068             v = v+vv*vv;
2069         }
2070     }
2071     while(ae_fp_less_eq(v,(double)(0)));
2072     v = 1/ae_sqrt(v, _state);
2073     for(i=0; i<=n-1; i++)
2074     {
2075         x->ptr.p_double[i] = x->ptr.p_double[i]*v;
2076     }
2077 }
2078 
2079 
2080 /*************************************************************************
2081 This function is used to swap two integer values
2082 *************************************************************************/
swapi(ae_int_t * v0,ae_int_t * v1,ae_state * _state)2083 void swapi(ae_int_t* v0, ae_int_t* v1, ae_state *_state)
2084 {
2085     ae_int_t v;
2086 
2087 
2088     v = *v0;
2089     *v0 = *v1;
2090     *v1 = v;
2091 }
2092 
2093 
2094 /*************************************************************************
2095 This function is used to swap two real values
2096 *************************************************************************/
swapr(double * v0,double * v1,ae_state * _state)2097 void swapr(double* v0, double* v1, ae_state *_state)
2098 {
2099     double v;
2100 
2101 
2102     v = *v0;
2103     *v0 = *v1;
2104     *v1 = v;
2105 }
2106 
2107 
2108 /*************************************************************************
2109 This function is used to swap two rows of the matrix; if NCols<0, automatically
2110 determined from the matrix size.
2111 *************************************************************************/
swaprows(ae_matrix * a,ae_int_t i0,ae_int_t i1,ae_int_t ncols,ae_state * _state)2112 void swaprows(/* Real    */ ae_matrix* a,
2113      ae_int_t i0,
2114      ae_int_t i1,
2115      ae_int_t ncols,
2116      ae_state *_state)
2117 {
2118     ae_int_t j;
2119     double v;
2120 
2121 
2122     if( i0==i1 )
2123     {
2124         return;
2125     }
2126     if( ncols<0 )
2127     {
2128         ncols = a->cols;
2129     }
2130     for(j=0; j<=ncols-1; j++)
2131     {
2132         v = a->ptr.pp_double[i0][j];
2133         a->ptr.pp_double[i0][j] = a->ptr.pp_double[i1][j];
2134         a->ptr.pp_double[i1][j] = v;
2135     }
2136 }
2137 
2138 
2139 /*************************************************************************
2140 This function is used to swap two cols of the matrix; if NRows<0, automatically
2141 determined from the matrix size.
2142 *************************************************************************/
swapcols(ae_matrix * a,ae_int_t j0,ae_int_t j1,ae_int_t nrows,ae_state * _state)2143 void swapcols(/* Real    */ ae_matrix* a,
2144      ae_int_t j0,
2145      ae_int_t j1,
2146      ae_int_t nrows,
2147      ae_state *_state)
2148 {
2149     ae_int_t i;
2150     double v;
2151 
2152 
2153     if( j0==j1 )
2154     {
2155         return;
2156     }
2157     if( nrows<0 )
2158     {
2159         nrows = a->rows;
2160     }
2161     for(i=0; i<=nrows-1; i++)
2162     {
2163         v = a->ptr.pp_double[i][j0];
2164         a->ptr.pp_double[i][j0] = a->ptr.pp_double[i][j1];
2165         a->ptr.pp_double[i][j1] = v;
2166     }
2167 }
2168 
2169 
2170 /*************************************************************************
2171 This function is used to swap two "entries" in 1-dimensional array composed
2172 from D-element entries
2173 *************************************************************************/
swapentries(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_int_t entrywidth,ae_state * _state)2174 void swapentries(/* Real    */ ae_vector* a,
2175      ae_int_t i0,
2176      ae_int_t i1,
2177      ae_int_t entrywidth,
2178      ae_state *_state)
2179 {
2180     ae_int_t offs0;
2181     ae_int_t offs1;
2182     ae_int_t j;
2183     double v;
2184 
2185 
2186     if( i0==i1 )
2187     {
2188         return;
2189     }
2190     offs0 = i0*entrywidth;
2191     offs1 = i1*entrywidth;
2192     for(j=0; j<=entrywidth-1; j++)
2193     {
2194         v = a->ptr.p_double[offs0+j];
2195         a->ptr.p_double[offs0+j] = a->ptr.p_double[offs1+j];
2196         a->ptr.p_double[offs1+j] = v;
2197     }
2198 }
2199 
2200 
2201 /*************************************************************************
2202 This function is used to swap two elements of the vector
2203 *************************************************************************/
swapelements(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_state * _state)2204 void swapelements(/* Real    */ ae_vector* a,
2205      ae_int_t i0,
2206      ae_int_t i1,
2207      ae_state *_state)
2208 {
2209     double v;
2210 
2211 
2212     if( i0==i1 )
2213     {
2214         return;
2215     }
2216     v = a->ptr.p_double[i0];
2217     a->ptr.p_double[i0] = a->ptr.p_double[i1];
2218     a->ptr.p_double[i1] = v;
2219 }
2220 
2221 
2222 /*************************************************************************
2223 This function is used to swap two elements of the vector
2224 *************************************************************************/
swapelementsi(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_state * _state)2225 void swapelementsi(/* Integer */ ae_vector* a,
2226      ae_int_t i0,
2227      ae_int_t i1,
2228      ae_state *_state)
2229 {
2230     ae_int_t v;
2231 
2232 
2233     if( i0==i1 )
2234     {
2235         return;
2236     }
2237     v = a->ptr.p_int[i0];
2238     a->ptr.p_int[i0] = a->ptr.p_int[i1];
2239     a->ptr.p_int[i1] = v;
2240 }
2241 
2242 
2243 /*************************************************************************
2244 This function is used to return maximum of three real values
2245 *************************************************************************/
maxreal3(double v0,double v1,double v2,ae_state * _state)2246 double maxreal3(double v0, double v1, double v2, ae_state *_state)
2247 {
2248     double result;
2249 
2250 
2251     result = v0;
2252     if( ae_fp_less(result,v1) )
2253     {
2254         result = v1;
2255     }
2256     if( ae_fp_less(result,v2) )
2257     {
2258         result = v2;
2259     }
2260     return result;
2261 }
2262 
2263 
2264 /*************************************************************************
2265 This function is used to increment value of integer variable
2266 *************************************************************************/
inc(ae_int_t * v,ae_state * _state)2267 void inc(ae_int_t* v, ae_state *_state)
2268 {
2269 
2270 
2271     *v = *v+1;
2272 }
2273 
2274 
2275 /*************************************************************************
2276 This function is used to decrement value of integer variable
2277 *************************************************************************/
dec(ae_int_t * v,ae_state * _state)2278 void dec(ae_int_t* v, ae_state *_state)
2279 {
2280 
2281 
2282     *v = *v-1;
2283 }
2284 
2285 
2286 /*************************************************************************
2287 This function is used to increment value of integer variable; name of  the
2288 function suggests that increment is done in multithreaded setting  in  the
2289 thread-unsafe manner (optional progress reports which do not need guaranteed
2290 correctness)
2291 *************************************************************************/
threadunsafeinc(ae_int_t * v,ae_state * _state)2292 void threadunsafeinc(ae_int_t* v, ae_state *_state)
2293 {
2294 
2295 
2296     *v = *v+1;
2297 }
2298 
2299 
2300 /*************************************************************************
2301 This function is used to increment value of integer variable; name of  the
2302 function suggests that increment is done in multithreaded setting  in  the
2303 thread-unsafe manner (optional progress reports which do not need guaranteed
2304 correctness)
2305 *************************************************************************/
threadunsafeincby(ae_int_t * v,ae_int_t k,ae_state * _state)2306 void threadunsafeincby(ae_int_t* v, ae_int_t k, ae_state *_state)
2307 {
2308 
2309 
2310     *v = *v+k;
2311 }
2312 
2313 
2314 /*************************************************************************
2315 This function performs two operations:
2316 1. decrements value of integer variable, if it is positive
2317 2. explicitly sets variable to zero if it is non-positive
2318 It is used by some algorithms to decrease value of internal counters.
2319 *************************************************************************/
countdown(ae_int_t * v,ae_state * _state)2320 void countdown(ae_int_t* v, ae_state *_state)
2321 {
2322 
2323 
2324     if( *v>0 )
2325     {
2326         *v = *v-1;
2327     }
2328     else
2329     {
2330         *v = 0;
2331     }
2332 }
2333 
2334 
2335 /*************************************************************************
2336 This function returns +1 or -1 depending on sign of X.
2337 x=0 results in +1 being returned.
2338 *************************************************************************/
possign(double x,ae_state * _state)2339 double possign(double x, ae_state *_state)
2340 {
2341     double result;
2342 
2343 
2344     if( ae_fp_greater_eq(x,(double)(0)) )
2345     {
2346         result = (double)(1);
2347     }
2348     else
2349     {
2350         result = (double)(-1);
2351     }
2352     return result;
2353 }
2354 
2355 
2356 /*************************************************************************
2357 This function returns product of two real numbers. It is convenient when
2358 you have to perform typecast-and-product of two INTEGERS.
2359 *************************************************************************/
rmul2(double v0,double v1,ae_state * _state)2360 double rmul2(double v0, double v1, ae_state *_state)
2361 {
2362     double result;
2363 
2364 
2365     result = v0*v1;
2366     return result;
2367 }
2368 
2369 
2370 /*************************************************************************
2371 This function returns product of three real numbers. It is convenient when
2372 you have to perform typecast-and-product of two INTEGERS.
2373 *************************************************************************/
rmul3(double v0,double v1,double v2,ae_state * _state)2374 double rmul3(double v0, double v1, double v2, ae_state *_state)
2375 {
2376     double result;
2377 
2378 
2379     result = v0*v1*v2;
2380     return result;
2381 }
2382 
2383 
2384 /*************************************************************************
2385 This function returns (A div B) rounded up; it expects that A>0, B>0, but
2386 does not check it.
2387 *************************************************************************/
idivup(ae_int_t a,ae_int_t b,ae_state * _state)2388 ae_int_t idivup(ae_int_t a, ae_int_t b, ae_state *_state)
2389 {
2390     ae_int_t result;
2391 
2392 
2393     result = a/b;
2394     if( a%b>0 )
2395     {
2396         result = result+1;
2397     }
2398     return result;
2399 }
2400 
2401 
2402 /*************************************************************************
2403 This function returns min(i0,i1)
2404 *************************************************************************/
imin2(ae_int_t i0,ae_int_t i1,ae_state * _state)2405 ae_int_t imin2(ae_int_t i0, ae_int_t i1, ae_state *_state)
2406 {
2407     ae_int_t result;
2408 
2409 
2410     result = i0;
2411     if( i1<result )
2412     {
2413         result = i1;
2414     }
2415     return result;
2416 }
2417 
2418 
2419 /*************************************************************************
2420 This function returns min(i0,i1,i2)
2421 *************************************************************************/
imin3(ae_int_t i0,ae_int_t i1,ae_int_t i2,ae_state * _state)2422 ae_int_t imin3(ae_int_t i0, ae_int_t i1, ae_int_t i2, ae_state *_state)
2423 {
2424     ae_int_t result;
2425 
2426 
2427     result = i0;
2428     if( i1<result )
2429     {
2430         result = i1;
2431     }
2432     if( i2<result )
2433     {
2434         result = i2;
2435     }
2436     return result;
2437 }
2438 
2439 
2440 /*************************************************************************
2441 This function returns max(i0,i1)
2442 *************************************************************************/
imax2(ae_int_t i0,ae_int_t i1,ae_state * _state)2443 ae_int_t imax2(ae_int_t i0, ae_int_t i1, ae_state *_state)
2444 {
2445     ae_int_t result;
2446 
2447 
2448     result = i0;
2449     if( i1>result )
2450     {
2451         result = i1;
2452     }
2453     return result;
2454 }
2455 
2456 
2457 /*************************************************************************
2458 This function returns max(i0,i1,i2)
2459 *************************************************************************/
imax3(ae_int_t i0,ae_int_t i1,ae_int_t i2,ae_state * _state)2460 ae_int_t imax3(ae_int_t i0, ae_int_t i1, ae_int_t i2, ae_state *_state)
2461 {
2462     ae_int_t result;
2463 
2464 
2465     result = i0;
2466     if( i1>result )
2467     {
2468         result = i1;
2469     }
2470     if( i2>result )
2471     {
2472         result = i2;
2473     }
2474     return result;
2475 }
2476 
2477 
2478 /*************************************************************************
2479 This function returns max(r0,r1,r2)
2480 *************************************************************************/
rmax3(double r0,double r1,double r2,ae_state * _state)2481 double rmax3(double r0, double r1, double r2, ae_state *_state)
2482 {
2483     double result;
2484 
2485 
2486     result = r0;
2487     if( ae_fp_greater(r1,result) )
2488     {
2489         result = r1;
2490     }
2491     if( ae_fp_greater(r2,result) )
2492     {
2493         result = r2;
2494     }
2495     return result;
2496 }
2497 
2498 
2499 /*************************************************************************
2500 This function returns max(|r0|,|r1|,|r2|)
2501 *************************************************************************/
rmaxabs3(double r0,double r1,double r2,ae_state * _state)2502 double rmaxabs3(double r0, double r1, double r2, ae_state *_state)
2503 {
2504     double result;
2505 
2506 
2507     r0 = ae_fabs(r0, _state);
2508     r1 = ae_fabs(r1, _state);
2509     r2 = ae_fabs(r2, _state);
2510     result = r0;
2511     if( ae_fp_greater(r1,result) )
2512     {
2513         result = r1;
2514     }
2515     if( ae_fp_greater(r2,result) )
2516     {
2517         result = r2;
2518     }
2519     return result;
2520 }
2521 
2522 
2523 /*************************************************************************
2524 'bounds' value: maps X to [B1,B2]
2525 
2526   -- ALGLIB --
2527      Copyright 20.03.2009 by Bochkanov Sergey
2528 *************************************************************************/
boundval(double x,double b1,double b2,ae_state * _state)2529 double boundval(double x, double b1, double b2, ae_state *_state)
2530 {
2531     double result;
2532 
2533 
2534     if( ae_fp_less_eq(x,b1) )
2535     {
2536         result = b1;
2537         return result;
2538     }
2539     if( ae_fp_greater_eq(x,b2) )
2540     {
2541         result = b2;
2542         return result;
2543     }
2544     result = x;
2545     return result;
2546 }
2547 
2548 
2549 /*************************************************************************
2550 'bounds' value: maps X to [B1,B2]
2551 
2552   -- ALGLIB --
2553      Copyright 20.03.2009 by Bochkanov Sergey
2554 *************************************************************************/
iboundval(ae_int_t x,ae_int_t b1,ae_int_t b2,ae_state * _state)2555 ae_int_t iboundval(ae_int_t x, ae_int_t b1, ae_int_t b2, ae_state *_state)
2556 {
2557     ae_int_t result;
2558 
2559 
2560     if( x<=b1 )
2561     {
2562         result = b1;
2563         return result;
2564     }
2565     if( x>=b2 )
2566     {
2567         result = b2;
2568         return result;
2569     }
2570     result = x;
2571     return result;
2572 }
2573 
2574 
2575 /*************************************************************************
2576 'bounds' value: maps X to [B1,B2]
2577 
2578   -- ALGLIB --
2579      Copyright 20.03.2009 by Bochkanov Sergey
2580 *************************************************************************/
rboundval(double x,double b1,double b2,ae_state * _state)2581 double rboundval(double x, double b1, double b2, ae_state *_state)
2582 {
2583     double result;
2584 
2585 
2586     if( ae_fp_less_eq(x,b1) )
2587     {
2588         result = b1;
2589         return result;
2590     }
2591     if( ae_fp_greater_eq(x,b2) )
2592     {
2593         result = b2;
2594         return result;
2595     }
2596     result = x;
2597     return result;
2598 }
2599 
2600 
2601 /*************************************************************************
2602 Returns number of non-zeros
2603 *************************************************************************/
countnz1(ae_vector * v,ae_int_t n,ae_state * _state)2604 ae_int_t countnz1(/* Real    */ ae_vector* v,
2605      ae_int_t n,
2606      ae_state *_state)
2607 {
2608     ae_int_t i;
2609     ae_int_t result;
2610 
2611 
2612     result = 0;
2613     for(i=0; i<=n-1; i++)
2614     {
2615         if( !(v->ptr.p_double[i]==0) )
2616         {
2617             result = result+1;
2618         }
2619     }
2620     return result;
2621 }
2622 
2623 
2624 /*************************************************************************
2625 Returns number of non-zeros
2626 *************************************************************************/
countnz2(ae_matrix * v,ae_int_t m,ae_int_t n,ae_state * _state)2627 ae_int_t countnz2(/* Real    */ ae_matrix* v,
2628      ae_int_t m,
2629      ae_int_t n,
2630      ae_state *_state)
2631 {
2632     ae_int_t i;
2633     ae_int_t j;
2634     ae_int_t result;
2635 
2636 
2637     result = 0;
2638     for(i=0; i<=m-1; i++)
2639     {
2640         for(j=0; j<=n-1; j++)
2641         {
2642             if( !(v->ptr.pp_double[i][j]==0) )
2643             {
2644                 result = result+1;
2645             }
2646         }
2647     }
2648     return result;
2649 }
2650 
2651 
2652 /*************************************************************************
2653 Allocation of serializer: complex value
2654 *************************************************************************/
alloccomplex(ae_serializer * s,ae_complex v,ae_state * _state)2655 void alloccomplex(ae_serializer* s, ae_complex v, ae_state *_state)
2656 {
2657 
2658 
2659     ae_serializer_alloc_entry(s);
2660     ae_serializer_alloc_entry(s);
2661 }
2662 
2663 
2664 /*************************************************************************
2665 Serialization: complex value
2666 *************************************************************************/
serializecomplex(ae_serializer * s,ae_complex v,ae_state * _state)2667 void serializecomplex(ae_serializer* s, ae_complex v, ae_state *_state)
2668 {
2669 
2670 
2671     ae_serializer_serialize_double(s, v.x, _state);
2672     ae_serializer_serialize_double(s, v.y, _state);
2673 }
2674 
2675 
2676 /*************************************************************************
2677 Unserialization: complex value
2678 *************************************************************************/
unserializecomplex(ae_serializer * s,ae_state * _state)2679 ae_complex unserializecomplex(ae_serializer* s, ae_state *_state)
2680 {
2681     ae_complex result;
2682 
2683 
2684     ae_serializer_unserialize_double(s, &result.x, _state);
2685     ae_serializer_unserialize_double(s, &result.y, _state);
2686     return result;
2687 }
2688 
2689 
2690 /*************************************************************************
2691 Allocation of serializer: real array
2692 *************************************************************************/
allocrealarray(ae_serializer * s,ae_vector * v,ae_int_t n,ae_state * _state)2693 void allocrealarray(ae_serializer* s,
2694      /* Real    */ ae_vector* v,
2695      ae_int_t n,
2696      ae_state *_state)
2697 {
2698     ae_int_t i;
2699 
2700 
2701     if( n<0 )
2702     {
2703         n = v->cnt;
2704     }
2705     ae_serializer_alloc_entry(s);
2706     for(i=0; i<=n-1; i++)
2707     {
2708         ae_serializer_alloc_entry(s);
2709     }
2710 }
2711 
2712 
2713 /*************************************************************************
2714 Serialization: complex value
2715 *************************************************************************/
serializerealarray(ae_serializer * s,ae_vector * v,ae_int_t n,ae_state * _state)2716 void serializerealarray(ae_serializer* s,
2717      /* Real    */ ae_vector* v,
2718      ae_int_t n,
2719      ae_state *_state)
2720 {
2721     ae_int_t i;
2722 
2723 
2724     if( n<0 )
2725     {
2726         n = v->cnt;
2727     }
2728     ae_serializer_serialize_int(s, n, _state);
2729     for(i=0; i<=n-1; i++)
2730     {
2731         ae_serializer_serialize_double(s, v->ptr.p_double[i], _state);
2732     }
2733 }
2734 
2735 
2736 /*************************************************************************
2737 Unserialization: complex value
2738 *************************************************************************/
unserializerealarray(ae_serializer * s,ae_vector * v,ae_state * _state)2739 void unserializerealarray(ae_serializer* s,
2740      /* Real    */ ae_vector* v,
2741      ae_state *_state)
2742 {
2743     ae_int_t n;
2744     ae_int_t i;
2745     double t;
2746 
2747     ae_vector_clear(v);
2748 
2749     ae_serializer_unserialize_int(s, &n, _state);
2750     if( n==0 )
2751     {
2752         return;
2753     }
2754     ae_vector_set_length(v, n, _state);
2755     for(i=0; i<=n-1; i++)
2756     {
2757         ae_serializer_unserialize_double(s, &t, _state);
2758         v->ptr.p_double[i] = t;
2759     }
2760 }
2761 
2762 
2763 /*************************************************************************
2764 Allocation of serializer: Integer array
2765 *************************************************************************/
allocintegerarray(ae_serializer * s,ae_vector * v,ae_int_t n,ae_state * _state)2766 void allocintegerarray(ae_serializer* s,
2767      /* Integer */ ae_vector* v,
2768      ae_int_t n,
2769      ae_state *_state)
2770 {
2771     ae_int_t i;
2772 
2773 
2774     if( n<0 )
2775     {
2776         n = v->cnt;
2777     }
2778     ae_serializer_alloc_entry(s);
2779     for(i=0; i<=n-1; i++)
2780     {
2781         ae_serializer_alloc_entry(s);
2782     }
2783 }
2784 
2785 
2786 /*************************************************************************
2787 Serialization: Integer array
2788 *************************************************************************/
serializeintegerarray(ae_serializer * s,ae_vector * v,ae_int_t n,ae_state * _state)2789 void serializeintegerarray(ae_serializer* s,
2790      /* Integer */ ae_vector* v,
2791      ae_int_t n,
2792      ae_state *_state)
2793 {
2794     ae_int_t i;
2795 
2796 
2797     if( n<0 )
2798     {
2799         n = v->cnt;
2800     }
2801     ae_serializer_serialize_int(s, n, _state);
2802     for(i=0; i<=n-1; i++)
2803     {
2804         ae_serializer_serialize_int(s, v->ptr.p_int[i], _state);
2805     }
2806 }
2807 
2808 
2809 /*************************************************************************
2810 Unserialization: complex value
2811 *************************************************************************/
unserializeintegerarray(ae_serializer * s,ae_vector * v,ae_state * _state)2812 void unserializeintegerarray(ae_serializer* s,
2813      /* Integer */ ae_vector* v,
2814      ae_state *_state)
2815 {
2816     ae_int_t n;
2817     ae_int_t i;
2818     ae_int_t t;
2819 
2820     ae_vector_clear(v);
2821 
2822     ae_serializer_unserialize_int(s, &n, _state);
2823     if( n==0 )
2824     {
2825         return;
2826     }
2827     ae_vector_set_length(v, n, _state);
2828     for(i=0; i<=n-1; i++)
2829     {
2830         ae_serializer_unserialize_int(s, &t, _state);
2831         v->ptr.p_int[i] = t;
2832     }
2833 }
2834 
2835 
2836 /*************************************************************************
2837 Allocation of serializer: real matrix
2838 *************************************************************************/
allocrealmatrix(ae_serializer * s,ae_matrix * v,ae_int_t n0,ae_int_t n1,ae_state * _state)2839 void allocrealmatrix(ae_serializer* s,
2840      /* Real    */ ae_matrix* v,
2841      ae_int_t n0,
2842      ae_int_t n1,
2843      ae_state *_state)
2844 {
2845     ae_int_t i;
2846     ae_int_t j;
2847 
2848 
2849     if( n0<0 )
2850     {
2851         n0 = v->rows;
2852     }
2853     if( n1<0 )
2854     {
2855         n1 = v->cols;
2856     }
2857     ae_serializer_alloc_entry(s);
2858     ae_serializer_alloc_entry(s);
2859     for(i=0; i<=n0-1; i++)
2860     {
2861         for(j=0; j<=n1-1; j++)
2862         {
2863             ae_serializer_alloc_entry(s);
2864         }
2865     }
2866 }
2867 
2868 
2869 /*************************************************************************
2870 Serialization: complex value
2871 *************************************************************************/
serializerealmatrix(ae_serializer * s,ae_matrix * v,ae_int_t n0,ae_int_t n1,ae_state * _state)2872 void serializerealmatrix(ae_serializer* s,
2873      /* Real    */ ae_matrix* v,
2874      ae_int_t n0,
2875      ae_int_t n1,
2876      ae_state *_state)
2877 {
2878     ae_int_t i;
2879     ae_int_t j;
2880 
2881 
2882     if( n0<0 )
2883     {
2884         n0 = v->rows;
2885     }
2886     if( n1<0 )
2887     {
2888         n1 = v->cols;
2889     }
2890     ae_serializer_serialize_int(s, n0, _state);
2891     ae_serializer_serialize_int(s, n1, _state);
2892     for(i=0; i<=n0-1; i++)
2893     {
2894         for(j=0; j<=n1-1; j++)
2895         {
2896             ae_serializer_serialize_double(s, v->ptr.pp_double[i][j], _state);
2897         }
2898     }
2899 }
2900 
2901 
2902 /*************************************************************************
2903 Unserialization: complex value
2904 *************************************************************************/
unserializerealmatrix(ae_serializer * s,ae_matrix * v,ae_state * _state)2905 void unserializerealmatrix(ae_serializer* s,
2906      /* Real    */ ae_matrix* v,
2907      ae_state *_state)
2908 {
2909     ae_int_t i;
2910     ae_int_t j;
2911     ae_int_t n0;
2912     ae_int_t n1;
2913     double t;
2914 
2915     ae_matrix_clear(v);
2916 
2917     ae_serializer_unserialize_int(s, &n0, _state);
2918     ae_serializer_unserialize_int(s, &n1, _state);
2919     if( n0==0||n1==0 )
2920     {
2921         return;
2922     }
2923     ae_matrix_set_length(v, n0, n1, _state);
2924     for(i=0; i<=n0-1; i++)
2925     {
2926         for(j=0; j<=n1-1; j++)
2927         {
2928             ae_serializer_unserialize_double(s, &t, _state);
2929             v->ptr.pp_double[i][j] = t;
2930         }
2931     }
2932 }
2933 
2934 
2935 /*************************************************************************
2936 Copy boolean array
2937 *************************************************************************/
copybooleanarray(ae_vector * src,ae_vector * dst,ae_state * _state)2938 void copybooleanarray(/* Boolean */ ae_vector* src,
2939      /* Boolean */ ae_vector* dst,
2940      ae_state *_state)
2941 {
2942     ae_int_t i;
2943 
2944     ae_vector_clear(dst);
2945 
2946     if( src->cnt>0 )
2947     {
2948         ae_vector_set_length(dst, src->cnt, _state);
2949         for(i=0; i<=src->cnt-1; i++)
2950         {
2951             dst->ptr.p_bool[i] = src->ptr.p_bool[i];
2952         }
2953     }
2954 }
2955 
2956 
2957 /*************************************************************************
2958 Copy integer array
2959 *************************************************************************/
copyintegerarray(ae_vector * src,ae_vector * dst,ae_state * _state)2960 void copyintegerarray(/* Integer */ ae_vector* src,
2961      /* Integer */ ae_vector* dst,
2962      ae_state *_state)
2963 {
2964     ae_int_t i;
2965 
2966     ae_vector_clear(dst);
2967 
2968     if( src->cnt>0 )
2969     {
2970         ae_vector_set_length(dst, src->cnt, _state);
2971         for(i=0; i<=src->cnt-1; i++)
2972         {
2973             dst->ptr.p_int[i] = src->ptr.p_int[i];
2974         }
2975     }
2976 }
2977 
2978 
2979 /*************************************************************************
2980 Copy real array
2981 *************************************************************************/
copyrealarray(ae_vector * src,ae_vector * dst,ae_state * _state)2982 void copyrealarray(/* Real    */ ae_vector* src,
2983      /* Real    */ ae_vector* dst,
2984      ae_state *_state)
2985 {
2986     ae_int_t i;
2987 
2988     ae_vector_clear(dst);
2989 
2990     if( src->cnt>0 )
2991     {
2992         ae_vector_set_length(dst, src->cnt, _state);
2993         for(i=0; i<=src->cnt-1; i++)
2994         {
2995             dst->ptr.p_double[i] = src->ptr.p_double[i];
2996         }
2997     }
2998 }
2999 
3000 
3001 /*************************************************************************
3002 Copy real matrix
3003 *************************************************************************/
copyrealmatrix(ae_matrix * src,ae_matrix * dst,ae_state * _state)3004 void copyrealmatrix(/* Real    */ ae_matrix* src,
3005      /* Real    */ ae_matrix* dst,
3006      ae_state *_state)
3007 {
3008     ae_int_t i;
3009     ae_int_t j;
3010 
3011     ae_matrix_clear(dst);
3012 
3013     if( src->rows>0&&src->cols>0 )
3014     {
3015         ae_matrix_set_length(dst, src->rows, src->cols, _state);
3016         for(i=0; i<=src->rows-1; i++)
3017         {
3018             for(j=0; j<=src->cols-1; j++)
3019             {
3020                 dst->ptr.pp_double[i][j] = src->ptr.pp_double[i][j];
3021             }
3022         }
3023     }
3024 }
3025 
3026 
3027 /*************************************************************************
3028 Clears integer array
3029 *************************************************************************/
unsetintegerarray(ae_vector * a,ae_state * _state)3030 void unsetintegerarray(/* Integer */ ae_vector* a, ae_state *_state)
3031 {
3032 
3033     ae_vector_clear(a);
3034 
3035 }
3036 
3037 
3038 /*************************************************************************
3039 Clears real array
3040 *************************************************************************/
unsetrealarray(ae_vector * a,ae_state * _state)3041 void unsetrealarray(/* Real    */ ae_vector* a, ae_state *_state)
3042 {
3043 
3044     ae_vector_clear(a);
3045 
3046 }
3047 
3048 
3049 /*************************************************************************
3050 Clears real matrix
3051 *************************************************************************/
unsetrealmatrix(ae_matrix * a,ae_state * _state)3052 void unsetrealmatrix(/* Real    */ ae_matrix* a, ae_state *_state)
3053 {
3054 
3055     ae_matrix_clear(a);
3056 
3057 }
3058 
3059 
3060 /*************************************************************************
3061 This function is used in parallel functions for recurrent division of large
3062 task into two smaller tasks.
3063 
3064 It has following properties:
3065 * it works only for TaskSize>=2 and TaskSize>TileSize (assertion is thrown otherwise)
3066 * Task0+Task1=TaskSize, Task0>0, Task1>0
3067 * Task0 and Task1 are close to each other
3068 * Task0>=Task1
3069 * Task0 is always divisible by TileSize
3070 
3071   -- ALGLIB --
3072      Copyright 07.04.2013 by Bochkanov Sergey
3073 *************************************************************************/
tiledsplit(ae_int_t tasksize,ae_int_t tilesize,ae_int_t * task0,ae_int_t * task1,ae_state * _state)3074 void tiledsplit(ae_int_t tasksize,
3075      ae_int_t tilesize,
3076      ae_int_t* task0,
3077      ae_int_t* task1,
3078      ae_state *_state)
3079 {
3080     ae_int_t cc;
3081 
3082     *task0 = 0;
3083     *task1 = 0;
3084 
3085     ae_assert(tasksize>=2, "TiledSplit: TaskSize<2", _state);
3086     ae_assert(tasksize>tilesize, "TiledSplit: TaskSize<=TileSize", _state);
3087     cc = chunkscount(tasksize, tilesize, _state);
3088     ae_assert(cc>=2, "TiledSplit: integrity check failed", _state);
3089     *task0 = idivup(cc, 2, _state)*tilesize;
3090     *task1 = tasksize-(*task0);
3091     ae_assert(*task0>=1, "TiledSplit: internal error", _state);
3092     ae_assert(*task1>=1, "TiledSplit: internal error", _state);
3093     ae_assert(*task0%tilesize==0, "TiledSplit: internal error", _state);
3094     ae_assert(*task0>=(*task1), "TiledSplit: internal error", _state);
3095 }
3096 
3097 
3098 /*************************************************************************
3099 This function searches integer array. Elements in this array are actually
3100 records, each NRec elements wide. Each record has unique header - NHeader
3101 integer values, which identify it. Records are lexicographically sorted by
3102 header.
3103 
3104 Records are identified by their index, not offset (offset = NRec*index).
3105 
3106 This function searches A (records with indices [I0,I1)) for a record with
3107 header B. It returns index of this record (not offset!), or -1 on failure.
3108 
3109   -- ALGLIB --
3110      Copyright 28.03.2011 by Bochkanov Sergey
3111 *************************************************************************/
recsearch(ae_vector * a,ae_int_t nrec,ae_int_t nheader,ae_int_t i0,ae_int_t i1,ae_vector * b,ae_state * _state)3112 ae_int_t recsearch(/* Integer */ ae_vector* a,
3113      ae_int_t nrec,
3114      ae_int_t nheader,
3115      ae_int_t i0,
3116      ae_int_t i1,
3117      /* Integer */ ae_vector* b,
3118      ae_state *_state)
3119 {
3120     ae_int_t mididx;
3121     ae_int_t cflag;
3122     ae_int_t k;
3123     ae_int_t offs;
3124     ae_int_t result;
3125 
3126 
3127     result = -1;
3128     for(;;)
3129     {
3130         if( i0>=i1 )
3131         {
3132             break;
3133         }
3134         mididx = (i0+i1)/2;
3135         offs = nrec*mididx;
3136         cflag = 0;
3137         for(k=0; k<=nheader-1; k++)
3138         {
3139             if( a->ptr.p_int[offs+k]<b->ptr.p_int[k] )
3140             {
3141                 cflag = -1;
3142                 break;
3143             }
3144             if( a->ptr.p_int[offs+k]>b->ptr.p_int[k] )
3145             {
3146                 cflag = 1;
3147                 break;
3148             }
3149         }
3150         if( cflag==0 )
3151         {
3152             result = mididx;
3153             return result;
3154         }
3155         if( cflag<0 )
3156         {
3157             i0 = mididx+1;
3158         }
3159         else
3160         {
3161             i1 = mididx;
3162         }
3163     }
3164     return result;
3165 }
3166 
3167 
3168 /*************************************************************************
3169 This function is used in parallel functions for recurrent division of large
3170 task into two smaller tasks.
3171 
3172 It has following properties:
3173 * it works only for TaskSize>=2 (assertion is thrown otherwise)
3174 * for TaskSize=2, it returns Task0=1, Task1=1
3175 * in case TaskSize is odd,  Task0=TaskSize-1, Task1=1
3176 * in case TaskSize is even, Task0 and Task1 are approximately TaskSize/2
3177   and both Task0 and Task1 are even, Task0>=Task1
3178 
3179   -- ALGLIB --
3180      Copyright 07.04.2013 by Bochkanov Sergey
3181 *************************************************************************/
splitlengtheven(ae_int_t tasksize,ae_int_t * task0,ae_int_t * task1,ae_state * _state)3182 void splitlengtheven(ae_int_t tasksize,
3183      ae_int_t* task0,
3184      ae_int_t* task1,
3185      ae_state *_state)
3186 {
3187 
3188     *task0 = 0;
3189     *task1 = 0;
3190 
3191     ae_assert(tasksize>=2, "SplitLengthEven: TaskSize<2", _state);
3192     if( tasksize==2 )
3193     {
3194         *task0 = 1;
3195         *task1 = 1;
3196         return;
3197     }
3198     if( tasksize%2==0 )
3199     {
3200 
3201         /*
3202          * Even division
3203          */
3204         *task0 = tasksize/2;
3205         *task1 = tasksize/2;
3206         if( *task0%2!=0 )
3207         {
3208             *task0 = *task0+1;
3209             *task1 = *task1-1;
3210         }
3211     }
3212     else
3213     {
3214 
3215         /*
3216          * Odd task size, split trailing odd part from it.
3217          */
3218         *task0 = tasksize-1;
3219         *task1 = 1;
3220     }
3221     ae_assert(*task0>=1, "SplitLengthEven: internal error", _state);
3222     ae_assert(*task1>=1, "SplitLengthEven: internal error", _state);
3223 }
3224 
3225 
3226 /*************************************************************************
3227 This function is used to calculate number of chunks (including partial,
3228 non-complete chunks) in some set. It expects that ChunkSize>=1, TaskSize>=0.
3229 Assertion is thrown otherwise.
3230 
3231 Function result is equivalent to Ceil(TaskSize/ChunkSize), but with guarantees
3232 that rounding errors won't ruin results.
3233 
3234   -- ALGLIB --
3235      Copyright 21.01.2015 by Bochkanov Sergey
3236 *************************************************************************/
chunkscount(ae_int_t tasksize,ae_int_t chunksize,ae_state * _state)3237 ae_int_t chunkscount(ae_int_t tasksize,
3238      ae_int_t chunksize,
3239      ae_state *_state)
3240 {
3241     ae_int_t result;
3242 
3243 
3244     ae_assert(tasksize>=0, "ChunksCount: TaskSize<0", _state);
3245     ae_assert(chunksize>=1, "ChunksCount: ChunkSize<1", _state);
3246     result = tasksize/chunksize;
3247     if( tasksize%chunksize!=0 )
3248     {
3249         result = result+1;
3250     }
3251     return result;
3252 }
3253 
3254 
3255 /*************************************************************************
3256 Returns maximum density for level 2 sparse/dense functions. Density values
3257 below one returned by this function are better to handle via sparse Level 2
3258 functionality.
3259 
3260   -- ALGLIB routine --
3261      10.01.2019
3262      Bochkanov Sergey
3263 *************************************************************************/
sparselevel2density(ae_state * _state)3264 double sparselevel2density(ae_state *_state)
3265 {
3266     double result;
3267 
3268 
3269     result = 0.1;
3270     return result;
3271 }
3272 
3273 
3274 /*************************************************************************
3275 Returns A-tile size for a matrix.
3276 
3277 A-tiles are smallest tiles (32x32), suitable for processing by ALGLIB  own
3278 implementation of Level 3 linear algebra.
3279 
3280   -- ALGLIB routine --
3281      10.01.2019
3282      Bochkanov Sergey
3283 *************************************************************************/
matrixtilesizea(ae_state * _state)3284 ae_int_t matrixtilesizea(ae_state *_state)
3285 {
3286     ae_int_t result;
3287 
3288 
3289     result = 32;
3290     return result;
3291 }
3292 
3293 
3294 /*************************************************************************
3295 Returns B-tile size for a matrix.
3296 
3297 B-tiles are larger  tiles (64x64), suitable for parallel execution or for
3298 processing by vendor's implementation of Level 3 linear algebra.
3299 
3300   -- ALGLIB routine --
3301      10.01.2019
3302      Bochkanov Sergey
3303 *************************************************************************/
matrixtilesizeb(ae_state * _state)3304 ae_int_t matrixtilesizeb(ae_state *_state)
3305 {
3306 #ifndef ALGLIB_INTERCEPTS_MKL
3307     ae_int_t result;
3308 
3309 
3310     result = 64;
3311     return result;
3312 #else
3313     return _ialglib_i_matrixtilesizeb();
3314 #endif
3315 }
3316 
3317 
3318 /*************************************************************************
3319 This function returns minimum cost of task which is feasible for
3320 multithreaded processing. It returns real number in order to avoid overflow
3321 problems.
3322 
3323   -- ALGLIB --
3324      Copyright 10.01.2018 by Bochkanov Sergey
3325 *************************************************************************/
smpactivationlevel(ae_state * _state)3326 double smpactivationlevel(ae_state *_state)
3327 {
3328     double nn;
3329     double result;
3330 
3331 
3332     nn = (double)(2*matrixtilesizeb(_state));
3333     result = ae_maxreal(0.95*2*nn*nn*nn, 1.0E7, _state);
3334     return result;
3335 }
3336 
3337 
3338 /*************************************************************************
3339 This function returns minimum cost of task which is feasible for
3340 spawn (given that multithreading is active).
3341 
3342 It returns real number in order to avoid overflow problems.
3343 
3344   -- ALGLIB --
3345      Copyright 10.01.2018 by Bochkanov Sergey
3346 *************************************************************************/
spawnlevel(ae_state * _state)3347 double spawnlevel(ae_state *_state)
3348 {
3349     double nn;
3350     double result;
3351 
3352 
3353     nn = (double)(2*matrixtilesizea(_state));
3354     result = 0.95*2*nn*nn*nn;
3355     return result;
3356 }
3357 
3358 
3359 /*************************************************************************
3360 --- OBSOLETE FUNCTION, USE TILED SPLIT INSTEAD ---
3361 
3362 This function is used in parallel functions for recurrent division of large
3363 task into two smaller tasks.
3364 
3365 It has following properties:
3366 * it works only for TaskSize>=2 and ChunkSize>=2
3367   (assertion is thrown otherwise)
3368 * Task0+Task1=TaskSize, Task0>0, Task1>0
3369 * Task0 and Task1 are close to each other
3370 * in case TaskSize>ChunkSize, Task0 is always divisible by ChunkSize
3371 
3372   -- ALGLIB --
3373      Copyright 07.04.2013 by Bochkanov Sergey
3374 *************************************************************************/
splitlength(ae_int_t tasksize,ae_int_t chunksize,ae_int_t * task0,ae_int_t * task1,ae_state * _state)3375 void splitlength(ae_int_t tasksize,
3376      ae_int_t chunksize,
3377      ae_int_t* task0,
3378      ae_int_t* task1,
3379      ae_state *_state)
3380 {
3381 
3382     *task0 = 0;
3383     *task1 = 0;
3384 
3385     ae_assert(chunksize>=2, "SplitLength: ChunkSize<2", _state);
3386     ae_assert(tasksize>=2, "SplitLength: TaskSize<2", _state);
3387     *task0 = tasksize/2;
3388     if( *task0>chunksize&&*task0%chunksize!=0 )
3389     {
3390         *task0 = *task0-*task0%chunksize;
3391     }
3392     *task1 = tasksize-(*task0);
3393     ae_assert(*task0>=1, "SplitLength: internal error", _state);
3394     ae_assert(*task1>=1, "SplitLength: internal error", _state);
3395 }
3396 
3397 
3398 /*************************************************************************
3399 Outputs vector A[I0,I1-1] to trace log using either:
3400 a)  6-digit exponential format (no trace flags is set)
3401 b) 15-ditit exponential format ('PREC.E15' trace flag is set)
3402 b)  6-ditit fixed-point format ('PREC.F6' trace flag is set)
3403 
3404 This function checks trace flags every time it is called.
3405 *************************************************************************/
tracevectorautoprec(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_state * _state)3406 void tracevectorautoprec(/* Real    */ ae_vector* a,
3407      ae_int_t i0,
3408      ae_int_t i1,
3409      ae_state *_state)
3410 {
3411     ae_int_t i;
3412     ae_int_t prectouse;
3413 
3414 
3415 
3416     /*
3417      * Determine precision to use
3418      */
3419     prectouse = 0;
3420     if( ae_is_trace_enabled("PREC.E15") )
3421     {
3422         prectouse = 1;
3423     }
3424     if( ae_is_trace_enabled("PREC.F6") )
3425     {
3426         prectouse = 2;
3427     }
3428 
3429     /*
3430      * Output
3431      */
3432     ae_trace("[ ");
3433     for(i=i0; i<=i1-1; i++)
3434     {
3435         if( prectouse==0 )
3436         {
3437             ae_trace("%14.6e",
3438                 (double)(a->ptr.p_double[i]));
3439         }
3440         if( prectouse==1 )
3441         {
3442             ae_trace("%23.15e",
3443                 (double)(a->ptr.p_double[i]));
3444         }
3445         if( prectouse==2 )
3446         {
3447             ae_trace("%13.6f",
3448                 (double)(a->ptr.p_double[i]));
3449         }
3450         if( i<i1-1 )
3451         {
3452             ae_trace(" ");
3453         }
3454     }
3455     ae_trace(" ]");
3456 }
3457 
3458 
3459 /*************************************************************************
3460 Outputs row A[I,J0..J1-1] to trace log using either:
3461 a)  6-digit exponential format (no trace flags is set)
3462 b) 15-ditit exponential format ('PREC.E15' trace flag is set)
3463 b)  6-ditit fixed-point format ('PREC.F6' trace flag is set)
3464 
3465 This function checks trace flags every time it is called.
3466 *************************************************************************/
tracerowautoprec(ae_matrix * a,ae_int_t i,ae_int_t j0,ae_int_t j1,ae_state * _state)3467 void tracerowautoprec(/* Real    */ ae_matrix* a,
3468      ae_int_t i,
3469      ae_int_t j0,
3470      ae_int_t j1,
3471      ae_state *_state)
3472 {
3473     ae_int_t j;
3474     ae_int_t prectouse;
3475 
3476 
3477 
3478     /*
3479      * Determine precision to use
3480      */
3481     prectouse = 0;
3482     if( ae_is_trace_enabled("PREC.E15") )
3483     {
3484         prectouse = 1;
3485     }
3486     if( ae_is_trace_enabled("PREC.F6") )
3487     {
3488         prectouse = 2;
3489     }
3490 
3491     /*
3492      * Output
3493      */
3494     ae_trace("[ ");
3495     for(j=j0; j<=j1-1; j++)
3496     {
3497         if( prectouse==0 )
3498         {
3499             ae_trace("%14.6e",
3500                 (double)(a->ptr.pp_double[i][j]));
3501         }
3502         if( prectouse==1 )
3503         {
3504             ae_trace("%23.15e",
3505                 (double)(a->ptr.pp_double[i][j]));
3506         }
3507         if( prectouse==2 )
3508         {
3509             ae_trace("%13.6f",
3510                 (double)(a->ptr.pp_double[i][j]));
3511         }
3512         if( j<j1-1 )
3513         {
3514             ae_trace(" ");
3515         }
3516     }
3517     ae_trace(" ]");
3518 }
3519 
3520 
3521 /*************************************************************************
3522 Unscales/unshifts vector A[N] by computing A*Scl+Sft and outputs result to
3523 trace log using either:
3524 a)  6-digit exponential format (no trace flags is set)
3525 b) 15-ditit exponential format ('PREC.E15' trace flag is set)
3526 b)  6-ditit fixed-point format ('PREC.F6' trace flag is set)
3527 
3528 This function checks trace flags every time it is called.
3529 Both Scl and Sft can be omitted.
3530 *************************************************************************/
tracevectorunscaledunshiftedautoprec(ae_vector * x,ae_int_t n,ae_vector * scl,ae_bool applyscl,ae_vector * sft,ae_bool applysft,ae_state * _state)3531 void tracevectorunscaledunshiftedautoprec(/* Real    */ ae_vector* x,
3532      ae_int_t n,
3533      /* Real    */ ae_vector* scl,
3534      ae_bool applyscl,
3535      /* Real    */ ae_vector* sft,
3536      ae_bool applysft,
3537      ae_state *_state)
3538 {
3539     ae_int_t i;
3540     ae_int_t prectouse;
3541     double v;
3542 
3543 
3544 
3545     /*
3546      * Determine precision to use
3547      */
3548     prectouse = 0;
3549     if( ae_is_trace_enabled("PREC.E15") )
3550     {
3551         prectouse = 1;
3552     }
3553     if( ae_is_trace_enabled("PREC.F6") )
3554     {
3555         prectouse = 2;
3556     }
3557 
3558     /*
3559      * Output
3560      */
3561     ae_trace("[ ");
3562     for(i=0; i<=n-1; i++)
3563     {
3564         v = x->ptr.p_double[i];
3565         if( applyscl )
3566         {
3567             v = v*scl->ptr.p_double[i];
3568         }
3569         if( applysft )
3570         {
3571             v = v+sft->ptr.p_double[i];
3572         }
3573         if( prectouse==0 )
3574         {
3575             ae_trace("%14.6e",
3576                 (double)(v));
3577         }
3578         if( prectouse==1 )
3579         {
3580             ae_trace("%23.15e",
3581                 (double)(v));
3582         }
3583         if( prectouse==2 )
3584         {
3585             ae_trace("%13.6f",
3586                 (double)(v));
3587         }
3588         if( i<n-1 )
3589         {
3590             ae_trace(" ");
3591         }
3592     }
3593     ae_trace(" ]");
3594 }
3595 
3596 
3597 /*************************************************************************
3598 Outputs vector of 1-norms of rows [I0,I1-1] of A[I0...I1-1,J0...J1-1]   to
3599 trace log using either:
3600 a)  6-digit exponential format (no trace flags is set)
3601 b) 15-ditit exponential format ('PREC.E15' trace flag is set)
3602 b)  6-ditit fixed-point format ('PREC.F6' trace flag is set)
3603 
3604 This function checks trace flags every time it is called.
3605 *************************************************************************/
tracerownrm1autoprec(ae_matrix * a,ae_int_t i0,ae_int_t i1,ae_int_t j0,ae_int_t j1,ae_state * _state)3606 void tracerownrm1autoprec(/* Real    */ ae_matrix* a,
3607      ae_int_t i0,
3608      ae_int_t i1,
3609      ae_int_t j0,
3610      ae_int_t j1,
3611      ae_state *_state)
3612 {
3613     ae_int_t i;
3614     ae_int_t j;
3615     double v;
3616     ae_int_t prectouse;
3617 
3618 
3619 
3620     /*
3621      * Determine precision to use
3622      */
3623     prectouse = 0;
3624     if( ae_is_trace_enabled("PREC.E15") )
3625     {
3626         prectouse = 1;
3627     }
3628     if( ae_is_trace_enabled("PREC.F6") )
3629     {
3630         prectouse = 2;
3631     }
3632 
3633     /*
3634      * Output
3635      */
3636     ae_trace("[ ");
3637     for(i=i0; i<=i1-1; i++)
3638     {
3639         v = (double)(0);
3640         for(j=j0; j<=j1-1; j++)
3641         {
3642             v = ae_maxreal(v, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
3643         }
3644         if( prectouse==0 )
3645         {
3646             ae_trace("%14.6e",
3647                 (double)(v));
3648         }
3649         if( prectouse==1 )
3650         {
3651             ae_trace("%23.15e",
3652                 (double)(v));
3653         }
3654         if( prectouse==2 )
3655         {
3656             ae_trace("%13.6f",
3657                 (double)(v));
3658         }
3659         if( i<i1-1 )
3660         {
3661             ae_trace(" ");
3662         }
3663     }
3664     ae_trace(" ]");
3665 }
3666 
3667 
3668 /*************************************************************************
3669 Outputs vector A[I0,I1-1] to trace log using E8 precision
3670 *************************************************************************/
tracevectore6(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_state * _state)3671 void tracevectore6(/* Real    */ ae_vector* a,
3672      ae_int_t i0,
3673      ae_int_t i1,
3674      ae_state *_state)
3675 {
3676     ae_int_t i;
3677 
3678 
3679     ae_trace("[ ");
3680     for(i=i0; i<=i1-1; i++)
3681     {
3682         ae_trace("%14.6e",
3683             (double)(a->ptr.p_double[i]));
3684         if( i<i1-1 )
3685         {
3686             ae_trace(" ");
3687         }
3688     }
3689     ae_trace(" ]");
3690 }
3691 
3692 
3693 /*************************************************************************
3694 Outputs vector A[I0,I1-1] to trace log using E8 or E15 precision
3695 *************************************************************************/
tracevectore615(ae_vector * a,ae_int_t i0,ae_int_t i1,ae_bool usee15,ae_state * _state)3696 void tracevectore615(/* Real    */ ae_vector* a,
3697      ae_int_t i0,
3698      ae_int_t i1,
3699      ae_bool usee15,
3700      ae_state *_state)
3701 {
3702     ae_int_t i;
3703 
3704 
3705     ae_trace("[ ");
3706     for(i=i0; i<=i1-1; i++)
3707     {
3708         if( usee15 )
3709         {
3710             ae_trace("%23.15e",
3711                 (double)(a->ptr.p_double[i]));
3712         }
3713         else
3714         {
3715             ae_trace("%14.6e",
3716                 (double)(a->ptr.p_double[i]));
3717         }
3718         if( i<i1-1 )
3719         {
3720             ae_trace(" ");
3721         }
3722     }
3723     ae_trace(" ]");
3724 }
3725 
3726 
3727 /*************************************************************************
3728 Outputs vector of 1-norms of rows [I0,I1-1] of A[I0...I1-1,J0...J1-1]   to
3729 trace log using E8 precision
3730 *************************************************************************/
tracerownrm1e6(ae_matrix * a,ae_int_t i0,ae_int_t i1,ae_int_t j0,ae_int_t j1,ae_state * _state)3731 void tracerownrm1e6(/* Real    */ ae_matrix* a,
3732      ae_int_t i0,
3733      ae_int_t i1,
3734      ae_int_t j0,
3735      ae_int_t j1,
3736      ae_state *_state)
3737 {
3738     ae_int_t i;
3739     ae_int_t j;
3740     double v;
3741 
3742 
3743     ae_trace("[ ");
3744     for(i=i0; i<=i1-1; i++)
3745     {
3746         v = (double)(0);
3747         for(j=j0; j<=j1-1; j++)
3748         {
3749             v = ae_maxreal(v, ae_fabs(a->ptr.pp_double[i][j], _state), _state);
3750         }
3751         ae_trace("%14.6e",
3752             (double)(v));
3753         if( i<i1-1 )
3754         {
3755             ae_trace(" ");
3756         }
3757     }
3758     ae_trace(" ]");
3759 }
3760 
3761 
_apbuffers_init(void * _p,ae_state * _state,ae_bool make_automatic)3762 void _apbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
3763 {
3764     apbuffers *p = (apbuffers*)_p;
3765     ae_touch_ptr((void*)p);
3766     ae_vector_init(&p->ba0, 0, DT_BOOL, _state, make_automatic);
3767     ae_vector_init(&p->ia0, 0, DT_INT, _state, make_automatic);
3768     ae_vector_init(&p->ia1, 0, DT_INT, _state, make_automatic);
3769     ae_vector_init(&p->ia2, 0, DT_INT, _state, make_automatic);
3770     ae_vector_init(&p->ia3, 0, DT_INT, _state, make_automatic);
3771     ae_vector_init(&p->ra0, 0, DT_REAL, _state, make_automatic);
3772     ae_vector_init(&p->ra1, 0, DT_REAL, _state, make_automatic);
3773     ae_vector_init(&p->ra2, 0, DT_REAL, _state, make_automatic);
3774     ae_vector_init(&p->ra3, 0, DT_REAL, _state, make_automatic);
3775     ae_matrix_init(&p->rm0, 0, 0, DT_REAL, _state, make_automatic);
3776     ae_matrix_init(&p->rm1, 0, 0, DT_REAL, _state, make_automatic);
3777 }
3778 
3779 
_apbuffers_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3780 void _apbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3781 {
3782     apbuffers *dst = (apbuffers*)_dst;
3783     apbuffers *src = (apbuffers*)_src;
3784     ae_vector_init_copy(&dst->ba0, &src->ba0, _state, make_automatic);
3785     ae_vector_init_copy(&dst->ia0, &src->ia0, _state, make_automatic);
3786     ae_vector_init_copy(&dst->ia1, &src->ia1, _state, make_automatic);
3787     ae_vector_init_copy(&dst->ia2, &src->ia2, _state, make_automatic);
3788     ae_vector_init_copy(&dst->ia3, &src->ia3, _state, make_automatic);
3789     ae_vector_init_copy(&dst->ra0, &src->ra0, _state, make_automatic);
3790     ae_vector_init_copy(&dst->ra1, &src->ra1, _state, make_automatic);
3791     ae_vector_init_copy(&dst->ra2, &src->ra2, _state, make_automatic);
3792     ae_vector_init_copy(&dst->ra3, &src->ra3, _state, make_automatic);
3793     ae_matrix_init_copy(&dst->rm0, &src->rm0, _state, make_automatic);
3794     ae_matrix_init_copy(&dst->rm1, &src->rm1, _state, make_automatic);
3795 }
3796 
3797 
_apbuffers_clear(void * _p)3798 void _apbuffers_clear(void* _p)
3799 {
3800     apbuffers *p = (apbuffers*)_p;
3801     ae_touch_ptr((void*)p);
3802     ae_vector_clear(&p->ba0);
3803     ae_vector_clear(&p->ia0);
3804     ae_vector_clear(&p->ia1);
3805     ae_vector_clear(&p->ia2);
3806     ae_vector_clear(&p->ia3);
3807     ae_vector_clear(&p->ra0);
3808     ae_vector_clear(&p->ra1);
3809     ae_vector_clear(&p->ra2);
3810     ae_vector_clear(&p->ra3);
3811     ae_matrix_clear(&p->rm0);
3812     ae_matrix_clear(&p->rm1);
3813 }
3814 
3815 
_apbuffers_destroy(void * _p)3816 void _apbuffers_destroy(void* _p)
3817 {
3818     apbuffers *p = (apbuffers*)_p;
3819     ae_touch_ptr((void*)p);
3820     ae_vector_destroy(&p->ba0);
3821     ae_vector_destroy(&p->ia0);
3822     ae_vector_destroy(&p->ia1);
3823     ae_vector_destroy(&p->ia2);
3824     ae_vector_destroy(&p->ia3);
3825     ae_vector_destroy(&p->ra0);
3826     ae_vector_destroy(&p->ra1);
3827     ae_vector_destroy(&p->ra2);
3828     ae_vector_destroy(&p->ra3);
3829     ae_matrix_destroy(&p->rm0);
3830     ae_matrix_destroy(&p->rm1);
3831 }
3832 
3833 
_sboolean_init(void * _p,ae_state * _state,ae_bool make_automatic)3834 void _sboolean_init(void* _p, ae_state *_state, ae_bool make_automatic)
3835 {
3836     sboolean *p = (sboolean*)_p;
3837     ae_touch_ptr((void*)p);
3838 }
3839 
3840 
_sboolean_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3841 void _sboolean_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3842 {
3843     sboolean *dst = (sboolean*)_dst;
3844     sboolean *src = (sboolean*)_src;
3845     dst->val = src->val;
3846 }
3847 
3848 
_sboolean_clear(void * _p)3849 void _sboolean_clear(void* _p)
3850 {
3851     sboolean *p = (sboolean*)_p;
3852     ae_touch_ptr((void*)p);
3853 }
3854 
3855 
_sboolean_destroy(void * _p)3856 void _sboolean_destroy(void* _p)
3857 {
3858     sboolean *p = (sboolean*)_p;
3859     ae_touch_ptr((void*)p);
3860 }
3861 
3862 
_sbooleanarray_init(void * _p,ae_state * _state,ae_bool make_automatic)3863 void _sbooleanarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
3864 {
3865     sbooleanarray *p = (sbooleanarray*)_p;
3866     ae_touch_ptr((void*)p);
3867     ae_vector_init(&p->val, 0, DT_BOOL, _state, make_automatic);
3868 }
3869 
3870 
_sbooleanarray_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3871 void _sbooleanarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3872 {
3873     sbooleanarray *dst = (sbooleanarray*)_dst;
3874     sbooleanarray *src = (sbooleanarray*)_src;
3875     ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic);
3876 }
3877 
3878 
_sbooleanarray_clear(void * _p)3879 void _sbooleanarray_clear(void* _p)
3880 {
3881     sbooleanarray *p = (sbooleanarray*)_p;
3882     ae_touch_ptr((void*)p);
3883     ae_vector_clear(&p->val);
3884 }
3885 
3886 
_sbooleanarray_destroy(void * _p)3887 void _sbooleanarray_destroy(void* _p)
3888 {
3889     sbooleanarray *p = (sbooleanarray*)_p;
3890     ae_touch_ptr((void*)p);
3891     ae_vector_destroy(&p->val);
3892 }
3893 
3894 
_sinteger_init(void * _p,ae_state * _state,ae_bool make_automatic)3895 void _sinteger_init(void* _p, ae_state *_state, ae_bool make_automatic)
3896 {
3897     sinteger *p = (sinteger*)_p;
3898     ae_touch_ptr((void*)p);
3899 }
3900 
3901 
_sinteger_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3902 void _sinteger_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3903 {
3904     sinteger *dst = (sinteger*)_dst;
3905     sinteger *src = (sinteger*)_src;
3906     dst->val = src->val;
3907 }
3908 
3909 
_sinteger_clear(void * _p)3910 void _sinteger_clear(void* _p)
3911 {
3912     sinteger *p = (sinteger*)_p;
3913     ae_touch_ptr((void*)p);
3914 }
3915 
3916 
_sinteger_destroy(void * _p)3917 void _sinteger_destroy(void* _p)
3918 {
3919     sinteger *p = (sinteger*)_p;
3920     ae_touch_ptr((void*)p);
3921 }
3922 
3923 
_sintegerarray_init(void * _p,ae_state * _state,ae_bool make_automatic)3924 void _sintegerarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
3925 {
3926     sintegerarray *p = (sintegerarray*)_p;
3927     ae_touch_ptr((void*)p);
3928     ae_vector_init(&p->val, 0, DT_INT, _state, make_automatic);
3929 }
3930 
3931 
_sintegerarray_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3932 void _sintegerarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3933 {
3934     sintegerarray *dst = (sintegerarray*)_dst;
3935     sintegerarray *src = (sintegerarray*)_src;
3936     ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic);
3937 }
3938 
3939 
_sintegerarray_clear(void * _p)3940 void _sintegerarray_clear(void* _p)
3941 {
3942     sintegerarray *p = (sintegerarray*)_p;
3943     ae_touch_ptr((void*)p);
3944     ae_vector_clear(&p->val);
3945 }
3946 
3947 
_sintegerarray_destroy(void * _p)3948 void _sintegerarray_destroy(void* _p)
3949 {
3950     sintegerarray *p = (sintegerarray*)_p;
3951     ae_touch_ptr((void*)p);
3952     ae_vector_destroy(&p->val);
3953 }
3954 
3955 
_sreal_init(void * _p,ae_state * _state,ae_bool make_automatic)3956 void _sreal_init(void* _p, ae_state *_state, ae_bool make_automatic)
3957 {
3958     sreal *p = (sreal*)_p;
3959     ae_touch_ptr((void*)p);
3960 }
3961 
3962 
_sreal_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3963 void _sreal_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3964 {
3965     sreal *dst = (sreal*)_dst;
3966     sreal *src = (sreal*)_src;
3967     dst->val = src->val;
3968 }
3969 
3970 
_sreal_clear(void * _p)3971 void _sreal_clear(void* _p)
3972 {
3973     sreal *p = (sreal*)_p;
3974     ae_touch_ptr((void*)p);
3975 }
3976 
3977 
_sreal_destroy(void * _p)3978 void _sreal_destroy(void* _p)
3979 {
3980     sreal *p = (sreal*)_p;
3981     ae_touch_ptr((void*)p);
3982 }
3983 
3984 
_srealarray_init(void * _p,ae_state * _state,ae_bool make_automatic)3985 void _srealarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
3986 {
3987     srealarray *p = (srealarray*)_p;
3988     ae_touch_ptr((void*)p);
3989     ae_vector_init(&p->val, 0, DT_REAL, _state, make_automatic);
3990 }
3991 
3992 
_srealarray_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)3993 void _srealarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
3994 {
3995     srealarray *dst = (srealarray*)_dst;
3996     srealarray *src = (srealarray*)_src;
3997     ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic);
3998 }
3999 
4000 
_srealarray_clear(void * _p)4001 void _srealarray_clear(void* _p)
4002 {
4003     srealarray *p = (srealarray*)_p;
4004     ae_touch_ptr((void*)p);
4005     ae_vector_clear(&p->val);
4006 }
4007 
4008 
_srealarray_destroy(void * _p)4009 void _srealarray_destroy(void* _p)
4010 {
4011     srealarray *p = (srealarray*)_p;
4012     ae_touch_ptr((void*)p);
4013     ae_vector_destroy(&p->val);
4014 }
4015 
4016 
_scomplex_init(void * _p,ae_state * _state,ae_bool make_automatic)4017 void _scomplex_init(void* _p, ae_state *_state, ae_bool make_automatic)
4018 {
4019     scomplex *p = (scomplex*)_p;
4020     ae_touch_ptr((void*)p);
4021 }
4022 
4023 
_scomplex_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)4024 void _scomplex_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
4025 {
4026     scomplex *dst = (scomplex*)_dst;
4027     scomplex *src = (scomplex*)_src;
4028     dst->val = src->val;
4029 }
4030 
4031 
_scomplex_clear(void * _p)4032 void _scomplex_clear(void* _p)
4033 {
4034     scomplex *p = (scomplex*)_p;
4035     ae_touch_ptr((void*)p);
4036 }
4037 
4038 
_scomplex_destroy(void * _p)4039 void _scomplex_destroy(void* _p)
4040 {
4041     scomplex *p = (scomplex*)_p;
4042     ae_touch_ptr((void*)p);
4043 }
4044 
4045 
_scomplexarray_init(void * _p,ae_state * _state,ae_bool make_automatic)4046 void _scomplexarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
4047 {
4048     scomplexarray *p = (scomplexarray*)_p;
4049     ae_touch_ptr((void*)p);
4050     ae_vector_init(&p->val, 0, DT_COMPLEX, _state, make_automatic);
4051 }
4052 
4053 
_scomplexarray_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)4054 void _scomplexarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
4055 {
4056     scomplexarray *dst = (scomplexarray*)_dst;
4057     scomplexarray *src = (scomplexarray*)_src;
4058     ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic);
4059 }
4060 
4061 
_scomplexarray_clear(void * _p)4062 void _scomplexarray_clear(void* _p)
4063 {
4064     scomplexarray *p = (scomplexarray*)_p;
4065     ae_touch_ptr((void*)p);
4066     ae_vector_clear(&p->val);
4067 }
4068 
4069 
_scomplexarray_destroy(void * _p)4070 void _scomplexarray_destroy(void* _p)
4071 {
4072     scomplexarray *p = (scomplexarray*)_p;
4073     ae_touch_ptr((void*)p);
4074     ae_vector_destroy(&p->val);
4075 }
4076 
4077 
4078 #endif
4079 #if defined(AE_COMPILE_ABLASF) || !defined(AE_PARTIAL_BUILD)
4080 
4081 
4082 #ifdef ALGLIB_NO_FAST_KERNELS
4083 /*************************************************************************
4084 Computes dot product (X,Y) for elements [0,N) of X[] and Y[]
4085 
4086 INPUT PARAMETERS:
4087     N       -   vector length
4088     X       -   array[N], vector to process
4089     Y       -   array[N], vector to process
4090 
4091 RESULT:
4092     (X,Y)
4093 
4094   -- ALGLIB --
4095      Copyright 20.01.2020 by Bochkanov Sergey
4096 *************************************************************************/
rdotv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)4097 double rdotv(ae_int_t n,
4098      /* Real    */ ae_vector* x,
4099      /* Real    */ ae_vector* y,
4100      ae_state *_state)
4101 {
4102     ae_int_t i;
4103     double result;
4104 
4105 
4106     result = (double)(0);
4107     for(i=0; i<=n-1; i++)
4108     {
4109         result = result+x->ptr.p_double[i]*y->ptr.p_double[i];
4110     }
4111     return result;
4112 }
4113 #endif
4114 
4115 
4116 #ifdef ALGLIB_NO_FAST_KERNELS
4117 /*************************************************************************
4118 Computes dot product (X,A[i]) for elements [0,N) of vector X[] and row A[i,*]
4119 
4120 INPUT PARAMETERS:
4121     N       -   vector length
4122     X       -   array[N], vector to process
4123     A       -   array[?,N], matrix to process
4124     I       -   row index
4125 
4126 RESULT:
4127     (X,Ai)
4128 
4129   -- ALGLIB --
4130      Copyright 20.01.2020 by Bochkanov Sergey
4131 *************************************************************************/
rdotvr(ae_int_t n,ae_vector * x,ae_matrix * a,ae_int_t i,ae_state * _state)4132 double rdotvr(ae_int_t n,
4133      /* Real    */ ae_vector* x,
4134      /* Real    */ ae_matrix* a,
4135      ae_int_t i,
4136      ae_state *_state)
4137 {
4138     ae_int_t j;
4139     double result;
4140 
4141 
4142     result = (double)(0);
4143     for(j=0; j<=n-1; j++)
4144     {
4145         result = result+x->ptr.p_double[j]*a->ptr.pp_double[i][j];
4146     }
4147     return result;
4148 }
4149 #endif
4150 
4151 
4152 #ifdef ALGLIB_NO_FAST_KERNELS
4153 /*************************************************************************
4154 Computes dot product (X,A[i]) for rows A[ia,*] and B[ib,*]
4155 
4156 INPUT PARAMETERS:
4157     N       -   vector length
4158     X       -   array[N], vector to process
4159     A       -   array[?,N], matrix to process
4160     I       -   row index
4161 
4162 RESULT:
4163     (X,Ai)
4164 
4165   -- ALGLIB --
4166      Copyright 20.01.2020 by Bochkanov Sergey
4167 *************************************************************************/
rdotrr(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_matrix * b,ae_int_t ib,ae_state * _state)4168 double rdotrr(ae_int_t n,
4169      /* Real    */ ae_matrix* a,
4170      ae_int_t ia,
4171      /* Real    */ ae_matrix* b,
4172      ae_int_t ib,
4173      ae_state *_state)
4174 {
4175     ae_int_t j;
4176     double result;
4177 
4178 
4179     result = (double)(0);
4180     for(j=0; j<=n-1; j++)
4181     {
4182         result = result+a->ptr.pp_double[ia][j]*b->ptr.pp_double[ib][j];
4183     }
4184     return result;
4185 }
4186 #endif
4187 
4188 
4189 #ifdef ALGLIB_NO_FAST_KERNELS
4190 /*************************************************************************
4191 Computes dot product (X,X) for elements [0,N) of X[]
4192 
4193 INPUT PARAMETERS:
4194     N       -   vector length
4195     X       -   array[N], vector to process
4196 
4197 RESULT:
4198     (X,X)
4199 
4200   -- ALGLIB --
4201      Copyright 20.01.2020 by Bochkanov Sergey
4202 *************************************************************************/
rdotv2(ae_int_t n,ae_vector * x,ae_state * _state)4203 double rdotv2(ae_int_t n, /* Real    */ ae_vector* x, ae_state *_state)
4204 {
4205     ae_int_t i;
4206     double v;
4207     double result;
4208 
4209 
4210     result = (double)(0);
4211     for(i=0; i<=n-1; i++)
4212     {
4213         v = x->ptr.p_double[i];
4214         result = result+v*v;
4215     }
4216     return result;
4217 }
4218 #endif
4219 
4220 
4221 #ifdef ALGLIB_NO_FAST_KERNELS
4222 /*************************************************************************
4223 Performs inplace addition of Y[] to X[]
4224 
4225 INPUT PARAMETERS:
4226     N       -   vector length
4227     Alpha   -   multiplier
4228     Y       -   array[N], vector to process
4229     X       -   array[N], vector to process
4230 
4231 RESULT:
4232     X := X + alpha*Y
4233 
4234   -- ALGLIB --
4235      Copyright 20.01.2020 by Bochkanov Sergey
4236 *************************************************************************/
raddv(ae_int_t n,double alpha,ae_vector * y,ae_vector * x,ae_state * _state)4237 void raddv(ae_int_t n,
4238      double alpha,
4239      /* Real    */ ae_vector* y,
4240      /* Real    */ ae_vector* x,
4241      ae_state *_state)
4242 {
4243     ae_int_t i;
4244 
4245 
4246     for(i=0; i<=n-1; i++)
4247     {
4248         x->ptr.p_double[i] = x->ptr.p_double[i]+alpha*y->ptr.p_double[i];
4249     }
4250 }
4251 #endif
4252 
4253 
4254 #ifdef ALGLIB_NO_FAST_KERNELS
4255 /*************************************************************************
4256 Performs inplace addition of Y[] to X[]
4257 
4258 INPUT PARAMETERS:
4259     N       -   vector length
4260     Alpha   -   multiplier
4261     Y       -   source vector
4262     OffsY   -   source offset
4263     X       -   destination vector
4264     OffsX   -   destination offset
4265 
4266 RESULT:
4267     X := X + alpha*Y
4268 
4269   -- ALGLIB --
4270      Copyright 20.01.2020 by Bochkanov Sergey
4271 *************************************************************************/
raddvx(ae_int_t n,double alpha,ae_vector * y,ae_int_t offsy,ae_vector * x,ae_int_t offsx,ae_state * _state)4272 void raddvx(ae_int_t n,
4273      double alpha,
4274      /* Real    */ ae_vector* y,
4275      ae_int_t offsy,
4276      /* Real    */ ae_vector* x,
4277      ae_int_t offsx,
4278      ae_state *_state)
4279 {
4280     ae_int_t i;
4281 
4282 
4283     for(i=0; i<=n-1; i++)
4284     {
4285         x->ptr.p_double[offsx+i] = x->ptr.p_double[offsx+i]+alpha*y->ptr.p_double[offsy+i];
4286     }
4287 }
4288 #endif
4289 
4290 
4291 /*************************************************************************
4292 Performs inplace addition of vector Y[] to column X[]
4293 
4294 INPUT PARAMETERS:
4295     N       -   vector length
4296     Alpha   -   multiplier
4297     Y       -   vector to add
4298     X       -   target column ColIdx
4299 
4300 RESULT:
4301     X := X + alpha*Y
4302 
4303   -- ALGLIB --
4304      Copyright 20.01.2020 by Bochkanov Sergey
4305 *************************************************************************/
raddvc(ae_int_t n,double alpha,ae_vector * y,ae_matrix * x,ae_int_t colidx,ae_state * _state)4306 void raddvc(ae_int_t n,
4307      double alpha,
4308      /* Real    */ ae_vector* y,
4309      /* Real    */ ae_matrix* x,
4310      ae_int_t colidx,
4311      ae_state *_state)
4312 {
4313     ae_int_t i;
4314 
4315 
4316     for(i=0; i<=n-1; i++)
4317     {
4318         x->ptr.pp_double[i][colidx] = x->ptr.pp_double[i][colidx]+alpha*y->ptr.p_double[i];
4319     }
4320 }
4321 
4322 
4323 #ifdef ALGLIB_NO_FAST_KERNELS
4324 /*************************************************************************
4325 Performs inplace addition of vector Y[] to row X[]
4326 
4327 INPUT PARAMETERS:
4328     N       -   vector length
4329     Alpha   -   multiplier
4330     Y       -   vector to add
4331     X       -   target row RowIdx
4332 
4333 RESULT:
4334     X := X + alpha*Y
4335 
4336   -- ALGLIB --
4337      Copyright 20.01.2020 by Bochkanov Sergey
4338 *************************************************************************/
raddvr(ae_int_t n,double alpha,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4339 void raddvr(ae_int_t n,
4340      double alpha,
4341      /* Real    */ ae_vector* y,
4342      /* Real    */ ae_matrix* x,
4343      ae_int_t rowidx,
4344      ae_state *_state)
4345 {
4346     ae_int_t i;
4347 
4348 
4349     for(i=0; i<=n-1; i++)
4350     {
4351         x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]+alpha*y->ptr.p_double[i];
4352     }
4353 }
4354 #endif
4355 
4356 
4357 #ifdef ALGLIB_NO_FAST_KERNELS
4358 /*************************************************************************
4359 Performs componentwise multiplication of vector X[] by vector Y[]
4360 
4361 INPUT PARAMETERS:
4362     N       -   vector length
4363     Y       -   vector to multiply by
4364     X       -   target vector
4365 
4366 RESULT:
4367     X := componentwise(X*Y)
4368 
4369   -- ALGLIB --
4370      Copyright 20.01.2020 by Bochkanov Sergey
4371 *************************************************************************/
rmergemulv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)4372 void rmergemulv(ae_int_t n,
4373      /* Real    */ ae_vector* y,
4374      /* Real    */ ae_vector* x,
4375      ae_state *_state)
4376 {
4377     ae_int_t i;
4378 
4379 
4380     for(i=0; i<=n-1; i++)
4381     {
4382         x->ptr.p_double[i] = x->ptr.p_double[i]*y->ptr.p_double[i];
4383     }
4384 }
4385 #endif
4386 
4387 
4388 #ifdef ALGLIB_NO_FAST_KERNELS
4389 /*************************************************************************
4390 Performs componentwise multiplication of row X[] by vector Y[]
4391 
4392 INPUT PARAMETERS:
4393     N       -   vector length
4394     Y       -   vector to multiply by
4395     X       -   target row RowIdx
4396 
4397 RESULT:
4398     X := componentwise(X*Y)
4399 
4400   -- ALGLIB --
4401      Copyright 20.01.2020 by Bochkanov Sergey
4402 *************************************************************************/
rmergemulvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4403 void rmergemulvr(ae_int_t n,
4404      /* Real    */ ae_vector* y,
4405      /* Real    */ ae_matrix* x,
4406      ae_int_t rowidx,
4407      ae_state *_state)
4408 {
4409     ae_int_t i;
4410 
4411 
4412     for(i=0; i<=n-1; i++)
4413     {
4414         x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]*y->ptr.p_double[i];
4415     }
4416 }
4417 #endif
4418 
4419 
4420 #ifdef ALGLIB_NO_FAST_KERNELS
4421 /*************************************************************************
4422 Performs componentwise multiplication of row X[] by vector Y[]
4423 
4424 INPUT PARAMETERS:
4425     N       -   vector length
4426     Y       -   vector to multiply by
4427     X       -   target row RowIdx
4428 
4429 RESULT:
4430     X := componentwise(X*Y)
4431 
4432   -- ALGLIB --
4433      Copyright 20.01.2020 by Bochkanov Sergey
4434 *************************************************************************/
rmergemulrv(ae_int_t n,ae_matrix * y,ae_int_t rowidx,ae_vector * x,ae_state * _state)4435 void rmergemulrv(ae_int_t n,
4436      /* Real    */ ae_matrix* y,
4437      ae_int_t rowidx,
4438      /* Real    */ ae_vector* x,
4439      ae_state *_state)
4440 {
4441     ae_int_t i;
4442 
4443 
4444     for(i=0; i<=n-1; i++)
4445     {
4446         x->ptr.p_double[i] = x->ptr.p_double[i]*y->ptr.pp_double[rowidx][i];
4447     }
4448 }
4449 #endif
4450 
4451 
4452 #ifdef ALGLIB_NO_FAST_KERNELS
4453 /*************************************************************************
4454 Performs componentwise max of vector X[] and vector Y[]
4455 
4456 INPUT PARAMETERS:
4457     N       -   vector length
4458     Y       -   vector to multiply by
4459     X       -   target vector
4460 
4461 RESULT:
4462     X := componentwise_max(X,Y)
4463 
4464   -- ALGLIB --
4465      Copyright 20.01.2020 by Bochkanov Sergey
4466 *************************************************************************/
rmergemaxv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)4467 void rmergemaxv(ae_int_t n,
4468      /* Real    */ ae_vector* y,
4469      /* Real    */ ae_vector* x,
4470      ae_state *_state)
4471 {
4472     ae_int_t i;
4473 
4474 
4475     for(i=0; i<=n-1; i++)
4476     {
4477         x->ptr.p_double[i] = ae_maxreal(x->ptr.p_double[i], y->ptr.p_double[i], _state);
4478     }
4479 }
4480 #endif
4481 
4482 
4483 #ifdef ALGLIB_NO_FAST_KERNELS
4484 /*************************************************************************
4485 Performs componentwise max of row X[] and vector Y[]
4486 
4487 INPUT PARAMETERS:
4488     N       -   vector length
4489     Y       -   vector to multiply by
4490     X       -   target row RowIdx
4491 
4492 RESULT:
4493     X := componentwise_max(X,Y)
4494 
4495   -- ALGLIB --
4496      Copyright 20.01.2020 by Bochkanov Sergey
4497 *************************************************************************/
rmergemaxvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4498 void rmergemaxvr(ae_int_t n,
4499      /* Real    */ ae_vector* y,
4500      /* Real    */ ae_matrix* x,
4501      ae_int_t rowidx,
4502      ae_state *_state)
4503 {
4504     ae_int_t i;
4505 
4506 
4507     for(i=0; i<=n-1; i++)
4508     {
4509         x->ptr.pp_double[rowidx][i] = ae_maxreal(x->ptr.pp_double[rowidx][i], y->ptr.p_double[i], _state);
4510     }
4511 }
4512 #endif
4513 
4514 
4515 #ifdef ALGLIB_NO_FAST_KERNELS
4516 /*************************************************************************
4517 Performs componentwise max of row X[I] and vector Y[]
4518 
4519 INPUT PARAMETERS:
4520     N       -   vector length
4521     X       -   matrix, I-th row is source
4522     X       -   target row RowIdx
4523 
4524 RESULT:
4525     Y := componentwise_max(Y,X)
4526 
4527   -- ALGLIB --
4528      Copyright 20.01.2020 by Bochkanov Sergey
4529 *************************************************************************/
rmergemaxrv(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_vector * y,ae_state * _state)4530 void rmergemaxrv(ae_int_t n,
4531      /* Real    */ ae_matrix* x,
4532      ae_int_t rowidx,
4533      /* Real    */ ae_vector* y,
4534      ae_state *_state)
4535 {
4536     ae_int_t i;
4537 
4538 
4539     for(i=0; i<=n-1; i++)
4540     {
4541         y->ptr.p_double[i] = ae_maxreal(y->ptr.p_double[i], x->ptr.pp_double[rowidx][i], _state);
4542     }
4543 }
4544 #endif
4545 
4546 
4547 #ifdef ALGLIB_NO_FAST_KERNELS
4548 /*************************************************************************
4549 Performs componentwise max of vector X[] and vector Y[]
4550 
4551 INPUT PARAMETERS:
4552     N       -   vector length
4553     Y       -   vector to multiply by
4554     X       -   target vector
4555 
4556 RESULT:
4557     X := componentwise_max(X,Y)
4558 
4559   -- ALGLIB --
4560      Copyright 20.01.2020 by Bochkanov Sergey
4561 *************************************************************************/
rmergeminv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)4562 void rmergeminv(ae_int_t n,
4563      /* Real    */ ae_vector* y,
4564      /* Real    */ ae_vector* x,
4565      ae_state *_state)
4566 {
4567     ae_int_t i;
4568 
4569 
4570     for(i=0; i<=n-1; i++)
4571     {
4572         x->ptr.p_double[i] = ae_minreal(x->ptr.p_double[i], y->ptr.p_double[i], _state);
4573     }
4574 }
4575 #endif
4576 
4577 
4578 #ifdef ALGLIB_NO_FAST_KERNELS
4579 /*************************************************************************
4580 Performs componentwise max of row X[] and vector Y[]
4581 
4582 INPUT PARAMETERS:
4583     N       -   vector length
4584     Y       -   vector to multiply by
4585     X       -   target row RowIdx
4586 
4587 RESULT:
4588     X := componentwise_max(X,Y)
4589 
4590   -- ALGLIB --
4591      Copyright 20.01.2020 by Bochkanov Sergey
4592 *************************************************************************/
rmergeminvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4593 void rmergeminvr(ae_int_t n,
4594      /* Real    */ ae_vector* y,
4595      /* Real    */ ae_matrix* x,
4596      ae_int_t rowidx,
4597      ae_state *_state)
4598 {
4599     ae_int_t i;
4600 
4601 
4602     for(i=0; i<=n-1; i++)
4603     {
4604         x->ptr.pp_double[rowidx][i] = ae_minreal(x->ptr.pp_double[rowidx][i], y->ptr.p_double[i], _state);
4605     }
4606 }
4607 #endif
4608 
4609 
4610 #ifdef ALGLIB_NO_FAST_KERNELS
4611 /*************************************************************************
4612 Performs componentwise max of row X[I] and vector Y[]
4613 
4614 INPUT PARAMETERS:
4615     N       -   vector length
4616     X       -   matrix, I-th row is source
4617     X       -   target row RowIdx
4618 
4619 RESULT:
4620     X := componentwise_max(X,Y)
4621 
4622   -- ALGLIB --
4623      Copyright 20.01.2020 by Bochkanov Sergey
4624 *************************************************************************/
rmergeminrv(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_vector * y,ae_state * _state)4625 void rmergeminrv(ae_int_t n,
4626      /* Real    */ ae_matrix* x,
4627      ae_int_t rowidx,
4628      /* Real    */ ae_vector* y,
4629      ae_state *_state)
4630 {
4631     ae_int_t i;
4632 
4633 
4634     for(i=0; i<=n-1; i++)
4635     {
4636         y->ptr.p_double[i] = ae_minreal(y->ptr.p_double[i], x->ptr.pp_double[rowidx][i], _state);
4637     }
4638 }
4639 #endif
4640 
4641 
4642 #ifdef ALGLIB_NO_FAST_KERNELS
4643 /*************************************************************************
4644 Performs inplace addition of Y[RIdx,...] to X[]
4645 
4646 INPUT PARAMETERS:
4647     N       -   vector length
4648     Alpha   -   multiplier
4649     Y       -   array[?,N], matrix whose RIdx-th row is added
4650     RIdx    -   row index
4651     X       -   array[N], vector to process
4652 
4653 RESULT:
4654     X := X + alpha*Y
4655 
4656   -- ALGLIB --
4657      Copyright 20.01.2020 by Bochkanov Sergey
4658 *************************************************************************/
raddrv(ae_int_t n,double alpha,ae_matrix * y,ae_int_t ridx,ae_vector * x,ae_state * _state)4659 void raddrv(ae_int_t n,
4660      double alpha,
4661      /* Real    */ ae_matrix* y,
4662      ae_int_t ridx,
4663      /* Real    */ ae_vector* x,
4664      ae_state *_state)
4665 {
4666     ae_int_t i;
4667 
4668 
4669     for(i=0; i<=n-1; i++)
4670     {
4671         x->ptr.p_double[i] = x->ptr.p_double[i]+alpha*y->ptr.pp_double[ridx][i];
4672     }
4673 }
4674 #endif
4675 
4676 
4677 #ifdef ALGLIB_NO_FAST_KERNELS
4678 /*************************************************************************
4679 Performs inplace addition of Y[RIdx,...] to X[RIdxDst]
4680 
4681 INPUT PARAMETERS:
4682     N       -   vector length
4683     Alpha   -   multiplier
4684     Y       -   array[?,N], matrix whose RIdxSrc-th row is added
4685     RIdxSrc -   source row index
4686     X       -   array[?,N], matrix whose RIdxDst-th row is target
4687     RIdxDst -   destination row index
4688 
4689 RESULT:
4690     X := X + alpha*Y
4691 
4692   -- ALGLIB --
4693      Copyright 20.01.2020 by Bochkanov Sergey
4694 *************************************************************************/
raddrr(ae_int_t n,double alpha,ae_matrix * y,ae_int_t ridxsrc,ae_matrix * x,ae_int_t ridxdst,ae_state * _state)4695 void raddrr(ae_int_t n,
4696      double alpha,
4697      /* Real    */ ae_matrix* y,
4698      ae_int_t ridxsrc,
4699      /* Real    */ ae_matrix* x,
4700      ae_int_t ridxdst,
4701      ae_state *_state)
4702 {
4703     ae_int_t i;
4704 
4705 
4706     for(i=0; i<=n-1; i++)
4707     {
4708         x->ptr.pp_double[ridxdst][i] = x->ptr.pp_double[ridxdst][i]+alpha*y->ptr.pp_double[ridxsrc][i];
4709     }
4710 }
4711 #endif
4712 
4713 
4714 #ifdef ALGLIB_NO_FAST_KERNELS
4715 /*************************************************************************
4716 Performs inplace multiplication of X[] by V
4717 
4718 INPUT PARAMETERS:
4719     N       -   vector length
4720     X       -   array[N], vector to process
4721     V       -   multiplier
4722 
4723 OUTPUT PARAMETERS:
4724     X       -   elements 0...N-1 multiplied by V
4725 
4726   -- ALGLIB --
4727      Copyright 20.01.2020 by Bochkanov Sergey
4728 *************************************************************************/
rmulv(ae_int_t n,double v,ae_vector * x,ae_state * _state)4729 void rmulv(ae_int_t n,
4730      double v,
4731      /* Real    */ ae_vector* x,
4732      ae_state *_state)
4733 {
4734     ae_int_t i;
4735 
4736 
4737     for(i=0; i<=n-1; i++)
4738     {
4739         x->ptr.p_double[i] = x->ptr.p_double[i]*v;
4740     }
4741 }
4742 #endif
4743 
4744 
4745 #ifdef ALGLIB_NO_FAST_KERNELS
4746 /*************************************************************************
4747 Performs inplace multiplication of X[] by V
4748 
4749 INPUT PARAMETERS:
4750     N       -   row length
4751     X       -   array[?,N], row to process
4752     V       -   multiplier
4753 
4754 OUTPUT PARAMETERS:
4755     X       -   elements 0...N-1 of row RowIdx are multiplied by V
4756 
4757   -- ALGLIB --
4758      Copyright 20.01.2020 by Bochkanov Sergey
4759 *************************************************************************/
rmulr(ae_int_t n,double v,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4760 void rmulr(ae_int_t n,
4761      double v,
4762      /* Real    */ ae_matrix* x,
4763      ae_int_t rowidx,
4764      ae_state *_state)
4765 {
4766     ae_int_t i;
4767 
4768 
4769     for(i=0; i<=n-1; i++)
4770     {
4771         x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]*v;
4772     }
4773 }
4774 #endif
4775 
4776 
4777 #ifdef ALGLIB_NO_FAST_KERNELS
4778 /*************************************************************************
4779 Performs inplace multiplication of X[OffsX:OffsX+N-1] by V
4780 
4781 INPUT PARAMETERS:
4782     N       -   subvector length
4783     X       -   vector to process
4784     V       -   multiplier
4785 
4786 OUTPUT PARAMETERS:
4787     X       -   elements OffsX:OffsX+N-1 multiplied by V
4788 
4789   -- ALGLIB --
4790      Copyright 20.01.2020 by Bochkanov Sergey
4791 *************************************************************************/
rmulvx(ae_int_t n,double v,ae_vector * x,ae_int_t offsx,ae_state * _state)4792 void rmulvx(ae_int_t n,
4793      double v,
4794      /* Real    */ ae_vector* x,
4795      ae_int_t offsx,
4796      ae_state *_state)
4797 {
4798     ae_int_t i;
4799 
4800 
4801     for(i=0; i<=n-1; i++)
4802     {
4803         x->ptr.p_double[offsx+i] = x->ptr.p_double[offsx+i]*v;
4804     }
4805 }
4806 #endif
4807 
4808 
4809 #ifdef ALGLIB_NO_FAST_KERNELS
4810 /*************************************************************************
4811 Returns maximum X
4812 
4813 INPUT PARAMETERS:
4814     N       -   vector length
4815     X       -   array[N], vector to process
4816 
4817 OUTPUT PARAMETERS:
4818     max(X[i])
4819     zero for N=0
4820 
4821   -- ALGLIB --
4822      Copyright 20.01.2020 by Bochkanov Sergey
4823 *************************************************************************/
rmaxv(ae_int_t n,ae_vector * x,ae_state * _state)4824 double rmaxv(ae_int_t n, /* Real    */ ae_vector* x, ae_state *_state)
4825 {
4826     ae_int_t i;
4827     double v;
4828     double result;
4829 
4830 
4831     if( n<=0 )
4832     {
4833         result = (double)(0);
4834         return result;
4835     }
4836     result = x->ptr.p_double[0];
4837     for(i=1; i<=n-1; i++)
4838     {
4839         v = x->ptr.p_double[i];
4840         if( v>result )
4841         {
4842             result = v;
4843         }
4844     }
4845     return result;
4846 }
4847 #endif
4848 
4849 
4850 #ifdef ALGLIB_NO_FAST_KERNELS
4851 /*************************************************************************
4852 Returns maximum |X|
4853 
4854 INPUT PARAMETERS:
4855     N       -   vector length
4856     X       -   array[N], vector to process
4857 
4858 OUTPUT PARAMETERS:
4859     max(|X[i]|)
4860     zero for N=0
4861 
4862   -- ALGLIB --
4863      Copyright 20.01.2020 by Bochkanov Sergey
4864 *************************************************************************/
rmaxabsv(ae_int_t n,ae_vector * x,ae_state * _state)4865 double rmaxabsv(ae_int_t n, /* Real    */ ae_vector* x, ae_state *_state)
4866 {
4867     ae_int_t i;
4868     double v;
4869     double result;
4870 
4871 
4872     result = (double)(0);
4873     for(i=0; i<=n-1; i++)
4874     {
4875         v = ae_fabs(x->ptr.p_double[i], _state);
4876         if( v>result )
4877         {
4878             result = v;
4879         }
4880     }
4881     return result;
4882 }
4883 #endif
4884 
4885 
4886 #ifdef ALGLIB_NO_FAST_KERNELS
4887 /*************************************************************************
4888 Returns maximum X
4889 
4890 INPUT PARAMETERS:
4891     N       -   vector length
4892     X       -   matrix to process, RowIdx-th row is processed
4893 
4894 OUTPUT PARAMETERS:
4895     max(X[RowIdx,i])
4896     zero for N=0
4897 
4898   -- ALGLIB --
4899      Copyright 20.01.2020 by Bochkanov Sergey
4900 *************************************************************************/
rmaxr(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4901 double rmaxr(ae_int_t n,
4902      /* Real    */ ae_matrix* x,
4903      ae_int_t rowidx,
4904      ae_state *_state)
4905 {
4906     ae_int_t i;
4907     double v;
4908     double result;
4909 
4910 
4911     if( n<=0 )
4912     {
4913         result = (double)(0);
4914         return result;
4915     }
4916     result = x->ptr.pp_double[rowidx][0];
4917     for(i=1; i<=n-1; i++)
4918     {
4919         v = x->ptr.pp_double[rowidx][i];
4920         if( v>result )
4921         {
4922             result = v;
4923         }
4924     }
4925     return result;
4926 }
4927 #endif
4928 
4929 
4930 #ifdef ALGLIB_NO_FAST_KERNELS
4931 /*************************************************************************
4932 Returns maximum |X|
4933 
4934 INPUT PARAMETERS:
4935     N       -   vector length
4936     X       -   matrix to process, RowIdx-th row is processed
4937 
4938 OUTPUT PARAMETERS:
4939     max(|X[RowIdx,i]|)
4940     zero for N=0
4941 
4942   -- ALGLIB --
4943      Copyright 20.01.2020 by Bochkanov Sergey
4944 *************************************************************************/
rmaxabsr(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_state * _state)4945 double rmaxabsr(ae_int_t n,
4946      /* Real    */ ae_matrix* x,
4947      ae_int_t rowidx,
4948      ae_state *_state)
4949 {
4950     ae_int_t i;
4951     double v;
4952     double result;
4953 
4954 
4955     result = (double)(0);
4956     for(i=0; i<=n-1; i++)
4957     {
4958         v = ae_fabs(x->ptr.pp_double[rowidx][i], _state);
4959         if( v>result )
4960         {
4961             result = v;
4962         }
4963     }
4964     return result;
4965 }
4966 #endif
4967 
4968 
4969 #ifdef ALGLIB_NO_FAST_KERNELS
4970 /*************************************************************************
4971 Sets vector X[] to V
4972 
4973 INPUT PARAMETERS:
4974     N       -   vector length
4975     V       -   value to set
4976     X       -   array[N]
4977 
4978 OUTPUT PARAMETERS:
4979     X       -   leading N elements are replaced by V
4980 
4981   -- ALGLIB --
4982      Copyright 20.01.2020 by Bochkanov Sergey
4983 *************************************************************************/
rsetv(ae_int_t n,double v,ae_vector * x,ae_state * _state)4984 void rsetv(ae_int_t n,
4985      double v,
4986      /* Real    */ ae_vector* x,
4987      ae_state *_state)
4988 {
4989     ae_int_t j;
4990 
4991 
4992     for(j=0; j<=n-1; j++)
4993     {
4994         x->ptr.p_double[j] = v;
4995     }
4996 }
4997 #endif
4998 
4999 
5000 #ifdef ALGLIB_NO_FAST_KERNELS
5001 /*************************************************************************
5002 Sets X[OffsX:OffsX+N-1] to V
5003 
5004 INPUT PARAMETERS:
5005     N       -   subvector length
5006     V       -   value to set
5007     X       -   array[N]
5008 
5009 OUTPUT PARAMETERS:
5010     X       -   X[OffsX:OffsX+N-1] is replaced by V
5011 
5012   -- ALGLIB --
5013      Copyright 20.01.2020 by Bochkanov Sergey
5014 *************************************************************************/
rsetvx(ae_int_t n,double v,ae_vector * x,ae_int_t offsx,ae_state * _state)5015 void rsetvx(ae_int_t n,
5016      double v,
5017      /* Real    */ ae_vector* x,
5018      ae_int_t offsx,
5019      ae_state *_state)
5020 {
5021     ae_int_t j;
5022 
5023 
5024     for(j=0; j<=n-1; j++)
5025     {
5026         x->ptr.p_double[offsx+j] = v;
5027     }
5028 }
5029 #endif
5030 
5031 
5032 #ifdef ALGLIB_NO_FAST_KERNELS
5033 /*************************************************************************
5034 Sets vector X[] to V
5035 
5036 INPUT PARAMETERS:
5037     N       -   vector length
5038     V       -   value to set
5039     X       -   array[N]
5040 
5041 OUTPUT PARAMETERS:
5042     X       -   leading N elements are replaced by V
5043 
5044   -- ALGLIB --
5045      Copyright 20.01.2020 by Bochkanov Sergey
5046 *************************************************************************/
isetv(ae_int_t n,ae_int_t v,ae_vector * x,ae_state * _state)5047 void isetv(ae_int_t n,
5048      ae_int_t v,
5049      /* Integer */ ae_vector* x,
5050      ae_state *_state)
5051 {
5052     ae_int_t j;
5053 
5054 
5055     for(j=0; j<=n-1; j++)
5056     {
5057         x->ptr.p_int[j] = v;
5058     }
5059 }
5060 #endif
5061 
5062 
5063 #ifdef ALGLIB_NO_FAST_KERNELS
5064 /*************************************************************************
5065 Sets vector X[] to V
5066 
5067 INPUT PARAMETERS:
5068     N       -   vector length
5069     V       -   value to set
5070     X       -   array[N]
5071 
5072 OUTPUT PARAMETERS:
5073     X       -   leading N elements are replaced by V
5074 
5075   -- ALGLIB --
5076      Copyright 20.01.2020 by Bochkanov Sergey
5077 *************************************************************************/
bsetv(ae_int_t n,ae_bool v,ae_vector * x,ae_state * _state)5078 void bsetv(ae_int_t n,
5079      ae_bool v,
5080      /* Boolean */ ae_vector* x,
5081      ae_state *_state)
5082 {
5083     ae_int_t j;
5084 
5085 
5086     for(j=0; j<=n-1; j++)
5087     {
5088         x->ptr.p_bool[j] = v;
5089     }
5090 }
5091 #endif
5092 
5093 
5094 #ifdef ALGLIB_NO_FAST_KERNELS
5095 /*************************************************************************
5096 Sets matrix A[] to V
5097 
5098 INPUT PARAMETERS:
5099     M, N    -   rows/cols count
5100     V       -   value to set
5101     A       -   array[M,N]
5102 
5103 OUTPUT PARAMETERS:
5104     A       -   leading M rows, N cols are replaced by V
5105 
5106   -- ALGLIB --
5107      Copyright 20.01.2020 by Bochkanov Sergey
5108 *************************************************************************/
rsetm(ae_int_t m,ae_int_t n,double v,ae_matrix * a,ae_state * _state)5109 void rsetm(ae_int_t m,
5110      ae_int_t n,
5111      double v,
5112      /* Real    */ ae_matrix* a,
5113      ae_state *_state)
5114 {
5115     ae_int_t i;
5116     ae_int_t j;
5117 
5118 
5119     for(i=0; i<=m-1; i++)
5120     {
5121         for(j=0; j<=n-1; j++)
5122         {
5123             a->ptr.pp_double[i][j] = v;
5124         }
5125     }
5126 }
5127 #endif
5128 
5129 
5130 /*************************************************************************
5131 Sets vector X[] to V, reallocating X[] if too small
5132 
5133 INPUT PARAMETERS:
5134     N       -   vector length
5135     V       -   value to set
5136     X       -   possibly preallocated array
5137 
5138 OUTPUT PARAMETERS:
5139     X       -   leading N elements are replaced by V; array is reallocated
5140                 if its length is less than N.
5141 
5142   -- ALGLIB --
5143      Copyright 20.01.2020 by Bochkanov Sergey
5144 *************************************************************************/
rsetallocv(ae_int_t n,double v,ae_vector * x,ae_state * _state)5145 void rsetallocv(ae_int_t n,
5146      double v,
5147      /* Real    */ ae_vector* x,
5148      ae_state *_state)
5149 {
5150 
5151 
5152     if( x->cnt<n )
5153     {
5154         ae_vector_set_length(x, n, _state);
5155     }
5156     rsetv(n, v, x, _state);
5157 }
5158 
5159 
5160 /*************************************************************************
5161 Sets vector A[] to V, reallocating A[] if too small.
5162 
5163 INPUT PARAMETERS:
5164     M       -   rows count
5165     N       -   cols count
5166     V       -   value to set
5167     A       -   possibly preallocated matrix
5168 
5169 OUTPUT PARAMETERS:
5170     A       -   leading M rows, N cols are replaced by V; the matrix is
5171                 reallocated if its rows/cols count is less than M/N.
5172 
5173   -- ALGLIB --
5174      Copyright 20.01.2020 by Bochkanov Sergey
5175 *************************************************************************/
rsetallocm(ae_int_t m,ae_int_t n,double v,ae_matrix * a,ae_state * _state)5176 void rsetallocm(ae_int_t m,
5177      ae_int_t n,
5178      double v,
5179      /* Real    */ ae_matrix* a,
5180      ae_state *_state)
5181 {
5182 
5183 
5184     if( a->rows<m||a->cols<n )
5185     {
5186         ae_matrix_set_length(a, m, n, _state);
5187     }
5188     rsetm(m, n, v, a, _state);
5189 }
5190 
5191 
5192 /*************************************************************************
5193 Reallocates X[] if its length is less than required value. Does not change
5194 its length and contents if it is large enough.
5195 
5196 INPUT PARAMETERS:
5197     N       -   desired vector length
5198     X       -   possibly preallocated array
5199 
5200 OUTPUT PARAMETERS:
5201     X       -   length(X)>=N
5202 
5203   -- ALGLIB --
5204      Copyright 20.01.2020 by Bochkanov Sergey
5205 *************************************************************************/
rallocv(ae_int_t n,ae_vector * x,ae_state * _state)5206 void rallocv(ae_int_t n, /* Real    */ ae_vector* x, ae_state *_state)
5207 {
5208 
5209 
5210     if( x->cnt<n )
5211     {
5212         ae_vector_set_length(x, n, _state);
5213     }
5214 }
5215 
5216 
5217 /*************************************************************************
5218 Reallocates X[] if its length is less than required value. Does not change
5219 its length and contents if it is large enough.
5220 
5221 INPUT PARAMETERS:
5222     N       -   desired vector length
5223     X       -   possibly preallocated array
5224 
5225 OUTPUT PARAMETERS:
5226     X       -   length(X)>=N
5227 
5228   -- ALGLIB --
5229      Copyright 20.01.2020 by Bochkanov Sergey
5230 *************************************************************************/
iallocv(ae_int_t n,ae_vector * x,ae_state * _state)5231 void iallocv(ae_int_t n, /* Integer */ ae_vector* x, ae_state *_state)
5232 {
5233 
5234 
5235     if( x->cnt<n )
5236     {
5237         ae_vector_set_length(x, n, _state);
5238     }
5239 }
5240 
5241 
5242 /*************************************************************************
5243 Reallocates X[] if its length is less than required value. Does not change
5244 its length and contents if it is large enough.
5245 
5246 INPUT PARAMETERS:
5247     N       -   desired vector length
5248     X       -   possibly preallocated array
5249 
5250 OUTPUT PARAMETERS:
5251     X       -   length(X)>=N
5252 
5253   -- ALGLIB --
5254      Copyright 20.01.2020 by Bochkanov Sergey
5255 *************************************************************************/
ballocv(ae_int_t n,ae_vector * x,ae_state * _state)5256 void ballocv(ae_int_t n, /* Boolean */ ae_vector* x, ae_state *_state)
5257 {
5258 
5259 
5260     if( x->cnt<n )
5261     {
5262         ae_vector_set_length(x, n, _state);
5263     }
5264 }
5265 
5266 
5267 /*************************************************************************
5268 Reallocates matrix if its rows or cols count is less than  required.  Does
5269 not change its size if it is exactly that size or larger.
5270 
5271 INPUT PARAMETERS:
5272     M       -   rows count
5273     N       -   cols count
5274     A       -   possibly preallocated matrix
5275 
5276 OUTPUT PARAMETERS:
5277     A       -   size is at least M*N
5278 
5279   -- ALGLIB --
5280      Copyright 20.01.2020 by Bochkanov Sergey
5281 *************************************************************************/
rallocm(ae_int_t m,ae_int_t n,ae_matrix * a,ae_state * _state)5282 void rallocm(ae_int_t m,
5283      ae_int_t n,
5284      /* Real    */ ae_matrix* a,
5285      ae_state *_state)
5286 {
5287 
5288 
5289     if( a->rows<m||a->cols<n )
5290     {
5291         ae_matrix_set_length(a, m, n, _state);
5292     }
5293 }
5294 
5295 
5296 /*************************************************************************
5297 Sets vector X[] to V, reallocating X[] if too small
5298 
5299 INPUT PARAMETERS:
5300     N       -   vector length
5301     V       -   value to set
5302     X       -   possibly preallocated array
5303 
5304 OUTPUT PARAMETERS:
5305     X       -   leading N elements are replaced by V; array is reallocated
5306                 if its length is less than N.
5307 
5308   -- ALGLIB --
5309      Copyright 20.01.2020 by Bochkanov Sergey
5310 *************************************************************************/
isetallocv(ae_int_t n,ae_int_t v,ae_vector * x,ae_state * _state)5311 void isetallocv(ae_int_t n,
5312      ae_int_t v,
5313      /* Integer */ ae_vector* x,
5314      ae_state *_state)
5315 {
5316 
5317 
5318     if( x->cnt<n )
5319     {
5320         ae_vector_set_length(x, n, _state);
5321     }
5322     isetv(n, v, x, _state);
5323 }
5324 
5325 
5326 /*************************************************************************
5327 Sets vector X[] to V, reallocating X[] if too small
5328 
5329 INPUT PARAMETERS:
5330     N       -   vector length
5331     V       -   value to set
5332     X       -   possibly preallocated array
5333 
5334 OUTPUT PARAMETERS:
5335     X       -   leading N elements are replaced by V; array is reallocated
5336                 if its length is less than N.
5337 
5338   -- ALGLIB --
5339      Copyright 20.01.2020 by Bochkanov Sergey
5340 *************************************************************************/
bsetallocv(ae_int_t n,ae_bool v,ae_vector * x,ae_state * _state)5341 void bsetallocv(ae_int_t n,
5342      ae_bool v,
5343      /* Boolean */ ae_vector* x,
5344      ae_state *_state)
5345 {
5346 
5347 
5348     if( x->cnt<n )
5349     {
5350         ae_vector_set_length(x, n, _state);
5351     }
5352     bsetv(n, v, x, _state);
5353 }
5354 
5355 
5356 #ifdef ALGLIB_NO_FAST_KERNELS
5357 /*************************************************************************
5358 Sets row I of A[,] to V
5359 
5360 INPUT PARAMETERS:
5361     N       -   vector length
5362     V       -   value to set
5363     A       -   array[N,N] or larger
5364     I       -   row index
5365 
5366 OUTPUT PARAMETERS:
5367     A       -   leading N elements of I-th row are replaced by V
5368 
5369   -- ALGLIB --
5370      Copyright 20.01.2020 by Bochkanov Sergey
5371 *************************************************************************/
rsetr(ae_int_t n,double v,ae_matrix * a,ae_int_t i,ae_state * _state)5372 void rsetr(ae_int_t n,
5373      double v,
5374      /* Real    */ ae_matrix* a,
5375      ae_int_t i,
5376      ae_state *_state)
5377 {
5378     ae_int_t j;
5379 
5380 
5381     for(j=0; j<=n-1; j++)
5382     {
5383         a->ptr.pp_double[i][j] = v;
5384     }
5385 }
5386 #endif
5387 
5388 
5389 /*************************************************************************
5390 Sets col J of A[,] to V
5391 
5392 INPUT PARAMETERS:
5393     N       -   vector length
5394     V       -   value to set
5395     A       -   array[N,N] or larger
5396     J       -   col index
5397 
5398 OUTPUT PARAMETERS:
5399     A       -   leading N elements of I-th col are replaced by V
5400 
5401   -- ALGLIB --
5402      Copyright 20.01.2020 by Bochkanov Sergey
5403 *************************************************************************/
rsetc(ae_int_t n,double v,ae_matrix * a,ae_int_t j,ae_state * _state)5404 void rsetc(ae_int_t n,
5405      double v,
5406      /* Real    */ ae_matrix* a,
5407      ae_int_t j,
5408      ae_state *_state)
5409 {
5410     ae_int_t i;
5411 
5412 
5413     for(i=0; i<=n-1; i++)
5414     {
5415         a->ptr.pp_double[i][j] = v;
5416     }
5417 }
5418 
5419 
5420 #ifdef ALGLIB_NO_FAST_KERNELS
5421 /*************************************************************************
5422 Copies vector X[] to Y[]
5423 
5424 INPUT PARAMETERS:
5425     N       -   vector length
5426     X       -   array[N], source
5427     Y       -   preallocated array[N]
5428 
5429 OUTPUT PARAMETERS:
5430     Y       -   leading N elements are replaced by X
5431 
5432 
5433 NOTE: destination and source should NOT overlap
5434 
5435   -- ALGLIB --
5436      Copyright 20.01.2020 by Bochkanov Sergey
5437 *************************************************************************/
rcopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5438 void rcopyv(ae_int_t n,
5439      /* Real    */ ae_vector* x,
5440      /* Real    */ ae_vector* y,
5441      ae_state *_state)
5442 {
5443     ae_int_t j;
5444 
5445 
5446     for(j=0; j<=n-1; j++)
5447     {
5448         y->ptr.p_double[j] = x->ptr.p_double[j];
5449     }
5450 }
5451 #endif
5452 
5453 
5454 #ifdef ALGLIB_NO_FAST_KERNELS
5455 /*************************************************************************
5456 Copies vector X[] to Y[]
5457 
5458 INPUT PARAMETERS:
5459     N       -   vector length
5460     X       -   array[N], source
5461     Y       -   preallocated array[N]
5462 
5463 OUTPUT PARAMETERS:
5464     Y       -   leading N elements are replaced by X
5465 
5466 
5467 NOTE: destination and source should NOT overlap
5468 
5469   -- ALGLIB --
5470      Copyright 20.01.2020 by Bochkanov Sergey
5471 *************************************************************************/
bcopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5472 void bcopyv(ae_int_t n,
5473      /* Boolean */ ae_vector* x,
5474      /* Boolean */ ae_vector* y,
5475      ae_state *_state)
5476 {
5477     ae_int_t j;
5478 
5479 
5480     for(j=0; j<=n-1; j++)
5481     {
5482         y->ptr.p_bool[j] = x->ptr.p_bool[j];
5483     }
5484 }
5485 #endif
5486 
5487 
5488 #ifdef ALGLIB_NO_FAST_KERNELS
5489 /*************************************************************************
5490 Copies vector X[] to Y[], extended version
5491 
5492 INPUT PARAMETERS:
5493     N       -   vector length
5494     X       -   source array
5495     OffsX   -   source offset
5496     Y       -   preallocated array[N]
5497     OffsY   -   destination offset
5498 
5499 OUTPUT PARAMETERS:
5500     Y       -   N elements starting from OffsY are replaced by X[OffsX:OffsX+N-1]
5501 
5502 NOTE: destination and source should NOT overlap
5503 
5504   -- ALGLIB --
5505      Copyright 20.01.2020 by Bochkanov Sergey
5506 *************************************************************************/
rcopyvx(ae_int_t n,ae_vector * x,ae_int_t offsx,ae_vector * y,ae_int_t offsy,ae_state * _state)5507 void rcopyvx(ae_int_t n,
5508      /* Real    */ ae_vector* x,
5509      ae_int_t offsx,
5510      /* Real    */ ae_vector* y,
5511      ae_int_t offsy,
5512      ae_state *_state)
5513 {
5514     ae_int_t j;
5515 
5516 
5517     for(j=0; j<=n-1; j++)
5518     {
5519         y->ptr.p_double[offsy+j] = x->ptr.p_double[offsx+j];
5520     }
5521 }
5522 #endif
5523 
5524 
5525 /*************************************************************************
5526 Copies vector X[] to Y[], resizing Y[] if needed.
5527 
5528 INPUT PARAMETERS:
5529     N       -   vector length
5530     X       -   array[N], source
5531     Y       -   possibly preallocated array[N] (resized if needed)
5532 
5533 OUTPUT PARAMETERS:
5534     Y       -   leading N elements are replaced by X
5535 
5536   -- ALGLIB --
5537      Copyright 20.01.2020 by Bochkanov Sergey
5538 *************************************************************************/
rcopyallocv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5539 void rcopyallocv(ae_int_t n,
5540      /* Real    */ ae_vector* x,
5541      /* Real    */ ae_vector* y,
5542      ae_state *_state)
5543 {
5544 
5545 
5546     if( y->cnt<n )
5547     {
5548         ae_vector_set_length(y, n, _state);
5549     }
5550     rcopyv(n, x, y, _state);
5551 }
5552 
5553 
5554 /*************************************************************************
5555 Copies matrix X[] to Y[], resizing Y[] if needed. On resize, dimensions of
5556 Y[] are increased - but not decreased.
5557 
5558 INPUT PARAMETERS:
5559     M       -   rows count
5560     N       -   cols count
5561     X       -   array[M,N], source
5562     Y       -   possibly preallocated array[M,N] (resized if needed)
5563 
5564 OUTPUT PARAMETERS:
5565     Y       -   leading [M,N] elements are replaced by X
5566 
5567   -- ALGLIB --
5568      Copyright 20.01.2020 by Bochkanov Sergey
5569 *************************************************************************/
rcopym(ae_int_t m,ae_int_t n,ae_matrix * x,ae_matrix * y,ae_state * _state)5570 void rcopym(ae_int_t m,
5571      ae_int_t n,
5572      /* Real    */ ae_matrix* x,
5573      /* Real    */ ae_matrix* y,
5574      ae_state *_state)
5575 {
5576     ae_int_t i;
5577     ae_int_t j;
5578 
5579 
5580     if( m==0||n==0 )
5581     {
5582         return;
5583     }
5584     for(i=0; i<=m-1; i++)
5585     {
5586         for(j=0; j<=n-1; j++)
5587         {
5588             y->ptr.pp_double[i][j] = x->ptr.pp_double[i][j];
5589         }
5590     }
5591 }
5592 
5593 
5594 /*************************************************************************
5595 Copies matrix X[] to Y[], resizing Y[] if needed. On resize, dimensions of
5596 Y[] are increased - but not decreased.
5597 
5598 INPUT PARAMETERS:
5599     M       -   rows count
5600     N       -   cols count
5601     X       -   array[M,N], source
5602     Y       -   possibly preallocated array[M,N] (resized if needed)
5603 
5604 OUTPUT PARAMETERS:
5605     Y       -   leading [M,N] elements are replaced by X
5606 
5607   -- ALGLIB --
5608      Copyright 20.01.2020 by Bochkanov Sergey
5609 *************************************************************************/
rcopyallocm(ae_int_t m,ae_int_t n,ae_matrix * x,ae_matrix * y,ae_state * _state)5610 void rcopyallocm(ae_int_t m,
5611      ae_int_t n,
5612      /* Real    */ ae_matrix* x,
5613      /* Real    */ ae_matrix* y,
5614      ae_state *_state)
5615 {
5616 
5617 
5618     if( m==0||n==0 )
5619     {
5620         return;
5621     }
5622     if( y->rows<m||y->cols<n )
5623     {
5624         ae_matrix_set_length(y, ae_maxint(m, y->rows, _state), ae_maxint(n, y->cols, _state), _state);
5625     }
5626     rcopym(m, n, x, y, _state);
5627 }
5628 
5629 
5630 /*************************************************************************
5631 Copies vector X[] to Y[], resizing Y[] if needed.
5632 
5633 INPUT PARAMETERS:
5634     N       -   vector length
5635     X       -   array[N], source
5636     Y       -   possibly preallocated array[N] (resized if needed)
5637 
5638 OUTPUT PARAMETERS:
5639     Y       -   leading N elements are replaced by X
5640 
5641   -- ALGLIB --
5642      Copyright 20.01.2020 by Bochkanov Sergey
5643 *************************************************************************/
icopyallocv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5644 void icopyallocv(ae_int_t n,
5645      /* Integer */ ae_vector* x,
5646      /* Integer */ ae_vector* y,
5647      ae_state *_state)
5648 {
5649 
5650 
5651     if( y->cnt<n )
5652     {
5653         ae_vector_set_length(y, n, _state);
5654     }
5655     icopyv(n, x, y, _state);
5656 }
5657 
5658 
5659 /*************************************************************************
5660 Copies vector X[] to Y[], resizing Y[] if needed.
5661 
5662 INPUT PARAMETERS:
5663     N       -   vector length
5664     X       -   array[N], source
5665     Y       -   possibly preallocated array[N] (resized if needed)
5666 
5667 OUTPUT PARAMETERS:
5668     Y       -   leading N elements are replaced by X
5669 
5670   -- ALGLIB --
5671      Copyright 20.01.2020 by Bochkanov Sergey
5672 *************************************************************************/
bcopyallocv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5673 void bcopyallocv(ae_int_t n,
5674      /* Boolean */ ae_vector* x,
5675      /* Boolean */ ae_vector* y,
5676      ae_state *_state)
5677 {
5678 
5679 
5680     if( y->cnt<n )
5681     {
5682         ae_vector_set_length(y, n, _state);
5683     }
5684     bcopyv(n, x, y, _state);
5685 }
5686 
5687 
5688 #ifdef ALGLIB_NO_FAST_KERNELS
5689 /*************************************************************************
5690 Copies vector X[] to Y[]
5691 
5692 INPUT PARAMETERS:
5693     N       -   vector length
5694     X       -   source array
5695     Y       -   preallocated array[N]
5696 
5697 OUTPUT PARAMETERS:
5698     Y       -   X copied to Y
5699 
5700   -- ALGLIB --
5701      Copyright 20.01.2020 by Bochkanov Sergey
5702 *************************************************************************/
icopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)5703 void icopyv(ae_int_t n,
5704      /* Integer */ ae_vector* x,
5705      /* Integer */ ae_vector* y,
5706      ae_state *_state)
5707 {
5708     ae_int_t j;
5709 
5710 
5711     for(j=0; j<=n-1; j++)
5712     {
5713         y->ptr.p_int[j] = x->ptr.p_int[j];
5714     }
5715 }
5716 #endif
5717 
5718 
5719 #ifdef ALGLIB_NO_FAST_KERNELS
5720 /*************************************************************************
5721 Copies vector X[] to Y[], extended version
5722 
5723 INPUT PARAMETERS:
5724     N       -   vector length
5725     X       -   source array
5726     OffsX   -   source offset
5727     Y       -   preallocated array[N]
5728     OffsY   -   destination offset
5729 
5730 OUTPUT PARAMETERS:
5731     Y       -   N elements starting from OffsY are replaced by X[OffsX:OffsX+N-1]
5732 
5733 NOTE: destination and source should NOT overlap
5734 
5735   -- ALGLIB --
5736      Copyright 20.01.2020 by Bochkanov Sergey
5737 *************************************************************************/
icopyvx(ae_int_t n,ae_vector * x,ae_int_t offsx,ae_vector * y,ae_int_t offsy,ae_state * _state)5738 void icopyvx(ae_int_t n,
5739      /* Integer */ ae_vector* x,
5740      ae_int_t offsx,
5741      /* Integer */ ae_vector* y,
5742      ae_int_t offsy,
5743      ae_state *_state)
5744 {
5745     ae_int_t j;
5746 
5747 
5748     for(j=0; j<=n-1; j++)
5749     {
5750         y->ptr.p_int[offsy+j] = x->ptr.p_int[offsx+j];
5751     }
5752 }
5753 #endif
5754 
5755 
5756 /*************************************************************************
5757 Grows X, i.e. changes its size in such a way that:
5758 a) contents is preserved
5759 b) new size is at least N
5760 c) actual size can be larger than N, so subsequent grow() calls can return
5761    without reallocation
5762 
5763   -- ALGLIB --
5764      Copyright 20.03.2009 by Bochkanov Sergey
5765 *************************************************************************/
igrowv(ae_int_t newn,ae_vector * x,ae_state * _state)5766 void igrowv(ae_int_t newn, /* Integer */ ae_vector* x, ae_state *_state)
5767 {
5768     ae_frame _frame_block;
5769     ae_vector oldx;
5770     ae_int_t oldn;
5771 
5772     ae_frame_make(_state, &_frame_block);
5773     memset(&oldx, 0, sizeof(oldx));
5774     ae_vector_init(&oldx, 0, DT_INT, _state, ae_true);
5775 
5776     if( x->cnt>=newn )
5777     {
5778         ae_frame_leave(_state);
5779         return;
5780     }
5781     oldn = x->cnt;
5782     newn = ae_maxint(newn, ae_round(1.8*oldn+1, _state), _state);
5783     ae_swap_vectors(x, &oldx);
5784     ae_vector_set_length(x, newn, _state);
5785     icopyv(oldn, &oldx, x, _state);
5786     ae_frame_leave(_state);
5787 }
5788 
5789 
5790 /*************************************************************************
5791 Grows X, i.e. changes its size in such a way that:
5792 a) contents is preserved
5793 b) new size is at least N
5794 c) actual size can be larger than N, so subsequent grow() calls can return
5795    without reallocation
5796 
5797   -- ALGLIB --
5798      Copyright 20.03.2009 by Bochkanov Sergey
5799 *************************************************************************/
rgrowv(ae_int_t newn,ae_vector * x,ae_state * _state)5800 void rgrowv(ae_int_t newn, /* Real    */ ae_vector* x, ae_state *_state)
5801 {
5802     ae_frame _frame_block;
5803     ae_vector oldx;
5804     ae_int_t oldn;
5805 
5806     ae_frame_make(_state, &_frame_block);
5807     memset(&oldx, 0, sizeof(oldx));
5808     ae_vector_init(&oldx, 0, DT_REAL, _state, ae_true);
5809 
5810     if( x->cnt>=newn )
5811     {
5812         ae_frame_leave(_state);
5813         return;
5814     }
5815     oldn = x->cnt;
5816     newn = ae_maxint(newn, ae_round(1.8*oldn+1, _state), _state);
5817     ae_swap_vectors(x, &oldx);
5818     ae_vector_set_length(x, newn, _state);
5819     rcopyv(oldn, &oldx, x, _state);
5820     ae_frame_leave(_state);
5821 }
5822 
5823 
5824 #ifdef ALGLIB_NO_FAST_KERNELS
5825 /*************************************************************************
5826 Performs copying with multiplication of V*X[] to Y[]
5827 
5828 INPUT PARAMETERS:
5829     N       -   vector length
5830     V       -   multiplier
5831     X       -   array[N], source
5832     Y       -   preallocated array[N]
5833 
5834 OUTPUT PARAMETERS:
5835     Y       -   array[N], Y = V*X
5836 
5837   -- ALGLIB --
5838      Copyright 20.01.2020 by Bochkanov Sergey
5839 *************************************************************************/
rcopymulv(ae_int_t n,double v,ae_vector * x,ae_vector * y,ae_state * _state)5840 void rcopymulv(ae_int_t n,
5841      double v,
5842      /* Real    */ ae_vector* x,
5843      /* Real    */ ae_vector* y,
5844      ae_state *_state)
5845 {
5846     ae_int_t i;
5847 
5848 
5849     for(i=0; i<=n-1; i++)
5850     {
5851         y->ptr.p_double[i] = v*x->ptr.p_double[i];
5852     }
5853 }
5854 #endif
5855 
5856 
5857 #ifdef ALGLIB_NO_FAST_KERNELS
5858 /*************************************************************************
5859 Performs copying with multiplication of V*X[] to Y[I,*]
5860 
5861 INPUT PARAMETERS:
5862     N       -   vector length
5863     V       -   multiplier
5864     X       -   array[N], source
5865     Y       -   preallocated array[?,N]
5866     RIdx    -   destination row index
5867 
5868 OUTPUT PARAMETERS:
5869     Y       -   Y[RIdx,...] = V*X
5870 
5871   -- ALGLIB --
5872      Copyright 20.01.2020 by Bochkanov Sergey
5873 *************************************************************************/
rcopymulvr(ae_int_t n,double v,ae_vector * x,ae_matrix * y,ae_int_t ridx,ae_state * _state)5874 void rcopymulvr(ae_int_t n,
5875      double v,
5876      /* Real    */ ae_vector* x,
5877      /* Real    */ ae_matrix* y,
5878      ae_int_t ridx,
5879      ae_state *_state)
5880 {
5881     ae_int_t i;
5882 
5883 
5884     for(i=0; i<=n-1; i++)
5885     {
5886         y->ptr.pp_double[ridx][i] = v*x->ptr.p_double[i];
5887     }
5888 }
5889 #endif
5890 
5891 
5892 /*************************************************************************
5893 Performs copying with multiplication of V*X[] to Y[*,J]
5894 
5895 INPUT PARAMETERS:
5896     N       -   vector length
5897     V       -   multiplier
5898     X       -   array[N], source
5899     Y       -   preallocated array[N,?]
5900     CIdx    -   destination rocol index
5901 
5902 OUTPUT PARAMETERS:
5903     Y       -   Y[RIdx,...] = V*X
5904 
5905   -- ALGLIB --
5906      Copyright 20.01.2020 by Bochkanov Sergey
5907 *************************************************************************/
rcopymulvc(ae_int_t n,double v,ae_vector * x,ae_matrix * y,ae_int_t cidx,ae_state * _state)5908 void rcopymulvc(ae_int_t n,
5909      double v,
5910      /* Real    */ ae_vector* x,
5911      /* Real    */ ae_matrix* y,
5912      ae_int_t cidx,
5913      ae_state *_state)
5914 {
5915     ae_int_t i;
5916 
5917 
5918     for(i=0; i<=n-1; i++)
5919     {
5920         y->ptr.pp_double[i][cidx] = v*x->ptr.p_double[i];
5921     }
5922 }
5923 
5924 
5925 #ifdef ALGLIB_NO_FAST_KERNELS
5926 /*************************************************************************
5927 Copies vector X[] to row I of A[,]
5928 
5929 INPUT PARAMETERS:
5930     N       -   vector length
5931     X       -   array[N], source
5932     A       -   preallocated 2D array large enough to store result
5933     I       -   destination row index
5934 
5935 OUTPUT PARAMETERS:
5936     A       -   leading N elements of I-th row are replaced by X
5937 
5938   -- ALGLIB --
5939      Copyright 20.01.2020 by Bochkanov Sergey
5940 *************************************************************************/
rcopyvr(ae_int_t n,ae_vector * x,ae_matrix * a,ae_int_t i,ae_state * _state)5941 void rcopyvr(ae_int_t n,
5942      /* Real    */ ae_vector* x,
5943      /* Real    */ ae_matrix* a,
5944      ae_int_t i,
5945      ae_state *_state)
5946 {
5947     ae_int_t j;
5948 
5949 
5950     for(j=0; j<=n-1; j++)
5951     {
5952         a->ptr.pp_double[i][j] = x->ptr.p_double[j];
5953     }
5954 }
5955 #endif
5956 
5957 
5958 #ifdef ALGLIB_NO_FAST_KERNELS
5959 /*************************************************************************
5960 Copies row I of A[,] to vector X[]
5961 
5962 INPUT PARAMETERS:
5963     N       -   vector length
5964     A       -   2D array, source
5965     I       -   source row index
5966     X       -   preallocated destination
5967 
5968 OUTPUT PARAMETERS:
5969     X       -   array[N], destination
5970 
5971   -- ALGLIB --
5972      Copyright 20.01.2020 by Bochkanov Sergey
5973 *************************************************************************/
rcopyrv(ae_int_t n,ae_matrix * a,ae_int_t i,ae_vector * x,ae_state * _state)5974 void rcopyrv(ae_int_t n,
5975      /* Real    */ ae_matrix* a,
5976      ae_int_t i,
5977      /* Real    */ ae_vector* x,
5978      ae_state *_state)
5979 {
5980     ae_int_t j;
5981 
5982 
5983     for(j=0; j<=n-1; j++)
5984     {
5985         x->ptr.p_double[j] = a->ptr.pp_double[i][j];
5986     }
5987 }
5988 #endif
5989 
5990 
5991 #ifdef ALGLIB_NO_FAST_KERNELS
5992 /*************************************************************************
5993 Copies row I of A[,] to row K of B[,].
5994 
5995 A[i,...] and B[k,...] may overlap.
5996 
5997 INPUT PARAMETERS:
5998     N       -   vector length
5999     A       -   2D array, source
6000     I       -   source row index
6001     B       -   preallocated destination
6002     K       -   destination row index
6003 
6004 OUTPUT PARAMETERS:
6005     B       -   row K overwritten
6006 
6007   -- ALGLIB --
6008      Copyright 20.01.2020 by Bochkanov Sergey
6009 *************************************************************************/
rcopyrr(ae_int_t n,ae_matrix * a,ae_int_t i,ae_matrix * b,ae_int_t k,ae_state * _state)6010 void rcopyrr(ae_int_t n,
6011      /* Real    */ ae_matrix* a,
6012      ae_int_t i,
6013      /* Real    */ ae_matrix* b,
6014      ae_int_t k,
6015      ae_state *_state)
6016 {
6017     ae_int_t j;
6018 
6019 
6020     for(j=0; j<=n-1; j++)
6021     {
6022         b->ptr.pp_double[k][j] = a->ptr.pp_double[i][j];
6023     }
6024 }
6025 #endif
6026 
6027 
6028 /*************************************************************************
6029 Copies vector X[] to column J of A[,]
6030 
6031 INPUT PARAMETERS:
6032     N       -   vector length
6033     X       -   array[N], source
6034     A       -   preallocated 2D array large enough to store result
6035     J       -   destination col index
6036 
6037 OUTPUT PARAMETERS:
6038     A       -   leading N elements of J-th column are replaced by X
6039 
6040   -- ALGLIB --
6041      Copyright 20.01.2020 by Bochkanov Sergey
6042 *************************************************************************/
rcopyvc(ae_int_t n,ae_vector * x,ae_matrix * a,ae_int_t j,ae_state * _state)6043 void rcopyvc(ae_int_t n,
6044      /* Real    */ ae_vector* x,
6045      /* Real    */ ae_matrix* a,
6046      ae_int_t j,
6047      ae_state *_state)
6048 {
6049     ae_int_t i;
6050 
6051 
6052     for(i=0; i<=n-1; i++)
6053     {
6054         a->ptr.pp_double[i][j] = x->ptr.p_double[i];
6055     }
6056 }
6057 
6058 
6059 /*************************************************************************
6060 Copies column J of A[,] to vector X[]
6061 
6062 INPUT PARAMETERS:
6063     N       -   vector length
6064     A       -   source 2D array
6065     J       -   source col index
6066 
6067 OUTPUT PARAMETERS:
6068     X       -   preallocated array[N], destination
6069 
6070   -- ALGLIB --
6071      Copyright 20.01.2020 by Bochkanov Sergey
6072 *************************************************************************/
rcopycv(ae_int_t n,ae_matrix * a,ae_int_t j,ae_vector * x,ae_state * _state)6073 void rcopycv(ae_int_t n,
6074      /* Real    */ ae_matrix* a,
6075      ae_int_t j,
6076      /* Real    */ ae_vector* x,
6077      ae_state *_state)
6078 {
6079     ae_int_t i;
6080 
6081 
6082     for(i=0; i<=n-1; i++)
6083     {
6084         x->ptr.p_double[i] = a->ptr.pp_double[i][j];
6085     }
6086 }
6087 
6088 
6089 #ifdef ALGLIB_NO_FAST_KERNELS
6090 /*************************************************************************
6091 Matrix-vector product: y := alpha*op(A)*x + beta*y
6092 
6093 NOTE: this  function  expects  Y  to  be  large enough to store result. No
6094       automatic preallocation happens for  smaller  arrays.  No  integrity
6095       checks is performed for sizes of A, x, y.
6096 
6097 INPUT PARAMETERS:
6098     M   -   number of rows of op(A)
6099     N   -   number of columns of op(A)
6100     Alpha-  coefficient
6101     A   -   source matrix
6102     OpA -   operation type:
6103             * OpA=0     =>  op(A) = A
6104             * OpA=1     =>  op(A) = A^T
6105     X   -   input vector, has at least N elements
6106     Beta-   coefficient
6107     Y   -   preallocated output array, has at least M elements
6108 
6109 OUTPUT PARAMETERS:
6110     Y   -   vector which stores result
6111 
6112 HANDLING OF SPECIAL CASES:
6113     * if M=0, then subroutine does nothing. It does not even touch arrays.
6114     * if N=0 or Alpha=0.0, then:
6115       * if Beta=0, then Y is filled by zeros. A and X are  not  referenced
6116         at all. Initial values of Y are ignored (we do not  multiply  Y by
6117         zero, we just rewrite it by zeros)
6118       * if Beta<>0, then Y is replaced by Beta*Y
6119     * if M>0, N>0, Alpha<>0, but  Beta=0,  then  Y  is  replaced  by  A*x;
6120        initial state of Y is ignored (rewritten by  A*x,  without  initial
6121        multiplication by zeros).
6122 
6123 
6124   -- ALGLIB routine --
6125 
6126      01.09.2021
6127      Bochkanov Sergey
6128 *************************************************************************/
rgemv(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t opa,ae_vector * x,double beta,ae_vector * y,ae_state * _state)6129 void rgemv(ae_int_t m,
6130      ae_int_t n,
6131      double alpha,
6132      /* Real    */ ae_matrix* a,
6133      ae_int_t opa,
6134      /* Real    */ ae_vector* x,
6135      double beta,
6136      /* Real    */ ae_vector* y,
6137      ae_state *_state)
6138 {
6139     ae_int_t i;
6140     ae_int_t j;
6141     double v;
6142 
6143 
6144 
6145     /*
6146      * Properly premultiply Y by Beta.
6147      *
6148      * Quick exit for M=0, N=0 or Alpha=0.
6149      * After this block we have M>0, N>0, Alpha<>0.
6150      */
6151     if( m<=0 )
6152     {
6153         return;
6154     }
6155     if( ae_fp_neq(beta,(double)(0)) )
6156     {
6157         rmulv(m, beta, y, _state);
6158     }
6159     else
6160     {
6161         rsetv(m, 0.0, y, _state);
6162     }
6163     if( n<=0||ae_fp_eq(alpha,0.0) )
6164     {
6165         return;
6166     }
6167 
6168     /*
6169      * Generic code
6170      */
6171     if( opa==0 )
6172     {
6173 
6174         /*
6175          * y += A*x
6176          */
6177         for(i=0; i<=m-1; i++)
6178         {
6179             v = (double)(0);
6180             for(j=0; j<=n-1; j++)
6181             {
6182                 v = v+a->ptr.pp_double[i][j]*x->ptr.p_double[j];
6183             }
6184             y->ptr.p_double[i] = alpha*v+y->ptr.p_double[i];
6185         }
6186         return;
6187     }
6188     if( opa==1 )
6189     {
6190 
6191         /*
6192          * y += A^T*x
6193          */
6194         for(i=0; i<=n-1; i++)
6195         {
6196             v = alpha*x->ptr.p_double[i];
6197             for(j=0; j<=m-1; j++)
6198             {
6199                 y->ptr.p_double[j] = y->ptr.p_double[j]+v*a->ptr.pp_double[i][j];
6200             }
6201         }
6202         return;
6203     }
6204 }
6205 #endif
6206 
6207 
6208 #ifdef ALGLIB_NO_FAST_KERNELS
6209 /*************************************************************************
6210 Matrix-vector product: y := alpha*op(A)*x + beta*y
6211 
6212 Here x, y, A are subvectors/submatrices of larger vectors/matrices.
6213 
6214 NOTE: this  function  expects  Y  to  be  large enough to store result. No
6215       automatic preallocation happens for  smaller  arrays.  No  integrity
6216       checks is performed for sizes of A, x, y.
6217 
6218 INPUT PARAMETERS:
6219     M   -   number of rows of op(A)
6220     N   -   number of columns of op(A)
6221     Alpha-  coefficient
6222     A   -   source matrix
6223     IA  -   submatrix offset (row index)
6224     JA  -   submatrix offset (column index)
6225     OpA -   operation type:
6226             * OpA=0     =>  op(A) = A
6227             * OpA=1     =>  op(A) = A^T
6228     X   -   input vector, has at least N+IX elements
6229     IX  -   subvector offset
6230     Beta-   coefficient
6231     Y   -   preallocated output array, has at least M+IY elements
6232     IY  -   subvector offset
6233 
6234 OUTPUT PARAMETERS:
6235     Y   -   vector which stores result
6236 
6237 HANDLING OF SPECIAL CASES:
6238     * if M=0, then subroutine does nothing. It does not even touch arrays.
6239     * if N=0 or Alpha=0.0, then:
6240       * if Beta=0, then Y is filled by zeros. A and X are  not  referenced
6241         at all. Initial values of Y are ignored (we do not  multiply  Y by
6242         zero, we just rewrite it by zeros)
6243       * if Beta<>0, then Y is replaced by Beta*Y
6244     * if M>0, N>0, Alpha<>0, but  Beta=0,  then  Y  is  replaced  by  A*x;
6245        initial state of Y is ignored (rewritten by  A*x,  without  initial
6246        multiplication by zeros).
6247 
6248 
6249   -- ALGLIB routine --
6250 
6251      01.09.2021
6252      Bochkanov Sergey
6253 *************************************************************************/
rgemvx(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)6254 void rgemvx(ae_int_t m,
6255      ae_int_t n,
6256      double alpha,
6257      /* Real    */ ae_matrix* a,
6258      ae_int_t ia,
6259      ae_int_t ja,
6260      ae_int_t opa,
6261      /* Real    */ ae_vector* x,
6262      ae_int_t ix,
6263      double beta,
6264      /* Real    */ ae_vector* y,
6265      ae_int_t iy,
6266      ae_state *_state)
6267 {
6268     ae_int_t i;
6269     ae_int_t j;
6270     double v;
6271 
6272 
6273 
6274     /*
6275      * Properly premultiply Y by Beta.
6276      *
6277      * Quick exit for M=0, N=0 or Alpha=0.
6278      * After this block we have M>0, N>0, Alpha<>0.
6279      */
6280     if( m<=0 )
6281     {
6282         return;
6283     }
6284     if( ae_fp_neq(beta,(double)(0)) )
6285     {
6286         rmulvx(m, beta, y, iy, _state);
6287     }
6288     else
6289     {
6290         rsetvx(m, 0.0, y, iy, _state);
6291     }
6292     if( n<=0||ae_fp_eq(alpha,0.0) )
6293     {
6294         return;
6295     }
6296 
6297     /*
6298      * Generic code
6299      */
6300     if( opa==0 )
6301     {
6302 
6303         /*
6304          * y += A*x
6305          */
6306         for(i=0; i<=m-1; i++)
6307         {
6308             v = (double)(0);
6309             for(j=0; j<=n-1; j++)
6310             {
6311                 v = v+a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
6312             }
6313             y->ptr.p_double[iy+i] = alpha*v+y->ptr.p_double[iy+i];
6314         }
6315         return;
6316     }
6317     if( opa==1 )
6318     {
6319 
6320         /*
6321          * y += A^T*x
6322          */
6323         for(i=0; i<=n-1; i++)
6324         {
6325             v = alpha*x->ptr.p_double[ix+i];
6326             for(j=0; j<=m-1; j++)
6327             {
6328                 y->ptr.p_double[iy+j] = y->ptr.p_double[iy+j]+v*a->ptr.pp_double[ia+i][ja+j];
6329             }
6330         }
6331         return;
6332     }
6333 }
6334 #endif
6335 
6336 
6337 #ifdef ALGLIB_NO_FAST_KERNELS
6338 /*************************************************************************
6339 Rank-1 correction: A := A + alpha*u*v'
6340 
6341 NOTE: this  function  expects  A  to  be  large enough to store result. No
6342       automatic preallocation happens for  smaller  arrays.  No  integrity
6343       checks is performed for sizes of A, u, v.
6344 
6345 INPUT PARAMETERS:
6346     M   -   number of rows
6347     N   -   number of columns
6348     A   -   target MxN matrix
6349     Alpha-  coefficient
6350     U   -   vector #1
6351     V   -   vector #2
6352 
6353 
6354   -- ALGLIB routine --
6355      07.09.2021
6356      Bochkanov Sergey
6357 *************************************************************************/
rger(ae_int_t m,ae_int_t n,double alpha,ae_vector * u,ae_vector * v,ae_matrix * a,ae_state * _state)6358 void rger(ae_int_t m,
6359      ae_int_t n,
6360      double alpha,
6361      /* Real    */ ae_vector* u,
6362      /* Real    */ ae_vector* v,
6363      /* Real    */ ae_matrix* a,
6364      ae_state *_state)
6365 {
6366     ae_int_t i;
6367     ae_int_t j;
6368     double s;
6369 
6370 
6371     if( (m<=0||n<=0)||ae_fp_eq(alpha,(double)(0)) )
6372     {
6373         return;
6374     }
6375     for(i=0; i<=m-1; i++)
6376     {
6377         s = alpha*u->ptr.p_double[i];
6378         for(j=0; j<=n-1; j++)
6379         {
6380             a->ptr.pp_double[i][j] = a->ptr.pp_double[i][j]+s*v->ptr.p_double[j];
6381         }
6382     }
6383 }
6384 #endif
6385 
6386 
6387 #ifdef ALGLIB_NO_FAST_KERNELS
6388 /*************************************************************************
6389 This subroutine solves linear system op(A)*x=b where:
6390 * A is NxN upper/lower triangular/unitriangular matrix
6391 * X and B are Nx1 vectors
6392 * "op" may be identity transformation or transposition
6393 
6394 Solution replaces X.
6395 
6396 IMPORTANT: * no overflow/underflow/denegeracy tests is performed.
6397            * no integrity checks for operand sizes, out-of-bounds accesses
6398              and so on is performed
6399 
6400 INPUT PARAMETERS
6401     N   -   matrix size, N>=0
6402     A       -   matrix, actial matrix is stored in A[IA:IA+N-1,JA:JA+N-1]
6403     IA      -   submatrix offset
6404     JA      -   submatrix offset
6405     IsUpper -   whether matrix is upper triangular
6406     IsUnit  -   whether matrix is unitriangular
6407     OpType  -   transformation type:
6408                 * 0 - no transformation
6409                 * 1 - transposition
6410     X       -   right part, actual vector is stored in X[IX:IX+N-1]
6411     IX      -   offset
6412 
6413 OUTPUT PARAMETERS
6414     X       -   solution replaces elements X[IX:IX+N-1]
6415 
6416   -- ALGLIB routine --
6417      (c) 07.09.2021 Bochkanov Sergey
6418 *************************************************************************/
rtrsvx(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_vector * x,ae_int_t ix,ae_state * _state)6419 void rtrsvx(ae_int_t n,
6420      /* Real    */ ae_matrix* a,
6421      ae_int_t ia,
6422      ae_int_t ja,
6423      ae_bool isupper,
6424      ae_bool isunit,
6425      ae_int_t optype,
6426      /* Real    */ ae_vector* x,
6427      ae_int_t ix,
6428      ae_state *_state)
6429 {
6430     ae_int_t i;
6431     ae_int_t j;
6432     double v;
6433 
6434 
6435     if( n<=0 )
6436     {
6437         return;
6438     }
6439     if( optype==0&&isupper )
6440     {
6441         for(i=n-1; i>=0; i--)
6442         {
6443             v = x->ptr.p_double[ix+i];
6444             for(j=i+1; j<=n-1; j++)
6445             {
6446                 v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
6447             }
6448             if( !isunit )
6449             {
6450                 v = v/a->ptr.pp_double[ia+i][ja+i];
6451             }
6452             x->ptr.p_double[ix+i] = v;
6453         }
6454         return;
6455     }
6456     if( optype==0&&!isupper )
6457     {
6458         for(i=0; i<=n-1; i++)
6459         {
6460             v = x->ptr.p_double[ix+i];
6461             for(j=0; j<=i-1; j++)
6462             {
6463                 v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
6464             }
6465             if( !isunit )
6466             {
6467                 v = v/a->ptr.pp_double[ia+i][ja+i];
6468             }
6469             x->ptr.p_double[ix+i] = v;
6470         }
6471         return;
6472     }
6473     if( optype==1&&isupper )
6474     {
6475         for(i=0; i<=n-1; i++)
6476         {
6477             v = x->ptr.p_double[ix+i];
6478             if( !isunit )
6479             {
6480                 v = v/a->ptr.pp_double[ia+i][ja+i];
6481             }
6482             x->ptr.p_double[ix+i] = v;
6483             if( v==0 )
6484             {
6485                 continue;
6486             }
6487             for(j=i+1; j<=n-1; j++)
6488             {
6489                 x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
6490             }
6491         }
6492         return;
6493     }
6494     if( optype==1&&!isupper )
6495     {
6496         for(i=n-1; i>=0; i--)
6497         {
6498             v = x->ptr.p_double[ix+i];
6499             if( !isunit )
6500             {
6501                 v = v/a->ptr.pp_double[ia+i][ja+i];
6502             }
6503             x->ptr.p_double[ix+i] = v;
6504             if( v==0 )
6505             {
6506                 continue;
6507             }
6508             for(j=0; j<=i-1; j++)
6509             {
6510                 x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
6511             }
6512         }
6513         return;
6514     }
6515     ae_assert(ae_false, "rTRSVX: unexpected operation type", _state);
6516 }
6517 #endif
6518 
6519 
6520 /*************************************************************************
6521 Fast kernel
6522 
6523   -- ALGLIB routine --
6524      19.01.2010
6525      Bochkanov Sergey
6526 *************************************************************************/
rmatrixgerf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,double ralpha,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)6527 ae_bool rmatrixgerf(ae_int_t m,
6528      ae_int_t n,
6529      /* Real    */ ae_matrix* a,
6530      ae_int_t ia,
6531      ae_int_t ja,
6532      double ralpha,
6533      /* Real    */ ae_vector* u,
6534      ae_int_t iu,
6535      /* Real    */ ae_vector* v,
6536      ae_int_t iv,
6537      ae_state *_state)
6538 {
6539 #ifndef ALGLIB_INTERCEPTS_ABLAS
6540     ae_bool result;
6541 
6542 
6543     result = ae_false;
6544     return result;
6545 #else
6546     return _ialglib_i_rmatrixgerf(m, n, a, ia, ja, ralpha, u, iu, v, iv);
6547 #endif
6548 }
6549 
6550 
6551 /*************************************************************************
6552 Fast kernel
6553 
6554   -- ALGLIB routine --
6555      19.01.2010
6556      Bochkanov Sergey
6557 *************************************************************************/
cmatrixrank1f(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)6558 ae_bool cmatrixrank1f(ae_int_t m,
6559      ae_int_t n,
6560      /* Complex */ ae_matrix* a,
6561      ae_int_t ia,
6562      ae_int_t ja,
6563      /* Complex */ ae_vector* u,
6564      ae_int_t iu,
6565      /* Complex */ ae_vector* v,
6566      ae_int_t iv,
6567      ae_state *_state)
6568 {
6569 #ifndef ALGLIB_INTERCEPTS_ABLAS
6570     ae_bool result;
6571 
6572 
6573     result = ae_false;
6574     return result;
6575 #else
6576     return _ialglib_i_cmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv);
6577 #endif
6578 }
6579 
6580 
6581 /*************************************************************************
6582 Fast kernel
6583 
6584   -- ALGLIB routine --
6585      19.01.2010
6586      Bochkanov Sergey
6587 *************************************************************************/
rmatrixrank1f(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)6588 ae_bool rmatrixrank1f(ae_int_t m,
6589      ae_int_t n,
6590      /* Real    */ ae_matrix* a,
6591      ae_int_t ia,
6592      ae_int_t ja,
6593      /* Real    */ ae_vector* u,
6594      ae_int_t iu,
6595      /* Real    */ ae_vector* v,
6596      ae_int_t iv,
6597      ae_state *_state)
6598 {
6599 #ifndef ALGLIB_INTERCEPTS_ABLAS
6600     ae_bool result;
6601 
6602 
6603     result = ae_false;
6604     return result;
6605 #else
6606     return _ialglib_i_rmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv);
6607 #endif
6608 }
6609 
6610 
6611 /*************************************************************************
6612 Fast kernel
6613 
6614   -- ALGLIB routine --
6615      19.01.2010
6616      Bochkanov Sergey
6617 *************************************************************************/
cmatrixrighttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)6618 ae_bool cmatrixrighttrsmf(ae_int_t m,
6619      ae_int_t n,
6620      /* Complex */ ae_matrix* a,
6621      ae_int_t i1,
6622      ae_int_t j1,
6623      ae_bool isupper,
6624      ae_bool isunit,
6625      ae_int_t optype,
6626      /* Complex */ ae_matrix* x,
6627      ae_int_t i2,
6628      ae_int_t j2,
6629      ae_state *_state)
6630 {
6631 #ifndef ALGLIB_INTERCEPTS_ABLAS
6632     ae_bool result;
6633 
6634 
6635     result = ae_false;
6636     return result;
6637 #else
6638     return _ialglib_i_cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
6639 #endif
6640 }
6641 
6642 
6643 /*************************************************************************
6644 Fast kernel
6645 
6646   -- ALGLIB routine --
6647      19.01.2010
6648      Bochkanov Sergey
6649 *************************************************************************/
cmatrixlefttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)6650 ae_bool cmatrixlefttrsmf(ae_int_t m,
6651      ae_int_t n,
6652      /* Complex */ ae_matrix* a,
6653      ae_int_t i1,
6654      ae_int_t j1,
6655      ae_bool isupper,
6656      ae_bool isunit,
6657      ae_int_t optype,
6658      /* Complex */ ae_matrix* x,
6659      ae_int_t i2,
6660      ae_int_t j2,
6661      ae_state *_state)
6662 {
6663 #ifndef ALGLIB_INTERCEPTS_ABLAS
6664     ae_bool result;
6665 
6666 
6667     result = ae_false;
6668     return result;
6669 #else
6670     return _ialglib_i_cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
6671 #endif
6672 }
6673 
6674 
6675 /*************************************************************************
6676 Fast kernel
6677 
6678   -- ALGLIB routine --
6679      19.01.2010
6680      Bochkanov Sergey
6681 *************************************************************************/
rmatrixrighttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)6682 ae_bool rmatrixrighttrsmf(ae_int_t m,
6683      ae_int_t n,
6684      /* Real    */ ae_matrix* a,
6685      ae_int_t i1,
6686      ae_int_t j1,
6687      ae_bool isupper,
6688      ae_bool isunit,
6689      ae_int_t optype,
6690      /* Real    */ ae_matrix* x,
6691      ae_int_t i2,
6692      ae_int_t j2,
6693      ae_state *_state)
6694 {
6695 #ifndef ALGLIB_INTERCEPTS_ABLAS
6696     ae_bool result;
6697 
6698 
6699     result = ae_false;
6700     return result;
6701 #else
6702     return _ialglib_i_rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
6703 #endif
6704 }
6705 
6706 
6707 /*************************************************************************
6708 Fast kernel
6709 
6710   -- ALGLIB routine --
6711      19.01.2010
6712      Bochkanov Sergey
6713 *************************************************************************/
rmatrixlefttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)6714 ae_bool rmatrixlefttrsmf(ae_int_t m,
6715      ae_int_t n,
6716      /* Real    */ ae_matrix* a,
6717      ae_int_t i1,
6718      ae_int_t j1,
6719      ae_bool isupper,
6720      ae_bool isunit,
6721      ae_int_t optype,
6722      /* Real    */ ae_matrix* x,
6723      ae_int_t i2,
6724      ae_int_t j2,
6725      ae_state *_state)
6726 {
6727 #ifndef ALGLIB_INTERCEPTS_ABLAS
6728     ae_bool result;
6729 
6730 
6731     result = ae_false;
6732     return result;
6733 #else
6734     return _ialglib_i_rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
6735 #endif
6736 }
6737 
6738 
6739 /*************************************************************************
6740 Fast kernel
6741 
6742   -- ALGLIB routine --
6743      19.01.2010
6744      Bochkanov Sergey
6745 *************************************************************************/
cmatrixherkf(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)6746 ae_bool cmatrixherkf(ae_int_t n,
6747      ae_int_t k,
6748      double alpha,
6749      /* Complex */ ae_matrix* a,
6750      ae_int_t ia,
6751      ae_int_t ja,
6752      ae_int_t optypea,
6753      double beta,
6754      /* Complex */ ae_matrix* c,
6755      ae_int_t ic,
6756      ae_int_t jc,
6757      ae_bool isupper,
6758      ae_state *_state)
6759 {
6760 #ifndef ALGLIB_INTERCEPTS_ABLAS
6761     ae_bool result;
6762 
6763 
6764     result = ae_false;
6765     return result;
6766 #else
6767     return _ialglib_i_cmatrixherkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
6768 #endif
6769 }
6770 
6771 
6772 /*************************************************************************
6773 Fast kernel
6774 
6775   -- ALGLIB routine --
6776      19.01.2010
6777      Bochkanov Sergey
6778 *************************************************************************/
rmatrixsyrkf(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)6779 ae_bool rmatrixsyrkf(ae_int_t n,
6780      ae_int_t k,
6781      double alpha,
6782      /* Real    */ ae_matrix* a,
6783      ae_int_t ia,
6784      ae_int_t ja,
6785      ae_int_t optypea,
6786      double beta,
6787      /* Real    */ ae_matrix* c,
6788      ae_int_t ic,
6789      ae_int_t jc,
6790      ae_bool isupper,
6791      ae_state *_state)
6792 {
6793 #ifndef ALGLIB_INTERCEPTS_ABLAS
6794     ae_bool result;
6795 
6796 
6797     result = ae_false;
6798     return result;
6799 #else
6800     return _ialglib_i_rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
6801 #endif
6802 }
6803 
6804 
6805 /*************************************************************************
6806 Fast kernel
6807 
6808   -- ALGLIB routine --
6809      19.01.2010
6810      Bochkanov Sergey
6811 *************************************************************************/
cmatrixgemmf(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)6812 ae_bool cmatrixgemmf(ae_int_t m,
6813      ae_int_t n,
6814      ae_int_t k,
6815      ae_complex alpha,
6816      /* Complex */ ae_matrix* a,
6817      ae_int_t ia,
6818      ae_int_t ja,
6819      ae_int_t optypea,
6820      /* Complex */ ae_matrix* b,
6821      ae_int_t ib,
6822      ae_int_t jb,
6823      ae_int_t optypeb,
6824      ae_complex beta,
6825      /* Complex */ ae_matrix* c,
6826      ae_int_t ic,
6827      ae_int_t jc,
6828      ae_state *_state)
6829 {
6830 #ifndef ALGLIB_INTERCEPTS_ABLAS
6831     ae_bool result;
6832 
6833 
6834     result = ae_false;
6835     return result;
6836 #else
6837     return _ialglib_i_cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
6838 #endif
6839 }
6840 
6841 
6842 /*************************************************************************
6843 CMatrixGEMM kernel, basecase code for CMatrixGEMM.
6844 
6845 This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
6846 * C is MxN general matrix
6847 * op1(A) is MxK matrix
6848 * op2(B) is KxN matrix
6849 * "op" may be identity transformation, transposition, conjugate transposition
6850 
6851 Additional info:
6852 * multiplication result replaces C. If Beta=0, C elements are not used in
6853   calculations (not multiplied by zero - just not referenced)
6854 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
6855 * if both Beta and Alpha are zero, C is filled by zeros.
6856 
6857 IMPORTANT:
6858 
6859 This function does NOT preallocate output matrix C, it MUST be preallocated
6860 by caller prior to calling this function. In case C does not have  enough
6861 space to store result, exception will be generated.
6862 
6863 INPUT PARAMETERS
6864     M       -   matrix size, M>0
6865     N       -   matrix size, N>0
6866     K       -   matrix size, K>0
6867     Alpha   -   coefficient
6868     A       -   matrix
6869     IA      -   submatrix offset
6870     JA      -   submatrix offset
6871     OpTypeA -   transformation type:
6872                 * 0 - no transformation
6873                 * 1 - transposition
6874                 * 2 - conjugate transposition
6875     B       -   matrix
6876     IB      -   submatrix offset
6877     JB      -   submatrix offset
6878     OpTypeB -   transformation type:
6879                 * 0 - no transformation
6880                 * 1 - transposition
6881                 * 2 - conjugate transposition
6882     Beta    -   coefficient
6883     C       -   PREALLOCATED output matrix
6884     IC      -   submatrix offset
6885     JC      -   submatrix offset
6886 
6887   -- ALGLIB routine --
6888      27.03.2013
6889      Bochkanov Sergey
6890 *************************************************************************/
cmatrixgemmk(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)6891 void cmatrixgemmk(ae_int_t m,
6892      ae_int_t n,
6893      ae_int_t k,
6894      ae_complex alpha,
6895      /* Complex */ ae_matrix* a,
6896      ae_int_t ia,
6897      ae_int_t ja,
6898      ae_int_t optypea,
6899      /* Complex */ ae_matrix* b,
6900      ae_int_t ib,
6901      ae_int_t jb,
6902      ae_int_t optypeb,
6903      ae_complex beta,
6904      /* Complex */ ae_matrix* c,
6905      ae_int_t ic,
6906      ae_int_t jc,
6907      ae_state *_state)
6908 {
6909     ae_int_t i;
6910     ae_int_t j;
6911     ae_complex v;
6912     ae_complex v00;
6913     ae_complex v01;
6914     ae_complex v10;
6915     ae_complex v11;
6916     double v00x;
6917     double v00y;
6918     double v01x;
6919     double v01y;
6920     double v10x;
6921     double v10y;
6922     double v11x;
6923     double v11y;
6924     double a0x;
6925     double a0y;
6926     double a1x;
6927     double a1y;
6928     double b0x;
6929     double b0y;
6930     double b1x;
6931     double b1y;
6932     ae_int_t idxa0;
6933     ae_int_t idxa1;
6934     ae_int_t idxb0;
6935     ae_int_t idxb1;
6936     ae_int_t i0;
6937     ae_int_t i1;
6938     ae_int_t ik;
6939     ae_int_t j0;
6940     ae_int_t j1;
6941     ae_int_t jk;
6942     ae_int_t t;
6943     ae_int_t offsa;
6944     ae_int_t offsb;
6945 
6946 
6947 
6948     /*
6949      * if matrix size is zero
6950      */
6951     if( m==0||n==0 )
6952     {
6953         return;
6954     }
6955 
6956     /*
6957      * Try optimized code
6958      */
6959     if( cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
6960     {
6961         return;
6962     }
6963 
6964     /*
6965      * if K=0 or Alpha=0, then C=Beta*C
6966      */
6967     if( k==0||ae_c_eq_d(alpha,(double)(0)) )
6968     {
6969         if( ae_c_neq_d(beta,(double)(1)) )
6970         {
6971             if( ae_c_neq_d(beta,(double)(0)) )
6972             {
6973                 for(i=0; i<=m-1; i++)
6974                 {
6975                     for(j=0; j<=n-1; j++)
6976                     {
6977                         c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul(beta,c->ptr.pp_complex[ic+i][jc+j]);
6978                     }
6979                 }
6980             }
6981             else
6982             {
6983                 for(i=0; i<=m-1; i++)
6984                 {
6985                     for(j=0; j<=n-1; j++)
6986                     {
6987                         c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_i(0);
6988                     }
6989                 }
6990             }
6991         }
6992         return;
6993     }
6994 
6995     /*
6996      * This phase is not really necessary, but compiler complains
6997      * about "possibly uninitialized variables"
6998      */
6999     a0x = (double)(0);
7000     a0y = (double)(0);
7001     a1x = (double)(0);
7002     a1y = (double)(0);
7003     b0x = (double)(0);
7004     b0y = (double)(0);
7005     b1x = (double)(0);
7006     b1y = (double)(0);
7007 
7008     /*
7009      * General case
7010      */
7011     i = 0;
7012     while(i<m)
7013     {
7014         j = 0;
7015         while(j<n)
7016         {
7017 
7018             /*
7019              * Choose between specialized 4x4 code and general code
7020              */
7021             if( i+2<=m&&j+2<=n )
7022             {
7023 
7024                 /*
7025                  * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
7026                  *
7027                  * This submatrix is calculated as sum of K rank-1 products,
7028                  * with operands cached in local variables in order to speed
7029                  * up operations with arrays.
7030                  */
7031                 v00x = 0.0;
7032                 v00y = 0.0;
7033                 v01x = 0.0;
7034                 v01y = 0.0;
7035                 v10x = 0.0;
7036                 v10y = 0.0;
7037                 v11x = 0.0;
7038                 v11y = 0.0;
7039                 if( optypea==0 )
7040                 {
7041                     idxa0 = ia+i+0;
7042                     idxa1 = ia+i+1;
7043                     offsa = ja;
7044                 }
7045                 else
7046                 {
7047                     idxa0 = ja+i+0;
7048                     idxa1 = ja+i+1;
7049                     offsa = ia;
7050                 }
7051                 if( optypeb==0 )
7052                 {
7053                     idxb0 = jb+j+0;
7054                     idxb1 = jb+j+1;
7055                     offsb = ib;
7056                 }
7057                 else
7058                 {
7059                     idxb0 = ib+j+0;
7060                     idxb1 = ib+j+1;
7061                     offsb = jb;
7062                 }
7063                 for(t=0; t<=k-1; t++)
7064                 {
7065                     if( optypea==0 )
7066                     {
7067                         a0x = a->ptr.pp_complex[idxa0][offsa].x;
7068                         a0y = a->ptr.pp_complex[idxa0][offsa].y;
7069                         a1x = a->ptr.pp_complex[idxa1][offsa].x;
7070                         a1y = a->ptr.pp_complex[idxa1][offsa].y;
7071                     }
7072                     if( optypea==1 )
7073                     {
7074                         a0x = a->ptr.pp_complex[offsa][idxa0].x;
7075                         a0y = a->ptr.pp_complex[offsa][idxa0].y;
7076                         a1x = a->ptr.pp_complex[offsa][idxa1].x;
7077                         a1y = a->ptr.pp_complex[offsa][idxa1].y;
7078                     }
7079                     if( optypea==2 )
7080                     {
7081                         a0x = a->ptr.pp_complex[offsa][idxa0].x;
7082                         a0y = -a->ptr.pp_complex[offsa][idxa0].y;
7083                         a1x = a->ptr.pp_complex[offsa][idxa1].x;
7084                         a1y = -a->ptr.pp_complex[offsa][idxa1].y;
7085                     }
7086                     if( optypeb==0 )
7087                     {
7088                         b0x = b->ptr.pp_complex[offsb][idxb0].x;
7089                         b0y = b->ptr.pp_complex[offsb][idxb0].y;
7090                         b1x = b->ptr.pp_complex[offsb][idxb1].x;
7091                         b1y = b->ptr.pp_complex[offsb][idxb1].y;
7092                     }
7093                     if( optypeb==1 )
7094                     {
7095                         b0x = b->ptr.pp_complex[idxb0][offsb].x;
7096                         b0y = b->ptr.pp_complex[idxb0][offsb].y;
7097                         b1x = b->ptr.pp_complex[idxb1][offsb].x;
7098                         b1y = b->ptr.pp_complex[idxb1][offsb].y;
7099                     }
7100                     if( optypeb==2 )
7101                     {
7102                         b0x = b->ptr.pp_complex[idxb0][offsb].x;
7103                         b0y = -b->ptr.pp_complex[idxb0][offsb].y;
7104                         b1x = b->ptr.pp_complex[idxb1][offsb].x;
7105                         b1y = -b->ptr.pp_complex[idxb1][offsb].y;
7106                     }
7107                     v00x = v00x+a0x*b0x-a0y*b0y;
7108                     v00y = v00y+a0x*b0y+a0y*b0x;
7109                     v01x = v01x+a0x*b1x-a0y*b1y;
7110                     v01y = v01y+a0x*b1y+a0y*b1x;
7111                     v10x = v10x+a1x*b0x-a1y*b0y;
7112                     v10y = v10y+a1x*b0y+a1y*b0x;
7113                     v11x = v11x+a1x*b1x-a1y*b1y;
7114                     v11y = v11y+a1x*b1y+a1y*b1x;
7115                     offsa = offsa+1;
7116                     offsb = offsb+1;
7117                 }
7118                 v00.x = v00x;
7119                 v00.y = v00y;
7120                 v10.x = v10x;
7121                 v10.y = v10y;
7122                 v01.x = v01x;
7123                 v01.y = v01y;
7124                 v11.x = v11x;
7125                 v11.y = v11y;
7126                 if( ae_c_eq_d(beta,(double)(0)) )
7127                 {
7128                     c->ptr.pp_complex[ic+i+0][jc+j+0] = ae_c_mul(alpha,v00);
7129                     c->ptr.pp_complex[ic+i+0][jc+j+1] = ae_c_mul(alpha,v01);
7130                     c->ptr.pp_complex[ic+i+1][jc+j+0] = ae_c_mul(alpha,v10);
7131                     c->ptr.pp_complex[ic+i+1][jc+j+1] = ae_c_mul(alpha,v11);
7132                 }
7133                 else
7134                 {
7135                     c->ptr.pp_complex[ic+i+0][jc+j+0] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+0][jc+j+0]),ae_c_mul(alpha,v00));
7136                     c->ptr.pp_complex[ic+i+0][jc+j+1] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+0][jc+j+1]),ae_c_mul(alpha,v01));
7137                     c->ptr.pp_complex[ic+i+1][jc+j+0] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+1][jc+j+0]),ae_c_mul(alpha,v10));
7138                     c->ptr.pp_complex[ic+i+1][jc+j+1] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+1][jc+j+1]),ae_c_mul(alpha,v11));
7139                 }
7140             }
7141             else
7142             {
7143 
7144                 /*
7145                  * Determine submatrix [I0..I1]x[J0..J1] to process
7146                  */
7147                 i0 = i;
7148                 i1 = ae_minint(i+1, m-1, _state);
7149                 j0 = j;
7150                 j1 = ae_minint(j+1, n-1, _state);
7151 
7152                 /*
7153                  * Process submatrix
7154                  */
7155                 for(ik=i0; ik<=i1; ik++)
7156                 {
7157                     for(jk=j0; jk<=j1; jk++)
7158                     {
7159                         if( k==0||ae_c_eq_d(alpha,(double)(0)) )
7160                         {
7161                             v = ae_complex_from_i(0);
7162                         }
7163                         else
7164                         {
7165                             v = ae_complex_from_d(0.0);
7166                             if( optypea==0&&optypeb==0 )
7167                             {
7168                                 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+ik][ja], 1, "N", &b->ptr.pp_complex[ib][jb+jk], b->stride, "N", ae_v_len(ja,ja+k-1));
7169                             }
7170                             if( optypea==0&&optypeb==1 )
7171                             {
7172                                 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+ik][ja], 1, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "N", ae_v_len(ja,ja+k-1));
7173                             }
7174                             if( optypea==0&&optypeb==2 )
7175                             {
7176                                 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+ik][ja], 1, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "Conj", ae_v_len(ja,ja+k-1));
7177                             }
7178                             if( optypea==1&&optypeb==0 )
7179                             {
7180                                 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "N", &b->ptr.pp_complex[ib][jb+jk], b->stride, "N", ae_v_len(ia,ia+k-1));
7181                             }
7182                             if( optypea==1&&optypeb==1 )
7183                             {
7184                                 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "N", ae_v_len(ia,ia+k-1));
7185                             }
7186                             if( optypea==1&&optypeb==2 )
7187                             {
7188                                 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "Conj", ae_v_len(ia,ia+k-1));
7189                             }
7190                             if( optypea==2&&optypeb==0 )
7191                             {
7192                                 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "Conj", &b->ptr.pp_complex[ib][jb+jk], b->stride, "N", ae_v_len(ia,ia+k-1));
7193                             }
7194                             if( optypea==2&&optypeb==1 )
7195                             {
7196                                 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "Conj", &b->ptr.pp_complex[ib+jk][jb], 1, "N", ae_v_len(ia,ia+k-1));
7197                             }
7198                             if( optypea==2&&optypeb==2 )
7199                             {
7200                                 v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "Conj", &b->ptr.pp_complex[ib+jk][jb], 1, "Conj", ae_v_len(ia,ia+k-1));
7201                             }
7202                         }
7203                         if( ae_c_eq_d(beta,(double)(0)) )
7204                         {
7205                             c->ptr.pp_complex[ic+ik][jc+jk] = ae_c_mul(alpha,v);
7206                         }
7207                         else
7208                         {
7209                             c->ptr.pp_complex[ic+ik][jc+jk] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+ik][jc+jk]),ae_c_mul(alpha,v));
7210                         }
7211                     }
7212                 }
7213             }
7214             j = j+2;
7215         }
7216         i = i+2;
7217     }
7218 }
7219 
7220 
7221 /*************************************************************************
7222 RMatrixGEMM kernel, basecase code for RMatrixGEMM.
7223 
7224 This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
7225 * C is MxN general matrix
7226 * op1(A) is MxK matrix
7227 * op2(B) is KxN matrix
7228 * "op" may be identity transformation, transposition
7229 
7230 Additional info:
7231 * multiplication result replaces C. If Beta=0, C elements are not used in
7232   calculations (not multiplied by zero - just not referenced)
7233 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
7234 * if both Beta and Alpha are zero, C is filled by zeros.
7235 
7236 IMPORTANT:
7237 
7238 This function does NOT preallocate output matrix C, it MUST be preallocated
7239 by caller prior to calling this function. In case C does not have  enough
7240 space to store result, exception will be generated.
7241 
7242 INPUT PARAMETERS
7243     M       -   matrix size, M>0
7244     N       -   matrix size, N>0
7245     K       -   matrix size, K>0
7246     Alpha   -   coefficient
7247     A       -   matrix
7248     IA      -   submatrix offset
7249     JA      -   submatrix offset
7250     OpTypeA -   transformation type:
7251                 * 0 - no transformation
7252                 * 1 - transposition
7253     B       -   matrix
7254     IB      -   submatrix offset
7255     JB      -   submatrix offset
7256     OpTypeB -   transformation type:
7257                 * 0 - no transformation
7258                 * 1 - transposition
7259     Beta    -   coefficient
7260     C       -   PREALLOCATED output matrix
7261     IC      -   submatrix offset
7262     JC      -   submatrix offset
7263 
7264   -- ALGLIB routine --
7265      27.03.2013
7266      Bochkanov Sergey
7267 *************************************************************************/
rmatrixgemmk(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)7268 void rmatrixgemmk(ae_int_t m,
7269      ae_int_t n,
7270      ae_int_t k,
7271      double alpha,
7272      /* Real    */ ae_matrix* a,
7273      ae_int_t ia,
7274      ae_int_t ja,
7275      ae_int_t optypea,
7276      /* Real    */ ae_matrix* b,
7277      ae_int_t ib,
7278      ae_int_t jb,
7279      ae_int_t optypeb,
7280      double beta,
7281      /* Real    */ ae_matrix* c,
7282      ae_int_t ic,
7283      ae_int_t jc,
7284      ae_state *_state)
7285 {
7286     ae_int_t i;
7287     ae_int_t j;
7288 
7289 
7290 
7291     /*
7292      * if matrix size is zero
7293      */
7294     if( m==0||n==0 )
7295     {
7296         return;
7297     }
7298 
7299     /*
7300      * Try optimized code
7301      */
7302     if( ablasf_rgemm32basecase(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
7303     {
7304         return;
7305     }
7306 
7307     /*
7308      * if K=0 or Alpha=0, then C=Beta*C
7309      */
7310     if( k==0||ae_fp_eq(alpha,(double)(0)) )
7311     {
7312         if( ae_fp_neq(beta,(double)(1)) )
7313         {
7314             if( ae_fp_neq(beta,(double)(0)) )
7315             {
7316                 for(i=0; i<=m-1; i++)
7317                 {
7318                     for(j=0; j<=n-1; j++)
7319                     {
7320                         c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j];
7321                     }
7322                 }
7323             }
7324             else
7325             {
7326                 for(i=0; i<=m-1; i++)
7327                 {
7328                     for(j=0; j<=n-1; j++)
7329                     {
7330                         c->ptr.pp_double[ic+i][jc+j] = (double)(0);
7331                     }
7332                 }
7333             }
7334         }
7335         return;
7336     }
7337 
7338     /*
7339      * Call specialized code.
7340      *
7341      * NOTE: specialized code was moved to separate function because of strange
7342      *       issues with instructions cache on some systems; Having too long
7343      *       functions significantly slows down internal loop of the algorithm.
7344      */
7345     if( optypea==0&&optypeb==0 )
7346     {
7347         rmatrixgemmk44v00(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
7348     }
7349     if( optypea==0&&optypeb!=0 )
7350     {
7351         rmatrixgemmk44v01(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
7352     }
7353     if( optypea!=0&&optypeb==0 )
7354     {
7355         rmatrixgemmk44v10(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
7356     }
7357     if( optypea!=0&&optypeb!=0 )
7358     {
7359         rmatrixgemmk44v11(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
7360     }
7361 }
7362 
7363 
7364 /*************************************************************************
7365 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
7366 with OpTypeA=0 and OpTypeB=0.
7367 
7368 Additional info:
7369 * this function requires that Alpha<>0 (assertion is thrown otherwise)
7370 
7371 INPUT PARAMETERS
7372     M       -   matrix size, M>0
7373     N       -   matrix size, N>0
7374     K       -   matrix size, K>0
7375     Alpha   -   coefficient
7376     A       -   matrix
7377     IA      -   submatrix offset
7378     JA      -   submatrix offset
7379     B       -   matrix
7380     IB      -   submatrix offset
7381     JB      -   submatrix offset
7382     Beta    -   coefficient
7383     C       -   PREALLOCATED output matrix
7384     IC      -   submatrix offset
7385     JC      -   submatrix offset
7386 
7387   -- ALGLIB routine --
7388      27.03.2013
7389      Bochkanov Sergey
7390 *************************************************************************/
rmatrixgemmk44v00(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)7391 void rmatrixgemmk44v00(ae_int_t m,
7392      ae_int_t n,
7393      ae_int_t k,
7394      double alpha,
7395      /* Real    */ ae_matrix* a,
7396      ae_int_t ia,
7397      ae_int_t ja,
7398      /* Real    */ ae_matrix* b,
7399      ae_int_t ib,
7400      ae_int_t jb,
7401      double beta,
7402      /* Real    */ ae_matrix* c,
7403      ae_int_t ic,
7404      ae_int_t jc,
7405      ae_state *_state)
7406 {
7407     ae_int_t i;
7408     ae_int_t j;
7409     double v;
7410     double v00;
7411     double v01;
7412     double v02;
7413     double v03;
7414     double v10;
7415     double v11;
7416     double v12;
7417     double v13;
7418     double v20;
7419     double v21;
7420     double v22;
7421     double v23;
7422     double v30;
7423     double v31;
7424     double v32;
7425     double v33;
7426     double a0;
7427     double a1;
7428     double a2;
7429     double a3;
7430     double b0;
7431     double b1;
7432     double b2;
7433     double b3;
7434     ae_int_t idxa0;
7435     ae_int_t idxa1;
7436     ae_int_t idxa2;
7437     ae_int_t idxa3;
7438     ae_int_t idxb0;
7439     ae_int_t idxb1;
7440     ae_int_t idxb2;
7441     ae_int_t idxb3;
7442     ae_int_t i0;
7443     ae_int_t i1;
7444     ae_int_t ik;
7445     ae_int_t j0;
7446     ae_int_t j1;
7447     ae_int_t jk;
7448     ae_int_t t;
7449     ae_int_t offsa;
7450     ae_int_t offsb;
7451 
7452 
7453     ae_assert(ae_fp_neq(alpha,(double)(0)), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
7454 
7455     /*
7456      * if matrix size is zero
7457      */
7458     if( m==0||n==0 )
7459     {
7460         return;
7461     }
7462 
7463     /*
7464      * A*B
7465      */
7466     i = 0;
7467     while(i<m)
7468     {
7469         j = 0;
7470         while(j<n)
7471         {
7472 
7473             /*
7474              * Choose between specialized 4x4 code and general code
7475              */
7476             if( i+4<=m&&j+4<=n )
7477             {
7478 
7479                 /*
7480                  * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
7481                  *
7482                  * This submatrix is calculated as sum of K rank-1 products,
7483                  * with operands cached in local variables in order to speed
7484                  * up operations with arrays.
7485                  */
7486                 idxa0 = ia+i+0;
7487                 idxa1 = ia+i+1;
7488                 idxa2 = ia+i+2;
7489                 idxa3 = ia+i+3;
7490                 offsa = ja;
7491                 idxb0 = jb+j+0;
7492                 idxb1 = jb+j+1;
7493                 idxb2 = jb+j+2;
7494                 idxb3 = jb+j+3;
7495                 offsb = ib;
7496                 v00 = 0.0;
7497                 v01 = 0.0;
7498                 v02 = 0.0;
7499                 v03 = 0.0;
7500                 v10 = 0.0;
7501                 v11 = 0.0;
7502                 v12 = 0.0;
7503                 v13 = 0.0;
7504                 v20 = 0.0;
7505                 v21 = 0.0;
7506                 v22 = 0.0;
7507                 v23 = 0.0;
7508                 v30 = 0.0;
7509                 v31 = 0.0;
7510                 v32 = 0.0;
7511                 v33 = 0.0;
7512 
7513                 /*
7514                  * Different variants of internal loop
7515                  */
7516                 for(t=0; t<=k-1; t++)
7517                 {
7518                     a0 = a->ptr.pp_double[idxa0][offsa];
7519                     a1 = a->ptr.pp_double[idxa1][offsa];
7520                     b0 = b->ptr.pp_double[offsb][idxb0];
7521                     b1 = b->ptr.pp_double[offsb][idxb1];
7522                     v00 = v00+a0*b0;
7523                     v01 = v01+a0*b1;
7524                     v10 = v10+a1*b0;
7525                     v11 = v11+a1*b1;
7526                     a2 = a->ptr.pp_double[idxa2][offsa];
7527                     a3 = a->ptr.pp_double[idxa3][offsa];
7528                     v20 = v20+a2*b0;
7529                     v21 = v21+a2*b1;
7530                     v30 = v30+a3*b0;
7531                     v31 = v31+a3*b1;
7532                     b2 = b->ptr.pp_double[offsb][idxb2];
7533                     b3 = b->ptr.pp_double[offsb][idxb3];
7534                     v22 = v22+a2*b2;
7535                     v23 = v23+a2*b3;
7536                     v32 = v32+a3*b2;
7537                     v33 = v33+a3*b3;
7538                     v02 = v02+a0*b2;
7539                     v03 = v03+a0*b3;
7540                     v12 = v12+a1*b2;
7541                     v13 = v13+a1*b3;
7542                     offsa = offsa+1;
7543                     offsb = offsb+1;
7544                 }
7545                 if( ae_fp_eq(beta,(double)(0)) )
7546                 {
7547                     c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
7548                     c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
7549                     c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
7550                     c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
7551                     c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
7552                     c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
7553                     c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
7554                     c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
7555                     c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
7556                     c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
7557                     c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
7558                     c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
7559                     c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
7560                     c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
7561                     c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
7562                     c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
7563                 }
7564                 else
7565                 {
7566                     c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
7567                     c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
7568                     c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
7569                     c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
7570                     c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
7571                     c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
7572                     c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
7573                     c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
7574                     c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
7575                     c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
7576                     c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
7577                     c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
7578                     c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
7579                     c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
7580                     c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
7581                     c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
7582                 }
7583             }
7584             else
7585             {
7586 
7587                 /*
7588                  * Determine submatrix [I0..I1]x[J0..J1] to process
7589                  */
7590                 i0 = i;
7591                 i1 = ae_minint(i+3, m-1, _state);
7592                 j0 = j;
7593                 j1 = ae_minint(j+3, n-1, _state);
7594 
7595                 /*
7596                  * Process submatrix
7597                  */
7598                 for(ik=i0; ik<=i1; ik++)
7599                 {
7600                     for(jk=j0; jk<=j1; jk++)
7601                     {
7602                         if( k==0||ae_fp_eq(alpha,(double)(0)) )
7603                         {
7604                             v = (double)(0);
7605                         }
7606                         else
7607                         {
7608                             v = ae_v_dotproduct(&a->ptr.pp_double[ia+ik][ja], 1, &b->ptr.pp_double[ib][jb+jk], b->stride, ae_v_len(ja,ja+k-1));
7609                         }
7610                         if( ae_fp_eq(beta,(double)(0)) )
7611                         {
7612                             c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
7613                         }
7614                         else
7615                         {
7616                             c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
7617                         }
7618                     }
7619                 }
7620             }
7621             j = j+4;
7622         }
7623         i = i+4;
7624     }
7625 }
7626 
7627 
7628 /*************************************************************************
7629 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
7630 with OpTypeA=0 and OpTypeB=1.
7631 
7632 Additional info:
7633 * this function requires that Alpha<>0 (assertion is thrown otherwise)
7634 
7635 INPUT PARAMETERS
7636     M       -   matrix size, M>0
7637     N       -   matrix size, N>0
7638     K       -   matrix size, K>0
7639     Alpha   -   coefficient
7640     A       -   matrix
7641     IA      -   submatrix offset
7642     JA      -   submatrix offset
7643     B       -   matrix
7644     IB      -   submatrix offset
7645     JB      -   submatrix offset
7646     Beta    -   coefficient
7647     C       -   PREALLOCATED output matrix
7648     IC      -   submatrix offset
7649     JC      -   submatrix offset
7650 
7651   -- ALGLIB routine --
7652      27.03.2013
7653      Bochkanov Sergey
7654 *************************************************************************/
rmatrixgemmk44v01(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)7655 void rmatrixgemmk44v01(ae_int_t m,
7656      ae_int_t n,
7657      ae_int_t k,
7658      double alpha,
7659      /* Real    */ ae_matrix* a,
7660      ae_int_t ia,
7661      ae_int_t ja,
7662      /* Real    */ ae_matrix* b,
7663      ae_int_t ib,
7664      ae_int_t jb,
7665      double beta,
7666      /* Real    */ ae_matrix* c,
7667      ae_int_t ic,
7668      ae_int_t jc,
7669      ae_state *_state)
7670 {
7671     ae_int_t i;
7672     ae_int_t j;
7673     double v;
7674     double v00;
7675     double v01;
7676     double v02;
7677     double v03;
7678     double v10;
7679     double v11;
7680     double v12;
7681     double v13;
7682     double v20;
7683     double v21;
7684     double v22;
7685     double v23;
7686     double v30;
7687     double v31;
7688     double v32;
7689     double v33;
7690     double a0;
7691     double a1;
7692     double a2;
7693     double a3;
7694     double b0;
7695     double b1;
7696     double b2;
7697     double b3;
7698     ae_int_t idxa0;
7699     ae_int_t idxa1;
7700     ae_int_t idxa2;
7701     ae_int_t idxa3;
7702     ae_int_t idxb0;
7703     ae_int_t idxb1;
7704     ae_int_t idxb2;
7705     ae_int_t idxb3;
7706     ae_int_t i0;
7707     ae_int_t i1;
7708     ae_int_t ik;
7709     ae_int_t j0;
7710     ae_int_t j1;
7711     ae_int_t jk;
7712     ae_int_t t;
7713     ae_int_t offsa;
7714     ae_int_t offsb;
7715 
7716 
7717     ae_assert(ae_fp_neq(alpha,(double)(0)), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
7718 
7719     /*
7720      * if matrix size is zero
7721      */
7722     if( m==0||n==0 )
7723     {
7724         return;
7725     }
7726 
7727     /*
7728      * A*B'
7729      */
7730     i = 0;
7731     while(i<m)
7732     {
7733         j = 0;
7734         while(j<n)
7735         {
7736 
7737             /*
7738              * Choose between specialized 4x4 code and general code
7739              */
7740             if( i+4<=m&&j+4<=n )
7741             {
7742 
7743                 /*
7744                  * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
7745                  *
7746                  * This submatrix is calculated as sum of K rank-1 products,
7747                  * with operands cached in local variables in order to speed
7748                  * up operations with arrays.
7749                  */
7750                 idxa0 = ia+i+0;
7751                 idxa1 = ia+i+1;
7752                 idxa2 = ia+i+2;
7753                 idxa3 = ia+i+3;
7754                 offsa = ja;
7755                 idxb0 = ib+j+0;
7756                 idxb1 = ib+j+1;
7757                 idxb2 = ib+j+2;
7758                 idxb3 = ib+j+3;
7759                 offsb = jb;
7760                 v00 = 0.0;
7761                 v01 = 0.0;
7762                 v02 = 0.0;
7763                 v03 = 0.0;
7764                 v10 = 0.0;
7765                 v11 = 0.0;
7766                 v12 = 0.0;
7767                 v13 = 0.0;
7768                 v20 = 0.0;
7769                 v21 = 0.0;
7770                 v22 = 0.0;
7771                 v23 = 0.0;
7772                 v30 = 0.0;
7773                 v31 = 0.0;
7774                 v32 = 0.0;
7775                 v33 = 0.0;
7776                 for(t=0; t<=k-1; t++)
7777                 {
7778                     a0 = a->ptr.pp_double[idxa0][offsa];
7779                     a1 = a->ptr.pp_double[idxa1][offsa];
7780                     b0 = b->ptr.pp_double[idxb0][offsb];
7781                     b1 = b->ptr.pp_double[idxb1][offsb];
7782                     v00 = v00+a0*b0;
7783                     v01 = v01+a0*b1;
7784                     v10 = v10+a1*b0;
7785                     v11 = v11+a1*b1;
7786                     a2 = a->ptr.pp_double[idxa2][offsa];
7787                     a3 = a->ptr.pp_double[idxa3][offsa];
7788                     v20 = v20+a2*b0;
7789                     v21 = v21+a2*b1;
7790                     v30 = v30+a3*b0;
7791                     v31 = v31+a3*b1;
7792                     b2 = b->ptr.pp_double[idxb2][offsb];
7793                     b3 = b->ptr.pp_double[idxb3][offsb];
7794                     v22 = v22+a2*b2;
7795                     v23 = v23+a2*b3;
7796                     v32 = v32+a3*b2;
7797                     v33 = v33+a3*b3;
7798                     v02 = v02+a0*b2;
7799                     v03 = v03+a0*b3;
7800                     v12 = v12+a1*b2;
7801                     v13 = v13+a1*b3;
7802                     offsa = offsa+1;
7803                     offsb = offsb+1;
7804                 }
7805                 if( ae_fp_eq(beta,(double)(0)) )
7806                 {
7807                     c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
7808                     c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
7809                     c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
7810                     c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
7811                     c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
7812                     c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
7813                     c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
7814                     c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
7815                     c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
7816                     c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
7817                     c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
7818                     c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
7819                     c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
7820                     c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
7821                     c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
7822                     c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
7823                 }
7824                 else
7825                 {
7826                     c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
7827                     c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
7828                     c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
7829                     c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
7830                     c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
7831                     c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
7832                     c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
7833                     c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
7834                     c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
7835                     c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
7836                     c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
7837                     c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
7838                     c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
7839                     c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
7840                     c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
7841                     c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
7842                 }
7843             }
7844             else
7845             {
7846 
7847                 /*
7848                  * Determine submatrix [I0..I1]x[J0..J1] to process
7849                  */
7850                 i0 = i;
7851                 i1 = ae_minint(i+3, m-1, _state);
7852                 j0 = j;
7853                 j1 = ae_minint(j+3, n-1, _state);
7854 
7855                 /*
7856                  * Process submatrix
7857                  */
7858                 for(ik=i0; ik<=i1; ik++)
7859                 {
7860                     for(jk=j0; jk<=j1; jk++)
7861                     {
7862                         if( k==0||ae_fp_eq(alpha,(double)(0)) )
7863                         {
7864                             v = (double)(0);
7865                         }
7866                         else
7867                         {
7868                             v = ae_v_dotproduct(&a->ptr.pp_double[ia+ik][ja], 1, &b->ptr.pp_double[ib+jk][jb], 1, ae_v_len(ja,ja+k-1));
7869                         }
7870                         if( ae_fp_eq(beta,(double)(0)) )
7871                         {
7872                             c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
7873                         }
7874                         else
7875                         {
7876                             c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
7877                         }
7878                     }
7879                 }
7880             }
7881             j = j+4;
7882         }
7883         i = i+4;
7884     }
7885 }
7886 
7887 
7888 /*************************************************************************
7889 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
7890 with OpTypeA=1 and OpTypeB=0.
7891 
7892 Additional info:
7893 * this function requires that Alpha<>0 (assertion is thrown otherwise)
7894 
7895 INPUT PARAMETERS
7896     M       -   matrix size, M>0
7897     N       -   matrix size, N>0
7898     K       -   matrix size, K>0
7899     Alpha   -   coefficient
7900     A       -   matrix
7901     IA      -   submatrix offset
7902     JA      -   submatrix offset
7903     B       -   matrix
7904     IB      -   submatrix offset
7905     JB      -   submatrix offset
7906     Beta    -   coefficient
7907     C       -   PREALLOCATED output matrix
7908     IC      -   submatrix offset
7909     JC      -   submatrix offset
7910 
7911   -- ALGLIB routine --
7912      27.03.2013
7913      Bochkanov Sergey
7914 *************************************************************************/
rmatrixgemmk44v10(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)7915 void rmatrixgemmk44v10(ae_int_t m,
7916      ae_int_t n,
7917      ae_int_t k,
7918      double alpha,
7919      /* Real    */ ae_matrix* a,
7920      ae_int_t ia,
7921      ae_int_t ja,
7922      /* Real    */ ae_matrix* b,
7923      ae_int_t ib,
7924      ae_int_t jb,
7925      double beta,
7926      /* Real    */ ae_matrix* c,
7927      ae_int_t ic,
7928      ae_int_t jc,
7929      ae_state *_state)
7930 {
7931     ae_int_t i;
7932     ae_int_t j;
7933     double v;
7934     double v00;
7935     double v01;
7936     double v02;
7937     double v03;
7938     double v10;
7939     double v11;
7940     double v12;
7941     double v13;
7942     double v20;
7943     double v21;
7944     double v22;
7945     double v23;
7946     double v30;
7947     double v31;
7948     double v32;
7949     double v33;
7950     double a0;
7951     double a1;
7952     double a2;
7953     double a3;
7954     double b0;
7955     double b1;
7956     double b2;
7957     double b3;
7958     ae_int_t idxa0;
7959     ae_int_t idxa1;
7960     ae_int_t idxa2;
7961     ae_int_t idxa3;
7962     ae_int_t idxb0;
7963     ae_int_t idxb1;
7964     ae_int_t idxb2;
7965     ae_int_t idxb3;
7966     ae_int_t i0;
7967     ae_int_t i1;
7968     ae_int_t ik;
7969     ae_int_t j0;
7970     ae_int_t j1;
7971     ae_int_t jk;
7972     ae_int_t t;
7973     ae_int_t offsa;
7974     ae_int_t offsb;
7975 
7976 
7977     ae_assert(ae_fp_neq(alpha,(double)(0)), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
7978 
7979     /*
7980      * if matrix size is zero
7981      */
7982     if( m==0||n==0 )
7983     {
7984         return;
7985     }
7986 
7987     /*
7988      * A'*B
7989      */
7990     i = 0;
7991     while(i<m)
7992     {
7993         j = 0;
7994         while(j<n)
7995         {
7996 
7997             /*
7998              * Choose between specialized 4x4 code and general code
7999              */
8000             if( i+4<=m&&j+4<=n )
8001             {
8002 
8003                 /*
8004                  * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
8005                  *
8006                  * This submatrix is calculated as sum of K rank-1 products,
8007                  * with operands cached in local variables in order to speed
8008                  * up operations with arrays.
8009                  */
8010                 idxa0 = ja+i+0;
8011                 idxa1 = ja+i+1;
8012                 idxa2 = ja+i+2;
8013                 idxa3 = ja+i+3;
8014                 offsa = ia;
8015                 idxb0 = jb+j+0;
8016                 idxb1 = jb+j+1;
8017                 idxb2 = jb+j+2;
8018                 idxb3 = jb+j+3;
8019                 offsb = ib;
8020                 v00 = 0.0;
8021                 v01 = 0.0;
8022                 v02 = 0.0;
8023                 v03 = 0.0;
8024                 v10 = 0.0;
8025                 v11 = 0.0;
8026                 v12 = 0.0;
8027                 v13 = 0.0;
8028                 v20 = 0.0;
8029                 v21 = 0.0;
8030                 v22 = 0.0;
8031                 v23 = 0.0;
8032                 v30 = 0.0;
8033                 v31 = 0.0;
8034                 v32 = 0.0;
8035                 v33 = 0.0;
8036                 for(t=0; t<=k-1; t++)
8037                 {
8038                     a0 = a->ptr.pp_double[offsa][idxa0];
8039                     a1 = a->ptr.pp_double[offsa][idxa1];
8040                     b0 = b->ptr.pp_double[offsb][idxb0];
8041                     b1 = b->ptr.pp_double[offsb][idxb1];
8042                     v00 = v00+a0*b0;
8043                     v01 = v01+a0*b1;
8044                     v10 = v10+a1*b0;
8045                     v11 = v11+a1*b1;
8046                     a2 = a->ptr.pp_double[offsa][idxa2];
8047                     a3 = a->ptr.pp_double[offsa][idxa3];
8048                     v20 = v20+a2*b0;
8049                     v21 = v21+a2*b1;
8050                     v30 = v30+a3*b0;
8051                     v31 = v31+a3*b1;
8052                     b2 = b->ptr.pp_double[offsb][idxb2];
8053                     b3 = b->ptr.pp_double[offsb][idxb3];
8054                     v22 = v22+a2*b2;
8055                     v23 = v23+a2*b3;
8056                     v32 = v32+a3*b2;
8057                     v33 = v33+a3*b3;
8058                     v02 = v02+a0*b2;
8059                     v03 = v03+a0*b3;
8060                     v12 = v12+a1*b2;
8061                     v13 = v13+a1*b3;
8062                     offsa = offsa+1;
8063                     offsb = offsb+1;
8064                 }
8065                 if( ae_fp_eq(beta,(double)(0)) )
8066                 {
8067                     c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
8068                     c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
8069                     c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
8070                     c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
8071                     c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
8072                     c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
8073                     c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
8074                     c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
8075                     c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
8076                     c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
8077                     c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
8078                     c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
8079                     c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
8080                     c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
8081                     c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
8082                     c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
8083                 }
8084                 else
8085                 {
8086                     c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
8087                     c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
8088                     c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
8089                     c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
8090                     c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
8091                     c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
8092                     c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
8093                     c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
8094                     c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
8095                     c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
8096                     c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
8097                     c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
8098                     c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
8099                     c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
8100                     c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
8101                     c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
8102                 }
8103             }
8104             else
8105             {
8106 
8107                 /*
8108                  * Determine submatrix [I0..I1]x[J0..J1] to process
8109                  */
8110                 i0 = i;
8111                 i1 = ae_minint(i+3, m-1, _state);
8112                 j0 = j;
8113                 j1 = ae_minint(j+3, n-1, _state);
8114 
8115                 /*
8116                  * Process submatrix
8117                  */
8118                 for(ik=i0; ik<=i1; ik++)
8119                 {
8120                     for(jk=j0; jk<=j1; jk++)
8121                     {
8122                         if( k==0||ae_fp_eq(alpha,(double)(0)) )
8123                         {
8124                             v = (double)(0);
8125                         }
8126                         else
8127                         {
8128                             v = 0.0;
8129                             v = ae_v_dotproduct(&a->ptr.pp_double[ia][ja+ik], a->stride, &b->ptr.pp_double[ib][jb+jk], b->stride, ae_v_len(ia,ia+k-1));
8130                         }
8131                         if( ae_fp_eq(beta,(double)(0)) )
8132                         {
8133                             c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
8134                         }
8135                         else
8136                         {
8137                             c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
8138                         }
8139                     }
8140                 }
8141             }
8142             j = j+4;
8143         }
8144         i = i+4;
8145     }
8146 }
8147 
8148 
8149 /*************************************************************************
8150 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
8151 with OpTypeA=1 and OpTypeB=1.
8152 
8153 Additional info:
8154 * this function requires that Alpha<>0 (assertion is thrown otherwise)
8155 
8156 INPUT PARAMETERS
8157     M       -   matrix size, M>0
8158     N       -   matrix size, N>0
8159     K       -   matrix size, K>0
8160     Alpha   -   coefficient
8161     A       -   matrix
8162     IA      -   submatrix offset
8163     JA      -   submatrix offset
8164     B       -   matrix
8165     IB      -   submatrix offset
8166     JB      -   submatrix offset
8167     Beta    -   coefficient
8168     C       -   PREALLOCATED output matrix
8169     IC      -   submatrix offset
8170     JC      -   submatrix offset
8171 
8172   -- ALGLIB routine --
8173      27.03.2013
8174      Bochkanov Sergey
8175 *************************************************************************/
rmatrixgemmk44v11(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_matrix * b,ae_int_t ib,ae_int_t jb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)8176 void rmatrixgemmk44v11(ae_int_t m,
8177      ae_int_t n,
8178      ae_int_t k,
8179      double alpha,
8180      /* Real    */ ae_matrix* a,
8181      ae_int_t ia,
8182      ae_int_t ja,
8183      /* Real    */ ae_matrix* b,
8184      ae_int_t ib,
8185      ae_int_t jb,
8186      double beta,
8187      /* Real    */ ae_matrix* c,
8188      ae_int_t ic,
8189      ae_int_t jc,
8190      ae_state *_state)
8191 {
8192     ae_int_t i;
8193     ae_int_t j;
8194     double v;
8195     double v00;
8196     double v01;
8197     double v02;
8198     double v03;
8199     double v10;
8200     double v11;
8201     double v12;
8202     double v13;
8203     double v20;
8204     double v21;
8205     double v22;
8206     double v23;
8207     double v30;
8208     double v31;
8209     double v32;
8210     double v33;
8211     double a0;
8212     double a1;
8213     double a2;
8214     double a3;
8215     double b0;
8216     double b1;
8217     double b2;
8218     double b3;
8219     ae_int_t idxa0;
8220     ae_int_t idxa1;
8221     ae_int_t idxa2;
8222     ae_int_t idxa3;
8223     ae_int_t idxb0;
8224     ae_int_t idxb1;
8225     ae_int_t idxb2;
8226     ae_int_t idxb3;
8227     ae_int_t i0;
8228     ae_int_t i1;
8229     ae_int_t ik;
8230     ae_int_t j0;
8231     ae_int_t j1;
8232     ae_int_t jk;
8233     ae_int_t t;
8234     ae_int_t offsa;
8235     ae_int_t offsb;
8236 
8237 
8238     ae_assert(ae_fp_neq(alpha,(double)(0)), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
8239 
8240     /*
8241      * if matrix size is zero
8242      */
8243     if( m==0||n==0 )
8244     {
8245         return;
8246     }
8247 
8248     /*
8249      * A'*B'
8250      */
8251     i = 0;
8252     while(i<m)
8253     {
8254         j = 0;
8255         while(j<n)
8256         {
8257 
8258             /*
8259              * Choose between specialized 4x4 code and general code
8260              */
8261             if( i+4<=m&&j+4<=n )
8262             {
8263 
8264                 /*
8265                  * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
8266                  *
8267                  * This submatrix is calculated as sum of K rank-1 products,
8268                  * with operands cached in local variables in order to speed
8269                  * up operations with arrays.
8270                  */
8271                 idxa0 = ja+i+0;
8272                 idxa1 = ja+i+1;
8273                 idxa2 = ja+i+2;
8274                 idxa3 = ja+i+3;
8275                 offsa = ia;
8276                 idxb0 = ib+j+0;
8277                 idxb1 = ib+j+1;
8278                 idxb2 = ib+j+2;
8279                 idxb3 = ib+j+3;
8280                 offsb = jb;
8281                 v00 = 0.0;
8282                 v01 = 0.0;
8283                 v02 = 0.0;
8284                 v03 = 0.0;
8285                 v10 = 0.0;
8286                 v11 = 0.0;
8287                 v12 = 0.0;
8288                 v13 = 0.0;
8289                 v20 = 0.0;
8290                 v21 = 0.0;
8291                 v22 = 0.0;
8292                 v23 = 0.0;
8293                 v30 = 0.0;
8294                 v31 = 0.0;
8295                 v32 = 0.0;
8296                 v33 = 0.0;
8297                 for(t=0; t<=k-1; t++)
8298                 {
8299                     a0 = a->ptr.pp_double[offsa][idxa0];
8300                     a1 = a->ptr.pp_double[offsa][idxa1];
8301                     b0 = b->ptr.pp_double[idxb0][offsb];
8302                     b1 = b->ptr.pp_double[idxb1][offsb];
8303                     v00 = v00+a0*b0;
8304                     v01 = v01+a0*b1;
8305                     v10 = v10+a1*b0;
8306                     v11 = v11+a1*b1;
8307                     a2 = a->ptr.pp_double[offsa][idxa2];
8308                     a3 = a->ptr.pp_double[offsa][idxa3];
8309                     v20 = v20+a2*b0;
8310                     v21 = v21+a2*b1;
8311                     v30 = v30+a3*b0;
8312                     v31 = v31+a3*b1;
8313                     b2 = b->ptr.pp_double[idxb2][offsb];
8314                     b3 = b->ptr.pp_double[idxb3][offsb];
8315                     v22 = v22+a2*b2;
8316                     v23 = v23+a2*b3;
8317                     v32 = v32+a3*b2;
8318                     v33 = v33+a3*b3;
8319                     v02 = v02+a0*b2;
8320                     v03 = v03+a0*b3;
8321                     v12 = v12+a1*b2;
8322                     v13 = v13+a1*b3;
8323                     offsa = offsa+1;
8324                     offsb = offsb+1;
8325                 }
8326                 if( ae_fp_eq(beta,(double)(0)) )
8327                 {
8328                     c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
8329                     c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
8330                     c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
8331                     c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
8332                     c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
8333                     c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
8334                     c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
8335                     c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
8336                     c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
8337                     c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
8338                     c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
8339                     c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
8340                     c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
8341                     c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
8342                     c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
8343                     c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
8344                 }
8345                 else
8346                 {
8347                     c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
8348                     c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
8349                     c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
8350                     c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
8351                     c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
8352                     c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
8353                     c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
8354                     c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
8355                     c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
8356                     c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
8357                     c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
8358                     c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
8359                     c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
8360                     c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
8361                     c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
8362                     c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
8363                 }
8364             }
8365             else
8366             {
8367 
8368                 /*
8369                  * Determine submatrix [I0..I1]x[J0..J1] to process
8370                  */
8371                 i0 = i;
8372                 i1 = ae_minint(i+3, m-1, _state);
8373                 j0 = j;
8374                 j1 = ae_minint(j+3, n-1, _state);
8375 
8376                 /*
8377                  * Process submatrix
8378                  */
8379                 for(ik=i0; ik<=i1; ik++)
8380                 {
8381                     for(jk=j0; jk<=j1; jk++)
8382                     {
8383                         if( k==0||ae_fp_eq(alpha,(double)(0)) )
8384                         {
8385                             v = (double)(0);
8386                         }
8387                         else
8388                         {
8389                             v = 0.0;
8390                             v = ae_v_dotproduct(&a->ptr.pp_double[ia][ja+ik], a->stride, &b->ptr.pp_double[ib+jk][jb], 1, ae_v_len(ia,ia+k-1));
8391                         }
8392                         if( ae_fp_eq(beta,(double)(0)) )
8393                         {
8394                             c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
8395                         }
8396                         else
8397                         {
8398                             c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
8399                         }
8400                     }
8401                 }
8402             }
8403             j = j+4;
8404         }
8405         i = i+4;
8406     }
8407 }
8408 
8409 
8410 #ifdef ALGLIB_NO_FAST_KERNELS
8411 /*************************************************************************
8412 Fast kernel (new version with AVX2/SSE2)
8413 
8414   -- ALGLIB routine --
8415      19.01.2010
8416      Bochkanov Sergey
8417 *************************************************************************/
ablasf_rgemm32basecase(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)8418 static ae_bool ablasf_rgemm32basecase(ae_int_t m,
8419      ae_int_t n,
8420      ae_int_t k,
8421      double alpha,
8422      /* Real    */ ae_matrix* a,
8423      ae_int_t ia,
8424      ae_int_t ja,
8425      ae_int_t optypea,
8426      /* Real    */ ae_matrix* b,
8427      ae_int_t ib,
8428      ae_int_t jb,
8429      ae_int_t optypeb,
8430      double beta,
8431      /* Real    */ ae_matrix* c,
8432      ae_int_t ic,
8433      ae_int_t jc,
8434      ae_state *_state)
8435 {
8436     ae_bool result;
8437 
8438 
8439     result = ae_false;
8440     return result;
8441 }
8442 #endif
8443 
8444 
8445 #endif
8446 #if defined(AE_COMPILE_HBLAS) || !defined(AE_PARTIAL_BUILD)
8447 
8448 
hermitianmatrixvectormultiply(ae_matrix * a,ae_bool isupper,ae_int_t i1,ae_int_t i2,ae_vector * x,ae_complex alpha,ae_vector * y,ae_state * _state)8449 void hermitianmatrixvectormultiply(/* Complex */ ae_matrix* a,
8450      ae_bool isupper,
8451      ae_int_t i1,
8452      ae_int_t i2,
8453      /* Complex */ ae_vector* x,
8454      ae_complex alpha,
8455      /* Complex */ ae_vector* y,
8456      ae_state *_state)
8457 {
8458     ae_int_t i;
8459     ae_int_t ba1;
8460     ae_int_t by1;
8461     ae_int_t by2;
8462     ae_int_t bx1;
8463     ae_int_t bx2;
8464     ae_int_t n;
8465     ae_complex v;
8466 
8467 
8468     n = i2-i1+1;
8469     if( n<=0 )
8470     {
8471         return;
8472     }
8473 
8474     /*
8475      * Let A = L + D + U, where
8476      *  L is strictly lower triangular (main diagonal is zero)
8477      *  D is diagonal
8478      *  U is strictly upper triangular (main diagonal is zero)
8479      *
8480      * A*x = L*x + D*x + U*x
8481      *
8482      * Calculate D*x first
8483      */
8484     for(i=i1; i<=i2; i++)
8485     {
8486         y->ptr.p_complex[i-i1+1] = ae_c_mul(a->ptr.pp_complex[i][i],x->ptr.p_complex[i-i1+1]);
8487     }
8488 
8489     /*
8490      * Add L*x + U*x
8491      */
8492     if( isupper )
8493     {
8494         for(i=i1; i<=i2-1; i++)
8495         {
8496 
8497             /*
8498              * Add L*x to the result
8499              */
8500             v = x->ptr.p_complex[i-i1+1];
8501             by1 = i-i1+2;
8502             by2 = n;
8503             ba1 = i+1;
8504             ae_v_caddc(&y->ptr.p_complex[by1], 1, &a->ptr.pp_complex[i][ba1], 1, "Conj", ae_v_len(by1,by2), v);
8505 
8506             /*
8507              * Add U*x to the result
8508              */
8509             bx1 = i-i1+2;
8510             bx2 = n;
8511             ba1 = i+1;
8512             v = ae_v_cdotproduct(&x->ptr.p_complex[bx1], 1, "N", &a->ptr.pp_complex[i][ba1], 1, "N", ae_v_len(bx1,bx2));
8513             y->ptr.p_complex[i-i1+1] = ae_c_add(y->ptr.p_complex[i-i1+1],v);
8514         }
8515     }
8516     else
8517     {
8518         for(i=i1+1; i<=i2; i++)
8519         {
8520 
8521             /*
8522              * Add L*x to the result
8523              */
8524             bx1 = 1;
8525             bx2 = i-i1;
8526             ba1 = i1;
8527             v = ae_v_cdotproduct(&x->ptr.p_complex[bx1], 1, "N", &a->ptr.pp_complex[i][ba1], 1, "N", ae_v_len(bx1,bx2));
8528             y->ptr.p_complex[i-i1+1] = ae_c_add(y->ptr.p_complex[i-i1+1],v);
8529 
8530             /*
8531              * Add U*x to the result
8532              */
8533             v = x->ptr.p_complex[i-i1+1];
8534             by1 = 1;
8535             by2 = i-i1;
8536             ba1 = i1;
8537             ae_v_caddc(&y->ptr.p_complex[by1], 1, &a->ptr.pp_complex[i][ba1], 1, "Conj", ae_v_len(by1,by2), v);
8538         }
8539     }
8540     ae_v_cmulc(&y->ptr.p_complex[1], 1, ae_v_len(1,n), alpha);
8541 }
8542 
8543 
hermitianrank2update(ae_matrix * a,ae_bool isupper,ae_int_t i1,ae_int_t i2,ae_vector * x,ae_vector * y,ae_vector * t,ae_complex alpha,ae_state * _state)8544 void hermitianrank2update(/* Complex */ ae_matrix* a,
8545      ae_bool isupper,
8546      ae_int_t i1,
8547      ae_int_t i2,
8548      /* Complex */ ae_vector* x,
8549      /* Complex */ ae_vector* y,
8550      /* Complex */ ae_vector* t,
8551      ae_complex alpha,
8552      ae_state *_state)
8553 {
8554     ae_int_t i;
8555     ae_int_t tp1;
8556     ae_int_t tp2;
8557     ae_complex v;
8558 
8559 
8560     if( isupper )
8561     {
8562         for(i=i1; i<=i2; i++)
8563         {
8564             tp1 = i+1-i1;
8565             tp2 = i2-i1+1;
8566             v = ae_c_mul(alpha,x->ptr.p_complex[i+1-i1]);
8567             ae_v_cmovec(&t->ptr.p_complex[tp1], 1, &y->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
8568             v = ae_c_mul(ae_c_conj(alpha, _state),y->ptr.p_complex[i+1-i1]);
8569             ae_v_caddc(&t->ptr.p_complex[tp1], 1, &x->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
8570             ae_v_cadd(&a->ptr.pp_complex[i][i], 1, &t->ptr.p_complex[tp1], 1, "N", ae_v_len(i,i2));
8571         }
8572     }
8573     else
8574     {
8575         for(i=i1; i<=i2; i++)
8576         {
8577             tp1 = 1;
8578             tp2 = i+1-i1;
8579             v = ae_c_mul(alpha,x->ptr.p_complex[i+1-i1]);
8580             ae_v_cmovec(&t->ptr.p_complex[tp1], 1, &y->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
8581             v = ae_c_mul(ae_c_conj(alpha, _state),y->ptr.p_complex[i+1-i1]);
8582             ae_v_caddc(&t->ptr.p_complex[tp1], 1, &x->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
8583             ae_v_cadd(&a->ptr.pp_complex[i][i1], 1, &t->ptr.p_complex[tp1], 1, "N", ae_v_len(i1,i));
8584         }
8585     }
8586 }
8587 
8588 
8589 #endif
8590 #if defined(AE_COMPILE_CREFLECTIONS) || !defined(AE_PARTIAL_BUILD)
8591 
8592 
8593 /*************************************************************************
8594 Generation of an elementary complex reflection transformation
8595 
8596 The subroutine generates elementary complex reflection H of  order  N,  so
8597 that, for a given X, the following equality holds true:
8598 
8599      ( X(1) )   ( Beta )
8600 H' * (  ..  ) = (  0   ),   H'*H = I,   Beta is a real number
8601      ( X(n) )   (  0   )
8602 
8603 where
8604 
8605               ( V(1) )
8606 H = 1 - Tau * (  ..  ) * ( conj(V(1)), ..., conj(V(n)) )
8607               ( V(n) )
8608 
8609 where the first component of vector V equals 1.
8610 
8611 Input parameters:
8612     X   -   vector. Array with elements [1..N].
8613     N   -   reflection order.
8614 
8615 Output parameters:
8616     X   -   components from 2 to N are replaced by vector V.
8617             The first component is replaced with parameter Beta.
8618     Tau -   scalar value Tau.
8619 
8620 This subroutine is the modification of CLARFG subroutines  from the LAPACK
8621 library. It has similar functionality except for the fact that it  doesn't
8622 handle errors when intermediate results cause an overflow.
8623 
8624   -- LAPACK auxiliary routine (version 3.0) --
8625      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
8626      Courant Institute, Argonne National Lab, and Rice University
8627      September 30, 1994
8628 *************************************************************************/
complexgeneratereflection(ae_vector * x,ae_int_t n,ae_complex * tau,ae_state * _state)8629 void complexgeneratereflection(/* Complex */ ae_vector* x,
8630      ae_int_t n,
8631      ae_complex* tau,
8632      ae_state *_state)
8633 {
8634     ae_int_t j;
8635     ae_complex alpha;
8636     double alphi;
8637     double alphr;
8638     double beta;
8639     double xnorm;
8640     double mx;
8641     ae_complex t;
8642     double s;
8643     ae_complex v;
8644 
8645     tau->x = 0;
8646     tau->y = 0;
8647 
8648     if( n<=0 )
8649     {
8650         *tau = ae_complex_from_i(0);
8651         return;
8652     }
8653 
8654     /*
8655      * Scale if needed (to avoid overflow/underflow during intermediate
8656      * calculations).
8657      */
8658     mx = (double)(0);
8659     for(j=1; j<=n; j++)
8660     {
8661         mx = ae_maxreal(ae_c_abs(x->ptr.p_complex[j], _state), mx, _state);
8662     }
8663     s = (double)(1);
8664     if( ae_fp_neq(mx,(double)(0)) )
8665     {
8666         if( ae_fp_less(mx,(double)(1)) )
8667         {
8668             s = ae_sqrt(ae_minrealnumber, _state);
8669             v = ae_complex_from_d(1/s);
8670             ae_v_cmulc(&x->ptr.p_complex[1], 1, ae_v_len(1,n), v);
8671         }
8672         else
8673         {
8674             s = ae_sqrt(ae_maxrealnumber, _state);
8675             v = ae_complex_from_d(1/s);
8676             ae_v_cmulc(&x->ptr.p_complex[1], 1, ae_v_len(1,n), v);
8677         }
8678     }
8679 
8680     /*
8681      * calculate
8682      */
8683     alpha = x->ptr.p_complex[1];
8684     mx = (double)(0);
8685     for(j=2; j<=n; j++)
8686     {
8687         mx = ae_maxreal(ae_c_abs(x->ptr.p_complex[j], _state), mx, _state);
8688     }
8689     xnorm = (double)(0);
8690     if( ae_fp_neq(mx,(double)(0)) )
8691     {
8692         for(j=2; j<=n; j++)
8693         {
8694             t = ae_c_div_d(x->ptr.p_complex[j],mx);
8695             xnorm = xnorm+ae_c_mul(t,ae_c_conj(t, _state)).x;
8696         }
8697         xnorm = ae_sqrt(xnorm, _state)*mx;
8698     }
8699     alphr = alpha.x;
8700     alphi = alpha.y;
8701     if( ae_fp_eq(xnorm,(double)(0))&&ae_fp_eq(alphi,(double)(0)) )
8702     {
8703         *tau = ae_complex_from_i(0);
8704         x->ptr.p_complex[1] = ae_c_mul_d(x->ptr.p_complex[1],s);
8705         return;
8706     }
8707     mx = ae_maxreal(ae_fabs(alphr, _state), ae_fabs(alphi, _state), _state);
8708     mx = ae_maxreal(mx, ae_fabs(xnorm, _state), _state);
8709     beta = -mx*ae_sqrt(ae_sqr(alphr/mx, _state)+ae_sqr(alphi/mx, _state)+ae_sqr(xnorm/mx, _state), _state);
8710     if( ae_fp_less(alphr,(double)(0)) )
8711     {
8712         beta = -beta;
8713     }
8714     tau->x = (beta-alphr)/beta;
8715     tau->y = -alphi/beta;
8716     alpha = ae_c_d_div(1,ae_c_sub_d(alpha,beta));
8717     if( n>1 )
8718     {
8719         ae_v_cmulc(&x->ptr.p_complex[2], 1, ae_v_len(2,n), alpha);
8720     }
8721     alpha = ae_complex_from_d(beta);
8722     x->ptr.p_complex[1] = alpha;
8723 
8724     /*
8725      * Scale back
8726      */
8727     x->ptr.p_complex[1] = ae_c_mul_d(x->ptr.p_complex[1],s);
8728 }
8729 
8730 
8731 /*************************************************************************
8732 Application of an elementary reflection to a rectangular matrix of size MxN
8733 
8734 The  algorithm  pre-multiplies  the  matrix  by  an  elementary reflection
8735 transformation  which  is  given  by  column  V  and  scalar  Tau (see the
8736 description of the GenerateReflection). Not the whole matrix  but  only  a
8737 part of it is transformed (rows from M1 to M2, columns from N1 to N2). Only
8738 the elements of this submatrix are changed.
8739 
8740 Note: the matrix is multiplied by H, not by H'.   If  it  is  required  to
8741 multiply the matrix by H', it is necessary to pass Conj(Tau) instead of Tau.
8742 
8743 Input parameters:
8744     C       -   matrix to be transformed.
8745     Tau     -   scalar defining transformation.
8746     V       -   column defining transformation.
8747                 Array whose index ranges within [1..M2-M1+1]
8748     M1, M2  -   range of rows to be transformed.
8749     N1, N2  -   range of columns to be transformed.
8750     WORK    -   working array whose index goes from N1 to N2.
8751 
8752 Output parameters:
8753     C       -   the result of multiplying the input matrix C by the
8754                 transformation matrix which is given by Tau and V.
8755                 If N1>N2 or M1>M2, C is not modified.
8756 
8757   -- LAPACK auxiliary routine (version 3.0) --
8758      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
8759      Courant Institute, Argonne National Lab, and Rice University
8760      September 30, 1994
8761 *************************************************************************/
complexapplyreflectionfromtheleft(ae_matrix * c,ae_complex tau,ae_vector * v,ae_int_t m1,ae_int_t m2,ae_int_t n1,ae_int_t n2,ae_vector * work,ae_state * _state)8762 void complexapplyreflectionfromtheleft(/* Complex */ ae_matrix* c,
8763      ae_complex tau,
8764      /* Complex */ ae_vector* v,
8765      ae_int_t m1,
8766      ae_int_t m2,
8767      ae_int_t n1,
8768      ae_int_t n2,
8769      /* Complex */ ae_vector* work,
8770      ae_state *_state)
8771 {
8772     ae_complex t;
8773     ae_int_t i;
8774 
8775 
8776     if( (ae_c_eq_d(tau,(double)(0))||n1>n2)||m1>m2 )
8777     {
8778         return;
8779     }
8780 
8781     /*
8782      * w := C^T * conj(v)
8783      */
8784     for(i=n1; i<=n2; i++)
8785     {
8786         work->ptr.p_complex[i] = ae_complex_from_i(0);
8787     }
8788     for(i=m1; i<=m2; i++)
8789     {
8790         t = ae_c_conj(v->ptr.p_complex[i+1-m1], _state);
8791         ae_v_caddc(&work->ptr.p_complex[n1], 1, &c->ptr.pp_complex[i][n1], 1, "N", ae_v_len(n1,n2), t);
8792     }
8793 
8794     /*
8795      * C := C - tau * v * w^T
8796      */
8797     for(i=m1; i<=m2; i++)
8798     {
8799         t = ae_c_mul(v->ptr.p_complex[i-m1+1],tau);
8800         ae_v_csubc(&c->ptr.pp_complex[i][n1], 1, &work->ptr.p_complex[n1], 1, "N", ae_v_len(n1,n2), t);
8801     }
8802 }
8803 
8804 
8805 /*************************************************************************
8806 Application of an elementary reflection to a rectangular matrix of size MxN
8807 
8808 The  algorithm  post-multiplies  the  matrix  by  an elementary reflection
8809 transformation  which  is  given  by  column  V  and  scalar  Tau (see the
8810 description  of  the  GenerateReflection). Not the whole matrix but only a
8811 part  of  it  is  transformed (rows from M1 to M2, columns from N1 to N2).
8812 Only the elements of this submatrix are changed.
8813 
8814 Input parameters:
8815     C       -   matrix to be transformed.
8816     Tau     -   scalar defining transformation.
8817     V       -   column defining transformation.
8818                 Array whose index ranges within [1..N2-N1+1]
8819     M1, M2  -   range of rows to be transformed.
8820     N1, N2  -   range of columns to be transformed.
8821     WORK    -   working array whose index goes from M1 to M2.
8822 
8823 Output parameters:
8824     C       -   the result of multiplying the input matrix C by the
8825                 transformation matrix which is given by Tau and V.
8826                 If N1>N2 or M1>M2, C is not modified.
8827 
8828   -- LAPACK auxiliary routine (version 3.0) --
8829      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
8830      Courant Institute, Argonne National Lab, and Rice University
8831      September 30, 1994
8832 *************************************************************************/
complexapplyreflectionfromtheright(ae_matrix * c,ae_complex tau,ae_vector * v,ae_int_t m1,ae_int_t m2,ae_int_t n1,ae_int_t n2,ae_vector * work,ae_state * _state)8833 void complexapplyreflectionfromtheright(/* Complex */ ae_matrix* c,
8834      ae_complex tau,
8835      /* Complex */ ae_vector* v,
8836      ae_int_t m1,
8837      ae_int_t m2,
8838      ae_int_t n1,
8839      ae_int_t n2,
8840      /* Complex */ ae_vector* work,
8841      ae_state *_state)
8842 {
8843     ae_complex t;
8844     ae_int_t i;
8845     ae_int_t vm;
8846 
8847 
8848     if( (ae_c_eq_d(tau,(double)(0))||n1>n2)||m1>m2 )
8849     {
8850         return;
8851     }
8852 
8853     /*
8854      * w := C * v
8855      */
8856     vm = n2-n1+1;
8857     for(i=m1; i<=m2; i++)
8858     {
8859         t = ae_v_cdotproduct(&c->ptr.pp_complex[i][n1], 1, "N", &v->ptr.p_complex[1], 1, "N", ae_v_len(n1,n2));
8860         work->ptr.p_complex[i] = t;
8861     }
8862 
8863     /*
8864      * C := C - w * conj(v^T)
8865      */
8866     ae_v_cmove(&v->ptr.p_complex[1], 1, &v->ptr.p_complex[1], 1, "Conj", ae_v_len(1,vm));
8867     for(i=m1; i<=m2; i++)
8868     {
8869         t = ae_c_mul(work->ptr.p_complex[i],tau);
8870         ae_v_csubc(&c->ptr.pp_complex[i][n1], 1, &v->ptr.p_complex[1], 1, "N", ae_v_len(n1,n2), t);
8871     }
8872     ae_v_cmove(&v->ptr.p_complex[1], 1, &v->ptr.p_complex[1], 1, "Conj", ae_v_len(1,vm));
8873 }
8874 
8875 
8876 #endif
8877 #if defined(AE_COMPILE_SBLAS) || !defined(AE_PARTIAL_BUILD)
8878 
8879 
symmetricmatrixvectormultiply(ae_matrix * a,ae_bool isupper,ae_int_t i1,ae_int_t i2,ae_vector * x,double alpha,ae_vector * y,ae_state * _state)8880 void symmetricmatrixvectormultiply(/* Real    */ ae_matrix* a,
8881      ae_bool isupper,
8882      ae_int_t i1,
8883      ae_int_t i2,
8884      /* Real    */ ae_vector* x,
8885      double alpha,
8886      /* Real    */ ae_vector* y,
8887      ae_state *_state)
8888 {
8889     ae_int_t i;
8890     ae_int_t ba1;
8891     ae_int_t ba2;
8892     ae_int_t by1;
8893     ae_int_t by2;
8894     ae_int_t bx1;
8895     ae_int_t bx2;
8896     ae_int_t n;
8897     double v;
8898 
8899 
8900     n = i2-i1+1;
8901     if( n<=0 )
8902     {
8903         return;
8904     }
8905 
8906     /*
8907      * Let A = L + D + U, where
8908      *  L is strictly lower triangular (main diagonal is zero)
8909      *  D is diagonal
8910      *  U is strictly upper triangular (main diagonal is zero)
8911      *
8912      * A*x = L*x + D*x + U*x
8913      *
8914      * Calculate D*x first
8915      */
8916     for(i=i1; i<=i2; i++)
8917     {
8918         y->ptr.p_double[i-i1+1] = a->ptr.pp_double[i][i]*x->ptr.p_double[i-i1+1];
8919     }
8920 
8921     /*
8922      * Add L*x + U*x
8923      */
8924     if( isupper )
8925     {
8926         for(i=i1; i<=i2-1; i++)
8927         {
8928 
8929             /*
8930              * Add L*x to the result
8931              */
8932             v = x->ptr.p_double[i-i1+1];
8933             by1 = i-i1+2;
8934             by2 = n;
8935             ba1 = i+1;
8936             ba2 = i2;
8937             ae_v_addd(&y->ptr.p_double[by1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(by1,by2), v);
8938 
8939             /*
8940              * Add U*x to the result
8941              */
8942             bx1 = i-i1+2;
8943             bx2 = n;
8944             ba1 = i+1;
8945             ba2 = i2;
8946             v = ae_v_dotproduct(&x->ptr.p_double[bx1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(bx1,bx2));
8947             y->ptr.p_double[i-i1+1] = y->ptr.p_double[i-i1+1]+v;
8948         }
8949     }
8950     else
8951     {
8952         for(i=i1+1; i<=i2; i++)
8953         {
8954 
8955             /*
8956              * Add L*x to the result
8957              */
8958             bx1 = 1;
8959             bx2 = i-i1;
8960             ba1 = i1;
8961             ba2 = i-1;
8962             v = ae_v_dotproduct(&x->ptr.p_double[bx1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(bx1,bx2));
8963             y->ptr.p_double[i-i1+1] = y->ptr.p_double[i-i1+1]+v;
8964 
8965             /*
8966              * Add U*x to the result
8967              */
8968             v = x->ptr.p_double[i-i1+1];
8969             by1 = 1;
8970             by2 = i-i1;
8971             ba1 = i1;
8972             ba2 = i-1;
8973             ae_v_addd(&y->ptr.p_double[by1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(by1,by2), v);
8974         }
8975     }
8976     ae_v_muld(&y->ptr.p_double[1], 1, ae_v_len(1,n), alpha);
8977     touchint(&ba2, _state);
8978 }
8979 
8980 
symmetricrank2update(ae_matrix * a,ae_bool isupper,ae_int_t i1,ae_int_t i2,ae_vector * x,ae_vector * y,ae_vector * t,double alpha,ae_state * _state)8981 void symmetricrank2update(/* Real    */ ae_matrix* a,
8982      ae_bool isupper,
8983      ae_int_t i1,
8984      ae_int_t i2,
8985      /* Real    */ ae_vector* x,
8986      /* Real    */ ae_vector* y,
8987      /* Real    */ ae_vector* t,
8988      double alpha,
8989      ae_state *_state)
8990 {
8991     ae_int_t i;
8992     ae_int_t tp1;
8993     ae_int_t tp2;
8994     double v;
8995 
8996 
8997     if( isupper )
8998     {
8999         for(i=i1; i<=i2; i++)
9000         {
9001             tp1 = i+1-i1;
9002             tp2 = i2-i1+1;
9003             v = x->ptr.p_double[i+1-i1];
9004             ae_v_moved(&t->ptr.p_double[tp1], 1, &y->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
9005             v = y->ptr.p_double[i+1-i1];
9006             ae_v_addd(&t->ptr.p_double[tp1], 1, &x->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
9007             ae_v_muld(&t->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), alpha);
9008             ae_v_add(&a->ptr.pp_double[i][i], 1, &t->ptr.p_double[tp1], 1, ae_v_len(i,i2));
9009         }
9010     }
9011     else
9012     {
9013         for(i=i1; i<=i2; i++)
9014         {
9015             tp1 = 1;
9016             tp2 = i+1-i1;
9017             v = x->ptr.p_double[i+1-i1];
9018             ae_v_moved(&t->ptr.p_double[tp1], 1, &y->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
9019             v = y->ptr.p_double[i+1-i1];
9020             ae_v_addd(&t->ptr.p_double[tp1], 1, &x->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
9021             ae_v_muld(&t->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), alpha);
9022             ae_v_add(&a->ptr.pp_double[i][i1], 1, &t->ptr.p_double[tp1], 1, ae_v_len(i1,i));
9023         }
9024     }
9025 }
9026 
9027 
9028 #endif
9029 #if defined(AE_COMPILE_ABLASMKL) || !defined(AE_PARTIAL_BUILD)
9030 
9031 
9032 /*************************************************************************
9033 MKL-based kernel
9034 
9035   -- ALGLIB routine --
9036      12.10.2017
9037      Bochkanov Sergey
9038 *************************************************************************/
rmatrixgermkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,double alpha,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)9039 ae_bool rmatrixgermkl(ae_int_t m,
9040      ae_int_t n,
9041      /* Real    */ ae_matrix* a,
9042      ae_int_t ia,
9043      ae_int_t ja,
9044      double alpha,
9045      /* Real    */ ae_vector* u,
9046      ae_int_t iu,
9047      /* Real    */ ae_vector* v,
9048      ae_int_t iv,
9049      ae_state *_state)
9050 {
9051 #ifndef ALGLIB_INTERCEPTS_MKL
9052     ae_bool result;
9053 
9054 
9055     result = ae_false;
9056     return result;
9057 #else
9058     return _ialglib_i_rmatrixgermkl(m, n, a, ia, ja, alpha, u, iu, v, iv);
9059 #endif
9060 }
9061 
9062 
9063 /*************************************************************************
9064 MKL-based kernel
9065 
9066   -- ALGLIB routine --
9067      12.10.2017
9068      Bochkanov Sergey
9069 *************************************************************************/
cmatrixrank1mkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)9070 ae_bool cmatrixrank1mkl(ae_int_t m,
9071      ae_int_t n,
9072      /* Complex */ ae_matrix* a,
9073      ae_int_t ia,
9074      ae_int_t ja,
9075      /* Complex */ ae_vector* u,
9076      ae_int_t iu,
9077      /* Complex */ ae_vector* v,
9078      ae_int_t iv,
9079      ae_state *_state)
9080 {
9081 #ifndef ALGLIB_INTERCEPTS_MKL
9082     ae_bool result;
9083 
9084 
9085     result = ae_false;
9086     return result;
9087 #else
9088     return _ialglib_i_cmatrixrank1mkl(m, n, a, ia, ja, u, iu, v, iv);
9089 #endif
9090 }
9091 
9092 
9093 /*************************************************************************
9094 MKL-based kernel
9095 
9096   -- ALGLIB routine --
9097      12.10.2017
9098      Bochkanov Sergey
9099 *************************************************************************/
rmatrixrank1mkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t iu,ae_vector * v,ae_int_t iv,ae_state * _state)9100 ae_bool rmatrixrank1mkl(ae_int_t m,
9101      ae_int_t n,
9102      /* Real    */ ae_matrix* a,
9103      ae_int_t ia,
9104      ae_int_t ja,
9105      /* Real    */ ae_vector* u,
9106      ae_int_t iu,
9107      /* Real    */ ae_vector* v,
9108      ae_int_t iv,
9109      ae_state *_state)
9110 {
9111 #ifndef ALGLIB_INTERCEPTS_MKL
9112     ae_bool result;
9113 
9114 
9115     result = ae_false;
9116     return result;
9117 #else
9118     return _ialglib_i_rmatrixrank1mkl(m, n, a, ia, ja, u, iu, v, iv);
9119 #endif
9120 }
9121 
9122 
9123 /*************************************************************************
9124 MKL-based kernel
9125 
9126   -- ALGLIB routine --
9127      12.10.2017
9128      Bochkanov Sergey
9129 *************************************************************************/
cmatrixmvmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,ae_vector * y,ae_int_t iy,ae_state * _state)9130 ae_bool cmatrixmvmkl(ae_int_t m,
9131      ae_int_t n,
9132      /* Complex */ ae_matrix* a,
9133      ae_int_t ia,
9134      ae_int_t ja,
9135      ae_int_t opa,
9136      /* Complex */ ae_vector* x,
9137      ae_int_t ix,
9138      /* Complex */ ae_vector* y,
9139      ae_int_t iy,
9140      ae_state *_state)
9141 {
9142 #ifndef ALGLIB_INTERCEPTS_MKL
9143     ae_bool result;
9144 
9145 
9146     result = ae_false;
9147     return result;
9148 #else
9149     return _ialglib_i_cmatrixmvmkl(m, n, a, ia, ja, opa, x, ix, y, iy);
9150 #endif
9151 }
9152 
9153 
9154 /*************************************************************************
9155 MKL-based kernel
9156 
9157   -- ALGLIB routine --
9158      12.10.2017
9159      Bochkanov Sergey
9160 *************************************************************************/
rmatrixmvmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,ae_vector * y,ae_int_t iy,ae_state * _state)9161 ae_bool rmatrixmvmkl(ae_int_t m,
9162      ae_int_t n,
9163      /* Real    */ ae_matrix* a,
9164      ae_int_t ia,
9165      ae_int_t ja,
9166      ae_int_t opa,
9167      /* Real    */ ae_vector* x,
9168      ae_int_t ix,
9169      /* Real    */ ae_vector* y,
9170      ae_int_t iy,
9171      ae_state *_state)
9172 {
9173 #ifndef ALGLIB_INTERCEPTS_MKL
9174     ae_bool result;
9175 
9176 
9177     result = ae_false;
9178     return result;
9179 #else
9180     return _ialglib_i_rmatrixmvmkl(m, n, a, ia, ja, opa, x, ix, y, iy);
9181 #endif
9182 }
9183 
9184 
9185 /*************************************************************************
9186 MKL-based kernel
9187 
9188   -- ALGLIB routine --
9189      12.10.2017
9190      Bochkanov Sergey
9191 *************************************************************************/
rmatrixgemvmkl(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)9192 ae_bool rmatrixgemvmkl(ae_int_t m,
9193      ae_int_t n,
9194      double alpha,
9195      /* Real    */ ae_matrix* a,
9196      ae_int_t ia,
9197      ae_int_t ja,
9198      ae_int_t opa,
9199      /* Real    */ ae_vector* x,
9200      ae_int_t ix,
9201      double beta,
9202      /* Real    */ ae_vector* y,
9203      ae_int_t iy,
9204      ae_state *_state)
9205 {
9206 #ifndef ALGLIB_INTERCEPTS_MKL
9207     ae_bool result;
9208 
9209 
9210     result = ae_false;
9211     return result;
9212 #else
9213     return _ialglib_i_rmatrixgemvmkl(m, n, alpha, a, ia, ja, opa, x, ix, beta, y, iy);
9214 #endif
9215 }
9216 
9217 
9218 /*************************************************************************
9219 MKL-based kernel
9220 
9221   -- ALGLIB routine --
9222      12.10.2017
9223      Bochkanov Sergey
9224 *************************************************************************/
rmatrixtrsvmkl(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_vector * x,ae_int_t ix,ae_state * _state)9225 ae_bool rmatrixtrsvmkl(ae_int_t n,
9226      /* Real    */ ae_matrix* a,
9227      ae_int_t ia,
9228      ae_int_t ja,
9229      ae_bool isupper,
9230      ae_bool isunit,
9231      ae_int_t optype,
9232      /* Real    */ ae_vector* x,
9233      ae_int_t ix,
9234      ae_state *_state)
9235 {
9236 #ifndef ALGLIB_INTERCEPTS_MKL
9237     ae_bool result;
9238 
9239 
9240     result = ae_false;
9241     return result;
9242 #else
9243     return _ialglib_i_rmatrixtrsvmkl(n, a, ia, ja, isupper, isunit, optype, x, ix);
9244 #endif
9245 }
9246 
9247 
9248 /*************************************************************************
9249 MKL-based kernel
9250 
9251   -- ALGLIB routine --
9252      01.10.2013
9253      Bochkanov Sergey
9254 *************************************************************************/
rmatrixsyrkmkl(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)9255 ae_bool rmatrixsyrkmkl(ae_int_t n,
9256      ae_int_t k,
9257      double alpha,
9258      /* Real    */ ae_matrix* a,
9259      ae_int_t ia,
9260      ae_int_t ja,
9261      ae_int_t optypea,
9262      double beta,
9263      /* Real    */ ae_matrix* c,
9264      ae_int_t ic,
9265      ae_int_t jc,
9266      ae_bool isupper,
9267      ae_state *_state)
9268 {
9269 #ifndef ALGLIB_INTERCEPTS_MKL
9270     ae_bool result;
9271 
9272 
9273     result = ae_false;
9274     return result;
9275 #else
9276     return _ialglib_i_rmatrixsyrkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
9277 #endif
9278 }
9279 
9280 
9281 /*************************************************************************
9282 MKL-based kernel
9283 
9284   -- ALGLIB routine --
9285      01.10.2013
9286      Bochkanov Sergey
9287 *************************************************************************/
cmatrixherkmkl(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper,ae_state * _state)9288 ae_bool cmatrixherkmkl(ae_int_t n,
9289      ae_int_t k,
9290      double alpha,
9291      /* Complex */ ae_matrix* a,
9292      ae_int_t ia,
9293      ae_int_t ja,
9294      ae_int_t optypea,
9295      double beta,
9296      /* Complex */ ae_matrix* c,
9297      ae_int_t ic,
9298      ae_int_t jc,
9299      ae_bool isupper,
9300      ae_state *_state)
9301 {
9302 #ifndef ALGLIB_INTERCEPTS_MKL
9303     ae_bool result;
9304 
9305 
9306     result = ae_false;
9307     return result;
9308 #else
9309     return _ialglib_i_cmatrixherkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
9310 #endif
9311 }
9312 
9313 
9314 /*************************************************************************
9315 MKL-based kernel
9316 
9317   -- ALGLIB routine --
9318      01.10.2013
9319      Bochkanov Sergey
9320 *************************************************************************/
rmatrixgemmmkl(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)9321 ae_bool rmatrixgemmmkl(ae_int_t m,
9322      ae_int_t n,
9323      ae_int_t k,
9324      double alpha,
9325      /* Real    */ ae_matrix* a,
9326      ae_int_t ia,
9327      ae_int_t ja,
9328      ae_int_t optypea,
9329      /* Real    */ ae_matrix* b,
9330      ae_int_t ib,
9331      ae_int_t jb,
9332      ae_int_t optypeb,
9333      double beta,
9334      /* Real    */ ae_matrix* c,
9335      ae_int_t ic,
9336      ae_int_t jc,
9337      ae_state *_state)
9338 {
9339 #ifndef ALGLIB_INTERCEPTS_MKL
9340     ae_bool result;
9341 
9342 
9343     result = ae_false;
9344     return result;
9345 #else
9346     return _ialglib_i_rmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
9347 #endif
9348 }
9349 
9350 
9351 /*************************************************************************
9352 MKL-based kernel
9353 
9354   -- ALGLIB routine --
9355      01.10.2017
9356      Bochkanov Sergey
9357 *************************************************************************/
rmatrixsymvmkl(ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)9358 ae_bool rmatrixsymvmkl(ae_int_t n,
9359      double alpha,
9360      /* Real    */ ae_matrix* a,
9361      ae_int_t ia,
9362      ae_int_t ja,
9363      ae_bool isupper,
9364      /* Real    */ ae_vector* x,
9365      ae_int_t ix,
9366      double beta,
9367      /* Real    */ ae_vector* y,
9368      ae_int_t iy,
9369      ae_state *_state)
9370 {
9371 #ifndef ALGLIB_INTERCEPTS_MKL
9372     ae_bool result;
9373 
9374 
9375     result = ae_false;
9376     return result;
9377 #else
9378     return _ialglib_i_rmatrixsymvmkl(n, alpha, a, ia, ja, isupper, x, ix, beta, y, iy);
9379 #endif
9380 }
9381 
9382 
9383 /*************************************************************************
9384 MKL-based kernel
9385 
9386   -- ALGLIB routine --
9387      16.10.2014
9388      Bochkanov Sergey
9389 *************************************************************************/
cmatrixgemmmkl(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_state * _state)9390 ae_bool cmatrixgemmmkl(ae_int_t m,
9391      ae_int_t n,
9392      ae_int_t k,
9393      ae_complex alpha,
9394      /* Complex */ ae_matrix* a,
9395      ae_int_t ia,
9396      ae_int_t ja,
9397      ae_int_t optypea,
9398      /* Complex */ ae_matrix* b,
9399      ae_int_t ib,
9400      ae_int_t jb,
9401      ae_int_t optypeb,
9402      ae_complex beta,
9403      /* Complex */ ae_matrix* c,
9404      ae_int_t ic,
9405      ae_int_t jc,
9406      ae_state *_state)
9407 {
9408 #ifndef ALGLIB_INTERCEPTS_MKL
9409     ae_bool result;
9410 
9411 
9412     result = ae_false;
9413     return result;
9414 #else
9415     return _ialglib_i_cmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
9416 #endif
9417 }
9418 
9419 
9420 /*************************************************************************
9421 MKL-based kernel
9422 
9423   -- ALGLIB routine --
9424      16.10.2014
9425      Bochkanov Sergey
9426 *************************************************************************/
cmatrixlefttrsmmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)9427 ae_bool cmatrixlefttrsmmkl(ae_int_t m,
9428      ae_int_t n,
9429      /* Complex */ ae_matrix* a,
9430      ae_int_t i1,
9431      ae_int_t j1,
9432      ae_bool isupper,
9433      ae_bool isunit,
9434      ae_int_t optype,
9435      /* Complex */ ae_matrix* x,
9436      ae_int_t i2,
9437      ae_int_t j2,
9438      ae_state *_state)
9439 {
9440 #ifndef ALGLIB_INTERCEPTS_MKL
9441     ae_bool result;
9442 
9443 
9444     result = ae_false;
9445     return result;
9446 #else
9447     return _ialglib_i_cmatrixlefttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
9448 #endif
9449 }
9450 
9451 
9452 /*************************************************************************
9453 MKL-based kernel
9454 
9455   -- ALGLIB routine --
9456      16.10.2014
9457      Bochkanov Sergey
9458 *************************************************************************/
cmatrixrighttrsmmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)9459 ae_bool cmatrixrighttrsmmkl(ae_int_t m,
9460      ae_int_t n,
9461      /* Complex */ ae_matrix* a,
9462      ae_int_t i1,
9463      ae_int_t j1,
9464      ae_bool isupper,
9465      ae_bool isunit,
9466      ae_int_t optype,
9467      /* Complex */ ae_matrix* x,
9468      ae_int_t i2,
9469      ae_int_t j2,
9470      ae_state *_state)
9471 {
9472 #ifndef ALGLIB_INTERCEPTS_MKL
9473     ae_bool result;
9474 
9475 
9476     result = ae_false;
9477     return result;
9478 #else
9479     return _ialglib_i_cmatrixrighttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
9480 #endif
9481 }
9482 
9483 
9484 /*************************************************************************
9485 MKL-based kernel
9486 
9487   -- ALGLIB routine --
9488      16.10.2014
9489      Bochkanov Sergey
9490 *************************************************************************/
rmatrixlefttrsmmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)9491 ae_bool rmatrixlefttrsmmkl(ae_int_t m,
9492      ae_int_t n,
9493      /* Real    */ ae_matrix* a,
9494      ae_int_t i1,
9495      ae_int_t j1,
9496      ae_bool isupper,
9497      ae_bool isunit,
9498      ae_int_t optype,
9499      /* Real    */ ae_matrix* x,
9500      ae_int_t i2,
9501      ae_int_t j2,
9502      ae_state *_state)
9503 {
9504 #ifndef ALGLIB_INTERCEPTS_MKL
9505     ae_bool result;
9506 
9507 
9508     result = ae_false;
9509     return result;
9510 #else
9511     return _ialglib_i_rmatrixlefttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
9512 #endif
9513 }
9514 
9515 
9516 /*************************************************************************
9517 MKL-based kernel
9518 
9519   -- ALGLIB routine --
9520      16.10.2014
9521      Bochkanov Sergey
9522 *************************************************************************/
rmatrixrighttrsmmkl(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2,ae_state * _state)9523 ae_bool rmatrixrighttrsmmkl(ae_int_t m,
9524      ae_int_t n,
9525      /* Real    */ ae_matrix* a,
9526      ae_int_t i1,
9527      ae_int_t j1,
9528      ae_bool isupper,
9529      ae_bool isunit,
9530      ae_int_t optype,
9531      /* Real    */ ae_matrix* x,
9532      ae_int_t i2,
9533      ae_int_t j2,
9534      ae_state *_state)
9535 {
9536 #ifndef ALGLIB_INTERCEPTS_MKL
9537     ae_bool result;
9538 
9539 
9540     result = ae_false;
9541     return result;
9542 #else
9543     return _ialglib_i_rmatrixrighttrsmmkl(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
9544 #endif
9545 }
9546 
9547 
9548 /*************************************************************************
9549 MKL-based kernel.
9550 
9551 NOTE:
9552 
9553 if function returned False, CholResult is NOT modified. Not ever referenced!
9554 if function returned True, CholResult is set to status of Cholesky decomposition
9555 (True on succeess).
9556 
9557   -- ALGLIB routine --
9558      16.10.2014
9559      Bochkanov Sergey
9560 *************************************************************************/
spdmatrixcholeskymkl(ae_matrix * a,ae_int_t offs,ae_int_t n,ae_bool isupper,ae_bool * cholresult,ae_state * _state)9561 ae_bool spdmatrixcholeskymkl(/* Real    */ ae_matrix* a,
9562      ae_int_t offs,
9563      ae_int_t n,
9564      ae_bool isupper,
9565      ae_bool* cholresult,
9566      ae_state *_state)
9567 {
9568 #ifndef ALGLIB_INTERCEPTS_MKL
9569     ae_bool result;
9570 
9571 
9572     result = ae_false;
9573     return result;
9574 #else
9575     return _ialglib_i_spdmatrixcholeskymkl(a, offs, n, isupper, cholresult);
9576 #endif
9577 }
9578 
9579 
9580 /*************************************************************************
9581 MKL-based kernel.
9582 
9583   -- ALGLIB routine --
9584      20.10.2014
9585      Bochkanov Sergey
9586 *************************************************************************/
rmatrixplumkl(ae_matrix * a,ae_int_t offs,ae_int_t m,ae_int_t n,ae_vector * pivots,ae_state * _state)9587 ae_bool rmatrixplumkl(/* Real    */ ae_matrix* a,
9588      ae_int_t offs,
9589      ae_int_t m,
9590      ae_int_t n,
9591      /* Integer */ ae_vector* pivots,
9592      ae_state *_state)
9593 {
9594 #ifndef ALGLIB_INTERCEPTS_MKL
9595     ae_bool result;
9596 
9597 
9598     result = ae_false;
9599     return result;
9600 #else
9601     return _ialglib_i_rmatrixplumkl(a, offs, m, n, pivots);
9602 #endif
9603 }
9604 
9605 
9606 /*************************************************************************
9607 MKL-based kernel.
9608 
9609 NOTE: this function needs preallocated output/temporary arrays.
9610       D and E must be at least max(M,N)-wide.
9611 
9612   -- ALGLIB routine --
9613      20.10.2014
9614      Bochkanov Sergey
9615 *************************************************************************/
rmatrixbdmkl(ae_matrix * a,ae_int_t m,ae_int_t n,ae_vector * d,ae_vector * e,ae_vector * tauq,ae_vector * taup,ae_state * _state)9616 ae_bool rmatrixbdmkl(/* Real    */ ae_matrix* a,
9617      ae_int_t m,
9618      ae_int_t n,
9619      /* Real    */ ae_vector* d,
9620      /* Real    */ ae_vector* e,
9621      /* Real    */ ae_vector* tauq,
9622      /* Real    */ ae_vector* taup,
9623      ae_state *_state)
9624 {
9625 #ifndef ALGLIB_INTERCEPTS_MKL
9626     ae_bool result;
9627 
9628 
9629     result = ae_false;
9630     return result;
9631 #else
9632     return _ialglib_i_rmatrixbdmkl(a, m, n, d, e, tauq, taup);
9633 #endif
9634 }
9635 
9636 
9637 /*************************************************************************
9638 MKL-based kernel.
9639 
9640 If ByQ is True,  TauP is not used (can be empty array).
9641 If ByQ is False, TauQ is not used (can be empty array).
9642 
9643   -- ALGLIB routine --
9644      20.10.2014
9645      Bochkanov Sergey
9646 *************************************************************************/
rmatrixbdmultiplybymkl(ae_matrix * qp,ae_int_t m,ae_int_t n,ae_vector * tauq,ae_vector * taup,ae_matrix * z,ae_int_t zrows,ae_int_t zcolumns,ae_bool byq,ae_bool fromtheright,ae_bool dotranspose,ae_state * _state)9647 ae_bool rmatrixbdmultiplybymkl(/* Real    */ ae_matrix* qp,
9648      ae_int_t m,
9649      ae_int_t n,
9650      /* Real    */ ae_vector* tauq,
9651      /* Real    */ ae_vector* taup,
9652      /* Real    */ ae_matrix* z,
9653      ae_int_t zrows,
9654      ae_int_t zcolumns,
9655      ae_bool byq,
9656      ae_bool fromtheright,
9657      ae_bool dotranspose,
9658      ae_state *_state)
9659 {
9660 #ifndef ALGLIB_INTERCEPTS_MKL
9661     ae_bool result;
9662 
9663 
9664     result = ae_false;
9665     return result;
9666 #else
9667     return _ialglib_i_rmatrixbdmultiplybymkl(qp, m, n, tauq, taup, z, zrows, zcolumns, byq, fromtheright, dotranspose);
9668 #endif
9669 }
9670 
9671 
9672 /*************************************************************************
9673 MKL-based kernel.
9674 
9675 NOTE: Tau must be preallocated array with at least N-1 elements.
9676 
9677   -- ALGLIB routine --
9678      20.10.2014
9679      Bochkanov Sergey
9680 *************************************************************************/
rmatrixhessenbergmkl(ae_matrix * a,ae_int_t n,ae_vector * tau,ae_state * _state)9681 ae_bool rmatrixhessenbergmkl(/* Real    */ ae_matrix* a,
9682      ae_int_t n,
9683      /* Real    */ ae_vector* tau,
9684      ae_state *_state)
9685 {
9686 #ifndef ALGLIB_INTERCEPTS_MKL
9687     ae_bool result;
9688 
9689 
9690     result = ae_false;
9691     return result;
9692 #else
9693     return _ialglib_i_rmatrixhessenbergmkl(a, n, tau);
9694 #endif
9695 }
9696 
9697 
9698 /*************************************************************************
9699 MKL-based kernel.
9700 
9701 NOTE: Q must be preallocated N*N array
9702 
9703   -- ALGLIB routine --
9704      20.10.2014
9705      Bochkanov Sergey
9706 *************************************************************************/
rmatrixhessenbergunpackqmkl(ae_matrix * a,ae_int_t n,ae_vector * tau,ae_matrix * q,ae_state * _state)9707 ae_bool rmatrixhessenbergunpackqmkl(/* Real    */ ae_matrix* a,
9708      ae_int_t n,
9709      /* Real    */ ae_vector* tau,
9710      /* Real    */ ae_matrix* q,
9711      ae_state *_state)
9712 {
9713 #ifndef ALGLIB_INTERCEPTS_MKL
9714     ae_bool result;
9715 
9716 
9717     result = ae_false;
9718     return result;
9719 #else
9720     return _ialglib_i_rmatrixhessenbergunpackqmkl(a, n, tau, q);
9721 #endif
9722 }
9723 
9724 
9725 /*************************************************************************
9726 MKL-based kernel.
9727 
9728 NOTE: Tau, D, E must be preallocated arrays;
9729       length(E)=length(Tau)=N-1 (or larger)
9730       length(D)=N (or larger)
9731 
9732   -- ALGLIB routine --
9733      20.10.2014
9734      Bochkanov Sergey
9735 *************************************************************************/
smatrixtdmkl(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_vector * d,ae_vector * e,ae_state * _state)9736 ae_bool smatrixtdmkl(/* Real    */ ae_matrix* a,
9737      ae_int_t n,
9738      ae_bool isupper,
9739      /* Real    */ ae_vector* tau,
9740      /* Real    */ ae_vector* d,
9741      /* Real    */ ae_vector* e,
9742      ae_state *_state)
9743 {
9744 #ifndef ALGLIB_INTERCEPTS_MKL
9745     ae_bool result;
9746 
9747 
9748     result = ae_false;
9749     return result;
9750 #else
9751     return _ialglib_i_smatrixtdmkl(a, n, isupper, tau, d, e);
9752 #endif
9753 }
9754 
9755 
9756 /*************************************************************************
9757 MKL-based kernel.
9758 
9759 NOTE: Q must be preallocated N*N array
9760 
9761   -- ALGLIB routine --
9762      20.10.2014
9763      Bochkanov Sergey
9764 *************************************************************************/
smatrixtdunpackqmkl(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_matrix * q,ae_state * _state)9765 ae_bool smatrixtdunpackqmkl(/* Real    */ ae_matrix* a,
9766      ae_int_t n,
9767      ae_bool isupper,
9768      /* Real    */ ae_vector* tau,
9769      /* Real    */ ae_matrix* q,
9770      ae_state *_state)
9771 {
9772 #ifndef ALGLIB_INTERCEPTS_MKL
9773     ae_bool result;
9774 
9775 
9776     result = ae_false;
9777     return result;
9778 #else
9779     return _ialglib_i_smatrixtdunpackqmkl(a, n, isupper, tau, q);
9780 #endif
9781 }
9782 
9783 
9784 /*************************************************************************
9785 MKL-based kernel.
9786 
9787 NOTE: Tau, D, E must be preallocated arrays;
9788       length(E)=length(Tau)=N-1 (or larger)
9789       length(D)=N (or larger)
9790 
9791   -- ALGLIB routine --
9792      20.10.2014
9793      Bochkanov Sergey
9794 *************************************************************************/
hmatrixtdmkl(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_vector * d,ae_vector * e,ae_state * _state)9795 ae_bool hmatrixtdmkl(/* Complex */ ae_matrix* a,
9796      ae_int_t n,
9797      ae_bool isupper,
9798      /* Complex */ ae_vector* tau,
9799      /* Real    */ ae_vector* d,
9800      /* Real    */ ae_vector* e,
9801      ae_state *_state)
9802 {
9803 #ifndef ALGLIB_INTERCEPTS_MKL
9804     ae_bool result;
9805 
9806 
9807     result = ae_false;
9808     return result;
9809 #else
9810     return _ialglib_i_hmatrixtdmkl(a, n, isupper, tau, d, e);
9811 #endif
9812 }
9813 
9814 
9815 /*************************************************************************
9816 MKL-based kernel.
9817 
9818 NOTE: Q must be preallocated N*N array
9819 
9820   -- ALGLIB routine --
9821      20.10.2014
9822      Bochkanov Sergey
9823 *************************************************************************/
hmatrixtdunpackqmkl(ae_matrix * a,ae_int_t n,ae_bool isupper,ae_vector * tau,ae_matrix * q,ae_state * _state)9824 ae_bool hmatrixtdunpackqmkl(/* Complex */ ae_matrix* a,
9825      ae_int_t n,
9826      ae_bool isupper,
9827      /* Complex */ ae_vector* tau,
9828      /* Complex */ ae_matrix* q,
9829      ae_state *_state)
9830 {
9831 #ifndef ALGLIB_INTERCEPTS_MKL
9832     ae_bool result;
9833 
9834 
9835     result = ae_false;
9836     return result;
9837 #else
9838     return _ialglib_i_hmatrixtdunpackqmkl(a, n, isupper, tau, q);
9839 #endif
9840 }
9841 
9842 
9843 /*************************************************************************
9844 MKL-based kernel.
9845 
9846 Returns True if MKL was present and handled request (MKL  completion  code
9847 is returned as separate output parameter).
9848 
9849 D and E are pre-allocated arrays with length N (both of them!). On output,
9850 D constraints singular values, and E is destroyed.
9851 
9852 SVDResult is modified if and only if MKL is present.
9853 
9854   -- ALGLIB routine --
9855      20.10.2014
9856      Bochkanov Sergey
9857 *************************************************************************/
rmatrixbdsvdmkl(ae_vector * d,ae_vector * e,ae_int_t n,ae_bool isupper,ae_matrix * u,ae_int_t nru,ae_matrix * c,ae_int_t ncc,ae_matrix * vt,ae_int_t ncvt,ae_bool * svdresult,ae_state * _state)9858 ae_bool rmatrixbdsvdmkl(/* Real    */ ae_vector* d,
9859      /* Real    */ ae_vector* e,
9860      ae_int_t n,
9861      ae_bool isupper,
9862      /* Real    */ ae_matrix* u,
9863      ae_int_t nru,
9864      /* Real    */ ae_matrix* c,
9865      ae_int_t ncc,
9866      /* Real    */ ae_matrix* vt,
9867      ae_int_t ncvt,
9868      ae_bool* svdresult,
9869      ae_state *_state)
9870 {
9871 #ifndef ALGLIB_INTERCEPTS_MKL
9872     ae_bool result;
9873 
9874 
9875     result = ae_false;
9876     return result;
9877 #else
9878     return _ialglib_i_rmatrixbdsvdmkl(d, e, n, isupper, u, nru, c, ncc, vt, ncvt, svdresult);
9879 #endif
9880 }
9881 
9882 
9883 /*************************************************************************
9884 MKL-based DHSEQR kernel.
9885 
9886 Returns True if MKL was present and handled request.
9887 
9888 WR and WI are pre-allocated arrays with length N.
9889 Z is pre-allocated array[N,N].
9890 
9891   -- ALGLIB routine --
9892      20.10.2014
9893      Bochkanov Sergey
9894 *************************************************************************/
rmatrixinternalschurdecompositionmkl(ae_matrix * h,ae_int_t n,ae_int_t tneeded,ae_int_t zneeded,ae_vector * wr,ae_vector * wi,ae_matrix * z,ae_int_t * info,ae_state * _state)9895 ae_bool rmatrixinternalschurdecompositionmkl(/* Real    */ ae_matrix* h,
9896      ae_int_t n,
9897      ae_int_t tneeded,
9898      ae_int_t zneeded,
9899      /* Real    */ ae_vector* wr,
9900      /* Real    */ ae_vector* wi,
9901      /* Real    */ ae_matrix* z,
9902      ae_int_t* info,
9903      ae_state *_state)
9904 {
9905 #ifndef ALGLIB_INTERCEPTS_MKL
9906     ae_bool result;
9907 
9908 
9909     result = ae_false;
9910     return result;
9911 #else
9912     return _ialglib_i_rmatrixinternalschurdecompositionmkl(h, n, tneeded, zneeded, wr, wi, z, info);
9913 #endif
9914 }
9915 
9916 
9917 /*************************************************************************
9918 MKL-based DTREVC kernel.
9919 
9920 Returns True if MKL was present and handled request.
9921 
9922 NOTE: this function does NOT support HOWMNY=3!!!!
9923 
9924 VL and VR are pre-allocated arrays with length N*N, if required. If particalar
9925 variables is not required, it can be dummy (empty) array.
9926 
9927   -- ALGLIB routine --
9928      20.10.2014
9929      Bochkanov Sergey
9930 *************************************************************************/
rmatrixinternaltrevcmkl(ae_matrix * t,ae_int_t n,ae_int_t side,ae_int_t howmny,ae_matrix * vl,ae_matrix * vr,ae_int_t * m,ae_int_t * info,ae_state * _state)9931 ae_bool rmatrixinternaltrevcmkl(/* Real    */ ae_matrix* t,
9932      ae_int_t n,
9933      ae_int_t side,
9934      ae_int_t howmny,
9935      /* Real    */ ae_matrix* vl,
9936      /* Real    */ ae_matrix* vr,
9937      ae_int_t* m,
9938      ae_int_t* info,
9939      ae_state *_state)
9940 {
9941 #ifndef ALGLIB_INTERCEPTS_MKL
9942     ae_bool result;
9943 
9944 
9945     result = ae_false;
9946     return result;
9947 #else
9948     return _ialglib_i_rmatrixinternaltrevcmkl(t, n, side, howmny, vl, vr, m, info);
9949 #endif
9950 }
9951 
9952 
9953 /*************************************************************************
9954 MKL-based kernel.
9955 
9956 Returns True if MKL was present and handled request (MKL  completion  code
9957 is returned as separate output parameter).
9958 
9959 D and E are pre-allocated arrays with length N (both of them!). On output,
9960 D constraints eigenvalues, and E is destroyed.
9961 
9962 Z is preallocated array[N,N] for ZNeeded<>0; ignored for ZNeeded=0.
9963 
9964 EVDResult is modified if and only if MKL is present.
9965 
9966   -- ALGLIB routine --
9967      20.10.2014
9968      Bochkanov Sergey
9969 *************************************************************************/
smatrixtdevdmkl(ae_vector * d,ae_vector * e,ae_int_t n,ae_int_t zneeded,ae_matrix * z,ae_bool * evdresult,ae_state * _state)9970 ae_bool smatrixtdevdmkl(/* Real    */ ae_vector* d,
9971      /* Real    */ ae_vector* e,
9972      ae_int_t n,
9973      ae_int_t zneeded,
9974      /* Real    */ ae_matrix* z,
9975      ae_bool* evdresult,
9976      ae_state *_state)
9977 {
9978 #ifndef ALGLIB_INTERCEPTS_MKL
9979     ae_bool result;
9980 
9981 
9982     result = ae_false;
9983     return result;
9984 #else
9985     return _ialglib_i_smatrixtdevdmkl(d, e, n, zneeded, z, evdresult);
9986 #endif
9987 }
9988 
9989 
9990 /*************************************************************************
9991 MKL-based kernel.
9992 
9993 Returns True if MKL was present and handled request (MKL  completion  code
9994 is returned as separate output parameter).
9995 
9996 D and E are pre-allocated arrays with length N (both of them!). On output,
9997 D constraints eigenvalues, and E is destroyed.
9998 
9999 Z is preallocated array[N,N] for ZNeeded<>0; ignored for ZNeeded=0.
10000 
10001 EVDResult is modified if and only if MKL is present.
10002 
10003   -- ALGLIB routine --
10004      20.10.2014
10005      Bochkanov Sergey
10006 *************************************************************************/
sparsegemvcrsmkl(ae_int_t opa,ae_int_t arows,ae_int_t acols,double alpha,ae_vector * vals,ae_vector * cidx,ae_vector * ridx,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)10007 ae_bool sparsegemvcrsmkl(ae_int_t opa,
10008      ae_int_t arows,
10009      ae_int_t acols,
10010      double alpha,
10011      /* Real    */ ae_vector* vals,
10012      /* Integer */ ae_vector* cidx,
10013      /* Integer */ ae_vector* ridx,
10014      /* Real    */ ae_vector* x,
10015      ae_int_t ix,
10016      double beta,
10017      /* Real    */ ae_vector* y,
10018      ae_int_t iy,
10019      ae_state *_state)
10020 {
10021 #ifndef ALGLIB_INTERCEPTS_MKL
10022     ae_bool result;
10023 
10024 
10025     result = ae_false;
10026     return result;
10027 #else
10028     return _ialglib_i_sparsegemvcrsmkl(opa, arows, acols, alpha, vals, cidx, ridx, x, ix, beta, y, iy);
10029 #endif
10030 }
10031 
10032 
10033 #endif
10034 #if defined(AE_COMPILE_SCODES) || !defined(AE_PARTIAL_BUILD)
10035 
10036 
getrdfserializationcode(ae_state * _state)10037 ae_int_t getrdfserializationcode(ae_state *_state)
10038 {
10039     ae_int_t result;
10040 
10041 
10042     result = 1;
10043     return result;
10044 }
10045 
10046 
getkdtreeserializationcode(ae_state * _state)10047 ae_int_t getkdtreeserializationcode(ae_state *_state)
10048 {
10049     ae_int_t result;
10050 
10051 
10052     result = 2;
10053     return result;
10054 }
10055 
10056 
getmlpserializationcode(ae_state * _state)10057 ae_int_t getmlpserializationcode(ae_state *_state)
10058 {
10059     ae_int_t result;
10060 
10061 
10062     result = 3;
10063     return result;
10064 }
10065 
10066 
getmlpeserializationcode(ae_state * _state)10067 ae_int_t getmlpeserializationcode(ae_state *_state)
10068 {
10069     ae_int_t result;
10070 
10071 
10072     result = 4;
10073     return result;
10074 }
10075 
10076 
getrbfserializationcode(ae_state * _state)10077 ae_int_t getrbfserializationcode(ae_state *_state)
10078 {
10079     ae_int_t result;
10080 
10081 
10082     result = 5;
10083     return result;
10084 }
10085 
10086 
getspline2dserializationcode(ae_state * _state)10087 ae_int_t getspline2dserializationcode(ae_state *_state)
10088 {
10089     ae_int_t result;
10090 
10091 
10092     result = 6;
10093     return result;
10094 }
10095 
10096 
getidwserializationcode(ae_state * _state)10097 ae_int_t getidwserializationcode(ae_state *_state)
10098 {
10099     ae_int_t result;
10100 
10101 
10102     result = 7;
10103     return result;
10104 }
10105 
10106 
getsparsematrixserializationcode(ae_state * _state)10107 ae_int_t getsparsematrixserializationcode(ae_state *_state)
10108 {
10109     ae_int_t result;
10110 
10111 
10112     result = 8;
10113     return result;
10114 }
10115 
10116 
getknnserializationcode(ae_state * _state)10117 ae_int_t getknnserializationcode(ae_state *_state)
10118 {
10119     ae_int_t result;
10120 
10121 
10122     result = 108;
10123     return result;
10124 }
10125 
10126 
getlptestserializationcode(ae_state * _state)10127 ae_int_t getlptestserializationcode(ae_state *_state)
10128 {
10129     ae_int_t result;
10130 
10131 
10132     result = 200;
10133     return result;
10134 }
10135 
10136 
10137 #endif
10138 #if defined(AE_COMPILE_TSORT) || !defined(AE_PARTIAL_BUILD)
10139 
10140 
10141 /*************************************************************************
10142 This function sorts array of real keys by ascending.
10143 
10144 Its results are:
10145 * sorted array A
10146 * permutation tables P1, P2
10147 
10148 Algorithm outputs permutation tables using two formats:
10149 * as usual permutation of [0..N-1]. If P1[i]=j, then sorted A[i]  contains
10150   value which was moved there from J-th position.
10151 * as a sequence of pairwise permutations. Sorted A[] may  be  obtained  by
10152   swaping A[i] and A[P2[i]] for all i from 0 to N-1.
10153 
10154 INPUT PARAMETERS:
10155     A       -   unsorted array
10156     N       -   array size
10157 
10158 OUPUT PARAMETERS:
10159     A       -   sorted array
10160     P1, P2  -   permutation tables, array[N]
10161 
10162 NOTES:
10163     this function assumes that A[] is finite; it doesn't checks that
10164     condition. All other conditions (size of input arrays, etc.) are not
10165     checked too.
10166 
10167   -- ALGLIB --
10168      Copyright 14.05.2008 by Bochkanov Sergey
10169 *************************************************************************/
tagsort(ae_vector * a,ae_int_t n,ae_vector * p1,ae_vector * p2,ae_state * _state)10170 void tagsort(/* Real    */ ae_vector* a,
10171      ae_int_t n,
10172      /* Integer */ ae_vector* p1,
10173      /* Integer */ ae_vector* p2,
10174      ae_state *_state)
10175 {
10176     ae_frame _frame_block;
10177     apbuffers buf;
10178 
10179     ae_frame_make(_state, &_frame_block);
10180     memset(&buf, 0, sizeof(buf));
10181     ae_vector_clear(p1);
10182     ae_vector_clear(p2);
10183     _apbuffers_init(&buf, _state, ae_true);
10184 
10185     tagsortbuf(a, n, p1, p2, &buf, _state);
10186     ae_frame_leave(_state);
10187 }
10188 
10189 
10190 /*************************************************************************
10191 Buffered variant of TagSort, which accepts preallocated output arrays as
10192 well as special structure for buffered allocations. If arrays are too
10193 short, they are reallocated. If they are large enough, no memory
10194 allocation is done.
10195 
10196 It is intended to be used in the performance-critical parts of code, where
10197 additional allocations can lead to severe performance degradation
10198 
10199   -- ALGLIB --
10200      Copyright 14.05.2008 by Bochkanov Sergey
10201 *************************************************************************/
tagsortbuf(ae_vector * a,ae_int_t n,ae_vector * p1,ae_vector * p2,apbuffers * buf,ae_state * _state)10202 void tagsortbuf(/* Real    */ ae_vector* a,
10203      ae_int_t n,
10204      /* Integer */ ae_vector* p1,
10205      /* Integer */ ae_vector* p2,
10206      apbuffers* buf,
10207      ae_state *_state)
10208 {
10209     ae_int_t i;
10210     ae_int_t lv;
10211     ae_int_t lp;
10212     ae_int_t rv;
10213     ae_int_t rp;
10214 
10215 
10216 
10217     /*
10218      * Special cases
10219      */
10220     if( n<=0 )
10221     {
10222         return;
10223     }
10224     if( n==1 )
10225     {
10226         ivectorsetlengthatleast(p1, 1, _state);
10227         ivectorsetlengthatleast(p2, 1, _state);
10228         p1->ptr.p_int[0] = 0;
10229         p2->ptr.p_int[0] = 0;
10230         return;
10231     }
10232 
10233     /*
10234      * General case, N>1: prepare permutations table P1
10235      */
10236     ivectorsetlengthatleast(p1, n, _state);
10237     for(i=0; i<=n-1; i++)
10238     {
10239         p1->ptr.p_int[i] = i;
10240     }
10241 
10242     /*
10243      * General case, N>1: sort, update P1
10244      */
10245     rvectorsetlengthatleast(&buf->ra0, n, _state);
10246     ivectorsetlengthatleast(&buf->ia0, n, _state);
10247     tagsortfasti(a, p1, &buf->ra0, &buf->ia0, n, _state);
10248 
10249     /*
10250      * General case, N>1: fill permutations table P2
10251      *
10252      * To fill P2 we maintain two arrays:
10253      * * PV (Buf.IA0), Position(Value). PV[i] contains position of I-th key at the moment
10254      * * VP (Buf.IA1), Value(Position). VP[i] contains key which has position I at the moment
10255      *
10256      * At each step we making permutation of two items:
10257      *   Left, which is given by position/value pair LP/LV
10258      *   and Right, which is given by RP/RV
10259      * and updating PV[] and VP[] correspondingly.
10260      */
10261     ivectorsetlengthatleast(&buf->ia0, n, _state);
10262     ivectorsetlengthatleast(&buf->ia1, n, _state);
10263     ivectorsetlengthatleast(p2, n, _state);
10264     for(i=0; i<=n-1; i++)
10265     {
10266         buf->ia0.ptr.p_int[i] = i;
10267         buf->ia1.ptr.p_int[i] = i;
10268     }
10269     for(i=0; i<=n-1; i++)
10270     {
10271 
10272         /*
10273          * calculate LP, LV, RP, RV
10274          */
10275         lp = i;
10276         lv = buf->ia1.ptr.p_int[lp];
10277         rv = p1->ptr.p_int[i];
10278         rp = buf->ia0.ptr.p_int[rv];
10279 
10280         /*
10281          * Fill P2
10282          */
10283         p2->ptr.p_int[i] = rp;
10284 
10285         /*
10286          * update PV and VP
10287          */
10288         buf->ia1.ptr.p_int[lp] = rv;
10289         buf->ia1.ptr.p_int[rp] = lv;
10290         buf->ia0.ptr.p_int[lv] = rp;
10291         buf->ia0.ptr.p_int[rv] = lp;
10292     }
10293 }
10294 
10295 
10296 /*************************************************************************
10297 Same as TagSort, but optimized for real keys and integer labels.
10298 
10299 A is sorted, and same permutations are applied to B.
10300 
10301 NOTES:
10302 1.  this function assumes that A[] is finite; it doesn't checks that
10303     condition. All other conditions (size of input arrays, etc.) are not
10304     checked too.
10305 2.  this function uses two buffers, BufA and BufB, each is N elements large.
10306     They may be preallocated (which will save some time) or not, in which
10307     case function will automatically allocate memory.
10308 
10309   -- ALGLIB --
10310      Copyright 11.12.2008 by Bochkanov Sergey
10311 *************************************************************************/
tagsortfasti(ae_vector * a,ae_vector * b,ae_vector * bufa,ae_vector * bufb,ae_int_t n,ae_state * _state)10312 void tagsortfasti(/* Real    */ ae_vector* a,
10313      /* Integer */ ae_vector* b,
10314      /* Real    */ ae_vector* bufa,
10315      /* Integer */ ae_vector* bufb,
10316      ae_int_t n,
10317      ae_state *_state)
10318 {
10319     ae_int_t i;
10320     ae_int_t j;
10321     ae_bool isascending;
10322     ae_bool isdescending;
10323     double tmpr;
10324     ae_int_t tmpi;
10325 
10326 
10327 
10328     /*
10329      * Special case
10330      */
10331     if( n<=1 )
10332     {
10333         return;
10334     }
10335 
10336     /*
10337      * Test for already sorted set
10338      */
10339     isascending = ae_true;
10340     isdescending = ae_true;
10341     for(i=1; i<=n-1; i++)
10342     {
10343         isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
10344         isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
10345     }
10346     if( isascending )
10347     {
10348         return;
10349     }
10350     if( isdescending )
10351     {
10352         for(i=0; i<=n-1; i++)
10353         {
10354             j = n-1-i;
10355             if( j<=i )
10356             {
10357                 break;
10358             }
10359             tmpr = a->ptr.p_double[i];
10360             a->ptr.p_double[i] = a->ptr.p_double[j];
10361             a->ptr.p_double[j] = tmpr;
10362             tmpi = b->ptr.p_int[i];
10363             b->ptr.p_int[i] = b->ptr.p_int[j];
10364             b->ptr.p_int[j] = tmpi;
10365         }
10366         return;
10367     }
10368 
10369     /*
10370      * General case
10371      */
10372     if( bufa->cnt<n )
10373     {
10374         ae_vector_set_length(bufa, n, _state);
10375     }
10376     if( bufb->cnt<n )
10377     {
10378         ae_vector_set_length(bufb, n, _state);
10379     }
10380     tsort_tagsortfastirec(a, b, bufa, bufb, 0, n-1, _state);
10381 }
10382 
10383 
10384 /*************************************************************************
10385 Same as TagSort, but optimized for real keys and real labels.
10386 
10387 A is sorted, and same permutations are applied to B.
10388 
10389 NOTES:
10390 1.  this function assumes that A[] is finite; it doesn't checks that
10391     condition. All other conditions (size of input arrays, etc.) are not
10392     checked too.
10393 2.  this function uses two buffers, BufA and BufB, each is N elements large.
10394     They may be preallocated (which will save some time) or not, in which
10395     case function will automatically allocate memory.
10396 
10397   -- ALGLIB --
10398      Copyright 11.12.2008 by Bochkanov Sergey
10399 *************************************************************************/
tagsortfastr(ae_vector * a,ae_vector * b,ae_vector * bufa,ae_vector * bufb,ae_int_t n,ae_state * _state)10400 void tagsortfastr(/* Real    */ ae_vector* a,
10401      /* Real    */ ae_vector* b,
10402      /* Real    */ ae_vector* bufa,
10403      /* Real    */ ae_vector* bufb,
10404      ae_int_t n,
10405      ae_state *_state)
10406 {
10407     ae_int_t i;
10408     ae_int_t j;
10409     ae_bool isascending;
10410     ae_bool isdescending;
10411     double tmpr;
10412 
10413 
10414 
10415     /*
10416      * Special case
10417      */
10418     if( n<=1 )
10419     {
10420         return;
10421     }
10422 
10423     /*
10424      * Test for already sorted set
10425      */
10426     isascending = ae_true;
10427     isdescending = ae_true;
10428     for(i=1; i<=n-1; i++)
10429     {
10430         isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
10431         isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
10432     }
10433     if( isascending )
10434     {
10435         return;
10436     }
10437     if( isdescending )
10438     {
10439         for(i=0; i<=n-1; i++)
10440         {
10441             j = n-1-i;
10442             if( j<=i )
10443             {
10444                 break;
10445             }
10446             tmpr = a->ptr.p_double[i];
10447             a->ptr.p_double[i] = a->ptr.p_double[j];
10448             a->ptr.p_double[j] = tmpr;
10449             tmpr = b->ptr.p_double[i];
10450             b->ptr.p_double[i] = b->ptr.p_double[j];
10451             b->ptr.p_double[j] = tmpr;
10452         }
10453         return;
10454     }
10455 
10456     /*
10457      * General case
10458      */
10459     if( bufa->cnt<n )
10460     {
10461         ae_vector_set_length(bufa, n, _state);
10462     }
10463     if( bufb->cnt<n )
10464     {
10465         ae_vector_set_length(bufb, n, _state);
10466     }
10467     tsort_tagsortfastrrec(a, b, bufa, bufb, 0, n-1, _state);
10468 }
10469 
10470 
10471 /*************************************************************************
10472 Same as TagSort, but optimized for real keys without labels.
10473 
10474 A is sorted, and that's all.
10475 
10476 NOTES:
10477 1.  this function assumes that A[] is finite; it doesn't checks that
10478     condition. All other conditions (size of input arrays, etc.) are not
10479     checked too.
10480 2.  this function uses buffer, BufA, which is N elements large. It may be
10481     preallocated (which will save some time) or not, in which case
10482     function will automatically allocate memory.
10483 
10484   -- ALGLIB --
10485      Copyright 11.12.2008 by Bochkanov Sergey
10486 *************************************************************************/
tagsortfast(ae_vector * a,ae_vector * bufa,ae_int_t n,ae_state * _state)10487 void tagsortfast(/* Real    */ ae_vector* a,
10488      /* Real    */ ae_vector* bufa,
10489      ae_int_t n,
10490      ae_state *_state)
10491 {
10492     ae_int_t i;
10493     ae_int_t j;
10494     ae_bool isascending;
10495     ae_bool isdescending;
10496     double tmpr;
10497 
10498 
10499 
10500     /*
10501      * Special case
10502      */
10503     if( n<=1 )
10504     {
10505         return;
10506     }
10507 
10508     /*
10509      * Test for already sorted set
10510      */
10511     isascending = ae_true;
10512     isdescending = ae_true;
10513     for(i=1; i<=n-1; i++)
10514     {
10515         isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
10516         isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
10517     }
10518     if( isascending )
10519     {
10520         return;
10521     }
10522     if( isdescending )
10523     {
10524         for(i=0; i<=n-1; i++)
10525         {
10526             j = n-1-i;
10527             if( j<=i )
10528             {
10529                 break;
10530             }
10531             tmpr = a->ptr.p_double[i];
10532             a->ptr.p_double[i] = a->ptr.p_double[j];
10533             a->ptr.p_double[j] = tmpr;
10534         }
10535         return;
10536     }
10537 
10538     /*
10539      * General case
10540      */
10541     if( bufa->cnt<n )
10542     {
10543         ae_vector_set_length(bufa, n, _state);
10544     }
10545     tsort_tagsortfastrec(a, bufa, 0, n-1, _state);
10546 }
10547 
10548 
10549 /*************************************************************************
10550 Sorting function optimized for integer keys and real labels, can be used
10551 to sort middle of the array
10552 
10553 A is sorted, and same permutations are applied to B.
10554 
10555 NOTES:
10556     this function assumes that A[] is finite; it doesn't checks that
10557     condition. All other conditions (size of input arrays, etc.) are not
10558     checked too.
10559 
10560   -- ALGLIB --
10561      Copyright 11.12.2008 by Bochkanov Sergey
10562 *************************************************************************/
tagsortmiddleir(ae_vector * a,ae_vector * b,ae_int_t offset,ae_int_t n,ae_state * _state)10563 void tagsortmiddleir(/* Integer */ ae_vector* a,
10564      /* Real    */ ae_vector* b,
10565      ae_int_t offset,
10566      ae_int_t n,
10567      ae_state *_state)
10568 {
10569     ae_int_t i;
10570     ae_int_t k;
10571     ae_int_t t;
10572     ae_int_t tmp;
10573     double tmpr;
10574     ae_int_t p0;
10575     ae_int_t p1;
10576     ae_int_t at;
10577     ae_int_t ak;
10578     ae_int_t ak1;
10579     double bt;
10580 
10581 
10582 
10583     /*
10584      * Special cases
10585      */
10586     if( n<=1 )
10587     {
10588         return;
10589     }
10590 
10591     /*
10592      * General case, N>1: sort, update B
10593      */
10594     for(i=2; i<=n; i++)
10595     {
10596         t = i;
10597         while(t!=1)
10598         {
10599             k = t/2;
10600             p0 = offset+k-1;
10601             p1 = offset+t-1;
10602             ak = a->ptr.p_int[p0];
10603             at = a->ptr.p_int[p1];
10604             if( ak>=at )
10605             {
10606                 break;
10607             }
10608             a->ptr.p_int[p0] = at;
10609             a->ptr.p_int[p1] = ak;
10610             tmpr = b->ptr.p_double[p0];
10611             b->ptr.p_double[p0] = b->ptr.p_double[p1];
10612             b->ptr.p_double[p1] = tmpr;
10613             t = k;
10614         }
10615     }
10616     for(i=n-1; i>=1; i--)
10617     {
10618         p0 = offset+0;
10619         p1 = offset+i;
10620         tmp = a->ptr.p_int[p1];
10621         a->ptr.p_int[p1] = a->ptr.p_int[p0];
10622         a->ptr.p_int[p0] = tmp;
10623         at = tmp;
10624         tmpr = b->ptr.p_double[p1];
10625         b->ptr.p_double[p1] = b->ptr.p_double[p0];
10626         b->ptr.p_double[p0] = tmpr;
10627         bt = tmpr;
10628         t = 0;
10629         for(;;)
10630         {
10631             k = 2*t+1;
10632             if( k+1>i )
10633             {
10634                 break;
10635             }
10636             p0 = offset+t;
10637             p1 = offset+k;
10638             ak = a->ptr.p_int[p1];
10639             if( k+1<i )
10640             {
10641                 ak1 = a->ptr.p_int[p1+1];
10642                 if( ak1>ak )
10643                 {
10644                     ak = ak1;
10645                     p1 = p1+1;
10646                     k = k+1;
10647                 }
10648             }
10649             if( at>=ak )
10650             {
10651                 break;
10652             }
10653             a->ptr.p_int[p1] = at;
10654             a->ptr.p_int[p0] = ak;
10655             b->ptr.p_double[p0] = b->ptr.p_double[p1];
10656             b->ptr.p_double[p1] = bt;
10657             t = k;
10658         }
10659     }
10660 }
10661 
10662 
10663 /*************************************************************************
10664 Sorting function optimized for integer keys and real labels, can be used
10665 to sort middle of the array
10666 
10667 A is sorted, and same permutations are applied to B.
10668 
10669 NOTES:
10670     this function assumes that A[] is finite; it doesn't checks that
10671     condition. All other conditions (size of input arrays, etc.) are not
10672     checked too.
10673 
10674   -- ALGLIB --
10675      Copyright 11.12.2008 by Bochkanov Sergey
10676 *************************************************************************/
tagsortmiddlei(ae_vector * a,ae_int_t offset,ae_int_t n,ae_state * _state)10677 void tagsortmiddlei(/* Integer */ ae_vector* a,
10678      ae_int_t offset,
10679      ae_int_t n,
10680      ae_state *_state)
10681 {
10682     ae_int_t i;
10683     ae_int_t k;
10684     ae_int_t t;
10685     ae_int_t tmp;
10686     ae_int_t p0;
10687     ae_int_t p1;
10688     ae_int_t at;
10689     ae_int_t ak;
10690     ae_int_t ak1;
10691 
10692 
10693 
10694     /*
10695      * Special cases
10696      */
10697     if( n<=1 )
10698     {
10699         return;
10700     }
10701 
10702     /*
10703      * General case, N>1: sort, update B
10704      */
10705     for(i=2; i<=n; i++)
10706     {
10707         t = i;
10708         while(t!=1)
10709         {
10710             k = t/2;
10711             p0 = offset+k-1;
10712             p1 = offset+t-1;
10713             ak = a->ptr.p_int[p0];
10714             at = a->ptr.p_int[p1];
10715             if( ak>=at )
10716             {
10717                 break;
10718             }
10719             a->ptr.p_int[p0] = at;
10720             a->ptr.p_int[p1] = ak;
10721             t = k;
10722         }
10723     }
10724     for(i=n-1; i>=1; i--)
10725     {
10726         p0 = offset+0;
10727         p1 = offset+i;
10728         tmp = a->ptr.p_int[p1];
10729         a->ptr.p_int[p1] = a->ptr.p_int[p0];
10730         a->ptr.p_int[p0] = tmp;
10731         at = tmp;
10732         t = 0;
10733         for(;;)
10734         {
10735             k = 2*t+1;
10736             if( k+1>i )
10737             {
10738                 break;
10739             }
10740             p0 = offset+t;
10741             p1 = offset+k;
10742             ak = a->ptr.p_int[p1];
10743             if( k+1<i )
10744             {
10745                 ak1 = a->ptr.p_int[p1+1];
10746                 if( ak1>ak )
10747                 {
10748                     ak = ak1;
10749                     p1 = p1+1;
10750                     k = k+1;
10751                 }
10752             }
10753             if( at>=ak )
10754             {
10755                 break;
10756             }
10757             a->ptr.p_int[p1] = at;
10758             a->ptr.p_int[p0] = ak;
10759             t = k;
10760         }
10761     }
10762 }
10763 
10764 
10765 /*************************************************************************
10766 Sorting function optimized for integer values (only keys, no labels),  can
10767 be used to sort middle of the array
10768 
10769   -- ALGLIB --
10770      Copyright 11.12.2008 by Bochkanov Sergey
10771 *************************************************************************/
sortmiddlei(ae_vector * a,ae_int_t offset,ae_int_t n,ae_state * _state)10772 void sortmiddlei(/* Integer */ ae_vector* a,
10773      ae_int_t offset,
10774      ae_int_t n,
10775      ae_state *_state)
10776 {
10777     ae_int_t i;
10778     ae_int_t k;
10779     ae_int_t t;
10780     ae_int_t tmp;
10781     ae_int_t p0;
10782     ae_int_t p1;
10783     ae_int_t at;
10784     ae_int_t ak;
10785     ae_int_t ak1;
10786 
10787 
10788 
10789     /*
10790      * Special cases
10791      */
10792     if( n<=1 )
10793     {
10794         return;
10795     }
10796 
10797     /*
10798      * General case, N>1: sort, update B
10799      */
10800     for(i=2; i<=n; i++)
10801     {
10802         t = i;
10803         while(t!=1)
10804         {
10805             k = t/2;
10806             p0 = offset+k-1;
10807             p1 = offset+t-1;
10808             ak = a->ptr.p_int[p0];
10809             at = a->ptr.p_int[p1];
10810             if( ak>=at )
10811             {
10812                 break;
10813             }
10814             a->ptr.p_int[p0] = at;
10815             a->ptr.p_int[p1] = ak;
10816             t = k;
10817         }
10818     }
10819     for(i=n-1; i>=1; i--)
10820     {
10821         p0 = offset+0;
10822         p1 = offset+i;
10823         tmp = a->ptr.p_int[p1];
10824         a->ptr.p_int[p1] = a->ptr.p_int[p0];
10825         a->ptr.p_int[p0] = tmp;
10826         at = tmp;
10827         t = 0;
10828         for(;;)
10829         {
10830             k = 2*t+1;
10831             if( k+1>i )
10832             {
10833                 break;
10834             }
10835             p0 = offset+t;
10836             p1 = offset+k;
10837             ak = a->ptr.p_int[p1];
10838             if( k+1<i )
10839             {
10840                 ak1 = a->ptr.p_int[p1+1];
10841                 if( ak1>ak )
10842                 {
10843                     ak = ak1;
10844                     p1 = p1+1;
10845                     k = k+1;
10846                 }
10847             }
10848             if( at>=ak )
10849             {
10850                 break;
10851             }
10852             a->ptr.p_int[p1] = at;
10853             a->ptr.p_int[p0] = ak;
10854             t = k;
10855         }
10856     }
10857 }
10858 
10859 
10860 /*************************************************************************
10861 Heap operations: adds element to the heap
10862 
10863 PARAMETERS:
10864     A       -   heap itself, must be at least array[0..N]
10865     B       -   array of integer tags, which are updated according to
10866                 permutations in the heap
10867     N       -   size of the heap (without new element).
10868                 updated on output
10869     VA      -   value of the element being added
10870     VB      -   value of the tag
10871 
10872   -- ALGLIB --
10873      Copyright 28.02.2010 by Bochkanov Sergey
10874 *************************************************************************/
tagheappushi(ae_vector * a,ae_vector * b,ae_int_t * n,double va,ae_int_t vb,ae_state * _state)10875 void tagheappushi(/* Real    */ ae_vector* a,
10876      /* Integer */ ae_vector* b,
10877      ae_int_t* n,
10878      double va,
10879      ae_int_t vb,
10880      ae_state *_state)
10881 {
10882     ae_int_t j;
10883     ae_int_t k;
10884     double v;
10885 
10886 
10887     if( *n<0 )
10888     {
10889         return;
10890     }
10891 
10892     /*
10893      * N=0 is a special case
10894      */
10895     if( *n==0 )
10896     {
10897         a->ptr.p_double[0] = va;
10898         b->ptr.p_int[0] = vb;
10899         *n = *n+1;
10900         return;
10901     }
10902 
10903     /*
10904      * add current point to the heap
10905      * (add to the bottom, then move up)
10906      *
10907      * we don't write point to the heap
10908      * until its final position is determined
10909      * (it allow us to reduce number of array access operations)
10910      */
10911     j = *n;
10912     *n = *n+1;
10913     while(j>0)
10914     {
10915         k = (j-1)/2;
10916         v = a->ptr.p_double[k];
10917         if( v<va )
10918         {
10919 
10920             /*
10921              * swap with higher element
10922              */
10923             a->ptr.p_double[j] = v;
10924             b->ptr.p_int[j] = b->ptr.p_int[k];
10925             j = k;
10926         }
10927         else
10928         {
10929 
10930             /*
10931              * element in its place. terminate.
10932              */
10933             break;
10934         }
10935     }
10936     a->ptr.p_double[j] = va;
10937     b->ptr.p_int[j] = vb;
10938 }
10939 
10940 
10941 /*************************************************************************
10942 Heap operations: replaces top element with new element
10943 (which is moved down)
10944 
10945 PARAMETERS:
10946     A       -   heap itself, must be at least array[0..N-1]
10947     B       -   array of integer tags, which are updated according to
10948                 permutations in the heap
10949     N       -   size of the heap
10950     VA      -   value of the element which replaces top element
10951     VB      -   value of the tag
10952 
10953   -- ALGLIB --
10954      Copyright 28.02.2010 by Bochkanov Sergey
10955 *************************************************************************/
tagheapreplacetopi(ae_vector * a,ae_vector * b,ae_int_t n,double va,ae_int_t vb,ae_state * _state)10956 void tagheapreplacetopi(/* Real    */ ae_vector* a,
10957      /* Integer */ ae_vector* b,
10958      ae_int_t n,
10959      double va,
10960      ae_int_t vb,
10961      ae_state *_state)
10962 {
10963     ae_int_t j;
10964     ae_int_t k1;
10965     ae_int_t k2;
10966     double v;
10967     double v1;
10968     double v2;
10969 
10970 
10971     if( n<1 )
10972     {
10973         return;
10974     }
10975 
10976     /*
10977      * N=1 is a special case
10978      */
10979     if( n==1 )
10980     {
10981         a->ptr.p_double[0] = va;
10982         b->ptr.p_int[0] = vb;
10983         return;
10984     }
10985 
10986     /*
10987      * move down through heap:
10988      * * J  -   current element
10989      * * K1 -   first child (always exists)
10990      * * K2 -   second child (may not exists)
10991      *
10992      * we don't write point to the heap
10993      * until its final position is determined
10994      * (it allow us to reduce number of array access operations)
10995      */
10996     j = 0;
10997     k1 = 1;
10998     k2 = 2;
10999     while(k1<n)
11000     {
11001         if( k2>=n )
11002         {
11003 
11004             /*
11005              * only one child.
11006              *
11007              * swap and terminate (because this child
11008              * have no siblings due to heap structure)
11009              */
11010             v = a->ptr.p_double[k1];
11011             if( v>va )
11012             {
11013                 a->ptr.p_double[j] = v;
11014                 b->ptr.p_int[j] = b->ptr.p_int[k1];
11015                 j = k1;
11016             }
11017             break;
11018         }
11019         else
11020         {
11021 
11022             /*
11023              * two childs
11024              */
11025             v1 = a->ptr.p_double[k1];
11026             v2 = a->ptr.p_double[k2];
11027             if( v1>v2 )
11028             {
11029                 if( va<v1 )
11030                 {
11031                     a->ptr.p_double[j] = v1;
11032                     b->ptr.p_int[j] = b->ptr.p_int[k1];
11033                     j = k1;
11034                 }
11035                 else
11036                 {
11037                     break;
11038                 }
11039             }
11040             else
11041             {
11042                 if( va<v2 )
11043                 {
11044                     a->ptr.p_double[j] = v2;
11045                     b->ptr.p_int[j] = b->ptr.p_int[k2];
11046                     j = k2;
11047                 }
11048                 else
11049                 {
11050                     break;
11051                 }
11052             }
11053             k1 = 2*j+1;
11054             k2 = 2*j+2;
11055         }
11056     }
11057     a->ptr.p_double[j] = va;
11058     b->ptr.p_int[j] = vb;
11059 }
11060 
11061 
11062 /*************************************************************************
11063 Heap operations: pops top element from the heap
11064 
11065 PARAMETERS:
11066     A       -   heap itself, must be at least array[0..N-1]
11067     B       -   array of integer tags, which are updated according to
11068                 permutations in the heap
11069     N       -   size of the heap, N>=1
11070 
11071 On output top element is moved to A[N-1], B[N-1], heap is reordered, N is
11072 decreased by 1.
11073 
11074   -- ALGLIB --
11075      Copyright 28.02.2010 by Bochkanov Sergey
11076 *************************************************************************/
tagheappopi(ae_vector * a,ae_vector * b,ae_int_t * n,ae_state * _state)11077 void tagheappopi(/* Real    */ ae_vector* a,
11078      /* Integer */ ae_vector* b,
11079      ae_int_t* n,
11080      ae_state *_state)
11081 {
11082     double va;
11083     ae_int_t vb;
11084 
11085 
11086     if( *n<1 )
11087     {
11088         return;
11089     }
11090 
11091     /*
11092      * N=1 is a special case
11093      */
11094     if( *n==1 )
11095     {
11096         *n = 0;
11097         return;
11098     }
11099 
11100     /*
11101      * swap top element and last element,
11102      * then reorder heap
11103      */
11104     va = a->ptr.p_double[*n-1];
11105     vb = b->ptr.p_int[*n-1];
11106     a->ptr.p_double[*n-1] = a->ptr.p_double[0];
11107     b->ptr.p_int[*n-1] = b->ptr.p_int[0];
11108     *n = *n-1;
11109     tagheapreplacetopi(a, b, *n, va, vb, _state);
11110 }
11111 
11112 
11113 /*************************************************************************
11114 Search first element less than T in sorted array.
11115 
11116 PARAMETERS:
11117     A - sorted array by ascending from 0 to N-1
11118     N - number of elements in array
11119     T - the desired element
11120 
11121 RESULT:
11122     The very first element's index, which isn't less than T.
11123 In the case when there aren't such elements, returns N.
11124 *************************************************************************/
lowerbound(ae_vector * a,ae_int_t n,double t,ae_state * _state)11125 ae_int_t lowerbound(/* Real    */ ae_vector* a,
11126      ae_int_t n,
11127      double t,
11128      ae_state *_state)
11129 {
11130     ae_int_t l;
11131     ae_int_t half;
11132     ae_int_t first;
11133     ae_int_t middle;
11134     ae_int_t result;
11135 
11136 
11137     l = n;
11138     first = 0;
11139     while(l>0)
11140     {
11141         half = l/2;
11142         middle = first+half;
11143         if( ae_fp_less(a->ptr.p_double[middle],t) )
11144         {
11145             first = middle+1;
11146             l = l-half-1;
11147         }
11148         else
11149         {
11150             l = half;
11151         }
11152     }
11153     result = first;
11154     return result;
11155 }
11156 
11157 
11158 /*************************************************************************
11159 Search first element more than T in sorted array.
11160 
11161 PARAMETERS:
11162     A - sorted array by ascending from 0 to N-1
11163     N - number of elements in array
11164     T - the desired element
11165 
11166     RESULT:
11167     The very first element's index, which more than T.
11168 In the case when there aren't such elements, returns N.
11169 *************************************************************************/
upperbound(ae_vector * a,ae_int_t n,double t,ae_state * _state)11170 ae_int_t upperbound(/* Real    */ ae_vector* a,
11171      ae_int_t n,
11172      double t,
11173      ae_state *_state)
11174 {
11175     ae_int_t l;
11176     ae_int_t half;
11177     ae_int_t first;
11178     ae_int_t middle;
11179     ae_int_t result;
11180 
11181 
11182     l = n;
11183     first = 0;
11184     while(l>0)
11185     {
11186         half = l/2;
11187         middle = first+half;
11188         if( ae_fp_less(t,a->ptr.p_double[middle]) )
11189         {
11190             l = half;
11191         }
11192         else
11193         {
11194             first = middle+1;
11195             l = l-half-1;
11196         }
11197     }
11198     result = first;
11199     return result;
11200 }
11201 
11202 
11203 /*************************************************************************
11204 Internal TagSortFastI: sorts A[I1...I2] (both bounds are included),
11205 applies same permutations to B.
11206 
11207   -- ALGLIB --
11208      Copyright 06.09.2010 by Bochkanov Sergey
11209 *************************************************************************/
tsort_tagsortfastirec(ae_vector * a,ae_vector * b,ae_vector * bufa,ae_vector * bufb,ae_int_t i1,ae_int_t i2,ae_state * _state)11210 static void tsort_tagsortfastirec(/* Real    */ ae_vector* a,
11211      /* Integer */ ae_vector* b,
11212      /* Real    */ ae_vector* bufa,
11213      /* Integer */ ae_vector* bufb,
11214      ae_int_t i1,
11215      ae_int_t i2,
11216      ae_state *_state)
11217 {
11218     ae_int_t i;
11219     ae_int_t j;
11220     ae_int_t k;
11221     ae_int_t cntless;
11222     ae_int_t cnteq;
11223     ae_int_t cntgreater;
11224     double tmpr;
11225     ae_int_t tmpi;
11226     double v0;
11227     double v1;
11228     double v2;
11229     double vp;
11230 
11231 
11232 
11233     /*
11234      * Fast exit
11235      */
11236     if( i2<=i1 )
11237     {
11238         return;
11239     }
11240 
11241     /*
11242      * Non-recursive sort for small arrays
11243      */
11244     if( i2-i1<=16 )
11245     {
11246         for(j=i1+1; j<=i2; j++)
11247         {
11248 
11249             /*
11250              * Search elements [I1..J-1] for place to insert Jth element.
11251              *
11252              * This code stops immediately if we can leave A[J] at J-th position
11253              * (all elements have same value of A[J] larger than any of them)
11254              */
11255             tmpr = a->ptr.p_double[j];
11256             tmpi = j;
11257             for(k=j-1; k>=i1; k--)
11258             {
11259                 if( a->ptr.p_double[k]<=tmpr )
11260                 {
11261                     break;
11262                 }
11263                 tmpi = k;
11264             }
11265             k = tmpi;
11266 
11267             /*
11268              * Insert Jth element into Kth position
11269              */
11270             if( k!=j )
11271             {
11272                 tmpr = a->ptr.p_double[j];
11273                 tmpi = b->ptr.p_int[j];
11274                 for(i=j-1; i>=k; i--)
11275                 {
11276                     a->ptr.p_double[i+1] = a->ptr.p_double[i];
11277                     b->ptr.p_int[i+1] = b->ptr.p_int[i];
11278                 }
11279                 a->ptr.p_double[k] = tmpr;
11280                 b->ptr.p_int[k] = tmpi;
11281             }
11282         }
11283         return;
11284     }
11285 
11286     /*
11287      * Quicksort: choose pivot
11288      * Here we assume that I2-I1>=2
11289      */
11290     v0 = a->ptr.p_double[i1];
11291     v1 = a->ptr.p_double[i1+(i2-i1)/2];
11292     v2 = a->ptr.p_double[i2];
11293     if( v0>v1 )
11294     {
11295         tmpr = v1;
11296         v1 = v0;
11297         v0 = tmpr;
11298     }
11299     if( v1>v2 )
11300     {
11301         tmpr = v2;
11302         v2 = v1;
11303         v1 = tmpr;
11304     }
11305     if( v0>v1 )
11306     {
11307         tmpr = v1;
11308         v1 = v0;
11309         v0 = tmpr;
11310     }
11311     vp = v1;
11312 
11313     /*
11314      * now pass through A/B and:
11315      * * move elements that are LESS than VP to the left of A/B
11316      * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
11317      * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
11318      * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
11319      * * move elements from the left of BufA/BufB to the end of A/B
11320      */
11321     cntless = 0;
11322     cnteq = 0;
11323     cntgreater = 0;
11324     for(i=i1; i<=i2; i++)
11325     {
11326         v0 = a->ptr.p_double[i];
11327         if( v0<vp )
11328         {
11329 
11330             /*
11331              * LESS
11332              */
11333             k = i1+cntless;
11334             if( i!=k )
11335             {
11336                 a->ptr.p_double[k] = v0;
11337                 b->ptr.p_int[k] = b->ptr.p_int[i];
11338             }
11339             cntless = cntless+1;
11340             continue;
11341         }
11342         if( v0==vp )
11343         {
11344 
11345             /*
11346              * EQUAL
11347              */
11348             k = i2-cnteq;
11349             bufa->ptr.p_double[k] = v0;
11350             bufb->ptr.p_int[k] = b->ptr.p_int[i];
11351             cnteq = cnteq+1;
11352             continue;
11353         }
11354 
11355         /*
11356          * GREATER
11357          */
11358         k = i1+cntgreater;
11359         bufa->ptr.p_double[k] = v0;
11360         bufb->ptr.p_int[k] = b->ptr.p_int[i];
11361         cntgreater = cntgreater+1;
11362     }
11363     for(i=0; i<=cnteq-1; i++)
11364     {
11365         j = i1+cntless+cnteq-1-i;
11366         k = i2+i-(cnteq-1);
11367         a->ptr.p_double[j] = bufa->ptr.p_double[k];
11368         b->ptr.p_int[j] = bufb->ptr.p_int[k];
11369     }
11370     for(i=0; i<=cntgreater-1; i++)
11371     {
11372         j = i1+cntless+cnteq+i;
11373         k = i1+i;
11374         a->ptr.p_double[j] = bufa->ptr.p_double[k];
11375         b->ptr.p_int[j] = bufb->ptr.p_int[k];
11376     }
11377 
11378     /*
11379      * Sort left and right parts of the array (ignoring middle part)
11380      */
11381     tsort_tagsortfastirec(a, b, bufa, bufb, i1, i1+cntless-1, _state);
11382     tsort_tagsortfastirec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state);
11383 }
11384 
11385 
11386 /*************************************************************************
11387 Internal TagSortFastR: sorts A[I1...I2] (both bounds are included),
11388 applies same permutations to B.
11389 
11390   -- ALGLIB --
11391      Copyright 06.09.2010 by Bochkanov Sergey
11392 *************************************************************************/
tsort_tagsortfastrrec(ae_vector * a,ae_vector * b,ae_vector * bufa,ae_vector * bufb,ae_int_t i1,ae_int_t i2,ae_state * _state)11393 static void tsort_tagsortfastrrec(/* Real    */ ae_vector* a,
11394      /* Real    */ ae_vector* b,
11395      /* Real    */ ae_vector* bufa,
11396      /* Real    */ ae_vector* bufb,
11397      ae_int_t i1,
11398      ae_int_t i2,
11399      ae_state *_state)
11400 {
11401     ae_int_t i;
11402     ae_int_t j;
11403     ae_int_t k;
11404     double tmpr;
11405     double tmpr2;
11406     ae_int_t tmpi;
11407     ae_int_t cntless;
11408     ae_int_t cnteq;
11409     ae_int_t cntgreater;
11410     double v0;
11411     double v1;
11412     double v2;
11413     double vp;
11414 
11415 
11416 
11417     /*
11418      * Fast exit
11419      */
11420     if( i2<=i1 )
11421     {
11422         return;
11423     }
11424 
11425     /*
11426      * Non-recursive sort for small arrays
11427      */
11428     if( i2-i1<=16 )
11429     {
11430         for(j=i1+1; j<=i2; j++)
11431         {
11432 
11433             /*
11434              * Search elements [I1..J-1] for place to insert Jth element.
11435              *
11436              * This code stops immediatly if we can leave A[J] at J-th position
11437              * (all elements have same value of A[J] larger than any of them)
11438              */
11439             tmpr = a->ptr.p_double[j];
11440             tmpi = j;
11441             for(k=j-1; k>=i1; k--)
11442             {
11443                 if( a->ptr.p_double[k]<=tmpr )
11444                 {
11445                     break;
11446                 }
11447                 tmpi = k;
11448             }
11449             k = tmpi;
11450 
11451             /*
11452              * Insert Jth element into Kth position
11453              */
11454             if( k!=j )
11455             {
11456                 tmpr = a->ptr.p_double[j];
11457                 tmpr2 = b->ptr.p_double[j];
11458                 for(i=j-1; i>=k; i--)
11459                 {
11460                     a->ptr.p_double[i+1] = a->ptr.p_double[i];
11461                     b->ptr.p_double[i+1] = b->ptr.p_double[i];
11462                 }
11463                 a->ptr.p_double[k] = tmpr;
11464                 b->ptr.p_double[k] = tmpr2;
11465             }
11466         }
11467         return;
11468     }
11469 
11470     /*
11471      * Quicksort: choose pivot
11472      * Here we assume that I2-I1>=16
11473      */
11474     v0 = a->ptr.p_double[i1];
11475     v1 = a->ptr.p_double[i1+(i2-i1)/2];
11476     v2 = a->ptr.p_double[i2];
11477     if( v0>v1 )
11478     {
11479         tmpr = v1;
11480         v1 = v0;
11481         v0 = tmpr;
11482     }
11483     if( v1>v2 )
11484     {
11485         tmpr = v2;
11486         v2 = v1;
11487         v1 = tmpr;
11488     }
11489     if( v0>v1 )
11490     {
11491         tmpr = v1;
11492         v1 = v0;
11493         v0 = tmpr;
11494     }
11495     vp = v1;
11496 
11497     /*
11498      * now pass through A/B and:
11499      * * move elements that are LESS than VP to the left of A/B
11500      * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
11501      * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
11502      * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
11503      * * move elements from the left of BufA/BufB to the end of A/B
11504      */
11505     cntless = 0;
11506     cnteq = 0;
11507     cntgreater = 0;
11508     for(i=i1; i<=i2; i++)
11509     {
11510         v0 = a->ptr.p_double[i];
11511         if( v0<vp )
11512         {
11513 
11514             /*
11515              * LESS
11516              */
11517             k = i1+cntless;
11518             if( i!=k )
11519             {
11520                 a->ptr.p_double[k] = v0;
11521                 b->ptr.p_double[k] = b->ptr.p_double[i];
11522             }
11523             cntless = cntless+1;
11524             continue;
11525         }
11526         if( v0==vp )
11527         {
11528 
11529             /*
11530              * EQUAL
11531              */
11532             k = i2-cnteq;
11533             bufa->ptr.p_double[k] = v0;
11534             bufb->ptr.p_double[k] = b->ptr.p_double[i];
11535             cnteq = cnteq+1;
11536             continue;
11537         }
11538 
11539         /*
11540          * GREATER
11541          */
11542         k = i1+cntgreater;
11543         bufa->ptr.p_double[k] = v0;
11544         bufb->ptr.p_double[k] = b->ptr.p_double[i];
11545         cntgreater = cntgreater+1;
11546     }
11547     for(i=0; i<=cnteq-1; i++)
11548     {
11549         j = i1+cntless+cnteq-1-i;
11550         k = i2+i-(cnteq-1);
11551         a->ptr.p_double[j] = bufa->ptr.p_double[k];
11552         b->ptr.p_double[j] = bufb->ptr.p_double[k];
11553     }
11554     for(i=0; i<=cntgreater-1; i++)
11555     {
11556         j = i1+cntless+cnteq+i;
11557         k = i1+i;
11558         a->ptr.p_double[j] = bufa->ptr.p_double[k];
11559         b->ptr.p_double[j] = bufb->ptr.p_double[k];
11560     }
11561 
11562     /*
11563      * Sort left and right parts of the array (ignoring middle part)
11564      */
11565     tsort_tagsortfastrrec(a, b, bufa, bufb, i1, i1+cntless-1, _state);
11566     tsort_tagsortfastrrec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state);
11567 }
11568 
11569 
11570 /*************************************************************************
11571 Internal TagSortFastI: sorts A[I1...I2] (both bounds are included),
11572 applies same permutations to B.
11573 
11574   -- ALGLIB --
11575      Copyright 06.09.2010 by Bochkanov Sergey
11576 *************************************************************************/
tsort_tagsortfastrec(ae_vector * a,ae_vector * bufa,ae_int_t i1,ae_int_t i2,ae_state * _state)11577 static void tsort_tagsortfastrec(/* Real    */ ae_vector* a,
11578      /* Real    */ ae_vector* bufa,
11579      ae_int_t i1,
11580      ae_int_t i2,
11581      ae_state *_state)
11582 {
11583     ae_int_t cntless;
11584     ae_int_t cnteq;
11585     ae_int_t cntgreater;
11586     ae_int_t i;
11587     ae_int_t j;
11588     ae_int_t k;
11589     double tmpr;
11590     ae_int_t tmpi;
11591     double v0;
11592     double v1;
11593     double v2;
11594     double vp;
11595 
11596 
11597 
11598     /*
11599      * Fast exit
11600      */
11601     if( i2<=i1 )
11602     {
11603         return;
11604     }
11605 
11606     /*
11607      * Non-recursive sort for small arrays
11608      */
11609     if( i2-i1<=16 )
11610     {
11611         for(j=i1+1; j<=i2; j++)
11612         {
11613 
11614             /*
11615              * Search elements [I1..J-1] for place to insert Jth element.
11616              *
11617              * This code stops immediatly if we can leave A[J] at J-th position
11618              * (all elements have same value of A[J] larger than any of them)
11619              */
11620             tmpr = a->ptr.p_double[j];
11621             tmpi = j;
11622             for(k=j-1; k>=i1; k--)
11623             {
11624                 if( a->ptr.p_double[k]<=tmpr )
11625                 {
11626                     break;
11627                 }
11628                 tmpi = k;
11629             }
11630             k = tmpi;
11631 
11632             /*
11633              * Insert Jth element into Kth position
11634              */
11635             if( k!=j )
11636             {
11637                 tmpr = a->ptr.p_double[j];
11638                 for(i=j-1; i>=k; i--)
11639                 {
11640                     a->ptr.p_double[i+1] = a->ptr.p_double[i];
11641                 }
11642                 a->ptr.p_double[k] = tmpr;
11643             }
11644         }
11645         return;
11646     }
11647 
11648     /*
11649      * Quicksort: choose pivot
11650      * Here we assume that I2-I1>=16
11651      */
11652     v0 = a->ptr.p_double[i1];
11653     v1 = a->ptr.p_double[i1+(i2-i1)/2];
11654     v2 = a->ptr.p_double[i2];
11655     if( v0>v1 )
11656     {
11657         tmpr = v1;
11658         v1 = v0;
11659         v0 = tmpr;
11660     }
11661     if( v1>v2 )
11662     {
11663         tmpr = v2;
11664         v2 = v1;
11665         v1 = tmpr;
11666     }
11667     if( v0>v1 )
11668     {
11669         tmpr = v1;
11670         v1 = v0;
11671         v0 = tmpr;
11672     }
11673     vp = v1;
11674 
11675     /*
11676      * now pass through A/B and:
11677      * * move elements that are LESS than VP to the left of A/B
11678      * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
11679      * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
11680      * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
11681      * * move elements from the left of BufA/BufB to the end of A/B
11682      */
11683     cntless = 0;
11684     cnteq = 0;
11685     cntgreater = 0;
11686     for(i=i1; i<=i2; i++)
11687     {
11688         v0 = a->ptr.p_double[i];
11689         if( v0<vp )
11690         {
11691 
11692             /*
11693              * LESS
11694              */
11695             k = i1+cntless;
11696             if( i!=k )
11697             {
11698                 a->ptr.p_double[k] = v0;
11699             }
11700             cntless = cntless+1;
11701             continue;
11702         }
11703         if( v0==vp )
11704         {
11705 
11706             /*
11707              * EQUAL
11708              */
11709             k = i2-cnteq;
11710             bufa->ptr.p_double[k] = v0;
11711             cnteq = cnteq+1;
11712             continue;
11713         }
11714 
11715         /*
11716          * GREATER
11717          */
11718         k = i1+cntgreater;
11719         bufa->ptr.p_double[k] = v0;
11720         cntgreater = cntgreater+1;
11721     }
11722     for(i=0; i<=cnteq-1; i++)
11723     {
11724         j = i1+cntless+cnteq-1-i;
11725         k = i2+i-(cnteq-1);
11726         a->ptr.p_double[j] = bufa->ptr.p_double[k];
11727     }
11728     for(i=0; i<=cntgreater-1; i++)
11729     {
11730         j = i1+cntless+cnteq+i;
11731         k = i1+i;
11732         a->ptr.p_double[j] = bufa->ptr.p_double[k];
11733     }
11734 
11735     /*
11736      * Sort left and right parts of the array (ignoring middle part)
11737      */
11738     tsort_tagsortfastrec(a, bufa, i1, i1+cntless-1, _state);
11739     tsort_tagsortfastrec(a, bufa, i1+cntless+cnteq, i2, _state);
11740 }
11741 
11742 
11743 #endif
11744 #if defined(AE_COMPILE_BLAS) || !defined(AE_PARTIAL_BUILD)
11745 
11746 
vectornorm2(ae_vector * x,ae_int_t i1,ae_int_t i2,ae_state * _state)11747 double vectornorm2(/* Real    */ ae_vector* x,
11748      ae_int_t i1,
11749      ae_int_t i2,
11750      ae_state *_state)
11751 {
11752     ae_int_t n;
11753     ae_int_t ix;
11754     double absxi;
11755     double scl;
11756     double ssq;
11757     double result;
11758 
11759 
11760     n = i2-i1+1;
11761     if( n<1 )
11762     {
11763         result = (double)(0);
11764         return result;
11765     }
11766     if( n==1 )
11767     {
11768         result = ae_fabs(x->ptr.p_double[i1], _state);
11769         return result;
11770     }
11771     scl = (double)(0);
11772     ssq = (double)(1);
11773     for(ix=i1; ix<=i2; ix++)
11774     {
11775         if( ae_fp_neq(x->ptr.p_double[ix],(double)(0)) )
11776         {
11777             absxi = ae_fabs(x->ptr.p_double[ix], _state);
11778             if( ae_fp_less(scl,absxi) )
11779             {
11780                 ssq = 1+ssq*ae_sqr(scl/absxi, _state);
11781                 scl = absxi;
11782             }
11783             else
11784             {
11785                 ssq = ssq+ae_sqr(absxi/scl, _state);
11786             }
11787         }
11788     }
11789     result = scl*ae_sqrt(ssq, _state);
11790     return result;
11791 }
11792 
11793 
vectoridxabsmax(ae_vector * x,ae_int_t i1,ae_int_t i2,ae_state * _state)11794 ae_int_t vectoridxabsmax(/* Real    */ ae_vector* x,
11795      ae_int_t i1,
11796      ae_int_t i2,
11797      ae_state *_state)
11798 {
11799     ae_int_t i;
11800     ae_int_t result;
11801 
11802 
11803     result = i1;
11804     for(i=i1+1; i<=i2; i++)
11805     {
11806         if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[result], _state)) )
11807         {
11808             result = i;
11809         }
11810     }
11811     return result;
11812 }
11813 
11814 
columnidxabsmax(ae_matrix * x,ae_int_t i1,ae_int_t i2,ae_int_t j,ae_state * _state)11815 ae_int_t columnidxabsmax(/* Real    */ ae_matrix* x,
11816      ae_int_t i1,
11817      ae_int_t i2,
11818      ae_int_t j,
11819      ae_state *_state)
11820 {
11821     ae_int_t i;
11822     ae_int_t result;
11823 
11824 
11825     result = i1;
11826     for(i=i1+1; i<=i2; i++)
11827     {
11828         if( ae_fp_greater(ae_fabs(x->ptr.pp_double[i][j], _state),ae_fabs(x->ptr.pp_double[result][j], _state)) )
11829         {
11830             result = i;
11831         }
11832     }
11833     return result;
11834 }
11835 
11836 
rowidxabsmax(ae_matrix * x,ae_int_t j1,ae_int_t j2,ae_int_t i,ae_state * _state)11837 ae_int_t rowidxabsmax(/* Real    */ ae_matrix* x,
11838      ae_int_t j1,
11839      ae_int_t j2,
11840      ae_int_t i,
11841      ae_state *_state)
11842 {
11843     ae_int_t j;
11844     ae_int_t result;
11845 
11846 
11847     result = j1;
11848     for(j=j1+1; j<=j2; j++)
11849     {
11850         if( ae_fp_greater(ae_fabs(x->ptr.pp_double[i][j], _state),ae_fabs(x->ptr.pp_double[i][result], _state)) )
11851         {
11852             result = j;
11853         }
11854     }
11855     return result;
11856 }
11857 
11858 
upperhessenberg1norm(ae_matrix * a,ae_int_t i1,ae_int_t i2,ae_int_t j1,ae_int_t j2,ae_vector * work,ae_state * _state)11859 double upperhessenberg1norm(/* Real    */ ae_matrix* a,
11860      ae_int_t i1,
11861      ae_int_t i2,
11862      ae_int_t j1,
11863      ae_int_t j2,
11864      /* Real    */ ae_vector* work,
11865      ae_state *_state)
11866 {
11867     ae_int_t i;
11868     ae_int_t j;
11869     double result;
11870 
11871 
11872     ae_assert(i2-i1==j2-j1, "UpperHessenberg1Norm: I2-I1<>J2-J1!", _state);
11873     for(j=j1; j<=j2; j++)
11874     {
11875         work->ptr.p_double[j] = (double)(0);
11876     }
11877     for(i=i1; i<=i2; i++)
11878     {
11879         for(j=ae_maxint(j1, j1+i-i1-1, _state); j<=j2; j++)
11880         {
11881             work->ptr.p_double[j] = work->ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
11882         }
11883     }
11884     result = (double)(0);
11885     for(j=j1; j<=j2; j++)
11886     {
11887         result = ae_maxreal(result, work->ptr.p_double[j], _state);
11888     }
11889     return result;
11890 }
11891 
11892 
copymatrix(ae_matrix * a,ae_int_t is1,ae_int_t is2,ae_int_t js1,ae_int_t js2,ae_matrix * b,ae_int_t id1,ae_int_t id2,ae_int_t jd1,ae_int_t jd2,ae_state * _state)11893 void copymatrix(/* Real    */ ae_matrix* a,
11894      ae_int_t is1,
11895      ae_int_t is2,
11896      ae_int_t js1,
11897      ae_int_t js2,
11898      /* Real    */ ae_matrix* b,
11899      ae_int_t id1,
11900      ae_int_t id2,
11901      ae_int_t jd1,
11902      ae_int_t jd2,
11903      ae_state *_state)
11904 {
11905     ae_int_t isrc;
11906     ae_int_t idst;
11907 
11908 
11909     if( is1>is2||js1>js2 )
11910     {
11911         return;
11912     }
11913     ae_assert(is2-is1==id2-id1, "CopyMatrix: different sizes!", _state);
11914     ae_assert(js2-js1==jd2-jd1, "CopyMatrix: different sizes!", _state);
11915     for(isrc=is1; isrc<=is2; isrc++)
11916     {
11917         idst = isrc-is1+id1;
11918         ae_v_move(&b->ptr.pp_double[idst][jd1], 1, &a->ptr.pp_double[isrc][js1], 1, ae_v_len(jd1,jd2));
11919     }
11920 }
11921 
11922 
inplacetranspose(ae_matrix * a,ae_int_t i1,ae_int_t i2,ae_int_t j1,ae_int_t j2,ae_vector * work,ae_state * _state)11923 void inplacetranspose(/* Real    */ ae_matrix* a,
11924      ae_int_t i1,
11925      ae_int_t i2,
11926      ae_int_t j1,
11927      ae_int_t j2,
11928      /* Real    */ ae_vector* work,
11929      ae_state *_state)
11930 {
11931     ae_int_t i;
11932     ae_int_t j;
11933     ae_int_t ips;
11934     ae_int_t jps;
11935     ae_int_t l;
11936 
11937 
11938     if( i1>i2||j1>j2 )
11939     {
11940         return;
11941     }
11942     ae_assert(i1-i2==j1-j2, "InplaceTranspose error: incorrect array size!", _state);
11943     for(i=i1; i<=i2-1; i++)
11944     {
11945         j = j1+i-i1;
11946         ips = i+1;
11947         jps = j1+ips-i1;
11948         l = i2-i;
11949         ae_v_move(&work->ptr.p_double[1], 1, &a->ptr.pp_double[ips][j], a->stride, ae_v_len(1,l));
11950         ae_v_move(&a->ptr.pp_double[ips][j], a->stride, &a->ptr.pp_double[i][jps], 1, ae_v_len(ips,i2));
11951         ae_v_move(&a->ptr.pp_double[i][jps], 1, &work->ptr.p_double[1], 1, ae_v_len(jps,j2));
11952     }
11953 }
11954 
11955 
copyandtranspose(ae_matrix * a,ae_int_t is1,ae_int_t is2,ae_int_t js1,ae_int_t js2,ae_matrix * b,ae_int_t id1,ae_int_t id2,ae_int_t jd1,ae_int_t jd2,ae_state * _state)11956 void copyandtranspose(/* Real    */ ae_matrix* a,
11957      ae_int_t is1,
11958      ae_int_t is2,
11959      ae_int_t js1,
11960      ae_int_t js2,
11961      /* Real    */ ae_matrix* b,
11962      ae_int_t id1,
11963      ae_int_t id2,
11964      ae_int_t jd1,
11965      ae_int_t jd2,
11966      ae_state *_state)
11967 {
11968     ae_int_t isrc;
11969     ae_int_t jdst;
11970 
11971 
11972     if( is1>is2||js1>js2 )
11973     {
11974         return;
11975     }
11976     ae_assert(is2-is1==jd2-jd1, "CopyAndTranspose: different sizes!", _state);
11977     ae_assert(js2-js1==id2-id1, "CopyAndTranspose: different sizes!", _state);
11978     for(isrc=is1; isrc<=is2; isrc++)
11979     {
11980         jdst = isrc-is1+jd1;
11981         ae_v_move(&b->ptr.pp_double[id1][jdst], b->stride, &a->ptr.pp_double[isrc][js1], 1, ae_v_len(id1,id2));
11982     }
11983 }
11984 
11985 
matrixvectormultiply(ae_matrix * a,ae_int_t i1,ae_int_t i2,ae_int_t j1,ae_int_t j2,ae_bool trans,ae_vector * x,ae_int_t ix1,ae_int_t ix2,double alpha,ae_vector * y,ae_int_t iy1,ae_int_t iy2,double beta,ae_state * _state)11986 void matrixvectormultiply(/* Real    */ ae_matrix* a,
11987      ae_int_t i1,
11988      ae_int_t i2,
11989      ae_int_t j1,
11990      ae_int_t j2,
11991      ae_bool trans,
11992      /* Real    */ ae_vector* x,
11993      ae_int_t ix1,
11994      ae_int_t ix2,
11995      double alpha,
11996      /* Real    */ ae_vector* y,
11997      ae_int_t iy1,
11998      ae_int_t iy2,
11999      double beta,
12000      ae_state *_state)
12001 {
12002     ae_int_t i;
12003     double v;
12004 
12005 
12006     if( !trans )
12007     {
12008 
12009         /*
12010          * y := alpha*A*x + beta*y;
12011          */
12012         if( i1>i2||j1>j2 )
12013         {
12014             return;
12015         }
12016         ae_assert(j2-j1==ix2-ix1, "MatrixVectorMultiply: A and X dont match!", _state);
12017         ae_assert(i2-i1==iy2-iy1, "MatrixVectorMultiply: A and Y dont match!", _state);
12018 
12019         /*
12020          * beta*y
12021          */
12022         if( ae_fp_eq(beta,(double)(0)) )
12023         {
12024             for(i=iy1; i<=iy2; i++)
12025             {
12026                 y->ptr.p_double[i] = (double)(0);
12027             }
12028         }
12029         else
12030         {
12031             ae_v_muld(&y->ptr.p_double[iy1], 1, ae_v_len(iy1,iy2), beta);
12032         }
12033 
12034         /*
12035          * alpha*A*x
12036          */
12037         for(i=i1; i<=i2; i++)
12038         {
12039             v = ae_v_dotproduct(&a->ptr.pp_double[i][j1], 1, &x->ptr.p_double[ix1], 1, ae_v_len(j1,j2));
12040             y->ptr.p_double[iy1+i-i1] = y->ptr.p_double[iy1+i-i1]+alpha*v;
12041         }
12042     }
12043     else
12044     {
12045 
12046         /*
12047          * y := alpha*A'*x + beta*y;
12048          */
12049         if( i1>i2||j1>j2 )
12050         {
12051             return;
12052         }
12053         ae_assert(i2-i1==ix2-ix1, "MatrixVectorMultiply: A and X dont match!", _state);
12054         ae_assert(j2-j1==iy2-iy1, "MatrixVectorMultiply: A and Y dont match!", _state);
12055 
12056         /*
12057          * beta*y
12058          */
12059         if( ae_fp_eq(beta,(double)(0)) )
12060         {
12061             for(i=iy1; i<=iy2; i++)
12062             {
12063                 y->ptr.p_double[i] = (double)(0);
12064             }
12065         }
12066         else
12067         {
12068             ae_v_muld(&y->ptr.p_double[iy1], 1, ae_v_len(iy1,iy2), beta);
12069         }
12070 
12071         /*
12072          * alpha*A'*x
12073          */
12074         for(i=i1; i<=i2; i++)
12075         {
12076             v = alpha*x->ptr.p_double[ix1+i-i1];
12077             ae_v_addd(&y->ptr.p_double[iy1], 1, &a->ptr.pp_double[i][j1], 1, ae_v_len(iy1,iy2), v);
12078         }
12079     }
12080 }
12081 
12082 
pythag2(double x,double y,ae_state * _state)12083 double pythag2(double x, double y, ae_state *_state)
12084 {
12085     double w;
12086     double xabs;
12087     double yabs;
12088     double z;
12089     double result;
12090 
12091 
12092     xabs = ae_fabs(x, _state);
12093     yabs = ae_fabs(y, _state);
12094     w = ae_maxreal(xabs, yabs, _state);
12095     z = ae_minreal(xabs, yabs, _state);
12096     if( ae_fp_eq(z,(double)(0)) )
12097     {
12098         result = w;
12099     }
12100     else
12101     {
12102         result = w*ae_sqrt(1+ae_sqr(z/w, _state), _state);
12103     }
12104     return result;
12105 }
12106 
12107 
matrixmatrixmultiply(ae_matrix * a,ae_int_t ai1,ae_int_t ai2,ae_int_t aj1,ae_int_t aj2,ae_bool transa,ae_matrix * b,ae_int_t bi1,ae_int_t bi2,ae_int_t bj1,ae_int_t bj2,ae_bool transb,double alpha,ae_matrix * c,ae_int_t ci1,ae_int_t ci2,ae_int_t cj1,ae_int_t cj2,double beta,ae_vector * work,ae_state * _state)12108 void matrixmatrixmultiply(/* Real    */ ae_matrix* a,
12109      ae_int_t ai1,
12110      ae_int_t ai2,
12111      ae_int_t aj1,
12112      ae_int_t aj2,
12113      ae_bool transa,
12114      /* Real    */ ae_matrix* b,
12115      ae_int_t bi1,
12116      ae_int_t bi2,
12117      ae_int_t bj1,
12118      ae_int_t bj2,
12119      ae_bool transb,
12120      double alpha,
12121      /* Real    */ ae_matrix* c,
12122      ae_int_t ci1,
12123      ae_int_t ci2,
12124      ae_int_t cj1,
12125      ae_int_t cj2,
12126      double beta,
12127      /* Real    */ ae_vector* work,
12128      ae_state *_state)
12129 {
12130     ae_int_t arows;
12131     ae_int_t acols;
12132     ae_int_t brows;
12133     ae_int_t bcols;
12134     ae_int_t crows;
12135     ae_int_t i;
12136     ae_int_t j;
12137     ae_int_t k;
12138     ae_int_t l;
12139     ae_int_t r;
12140     double v;
12141 
12142 
12143 
12144     /*
12145      * Setup
12146      */
12147     if( !transa )
12148     {
12149         arows = ai2-ai1+1;
12150         acols = aj2-aj1+1;
12151     }
12152     else
12153     {
12154         arows = aj2-aj1+1;
12155         acols = ai2-ai1+1;
12156     }
12157     if( !transb )
12158     {
12159         brows = bi2-bi1+1;
12160         bcols = bj2-bj1+1;
12161     }
12162     else
12163     {
12164         brows = bj2-bj1+1;
12165         bcols = bi2-bi1+1;
12166     }
12167     ae_assert(acols==brows, "MatrixMatrixMultiply: incorrect matrix sizes!", _state);
12168     if( ((arows<=0||acols<=0)||brows<=0)||bcols<=0 )
12169     {
12170         return;
12171     }
12172     crows = arows;
12173 
12174     /*
12175      * Test WORK
12176      */
12177     i = ae_maxint(arows, acols, _state);
12178     i = ae_maxint(brows, i, _state);
12179     i = ae_maxint(i, bcols, _state);
12180     work->ptr.p_double[1] = (double)(0);
12181     work->ptr.p_double[i] = (double)(0);
12182 
12183     /*
12184      * Prepare C
12185      */
12186     if( ae_fp_eq(beta,(double)(0)) )
12187     {
12188         for(i=ci1; i<=ci2; i++)
12189         {
12190             for(j=cj1; j<=cj2; j++)
12191             {
12192                 c->ptr.pp_double[i][j] = (double)(0);
12193             }
12194         }
12195     }
12196     else
12197     {
12198         for(i=ci1; i<=ci2; i++)
12199         {
12200             ae_v_muld(&c->ptr.pp_double[i][cj1], 1, ae_v_len(cj1,cj2), beta);
12201         }
12202     }
12203 
12204     /*
12205      * A*B
12206      */
12207     if( !transa&&!transb )
12208     {
12209         for(l=ai1; l<=ai2; l++)
12210         {
12211             for(r=bi1; r<=bi2; r++)
12212             {
12213                 v = alpha*a->ptr.pp_double[l][aj1+r-bi1];
12214                 k = ci1+l-ai1;
12215                 ae_v_addd(&c->ptr.pp_double[k][cj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(cj1,cj2), v);
12216             }
12217         }
12218         return;
12219     }
12220 
12221     /*
12222      * A*B'
12223      */
12224     if( !transa&&transb )
12225     {
12226         if( arows*acols<brows*bcols )
12227         {
12228             for(r=bi1; r<=bi2; r++)
12229             {
12230                 for(l=ai1; l<=ai2; l++)
12231                 {
12232                     v = ae_v_dotproduct(&a->ptr.pp_double[l][aj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(aj1,aj2));
12233                     c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1]+alpha*v;
12234                 }
12235             }
12236             return;
12237         }
12238         else
12239         {
12240             for(l=ai1; l<=ai2; l++)
12241             {
12242                 for(r=bi1; r<=bi2; r++)
12243                 {
12244                     v = ae_v_dotproduct(&a->ptr.pp_double[l][aj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(aj1,aj2));
12245                     c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1]+alpha*v;
12246                 }
12247             }
12248             return;
12249         }
12250     }
12251 
12252     /*
12253      * A'*B
12254      */
12255     if( transa&&!transb )
12256     {
12257         for(l=aj1; l<=aj2; l++)
12258         {
12259             for(r=bi1; r<=bi2; r++)
12260             {
12261                 v = alpha*a->ptr.pp_double[ai1+r-bi1][l];
12262                 k = ci1+l-aj1;
12263                 ae_v_addd(&c->ptr.pp_double[k][cj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(cj1,cj2), v);
12264             }
12265         }
12266         return;
12267     }
12268 
12269     /*
12270      * A'*B'
12271      */
12272     if( transa&&transb )
12273     {
12274         if( arows*acols<brows*bcols )
12275         {
12276             for(r=bi1; r<=bi2; r++)
12277             {
12278                 k = cj1+r-bi1;
12279                 for(i=1; i<=crows; i++)
12280                 {
12281                     work->ptr.p_double[i] = 0.0;
12282                 }
12283                 for(l=ai1; l<=ai2; l++)
12284                 {
12285                     v = alpha*b->ptr.pp_double[r][bj1+l-ai1];
12286                     ae_v_addd(&work->ptr.p_double[1], 1, &a->ptr.pp_double[l][aj1], 1, ae_v_len(1,crows), v);
12287                 }
12288                 ae_v_add(&c->ptr.pp_double[ci1][k], c->stride, &work->ptr.p_double[1], 1, ae_v_len(ci1,ci2));
12289             }
12290             return;
12291         }
12292         else
12293         {
12294             for(l=aj1; l<=aj2; l++)
12295             {
12296                 k = ai2-ai1+1;
12297                 ae_v_move(&work->ptr.p_double[1], 1, &a->ptr.pp_double[ai1][l], a->stride, ae_v_len(1,k));
12298                 for(r=bi1; r<=bi2; r++)
12299                 {
12300                     v = ae_v_dotproduct(&work->ptr.p_double[1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(1,k));
12301                     c->ptr.pp_double[ci1+l-aj1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-aj1][cj1+r-bi1]+alpha*v;
12302                 }
12303             }
12304             return;
12305         }
12306     }
12307 }
12308 
12309 
12310 #endif
12311 #if defined(AE_COMPILE_ROTATIONS) || !defined(AE_PARTIAL_BUILD)
12312 
12313 
12314 /*************************************************************************
12315 Application of a sequence of  elementary rotations to a matrix
12316 
12317 The algorithm pre-multiplies the matrix by a sequence of rotation
12318 transformations which is given by arrays C and S. Depending on the value
12319 of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true)
12320 rows are rotated, or the rows N and N-1, N-2 and N-3 and so on, are rotated.
12321 
12322 Not the whole matrix but only a part of it is transformed (rows from M1 to
12323 M2, columns from N1 to N2). Only the elements of this submatrix are changed.
12324 
12325 Input parameters:
12326     IsForward   -   the sequence of the rotation application.
12327     M1,M2       -   the range of rows to be transformed.
12328     N1, N2      -   the range of columns to be transformed.
12329     C,S         -   transformation coefficients.
12330                     Array whose index ranges within [1..M2-M1].
12331     A           -   processed matrix.
12332     WORK        -   working array whose index ranges within [N1..N2].
12333 
12334 Output parameters:
12335     A           -   transformed matrix.
12336 
12337 Utility subroutine.
12338 *************************************************************************/
applyrotationsfromtheleft(ae_bool isforward,ae_int_t m1,ae_int_t m2,ae_int_t n1,ae_int_t n2,ae_vector * c,ae_vector * s,ae_matrix * a,ae_vector * work,ae_state * _state)12339 void applyrotationsfromtheleft(ae_bool isforward,
12340      ae_int_t m1,
12341      ae_int_t m2,
12342      ae_int_t n1,
12343      ae_int_t n2,
12344      /* Real    */ ae_vector* c,
12345      /* Real    */ ae_vector* s,
12346      /* Real    */ ae_matrix* a,
12347      /* Real    */ ae_vector* work,
12348      ae_state *_state)
12349 {
12350     ae_int_t j;
12351     ae_int_t jp1;
12352     double ctemp;
12353     double stemp;
12354     double temp;
12355 
12356 
12357     if( m1>m2||n1>n2 )
12358     {
12359         return;
12360     }
12361 
12362     /*
12363      * Form  P * A
12364      */
12365     if( isforward )
12366     {
12367         if( n1!=n2 )
12368         {
12369 
12370             /*
12371              * Common case: N1<>N2
12372              */
12373             for(j=m1; j<=m2-1; j++)
12374             {
12375                 ctemp = c->ptr.p_double[j-m1+1];
12376                 stemp = s->ptr.p_double[j-m1+1];
12377                 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12378                 {
12379                     jp1 = j+1;
12380                     ae_v_moved(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), ctemp);
12381                     ae_v_subd(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), stemp);
12382                     ae_v_muld(&a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), ctemp);
12383                     ae_v_addd(&a->ptr.pp_double[j][n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), stemp);
12384                     ae_v_move(&a->ptr.pp_double[jp1][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2));
12385                 }
12386             }
12387         }
12388         else
12389         {
12390 
12391             /*
12392              * Special case: N1=N2
12393              */
12394             for(j=m1; j<=m2-1; j++)
12395             {
12396                 ctemp = c->ptr.p_double[j-m1+1];
12397                 stemp = s->ptr.p_double[j-m1+1];
12398                 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12399                 {
12400                     temp = a->ptr.pp_double[j+1][n1];
12401                     a->ptr.pp_double[j+1][n1] = ctemp*temp-stemp*a->ptr.pp_double[j][n1];
12402                     a->ptr.pp_double[j][n1] = stemp*temp+ctemp*a->ptr.pp_double[j][n1];
12403                 }
12404             }
12405         }
12406     }
12407     else
12408     {
12409         if( n1!=n2 )
12410         {
12411 
12412             /*
12413              * Common case: N1<>N2
12414              */
12415             for(j=m2-1; j>=m1; j--)
12416             {
12417                 ctemp = c->ptr.p_double[j-m1+1];
12418                 stemp = s->ptr.p_double[j-m1+1];
12419                 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12420                 {
12421                     jp1 = j+1;
12422                     ae_v_moved(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), ctemp);
12423                     ae_v_subd(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), stemp);
12424                     ae_v_muld(&a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), ctemp);
12425                     ae_v_addd(&a->ptr.pp_double[j][n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), stemp);
12426                     ae_v_move(&a->ptr.pp_double[jp1][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2));
12427                 }
12428             }
12429         }
12430         else
12431         {
12432 
12433             /*
12434              * Special case: N1=N2
12435              */
12436             for(j=m2-1; j>=m1; j--)
12437             {
12438                 ctemp = c->ptr.p_double[j-m1+1];
12439                 stemp = s->ptr.p_double[j-m1+1];
12440                 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12441                 {
12442                     temp = a->ptr.pp_double[j+1][n1];
12443                     a->ptr.pp_double[j+1][n1] = ctemp*temp-stemp*a->ptr.pp_double[j][n1];
12444                     a->ptr.pp_double[j][n1] = stemp*temp+ctemp*a->ptr.pp_double[j][n1];
12445                 }
12446             }
12447         }
12448     }
12449 }
12450 
12451 
12452 /*************************************************************************
12453 Application of a sequence of  elementary rotations to a matrix
12454 
12455 The algorithm post-multiplies the matrix by a sequence of rotation
12456 transformations which is given by arrays C and S. Depending on the value
12457 of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true)
12458 rows are rotated, or the rows N and N-1, N-2 and N-3 and so on are rotated.
12459 
12460 Not the whole matrix but only a part of it is transformed (rows from M1
12461 to M2, columns from N1 to N2). Only the elements of this submatrix are changed.
12462 
12463 Input parameters:
12464     IsForward   -   the sequence of the rotation application.
12465     M1,M2       -   the range of rows to be transformed.
12466     N1, N2      -   the range of columns to be transformed.
12467     C,S         -   transformation coefficients.
12468                     Array whose index ranges within [1..N2-N1].
12469     A           -   processed matrix.
12470     WORK        -   working array whose index ranges within [M1..M2].
12471 
12472 Output parameters:
12473     A           -   transformed matrix.
12474 
12475 Utility subroutine.
12476 *************************************************************************/
applyrotationsfromtheright(ae_bool isforward,ae_int_t m1,ae_int_t m2,ae_int_t n1,ae_int_t n2,ae_vector * c,ae_vector * s,ae_matrix * a,ae_vector * work,ae_state * _state)12477 void applyrotationsfromtheright(ae_bool isforward,
12478      ae_int_t m1,
12479      ae_int_t m2,
12480      ae_int_t n1,
12481      ae_int_t n2,
12482      /* Real    */ ae_vector* c,
12483      /* Real    */ ae_vector* s,
12484      /* Real    */ ae_matrix* a,
12485      /* Real    */ ae_vector* work,
12486      ae_state *_state)
12487 {
12488     ae_int_t j;
12489     ae_int_t jp1;
12490     double ctemp;
12491     double stemp;
12492     double temp;
12493 
12494 
12495 
12496     /*
12497      * Form A * P'
12498      */
12499     if( isforward )
12500     {
12501         if( m1!=m2 )
12502         {
12503 
12504             /*
12505              * Common case: M1<>M2
12506              */
12507             for(j=n1; j<=n2-1; j++)
12508             {
12509                 ctemp = c->ptr.p_double[j-n1+1];
12510                 stemp = s->ptr.p_double[j-n1+1];
12511                 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12512                 {
12513                     jp1 = j+1;
12514                     ae_v_moved(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), ctemp);
12515                     ae_v_subd(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), stemp);
12516                     ae_v_muld(&a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), ctemp);
12517                     ae_v_addd(&a->ptr.pp_double[m1][j], a->stride, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), stemp);
12518                     ae_v_move(&a->ptr.pp_double[m1][jp1], a->stride, &work->ptr.p_double[m1], 1, ae_v_len(m1,m2));
12519                 }
12520             }
12521         }
12522         else
12523         {
12524 
12525             /*
12526              * Special case: M1=M2
12527              */
12528             for(j=n1; j<=n2-1; j++)
12529             {
12530                 ctemp = c->ptr.p_double[j-n1+1];
12531                 stemp = s->ptr.p_double[j-n1+1];
12532                 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12533                 {
12534                     temp = a->ptr.pp_double[m1][j+1];
12535                     a->ptr.pp_double[m1][j+1] = ctemp*temp-stemp*a->ptr.pp_double[m1][j];
12536                     a->ptr.pp_double[m1][j] = stemp*temp+ctemp*a->ptr.pp_double[m1][j];
12537                 }
12538             }
12539         }
12540     }
12541     else
12542     {
12543         if( m1!=m2 )
12544         {
12545 
12546             /*
12547              * Common case: M1<>M2
12548              */
12549             for(j=n2-1; j>=n1; j--)
12550             {
12551                 ctemp = c->ptr.p_double[j-n1+1];
12552                 stemp = s->ptr.p_double[j-n1+1];
12553                 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12554                 {
12555                     jp1 = j+1;
12556                     ae_v_moved(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), ctemp);
12557                     ae_v_subd(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), stemp);
12558                     ae_v_muld(&a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), ctemp);
12559                     ae_v_addd(&a->ptr.pp_double[m1][j], a->stride, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), stemp);
12560                     ae_v_move(&a->ptr.pp_double[m1][jp1], a->stride, &work->ptr.p_double[m1], 1, ae_v_len(m1,m2));
12561                 }
12562             }
12563         }
12564         else
12565         {
12566 
12567             /*
12568              * Special case: M1=M2
12569              */
12570             for(j=n2-1; j>=n1; j--)
12571             {
12572                 ctemp = c->ptr.p_double[j-n1+1];
12573                 stemp = s->ptr.p_double[j-n1+1];
12574                 if( ae_fp_neq(ctemp,(double)(1))||ae_fp_neq(stemp,(double)(0)) )
12575                 {
12576                     temp = a->ptr.pp_double[m1][j+1];
12577                     a->ptr.pp_double[m1][j+1] = ctemp*temp-stemp*a->ptr.pp_double[m1][j];
12578                     a->ptr.pp_double[m1][j] = stemp*temp+ctemp*a->ptr.pp_double[m1][j];
12579                 }
12580             }
12581         }
12582     }
12583 }
12584 
12585 
12586 /*************************************************************************
12587 The subroutine generates the elementary rotation, so that:
12588 
12589 [  CS  SN  ]  .  [ F ]  =  [ R ]
12590 [ -SN  CS  ]     [ G ]     [ 0 ]
12591 
12592 CS**2 + SN**2 = 1
12593 *************************************************************************/
generaterotation(double f,double g,double * cs,double * sn,double * r,ae_state * _state)12594 void generaterotation(double f,
12595      double g,
12596      double* cs,
12597      double* sn,
12598      double* r,
12599      ae_state *_state)
12600 {
12601     double f1;
12602     double g1;
12603 
12604     *cs = 0;
12605     *sn = 0;
12606     *r = 0;
12607 
12608     if( ae_fp_eq(g,(double)(0)) )
12609     {
12610         *cs = (double)(1);
12611         *sn = (double)(0);
12612         *r = f;
12613     }
12614     else
12615     {
12616         if( ae_fp_eq(f,(double)(0)) )
12617         {
12618             *cs = (double)(0);
12619             *sn = (double)(1);
12620             *r = g;
12621         }
12622         else
12623         {
12624             f1 = f;
12625             g1 = g;
12626             if( ae_fp_greater(ae_fabs(f1, _state),ae_fabs(g1, _state)) )
12627             {
12628                 *r = ae_fabs(f1, _state)*ae_sqrt(1+ae_sqr(g1/f1, _state), _state);
12629             }
12630             else
12631             {
12632                 *r = ae_fabs(g1, _state)*ae_sqrt(1+ae_sqr(f1/g1, _state), _state);
12633             }
12634             *cs = f1/(*r);
12635             *sn = g1/(*r);
12636             if( ae_fp_greater(ae_fabs(f, _state),ae_fabs(g, _state))&&ae_fp_less(*cs,(double)(0)) )
12637             {
12638                 *cs = -*cs;
12639                 *sn = -*sn;
12640                 *r = -*r;
12641             }
12642         }
12643     }
12644 }
12645 
12646 
12647 #endif
12648 #if defined(AE_COMPILE_BASICSTATOPS) || !defined(AE_PARTIAL_BUILD)
12649 
12650 
12651 /*************************************************************************
12652 Internal tied ranking subroutine.
12653 
12654 INPUT PARAMETERS:
12655     X       -   array to rank
12656     N       -   array size
12657     IsCentered- whether ranks are centered or not:
12658                 * True      -   ranks are centered in such way that  their
12659                                 sum is zero
12660                 * False     -   ranks are not centered
12661     Buf     -   temporary buffers
12662 
12663 NOTE: when IsCentered is True and all X[] are equal, this  function  fills
12664       X by zeros (exact zeros are used, not sum which is only approximately
12665       equal to zero).
12666 *************************************************************************/
rankx(ae_vector * x,ae_int_t n,ae_bool iscentered,apbuffers * buf,ae_state * _state)12667 void rankx(/* Real    */ ae_vector* x,
12668      ae_int_t n,
12669      ae_bool iscentered,
12670      apbuffers* buf,
12671      ae_state *_state)
12672 {
12673     ae_int_t i;
12674     ae_int_t j;
12675     ae_int_t k;
12676     double tmp;
12677     double voffs;
12678 
12679 
12680 
12681     /*
12682      * Prepare
12683      */
12684     if( n<1 )
12685     {
12686         return;
12687     }
12688     if( n==1 )
12689     {
12690         x->ptr.p_double[0] = (double)(0);
12691         return;
12692     }
12693     if( buf->ra1.cnt<n )
12694     {
12695         ae_vector_set_length(&buf->ra1, n, _state);
12696     }
12697     if( buf->ia1.cnt<n )
12698     {
12699         ae_vector_set_length(&buf->ia1, n, _state);
12700     }
12701     for(i=0; i<=n-1; i++)
12702     {
12703         buf->ra1.ptr.p_double[i] = x->ptr.p_double[i];
12704         buf->ia1.ptr.p_int[i] = i;
12705     }
12706     tagsortfasti(&buf->ra1, &buf->ia1, &buf->ra2, &buf->ia2, n, _state);
12707 
12708     /*
12709      * Special test for all values being equal
12710      */
12711     if( ae_fp_eq(buf->ra1.ptr.p_double[0],buf->ra1.ptr.p_double[n-1]) )
12712     {
12713         if( iscentered )
12714         {
12715             tmp = 0.0;
12716         }
12717         else
12718         {
12719             tmp = (double)(n-1)/(double)2;
12720         }
12721         for(i=0; i<=n-1; i++)
12722         {
12723             x->ptr.p_double[i] = tmp;
12724         }
12725         return;
12726     }
12727 
12728     /*
12729      * compute tied ranks
12730      */
12731     i = 0;
12732     while(i<=n-1)
12733     {
12734         j = i+1;
12735         while(j<=n-1)
12736         {
12737             if( ae_fp_neq(buf->ra1.ptr.p_double[j],buf->ra1.ptr.p_double[i]) )
12738             {
12739                 break;
12740             }
12741             j = j+1;
12742         }
12743         for(k=i; k<=j-1; k++)
12744         {
12745             buf->ra1.ptr.p_double[k] = (double)(i+j-1)/(double)2;
12746         }
12747         i = j;
12748     }
12749 
12750     /*
12751      * back to x
12752      */
12753     if( iscentered )
12754     {
12755         voffs = (double)(n-1)/(double)2;
12756     }
12757     else
12758     {
12759         voffs = 0.0;
12760     }
12761     for(i=0; i<=n-1; i++)
12762     {
12763         x->ptr.p_double[buf->ia1.ptr.p_int[i]] = buf->ra1.ptr.p_double[i]-voffs;
12764     }
12765 }
12766 
12767 
12768 /*************************************************************************
12769 Internal untied ranking subroutine.
12770 
12771 INPUT PARAMETERS:
12772     X       -   array to rank
12773     N       -   array size
12774     Buf     -   temporary buffers
12775 
12776 Returns untied ranks (in case of a tie ranks are resolved arbitrarily).
12777 *************************************************************************/
rankxuntied(ae_vector * x,ae_int_t n,apbuffers * buf,ae_state * _state)12778 void rankxuntied(/* Real    */ ae_vector* x,
12779      ae_int_t n,
12780      apbuffers* buf,
12781      ae_state *_state)
12782 {
12783     ae_int_t i;
12784 
12785 
12786 
12787     /*
12788      * Prepare
12789      */
12790     if( n<1 )
12791     {
12792         return;
12793     }
12794     if( n==1 )
12795     {
12796         x->ptr.p_double[0] = (double)(0);
12797         return;
12798     }
12799     if( buf->ra1.cnt<n )
12800     {
12801         ae_vector_set_length(&buf->ra1, n, _state);
12802     }
12803     if( buf->ia1.cnt<n )
12804     {
12805         ae_vector_set_length(&buf->ia1, n, _state);
12806     }
12807     for(i=0; i<=n-1; i++)
12808     {
12809         buf->ra1.ptr.p_double[i] = x->ptr.p_double[i];
12810         buf->ia1.ptr.p_int[i] = i;
12811     }
12812     tagsortfasti(&buf->ra1, &buf->ia1, &buf->ra2, &buf->ia2, n, _state);
12813     for(i=0; i<=n-1; i++)
12814     {
12815         x->ptr.p_double[buf->ia1.ptr.p_int[i]] = (double)(i);
12816     }
12817 }
12818 
12819 
12820 #endif
12821 #if defined(AE_COMPILE_TRLINSOLVE) || !defined(AE_PARTIAL_BUILD)
12822 
12823 
12824 /*************************************************************************
12825 Utility subroutine performing the "safe" solution of system of linear
12826 equations with triangular coefficient matrices.
12827 
12828 The subroutine uses scaling and solves the scaled system A*x=s*b (where  s
12829 is  a  scalar  value)  instead  of  A*x=b,  choosing  s  so  that x can be
12830 represented by a floating-point number. The closer the system  gets  to  a
12831 singular, the less s is. If the system is singular, s=0 and x contains the
12832 non-trivial solution of equation A*x=0.
12833 
12834 The feature of an algorithm is that it could not cause an  overflow  or  a
12835 division by zero regardless of the matrix used as the input.
12836 
12837 The algorithm can solve systems of equations with  upper/lower  triangular
12838 matrices,  with/without unit diagonal, and systems of type A*x=b or A'*x=b
12839 (where A' is a transposed matrix A).
12840 
12841 Input parameters:
12842     A       -   system matrix. Array whose indexes range within [0..N-1, 0..N-1].
12843     N       -   size of matrix A.
12844     X       -   right-hand member of a system.
12845                 Array whose index ranges within [0..N-1].
12846     IsUpper -   matrix type. If it is True, the system matrix is the upper
12847                 triangular and is located in  the  corresponding  part  of
12848                 matrix A.
12849     Trans   -   problem type. If it is True, the problem to be  solved  is
12850                 A'*x=b, otherwise it is A*x=b.
12851     Isunit  -   matrix type. If it is True, the system matrix has  a  unit
12852                 diagonal (the elements on the main diagonal are  not  used
12853                 in the calculation process), otherwise the matrix is considered
12854                 to be a general triangular matrix.
12855 
12856 Output parameters:
12857     X       -   solution. Array whose index ranges within [0..N-1].
12858     S       -   scaling factor.
12859 
12860   -- LAPACK auxiliary routine (version 3.0) --
12861      Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
12862      Courant Institute, Argonne National Lab, and Rice University
12863      June 30, 1992
12864 *************************************************************************/
rmatrixtrsafesolve(ae_matrix * a,ae_int_t n,ae_vector * x,double * s,ae_bool isupper,ae_bool istrans,ae_bool isunit,ae_state * _state)12865 void rmatrixtrsafesolve(/* Real    */ ae_matrix* a,
12866      ae_int_t n,
12867      /* Real    */ ae_vector* x,
12868      double* s,
12869      ae_bool isupper,
12870      ae_bool istrans,
12871      ae_bool isunit,
12872      ae_state *_state)
12873 {
12874     ae_frame _frame_block;
12875     ae_bool normin;
12876     ae_vector cnorm;
12877     ae_matrix a1;
12878     ae_vector x1;
12879     ae_int_t i;
12880 
12881     ae_frame_make(_state, &_frame_block);
12882     memset(&cnorm, 0, sizeof(cnorm));
12883     memset(&a1, 0, sizeof(a1));
12884     memset(&x1, 0, sizeof(x1));
12885     *s = 0;
12886     ae_vector_init(&cnorm, 0, DT_REAL, _state, ae_true);
12887     ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true);
12888     ae_vector_init(&x1, 0, DT_REAL, _state, ae_true);
12889 
12890 
12891     /*
12892      * From 0-based to 1-based
12893      */
12894     normin = ae_false;
12895     ae_matrix_set_length(&a1, n+1, n+1, _state);
12896     ae_vector_set_length(&x1, n+1, _state);
12897     for(i=1; i<=n; i++)
12898     {
12899         ae_v_move(&a1.ptr.pp_double[i][1], 1, &a->ptr.pp_double[i-1][0], 1, ae_v_len(1,n));
12900     }
12901     ae_v_move(&x1.ptr.p_double[1], 1, &x->ptr.p_double[0], 1, ae_v_len(1,n));
12902 
12903     /*
12904      * Solve 1-based
12905      */
12906     safesolvetriangular(&a1, n, &x1, s, isupper, istrans, isunit, normin, &cnorm, _state);
12907 
12908     /*
12909      * From 1-based to 0-based
12910      */
12911     ae_v_move(&x->ptr.p_double[0], 1, &x1.ptr.p_double[1], 1, ae_v_len(0,n-1));
12912     ae_frame_leave(_state);
12913 }
12914 
12915 
12916 /*************************************************************************
12917 Obsolete 1-based subroutine.
12918 See RMatrixTRSafeSolve for 0-based replacement.
12919 *************************************************************************/
safesolvetriangular(ae_matrix * a,ae_int_t n,ae_vector * x,double * s,ae_bool isupper,ae_bool istrans,ae_bool isunit,ae_bool normin,ae_vector * cnorm,ae_state * _state)12920 void safesolvetriangular(/* Real    */ ae_matrix* a,
12921      ae_int_t n,
12922      /* Real    */ ae_vector* x,
12923      double* s,
12924      ae_bool isupper,
12925      ae_bool istrans,
12926      ae_bool isunit,
12927      ae_bool normin,
12928      /* Real    */ ae_vector* cnorm,
12929      ae_state *_state)
12930 {
12931     ae_int_t i;
12932     ae_int_t imax;
12933     ae_int_t j;
12934     ae_int_t jfirst;
12935     ae_int_t jinc;
12936     ae_int_t jlast;
12937     ae_int_t jm1;
12938     ae_int_t jp1;
12939     ae_int_t ip1;
12940     ae_int_t im1;
12941     ae_int_t k;
12942     ae_int_t flg;
12943     double v;
12944     double vd;
12945     double bignum;
12946     double grow;
12947     double rec;
12948     double smlnum;
12949     double sumj;
12950     double tjj;
12951     double tjjs;
12952     double tmax;
12953     double tscal;
12954     double uscal;
12955     double xbnd;
12956     double xj;
12957     double xmax;
12958     ae_bool notran;
12959     ae_bool upper;
12960     ae_bool nounit;
12961 
12962     *s = 0;
12963 
12964     upper = isupper;
12965     notran = !istrans;
12966     nounit = !isunit;
12967 
12968     /*
12969      * these initializers are not really necessary,
12970      * but without them compiler complains about uninitialized locals
12971      */
12972     tjjs = (double)(0);
12973 
12974     /*
12975      * Quick return if possible
12976      */
12977     if( n==0 )
12978     {
12979         return;
12980     }
12981 
12982     /*
12983      * Determine machine dependent parameters to control overflow.
12984      */
12985     smlnum = ae_minrealnumber/(ae_machineepsilon*2);
12986     bignum = 1/smlnum;
12987     *s = (double)(1);
12988     if( !normin )
12989     {
12990         ae_vector_set_length(cnorm, n+1, _state);
12991 
12992         /*
12993          * Compute the 1-norm of each column, not including the diagonal.
12994          */
12995         if( upper )
12996         {
12997 
12998             /*
12999              * A is upper triangular.
13000              */
13001             for(j=1; j<=n; j++)
13002             {
13003                 v = (double)(0);
13004                 for(k=1; k<=j-1; k++)
13005                 {
13006                     v = v+ae_fabs(a->ptr.pp_double[k][j], _state);
13007                 }
13008                 cnorm->ptr.p_double[j] = v;
13009             }
13010         }
13011         else
13012         {
13013 
13014             /*
13015              * A is lower triangular.
13016              */
13017             for(j=1; j<=n-1; j++)
13018             {
13019                 v = (double)(0);
13020                 for(k=j+1; k<=n; k++)
13021                 {
13022                     v = v+ae_fabs(a->ptr.pp_double[k][j], _state);
13023                 }
13024                 cnorm->ptr.p_double[j] = v;
13025             }
13026             cnorm->ptr.p_double[n] = (double)(0);
13027         }
13028     }
13029 
13030     /*
13031      * Scale the column norms by TSCAL if the maximum element in CNORM is
13032      * greater than BIGNUM.
13033      */
13034     imax = 1;
13035     for(k=2; k<=n; k++)
13036     {
13037         if( ae_fp_greater(cnorm->ptr.p_double[k],cnorm->ptr.p_double[imax]) )
13038         {
13039             imax = k;
13040         }
13041     }
13042     tmax = cnorm->ptr.p_double[imax];
13043     if( ae_fp_less_eq(tmax,bignum) )
13044     {
13045         tscal = (double)(1);
13046     }
13047     else
13048     {
13049         tscal = 1/(smlnum*tmax);
13050         ae_v_muld(&cnorm->ptr.p_double[1], 1, ae_v_len(1,n), tscal);
13051     }
13052 
13053     /*
13054      * Compute a bound on the computed solution vector to see if the
13055      * Level 2 BLAS routine DTRSV can be used.
13056      */
13057     j = 1;
13058     for(k=2; k<=n; k++)
13059     {
13060         if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[j], _state)) )
13061         {
13062             j = k;
13063         }
13064     }
13065     xmax = ae_fabs(x->ptr.p_double[j], _state);
13066     xbnd = xmax;
13067     if( notran )
13068     {
13069 
13070         /*
13071          * Compute the growth in A * x = b.
13072          */
13073         if( upper )
13074         {
13075             jfirst = n;
13076             jlast = 1;
13077             jinc = -1;
13078         }
13079         else
13080         {
13081             jfirst = 1;
13082             jlast = n;
13083             jinc = 1;
13084         }
13085         if( ae_fp_neq(tscal,(double)(1)) )
13086         {
13087             grow = (double)(0);
13088         }
13089         else
13090         {
13091             if( nounit )
13092             {
13093 
13094                 /*
13095                  * A is non-unit triangular.
13096                  *
13097                  * Compute GROW = 1/G(j) and XBND = 1/M(j).
13098                  * Initially, G(0) = max{x(i), i=1,...,n}.
13099                  */
13100                 grow = 1/ae_maxreal(xbnd, smlnum, _state);
13101                 xbnd = grow;
13102                 j = jfirst;
13103                 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13104                 {
13105 
13106                     /*
13107                      * Exit the loop if the growth factor is too small.
13108                      */
13109                     if( ae_fp_less_eq(grow,smlnum) )
13110                     {
13111                         break;
13112                     }
13113 
13114                     /*
13115                      * M(j) = G(j-1) / abs(A(j,j))
13116                      */
13117                     tjj = ae_fabs(a->ptr.pp_double[j][j], _state);
13118                     xbnd = ae_minreal(xbnd, ae_minreal((double)(1), tjj, _state)*grow, _state);
13119                     if( ae_fp_greater_eq(tjj+cnorm->ptr.p_double[j],smlnum) )
13120                     {
13121 
13122                         /*
13123                          * G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
13124                          */
13125                         grow = grow*(tjj/(tjj+cnorm->ptr.p_double[j]));
13126                     }
13127                     else
13128                     {
13129 
13130                         /*
13131                          * G(j) could overflow, set GROW to 0.
13132                          */
13133                         grow = (double)(0);
13134                     }
13135                     if( j==jlast )
13136                     {
13137                         grow = xbnd;
13138                     }
13139                     j = j+jinc;
13140                 }
13141             }
13142             else
13143             {
13144 
13145                 /*
13146                  * A is unit triangular.
13147                  *
13148                  * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
13149                  */
13150                 grow = ae_minreal((double)(1), 1/ae_maxreal(xbnd, smlnum, _state), _state);
13151                 j = jfirst;
13152                 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13153                 {
13154 
13155                     /*
13156                      * Exit the loop if the growth factor is too small.
13157                      */
13158                     if( ae_fp_less_eq(grow,smlnum) )
13159                     {
13160                         break;
13161                     }
13162 
13163                     /*
13164                      * G(j) = G(j-1)*( 1 + CNORM(j) )
13165                      */
13166                     grow = grow*(1/(1+cnorm->ptr.p_double[j]));
13167                     j = j+jinc;
13168                 }
13169             }
13170         }
13171     }
13172     else
13173     {
13174 
13175         /*
13176          * Compute the growth in A' * x = b.
13177          */
13178         if( upper )
13179         {
13180             jfirst = 1;
13181             jlast = n;
13182             jinc = 1;
13183         }
13184         else
13185         {
13186             jfirst = n;
13187             jlast = 1;
13188             jinc = -1;
13189         }
13190         if( ae_fp_neq(tscal,(double)(1)) )
13191         {
13192             grow = (double)(0);
13193         }
13194         else
13195         {
13196             if( nounit )
13197             {
13198 
13199                 /*
13200                  * A is non-unit triangular.
13201                  *
13202                  * Compute GROW = 1/G(j) and XBND = 1/M(j).
13203                  * Initially, M(0) = max{x(i), i=1,...,n}.
13204                  */
13205                 grow = 1/ae_maxreal(xbnd, smlnum, _state);
13206                 xbnd = grow;
13207                 j = jfirst;
13208                 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13209                 {
13210 
13211                     /*
13212                      * Exit the loop if the growth factor is too small.
13213                      */
13214                     if( ae_fp_less_eq(grow,smlnum) )
13215                     {
13216                         break;
13217                     }
13218 
13219                     /*
13220                      * G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
13221                      */
13222                     xj = 1+cnorm->ptr.p_double[j];
13223                     grow = ae_minreal(grow, xbnd/xj, _state);
13224 
13225                     /*
13226                      * M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
13227                      */
13228                     tjj = ae_fabs(a->ptr.pp_double[j][j], _state);
13229                     if( ae_fp_greater(xj,tjj) )
13230                     {
13231                         xbnd = xbnd*(tjj/xj);
13232                     }
13233                     if( j==jlast )
13234                     {
13235                         grow = ae_minreal(grow, xbnd, _state);
13236                     }
13237                     j = j+jinc;
13238                 }
13239             }
13240             else
13241             {
13242 
13243                 /*
13244                  * A is unit triangular.
13245                  *
13246                  * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
13247                  */
13248                 grow = ae_minreal((double)(1), 1/ae_maxreal(xbnd, smlnum, _state), _state);
13249                 j = jfirst;
13250                 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13251                 {
13252 
13253                     /*
13254                      * Exit the loop if the growth factor is too small.
13255                      */
13256                     if( ae_fp_less_eq(grow,smlnum) )
13257                     {
13258                         break;
13259                     }
13260 
13261                     /*
13262                      * G(j) = ( 1 + CNORM(j) )*G(j-1)
13263                      */
13264                     xj = 1+cnorm->ptr.p_double[j];
13265                     grow = grow/xj;
13266                     j = j+jinc;
13267                 }
13268             }
13269         }
13270     }
13271     if( ae_fp_greater(grow*tscal,smlnum) )
13272     {
13273 
13274         /*
13275          * Use the Level 2 BLAS solve if the reciprocal of the bound on
13276          * elements of X is not too small.
13277          */
13278         if( (upper&&notran)||(!upper&&!notran) )
13279         {
13280             if( nounit )
13281             {
13282                 vd = a->ptr.pp_double[n][n];
13283             }
13284             else
13285             {
13286                 vd = (double)(1);
13287             }
13288             x->ptr.p_double[n] = x->ptr.p_double[n]/vd;
13289             for(i=n-1; i>=1; i--)
13290             {
13291                 ip1 = i+1;
13292                 if( upper )
13293                 {
13294                     v = ae_v_dotproduct(&a->ptr.pp_double[i][ip1], 1, &x->ptr.p_double[ip1], 1, ae_v_len(ip1,n));
13295                 }
13296                 else
13297                 {
13298                     v = ae_v_dotproduct(&a->ptr.pp_double[ip1][i], a->stride, &x->ptr.p_double[ip1], 1, ae_v_len(ip1,n));
13299                 }
13300                 if( nounit )
13301                 {
13302                     vd = a->ptr.pp_double[i][i];
13303                 }
13304                 else
13305                 {
13306                     vd = (double)(1);
13307                 }
13308                 x->ptr.p_double[i] = (x->ptr.p_double[i]-v)/vd;
13309             }
13310         }
13311         else
13312         {
13313             if( nounit )
13314             {
13315                 vd = a->ptr.pp_double[1][1];
13316             }
13317             else
13318             {
13319                 vd = (double)(1);
13320             }
13321             x->ptr.p_double[1] = x->ptr.p_double[1]/vd;
13322             for(i=2; i<=n; i++)
13323             {
13324                 im1 = i-1;
13325                 if( upper )
13326                 {
13327                     v = ae_v_dotproduct(&a->ptr.pp_double[1][i], a->stride, &x->ptr.p_double[1], 1, ae_v_len(1,im1));
13328                 }
13329                 else
13330                 {
13331                     v = ae_v_dotproduct(&a->ptr.pp_double[i][1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,im1));
13332                 }
13333                 if( nounit )
13334                 {
13335                     vd = a->ptr.pp_double[i][i];
13336                 }
13337                 else
13338                 {
13339                     vd = (double)(1);
13340                 }
13341                 x->ptr.p_double[i] = (x->ptr.p_double[i]-v)/vd;
13342             }
13343         }
13344     }
13345     else
13346     {
13347 
13348         /*
13349          * Use a Level 1 BLAS solve, scaling intermediate results.
13350          */
13351         if( ae_fp_greater(xmax,bignum) )
13352         {
13353 
13354             /*
13355              * Scale X so that its components are less than or equal to
13356              * BIGNUM in absolute value.
13357              */
13358             *s = bignum/xmax;
13359             ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), *s);
13360             xmax = bignum;
13361         }
13362         if( notran )
13363         {
13364 
13365             /*
13366              * Solve A * x = b
13367              */
13368             j = jfirst;
13369             while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13370             {
13371 
13372                 /*
13373                  * Compute x(j) = b(j) / A(j,j), scaling x if necessary.
13374                  */
13375                 xj = ae_fabs(x->ptr.p_double[j], _state);
13376                 flg = 0;
13377                 if( nounit )
13378                 {
13379                     tjjs = a->ptr.pp_double[j][j]*tscal;
13380                 }
13381                 else
13382                 {
13383                     tjjs = tscal;
13384                     if( ae_fp_eq(tscal,(double)(1)) )
13385                     {
13386                         flg = 100;
13387                     }
13388                 }
13389                 if( flg!=100 )
13390                 {
13391                     tjj = ae_fabs(tjjs, _state);
13392                     if( ae_fp_greater(tjj,smlnum) )
13393                     {
13394 
13395                         /*
13396                          * abs(A(j,j)) > SMLNUM:
13397                          */
13398                         if( ae_fp_less(tjj,(double)(1)) )
13399                         {
13400                             if( ae_fp_greater(xj,tjj*bignum) )
13401                             {
13402 
13403                                 /*
13404                                  * Scale x by 1/b(j).
13405                                  */
13406                                 rec = 1/xj;
13407                                 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13408                                 *s = *s*rec;
13409                                 xmax = xmax*rec;
13410                             }
13411                         }
13412                         x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
13413                         xj = ae_fabs(x->ptr.p_double[j], _state);
13414                     }
13415                     else
13416                     {
13417                         if( ae_fp_greater(tjj,(double)(0)) )
13418                         {
13419 
13420                             /*
13421                              * 0 < abs(A(j,j)) <= SMLNUM:
13422                              */
13423                             if( ae_fp_greater(xj,tjj*bignum) )
13424                             {
13425 
13426                                 /*
13427                                  * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
13428                                  * to avoid overflow when dividing by A(j,j).
13429                                  */
13430                                 rec = tjj*bignum/xj;
13431                                 if( ae_fp_greater(cnorm->ptr.p_double[j],(double)(1)) )
13432                                 {
13433 
13434                                     /*
13435                                      * Scale by 1/CNORM(j) to avoid overflow when
13436                                      * multiplying x(j) times column j.
13437                                      */
13438                                     rec = rec/cnorm->ptr.p_double[j];
13439                                 }
13440                                 ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13441                                 *s = *s*rec;
13442                                 xmax = xmax*rec;
13443                             }
13444                             x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
13445                             xj = ae_fabs(x->ptr.p_double[j], _state);
13446                         }
13447                         else
13448                         {
13449 
13450                             /*
13451                              * A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
13452                              * scale = 0, and compute a solution to A*x = 0.
13453                              */
13454                             for(i=1; i<=n; i++)
13455                             {
13456                                 x->ptr.p_double[i] = (double)(0);
13457                             }
13458                             x->ptr.p_double[j] = (double)(1);
13459                             xj = (double)(1);
13460                             *s = (double)(0);
13461                             xmax = (double)(0);
13462                         }
13463                     }
13464                 }
13465 
13466                 /*
13467                  * Scale x if necessary to avoid overflow when adding a
13468                  * multiple of column j of A.
13469                  */
13470                 if( ae_fp_greater(xj,(double)(1)) )
13471                 {
13472                     rec = 1/xj;
13473                     if( ae_fp_greater(cnorm->ptr.p_double[j],(bignum-xmax)*rec) )
13474                     {
13475 
13476                         /*
13477                          * Scale x by 1/(2*abs(x(j))).
13478                          */
13479                         rec = rec*0.5;
13480                         ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13481                         *s = *s*rec;
13482                     }
13483                 }
13484                 else
13485                 {
13486                     if( ae_fp_greater(xj*cnorm->ptr.p_double[j],bignum-xmax) )
13487                     {
13488 
13489                         /*
13490                          * Scale x by 1/2.
13491                          */
13492                         ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), 0.5);
13493                         *s = *s*0.5;
13494                     }
13495                 }
13496                 if( upper )
13497                 {
13498                     if( j>1 )
13499                     {
13500 
13501                         /*
13502                          * Compute the update
13503                          * x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
13504                          */
13505                         v = x->ptr.p_double[j]*tscal;
13506                         jm1 = j-1;
13507                         ae_v_subd(&x->ptr.p_double[1], 1, &a->ptr.pp_double[1][j], a->stride, ae_v_len(1,jm1), v);
13508                         i = 1;
13509                         for(k=2; k<=j-1; k++)
13510                         {
13511                             if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[i], _state)) )
13512                             {
13513                                 i = k;
13514                             }
13515                         }
13516                         xmax = ae_fabs(x->ptr.p_double[i], _state);
13517                     }
13518                 }
13519                 else
13520                 {
13521                     if( j<n )
13522                     {
13523 
13524                         /*
13525                          * Compute the update
13526                          * x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
13527                          */
13528                         jp1 = j+1;
13529                         v = x->ptr.p_double[j]*tscal;
13530                         ae_v_subd(&x->ptr.p_double[jp1], 1, &a->ptr.pp_double[jp1][j], a->stride, ae_v_len(jp1,n), v);
13531                         i = j+1;
13532                         for(k=j+2; k<=n; k++)
13533                         {
13534                             if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[i], _state)) )
13535                             {
13536                                 i = k;
13537                             }
13538                         }
13539                         xmax = ae_fabs(x->ptr.p_double[i], _state);
13540                     }
13541                 }
13542                 j = j+jinc;
13543             }
13544         }
13545         else
13546         {
13547 
13548             /*
13549              * Solve A' * x = b
13550              */
13551             j = jfirst;
13552             while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
13553             {
13554 
13555                 /*
13556                  * Compute x(j) = b(j) - sum A(k,j)*x(k).
13557                  *   k<>j
13558                  */
13559                 xj = ae_fabs(x->ptr.p_double[j], _state);
13560                 uscal = tscal;
13561                 rec = 1/ae_maxreal(xmax, (double)(1), _state);
13562                 if( ae_fp_greater(cnorm->ptr.p_double[j],(bignum-xj)*rec) )
13563                 {
13564 
13565                     /*
13566                      * If x(j) could overflow, scale x by 1/(2*XMAX).
13567                      */
13568                     rec = rec*0.5;
13569                     if( nounit )
13570                     {
13571                         tjjs = a->ptr.pp_double[j][j]*tscal;
13572                     }
13573                     else
13574                     {
13575                         tjjs = tscal;
13576                     }
13577                     tjj = ae_fabs(tjjs, _state);
13578                     if( ae_fp_greater(tjj,(double)(1)) )
13579                     {
13580 
13581                         /*
13582                          * Divide by A(j,j) when scaling x if A(j,j) > 1.
13583                          */
13584                         rec = ae_minreal((double)(1), rec*tjj, _state);
13585                         uscal = uscal/tjjs;
13586                     }
13587                     if( ae_fp_less(rec,(double)(1)) )
13588                     {
13589                         ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13590                         *s = *s*rec;
13591                         xmax = xmax*rec;
13592                     }
13593                 }
13594                 sumj = (double)(0);
13595                 if( ae_fp_eq(uscal,(double)(1)) )
13596                 {
13597 
13598                     /*
13599                      * If the scaling needed for A in the dot product is 1,
13600                      * call DDOT to perform the dot product.
13601                      */
13602                     if( upper )
13603                     {
13604                         if( j>1 )
13605                         {
13606                             jm1 = j-1;
13607                             sumj = ae_v_dotproduct(&a->ptr.pp_double[1][j], a->stride, &x->ptr.p_double[1], 1, ae_v_len(1,jm1));
13608                         }
13609                         else
13610                         {
13611                             sumj = (double)(0);
13612                         }
13613                     }
13614                     else
13615                     {
13616                         if( j<n )
13617                         {
13618                             jp1 = j+1;
13619                             sumj = ae_v_dotproduct(&a->ptr.pp_double[jp1][j], a->stride, &x->ptr.p_double[jp1], 1, ae_v_len(jp1,n));
13620                         }
13621                     }
13622                 }
13623                 else
13624                 {
13625 
13626                     /*
13627                      * Otherwise, use in-line code for the dot product.
13628                      */
13629                     if( upper )
13630                     {
13631                         for(i=1; i<=j-1; i++)
13632                         {
13633                             v = a->ptr.pp_double[i][j]*uscal;
13634                             sumj = sumj+v*x->ptr.p_double[i];
13635                         }
13636                     }
13637                     else
13638                     {
13639                         if( j<n )
13640                         {
13641                             for(i=j+1; i<=n; i++)
13642                             {
13643                                 v = a->ptr.pp_double[i][j]*uscal;
13644                                 sumj = sumj+v*x->ptr.p_double[i];
13645                             }
13646                         }
13647                     }
13648                 }
13649                 if( ae_fp_eq(uscal,tscal) )
13650                 {
13651 
13652                     /*
13653                      * Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)
13654                      * was not used to scale the dotproduct.
13655                      */
13656                     x->ptr.p_double[j] = x->ptr.p_double[j]-sumj;
13657                     xj = ae_fabs(x->ptr.p_double[j], _state);
13658                     flg = 0;
13659                     if( nounit )
13660                     {
13661                         tjjs = a->ptr.pp_double[j][j]*tscal;
13662                     }
13663                     else
13664                     {
13665                         tjjs = tscal;
13666                         if( ae_fp_eq(tscal,(double)(1)) )
13667                         {
13668                             flg = 150;
13669                         }
13670                     }
13671 
13672                     /*
13673                      * Compute x(j) = x(j) / A(j,j), scaling if necessary.
13674                      */
13675                     if( flg!=150 )
13676                     {
13677                         tjj = ae_fabs(tjjs, _state);
13678                         if( ae_fp_greater(tjj,smlnum) )
13679                         {
13680 
13681                             /*
13682                              * abs(A(j,j)) > SMLNUM:
13683                              */
13684                             if( ae_fp_less(tjj,(double)(1)) )
13685                             {
13686                                 if( ae_fp_greater(xj,tjj*bignum) )
13687                                 {
13688 
13689                                     /*
13690                                      * Scale X by 1/abs(x(j)).
13691                                      */
13692                                     rec = 1/xj;
13693                                     ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13694                                     *s = *s*rec;
13695                                     xmax = xmax*rec;
13696                                 }
13697                             }
13698                             x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
13699                         }
13700                         else
13701                         {
13702                             if( ae_fp_greater(tjj,(double)(0)) )
13703                             {
13704 
13705                                 /*
13706                                  * 0 < abs(A(j,j)) <= SMLNUM:
13707                                  */
13708                                 if( ae_fp_greater(xj,tjj*bignum) )
13709                                 {
13710 
13711                                     /*
13712                                      * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
13713                                      */
13714                                     rec = tjj*bignum/xj;
13715                                     ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
13716                                     *s = *s*rec;
13717                                     xmax = xmax*rec;
13718                                 }
13719                                 x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
13720                             }
13721                             else
13722                             {
13723 
13724                                 /*
13725                                  * A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
13726                                  * scale = 0, and compute a solution to A'*x = 0.
13727                                  */
13728                                 for(i=1; i<=n; i++)
13729                                 {
13730                                     x->ptr.p_double[i] = (double)(0);
13731                                 }
13732                                 x->ptr.p_double[j] = (double)(1);
13733                                 *s = (double)(0);
13734                                 xmax = (double)(0);
13735                             }
13736                         }
13737                     }
13738                 }
13739                 else
13740                 {
13741 
13742                     /*
13743                      * Compute x(j) := x(j) / A(j,j)  - sumj if the dot
13744                      * product has already been divided by 1/A(j,j).
13745                      */
13746                     x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs-sumj;
13747                 }
13748                 xmax = ae_maxreal(xmax, ae_fabs(x->ptr.p_double[j], _state), _state);
13749                 j = j+jinc;
13750             }
13751         }
13752         *s = *s/tscal;
13753     }
13754 
13755     /*
13756      * Scale the column norms by 1/TSCAL for return.
13757      */
13758     if( ae_fp_neq(tscal,(double)(1)) )
13759     {
13760         v = 1/tscal;
13761         ae_v_muld(&cnorm->ptr.p_double[1], 1, ae_v_len(1,n), v);
13762     }
13763 }
13764 
13765 
13766 #endif
13767 #if defined(AE_COMPILE_SAFESOLVE) || !defined(AE_PARTIAL_BUILD)
13768 
13769 
13770 /*************************************************************************
13771 Real implementation of CMatrixScaledTRSafeSolve
13772 
13773   -- ALGLIB routine --
13774      21.01.2010
13775      Bochkanov Sergey
13776 *************************************************************************/
rmatrixscaledtrsafesolve(ae_matrix * a,double sa,ae_int_t n,ae_vector * x,ae_bool isupper,ae_int_t trans,ae_bool isunit,double maxgrowth,ae_state * _state)13777 ae_bool rmatrixscaledtrsafesolve(/* Real    */ ae_matrix* a,
13778      double sa,
13779      ae_int_t n,
13780      /* Real    */ ae_vector* x,
13781      ae_bool isupper,
13782      ae_int_t trans,
13783      ae_bool isunit,
13784      double maxgrowth,
13785      ae_state *_state)
13786 {
13787     ae_frame _frame_block;
13788     double lnmax;
13789     double nrmb;
13790     double nrmx;
13791     ae_int_t i;
13792     ae_complex alpha;
13793     ae_complex beta;
13794     double vr;
13795     ae_complex cx;
13796     ae_vector tmp;
13797     ae_bool result;
13798 
13799     ae_frame_make(_state, &_frame_block);
13800     memset(&tmp, 0, sizeof(tmp));
13801     ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
13802 
13803     ae_assert(n>0, "RMatrixTRSafeSolve: incorrect N!", _state);
13804     ae_assert(trans==0||trans==1, "RMatrixTRSafeSolve: incorrect Trans!", _state);
13805     result = ae_true;
13806     lnmax = ae_log(ae_maxrealnumber, _state);
13807 
13808     /*
13809      * Quick return if possible
13810      */
13811     if( n<=0 )
13812     {
13813         ae_frame_leave(_state);
13814         return result;
13815     }
13816 
13817     /*
13818      * Load norms: right part and X
13819      */
13820     nrmb = (double)(0);
13821     for(i=0; i<=n-1; i++)
13822     {
13823         nrmb = ae_maxreal(nrmb, ae_fabs(x->ptr.p_double[i], _state), _state);
13824     }
13825     nrmx = (double)(0);
13826 
13827     /*
13828      * Solve
13829      */
13830     ae_vector_set_length(&tmp, n, _state);
13831     result = ae_true;
13832     if( isupper&&trans==0 )
13833     {
13834 
13835         /*
13836          * U*x = b
13837          */
13838         for(i=n-1; i>=0; i--)
13839         {
13840 
13841             /*
13842              * Task is reduced to alpha*x[i] = beta
13843              */
13844             if( isunit )
13845             {
13846                 alpha = ae_complex_from_d(sa);
13847             }
13848             else
13849             {
13850                 alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
13851             }
13852             if( i<n-1 )
13853             {
13854                 ae_v_moved(&tmp.ptr.p_double[i+1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sa);
13855                 vr = ae_v_dotproduct(&tmp.ptr.p_double[i+1], 1, &x->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1));
13856                 beta = ae_complex_from_d(x->ptr.p_double[i]-vr);
13857             }
13858             else
13859             {
13860                 beta = ae_complex_from_d(x->ptr.p_double[i]);
13861             }
13862 
13863             /*
13864              * solve alpha*x[i] = beta
13865              */
13866             result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
13867             if( !result )
13868             {
13869                 ae_frame_leave(_state);
13870                 return result;
13871             }
13872             x->ptr.p_double[i] = cx.x;
13873         }
13874         ae_frame_leave(_state);
13875         return result;
13876     }
13877     if( !isupper&&trans==0 )
13878     {
13879 
13880         /*
13881          * L*x = b
13882          */
13883         for(i=0; i<=n-1; i++)
13884         {
13885 
13886             /*
13887              * Task is reduced to alpha*x[i] = beta
13888              */
13889             if( isunit )
13890             {
13891                 alpha = ae_complex_from_d(sa);
13892             }
13893             else
13894             {
13895                 alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
13896             }
13897             if( i>0 )
13898             {
13899                 ae_v_moved(&tmp.ptr.p_double[0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sa);
13900                 vr = ae_v_dotproduct(&tmp.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,i-1));
13901                 beta = ae_complex_from_d(x->ptr.p_double[i]-vr);
13902             }
13903             else
13904             {
13905                 beta = ae_complex_from_d(x->ptr.p_double[i]);
13906             }
13907 
13908             /*
13909              * solve alpha*x[i] = beta
13910              */
13911             result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
13912             if( !result )
13913             {
13914                 ae_frame_leave(_state);
13915                 return result;
13916             }
13917             x->ptr.p_double[i] = cx.x;
13918         }
13919         ae_frame_leave(_state);
13920         return result;
13921     }
13922     if( isupper&&trans==1 )
13923     {
13924 
13925         /*
13926          * U^T*x = b
13927          */
13928         for(i=0; i<=n-1; i++)
13929         {
13930 
13931             /*
13932              * Task is reduced to alpha*x[i] = beta
13933              */
13934             if( isunit )
13935             {
13936                 alpha = ae_complex_from_d(sa);
13937             }
13938             else
13939             {
13940                 alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
13941             }
13942             beta = ae_complex_from_d(x->ptr.p_double[i]);
13943 
13944             /*
13945              * solve alpha*x[i] = beta
13946              */
13947             result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
13948             if( !result )
13949             {
13950                 ae_frame_leave(_state);
13951                 return result;
13952             }
13953             x->ptr.p_double[i] = cx.x;
13954 
13955             /*
13956              * update the rest of right part
13957              */
13958             if( i<n-1 )
13959             {
13960                 vr = cx.x;
13961                 ae_v_moved(&tmp.ptr.p_double[i+1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sa);
13962                 ae_v_subd(&x->ptr.p_double[i+1], 1, &tmp.ptr.p_double[i+1], 1, ae_v_len(i+1,n-1), vr);
13963             }
13964         }
13965         ae_frame_leave(_state);
13966         return result;
13967     }
13968     if( !isupper&&trans==1 )
13969     {
13970 
13971         /*
13972          * L^T*x = b
13973          */
13974         for(i=n-1; i>=0; i--)
13975         {
13976 
13977             /*
13978              * Task is reduced to alpha*x[i] = beta
13979              */
13980             if( isunit )
13981             {
13982                 alpha = ae_complex_from_d(sa);
13983             }
13984             else
13985             {
13986                 alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
13987             }
13988             beta = ae_complex_from_d(x->ptr.p_double[i]);
13989 
13990             /*
13991              * solve alpha*x[i] = beta
13992              */
13993             result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
13994             if( !result )
13995             {
13996                 ae_frame_leave(_state);
13997                 return result;
13998             }
13999             x->ptr.p_double[i] = cx.x;
14000 
14001             /*
14002              * update the rest of right part
14003              */
14004             if( i>0 )
14005             {
14006                 vr = cx.x;
14007                 ae_v_moved(&tmp.ptr.p_double[0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sa);
14008                 ae_v_subd(&x->ptr.p_double[0], 1, &tmp.ptr.p_double[0], 1, ae_v_len(0,i-1), vr);
14009             }
14010         }
14011         ae_frame_leave(_state);
14012         return result;
14013     }
14014     result = ae_false;
14015     ae_frame_leave(_state);
14016     return result;
14017 }
14018 
14019 
14020 /*************************************************************************
14021 Internal subroutine for safe solution of
14022 
14023     SA*op(A)=b
14024 
14025 where  A  is  NxN  upper/lower  triangular/unitriangular  matrix, op(A) is
14026 either identity transform, transposition or Hermitian transposition, SA is
14027 a scaling factor such that max(|SA*A[i,j]|) is close to 1.0 in magnutude.
14028 
14029 This subroutine  limits  relative  growth  of  solution  (in inf-norm)  by
14030 MaxGrowth,  returning  False  if  growth  exceeds MaxGrowth. Degenerate or
14031 near-degenerate matrices are handled correctly (False is returned) as long
14032 as MaxGrowth is significantly less than MaxRealNumber/norm(b).
14033 
14034   -- ALGLIB routine --
14035      21.01.2010
14036      Bochkanov Sergey
14037 *************************************************************************/
cmatrixscaledtrsafesolve(ae_matrix * a,double sa,ae_int_t n,ae_vector * x,ae_bool isupper,ae_int_t trans,ae_bool isunit,double maxgrowth,ae_state * _state)14038 ae_bool cmatrixscaledtrsafesolve(/* Complex */ ae_matrix* a,
14039      double sa,
14040      ae_int_t n,
14041      /* Complex */ ae_vector* x,
14042      ae_bool isupper,
14043      ae_int_t trans,
14044      ae_bool isunit,
14045      double maxgrowth,
14046      ae_state *_state)
14047 {
14048     ae_frame _frame_block;
14049     double lnmax;
14050     double nrmb;
14051     double nrmx;
14052     ae_int_t i;
14053     ae_complex alpha;
14054     ae_complex beta;
14055     ae_complex vc;
14056     ae_vector tmp;
14057     ae_bool result;
14058 
14059     ae_frame_make(_state, &_frame_block);
14060     memset(&tmp, 0, sizeof(tmp));
14061     ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
14062 
14063     ae_assert(n>0, "CMatrixTRSafeSolve: incorrect N!", _state);
14064     ae_assert((trans==0||trans==1)||trans==2, "CMatrixTRSafeSolve: incorrect Trans!", _state);
14065     result = ae_true;
14066     lnmax = ae_log(ae_maxrealnumber, _state);
14067 
14068     /*
14069      * Quick return if possible
14070      */
14071     if( n<=0 )
14072     {
14073         ae_frame_leave(_state);
14074         return result;
14075     }
14076 
14077     /*
14078      * Load norms: right part and X
14079      */
14080     nrmb = (double)(0);
14081     for(i=0; i<=n-1; i++)
14082     {
14083         nrmb = ae_maxreal(nrmb, ae_c_abs(x->ptr.p_complex[i], _state), _state);
14084     }
14085     nrmx = (double)(0);
14086 
14087     /*
14088      * Solve
14089      */
14090     ae_vector_set_length(&tmp, n, _state);
14091     result = ae_true;
14092     if( isupper&&trans==0 )
14093     {
14094 
14095         /*
14096          * U*x = b
14097          */
14098         for(i=n-1; i>=0; i--)
14099         {
14100 
14101             /*
14102              * Task is reduced to alpha*x[i] = beta
14103              */
14104             if( isunit )
14105             {
14106                 alpha = ae_complex_from_d(sa);
14107             }
14108             else
14109             {
14110                 alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
14111             }
14112             if( i<n-1 )
14113             {
14114                 ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1), sa);
14115                 vc = ae_v_cdotproduct(&tmp.ptr.p_complex[i+1], 1, "N", &x->ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1));
14116                 beta = ae_c_sub(x->ptr.p_complex[i],vc);
14117             }
14118             else
14119             {
14120                 beta = x->ptr.p_complex[i];
14121             }
14122 
14123             /*
14124              * solve alpha*x[i] = beta
14125              */
14126             result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14127             if( !result )
14128             {
14129                 ae_frame_leave(_state);
14130                 return result;
14131             }
14132             x->ptr.p_complex[i] = vc;
14133         }
14134         ae_frame_leave(_state);
14135         return result;
14136     }
14137     if( !isupper&&trans==0 )
14138     {
14139 
14140         /*
14141          * L*x = b
14142          */
14143         for(i=0; i<=n-1; i++)
14144         {
14145 
14146             /*
14147              * Task is reduced to alpha*x[i] = beta
14148              */
14149             if( isunit )
14150             {
14151                 alpha = ae_complex_from_d(sa);
14152             }
14153             else
14154             {
14155                 alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
14156             }
14157             if( i>0 )
14158             {
14159                 ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,i-1), sa);
14160                 vc = ae_v_cdotproduct(&tmp.ptr.p_complex[0], 1, "N", &x->ptr.p_complex[0], 1, "N", ae_v_len(0,i-1));
14161                 beta = ae_c_sub(x->ptr.p_complex[i],vc);
14162             }
14163             else
14164             {
14165                 beta = x->ptr.p_complex[i];
14166             }
14167 
14168             /*
14169              * solve alpha*x[i] = beta
14170              */
14171             result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14172             if( !result )
14173             {
14174                 ae_frame_leave(_state);
14175                 return result;
14176             }
14177             x->ptr.p_complex[i] = vc;
14178         }
14179         ae_frame_leave(_state);
14180         return result;
14181     }
14182     if( isupper&&trans==1 )
14183     {
14184 
14185         /*
14186          * U^T*x = b
14187          */
14188         for(i=0; i<=n-1; i++)
14189         {
14190 
14191             /*
14192              * Task is reduced to alpha*x[i] = beta
14193              */
14194             if( isunit )
14195             {
14196                 alpha = ae_complex_from_d(sa);
14197             }
14198             else
14199             {
14200                 alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
14201             }
14202             beta = x->ptr.p_complex[i];
14203 
14204             /*
14205              * solve alpha*x[i] = beta
14206              */
14207             result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14208             if( !result )
14209             {
14210                 ae_frame_leave(_state);
14211                 return result;
14212             }
14213             x->ptr.p_complex[i] = vc;
14214 
14215             /*
14216              * update the rest of right part
14217              */
14218             if( i<n-1 )
14219             {
14220                 ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1), sa);
14221                 ae_v_csubc(&x->ptr.p_complex[i+1], 1, &tmp.ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1), vc);
14222             }
14223         }
14224         ae_frame_leave(_state);
14225         return result;
14226     }
14227     if( !isupper&&trans==1 )
14228     {
14229 
14230         /*
14231          * L^T*x = b
14232          */
14233         for(i=n-1; i>=0; i--)
14234         {
14235 
14236             /*
14237              * Task is reduced to alpha*x[i] = beta
14238              */
14239             if( isunit )
14240             {
14241                 alpha = ae_complex_from_d(sa);
14242             }
14243             else
14244             {
14245                 alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
14246             }
14247             beta = x->ptr.p_complex[i];
14248 
14249             /*
14250              * solve alpha*x[i] = beta
14251              */
14252             result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14253             if( !result )
14254             {
14255                 ae_frame_leave(_state);
14256                 return result;
14257             }
14258             x->ptr.p_complex[i] = vc;
14259 
14260             /*
14261              * update the rest of right part
14262              */
14263             if( i>0 )
14264             {
14265                 ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,i-1), sa);
14266                 ae_v_csubc(&x->ptr.p_complex[0], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(0,i-1), vc);
14267             }
14268         }
14269         ae_frame_leave(_state);
14270         return result;
14271     }
14272     if( isupper&&trans==2 )
14273     {
14274 
14275         /*
14276          * U^H*x = b
14277          */
14278         for(i=0; i<=n-1; i++)
14279         {
14280 
14281             /*
14282              * Task is reduced to alpha*x[i] = beta
14283              */
14284             if( isunit )
14285             {
14286                 alpha = ae_complex_from_d(sa);
14287             }
14288             else
14289             {
14290                 alpha = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[i][i], _state),sa);
14291             }
14292             beta = x->ptr.p_complex[i];
14293 
14294             /*
14295              * solve alpha*x[i] = beta
14296              */
14297             result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14298             if( !result )
14299             {
14300                 ae_frame_leave(_state);
14301                 return result;
14302             }
14303             x->ptr.p_complex[i] = vc;
14304 
14305             /*
14306              * update the rest of right part
14307              */
14308             if( i<n-1 )
14309             {
14310                 ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "Conj", ae_v_len(i+1,n-1), sa);
14311                 ae_v_csubc(&x->ptr.p_complex[i+1], 1, &tmp.ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1), vc);
14312             }
14313         }
14314         ae_frame_leave(_state);
14315         return result;
14316     }
14317     if( !isupper&&trans==2 )
14318     {
14319 
14320         /*
14321          * L^T*x = b
14322          */
14323         for(i=n-1; i>=0; i--)
14324         {
14325 
14326             /*
14327              * Task is reduced to alpha*x[i] = beta
14328              */
14329             if( isunit )
14330             {
14331                 alpha = ae_complex_from_d(sa);
14332             }
14333             else
14334             {
14335                 alpha = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[i][i], _state),sa);
14336             }
14337             beta = x->ptr.p_complex[i];
14338 
14339             /*
14340              * solve alpha*x[i] = beta
14341              */
14342             result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
14343             if( !result )
14344             {
14345                 ae_frame_leave(_state);
14346                 return result;
14347             }
14348             x->ptr.p_complex[i] = vc;
14349 
14350             /*
14351              * update the rest of right part
14352              */
14353             if( i>0 )
14354             {
14355                 ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "Conj", ae_v_len(0,i-1), sa);
14356                 ae_v_csubc(&x->ptr.p_complex[0], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(0,i-1), vc);
14357             }
14358         }
14359         ae_frame_leave(_state);
14360         return result;
14361     }
14362     result = ae_false;
14363     ae_frame_leave(_state);
14364     return result;
14365 }
14366 
14367 
14368 /*************************************************************************
14369 complex basic solver-updater for reduced linear system
14370 
14371     alpha*x[i] = beta
14372 
14373 solves this equation and updates it in overlfow-safe manner (keeping track
14374 of relative growth of solution).
14375 
14376 Parameters:
14377     Alpha   -   alpha
14378     Beta    -   beta
14379     LnMax   -   precomputed Ln(MaxRealNumber)
14380     BNorm   -   inf-norm of b (right part of original system)
14381     MaxGrowth-  maximum growth of norm(x) relative to norm(b)
14382     XNorm   -   inf-norm of other components of X (which are already processed)
14383                 it is updated by CBasicSolveAndUpdate.
14384     X       -   solution
14385 
14386   -- ALGLIB routine --
14387      26.01.2009
14388      Bochkanov Sergey
14389 *************************************************************************/
safesolve_cbasicsolveandupdate(ae_complex alpha,ae_complex beta,double lnmax,double bnorm,double maxgrowth,double * xnorm,ae_complex * x,ae_state * _state)14390 static ae_bool safesolve_cbasicsolveandupdate(ae_complex alpha,
14391      ae_complex beta,
14392      double lnmax,
14393      double bnorm,
14394      double maxgrowth,
14395      double* xnorm,
14396      ae_complex* x,
14397      ae_state *_state)
14398 {
14399     double v;
14400     ae_bool result;
14401 
14402     x->x = 0;
14403     x->y = 0;
14404 
14405     result = ae_false;
14406     if( ae_c_eq_d(alpha,(double)(0)) )
14407     {
14408         return result;
14409     }
14410     if( ae_c_neq_d(beta,(double)(0)) )
14411     {
14412 
14413         /*
14414          * alpha*x[i]=beta
14415          */
14416         v = ae_log(ae_c_abs(beta, _state), _state)-ae_log(ae_c_abs(alpha, _state), _state);
14417         if( ae_fp_greater(v,lnmax) )
14418         {
14419             return result;
14420         }
14421         *x = ae_c_div(beta,alpha);
14422     }
14423     else
14424     {
14425 
14426         /*
14427          * alpha*x[i]=0
14428          */
14429         *x = ae_complex_from_i(0);
14430     }
14431 
14432     /*
14433      * update NrmX, test growth limit
14434      */
14435     *xnorm = ae_maxreal(*xnorm, ae_c_abs(*x, _state), _state);
14436     if( ae_fp_greater(*xnorm,maxgrowth*bnorm) )
14437     {
14438         return result;
14439     }
14440     result = ae_true;
14441     return result;
14442 }
14443 
14444 
14445 #endif
14446 #if defined(AE_COMPILE_XBLAS) || !defined(AE_PARTIAL_BUILD)
14447 
14448 
14449 /*************************************************************************
14450 More precise dot-product. Absolute error of  subroutine  result  is  about
14451 1 ulp of max(MX,V), where:
14452     MX = max( |a[i]*b[i]| )
14453     V  = |(a,b)|
14454 
14455 INPUT PARAMETERS
14456     A       -   array[0..N-1], vector 1
14457     B       -   array[0..N-1], vector 2
14458     N       -   vectors length, N<2^29.
14459     Temp    -   array[0..N-1], pre-allocated temporary storage
14460 
14461 OUTPUT PARAMETERS
14462     R       -   (A,B)
14463     RErr    -   estimate of error. This estimate accounts for both  errors
14464                 during  calculation  of  (A,B)  and  errors  introduced by
14465                 rounding of A and B to fit in double (about 1 ulp).
14466 
14467   -- ALGLIB --
14468      Copyright 24.08.2009 by Bochkanov Sergey
14469 *************************************************************************/
xdot(ae_vector * a,ae_vector * b,ae_int_t n,ae_vector * temp,double * r,double * rerr,ae_state * _state)14470 void xdot(/* Real    */ ae_vector* a,
14471      /* Real    */ ae_vector* b,
14472      ae_int_t n,
14473      /* Real    */ ae_vector* temp,
14474      double* r,
14475      double* rerr,
14476      ae_state *_state)
14477 {
14478     ae_int_t i;
14479     double mx;
14480     double v;
14481 
14482     *r = 0;
14483     *rerr = 0;
14484 
14485 
14486     /*
14487      * special cases:
14488      * * N=0
14489      */
14490     if( n==0 )
14491     {
14492         *r = (double)(0);
14493         *rerr = (double)(0);
14494         return;
14495     }
14496     mx = (double)(0);
14497     for(i=0; i<=n-1; i++)
14498     {
14499         v = a->ptr.p_double[i]*b->ptr.p_double[i];
14500         temp->ptr.p_double[i] = v;
14501         mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
14502     }
14503     if( ae_fp_eq(mx,(double)(0)) )
14504     {
14505         *r = (double)(0);
14506         *rerr = (double)(0);
14507         return;
14508     }
14509     xblas_xsum(temp, mx, n, r, rerr, _state);
14510 }
14511 
14512 
14513 /*************************************************************************
14514 More precise complex dot-product. Absolute error of  subroutine  result is
14515 about 1 ulp of max(MX,V), where:
14516     MX = max( |a[i]*b[i]| )
14517     V  = |(a,b)|
14518 
14519 INPUT PARAMETERS
14520     A       -   array[0..N-1], vector 1
14521     B       -   array[0..N-1], vector 2
14522     N       -   vectors length, N<2^29.
14523     Temp    -   array[0..2*N-1], pre-allocated temporary storage
14524 
14525 OUTPUT PARAMETERS
14526     R       -   (A,B)
14527     RErr    -   estimate of error. This estimate accounts for both  errors
14528                 during  calculation  of  (A,B)  and  errors  introduced by
14529                 rounding of A and B to fit in double (about 1 ulp).
14530 
14531   -- ALGLIB --
14532      Copyright 27.01.2010 by Bochkanov Sergey
14533 *************************************************************************/
xcdot(ae_vector * a,ae_vector * b,ae_int_t n,ae_vector * temp,ae_complex * r,double * rerr,ae_state * _state)14534 void xcdot(/* Complex */ ae_vector* a,
14535      /* Complex */ ae_vector* b,
14536      ae_int_t n,
14537      /* Real    */ ae_vector* temp,
14538      ae_complex* r,
14539      double* rerr,
14540      ae_state *_state)
14541 {
14542     ae_int_t i;
14543     double mx;
14544     double v;
14545     double rerrx;
14546     double rerry;
14547 
14548     r->x = 0;
14549     r->y = 0;
14550     *rerr = 0;
14551 
14552 
14553     /*
14554      * special cases:
14555      * * N=0
14556      */
14557     if( n==0 )
14558     {
14559         *r = ae_complex_from_i(0);
14560         *rerr = (double)(0);
14561         return;
14562     }
14563 
14564     /*
14565      * calculate real part
14566      */
14567     mx = (double)(0);
14568     for(i=0; i<=n-1; i++)
14569     {
14570         v = a->ptr.p_complex[i].x*b->ptr.p_complex[i].x;
14571         temp->ptr.p_double[2*i+0] = v;
14572         mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
14573         v = -a->ptr.p_complex[i].y*b->ptr.p_complex[i].y;
14574         temp->ptr.p_double[2*i+1] = v;
14575         mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
14576     }
14577     if( ae_fp_eq(mx,(double)(0)) )
14578     {
14579         r->x = (double)(0);
14580         rerrx = (double)(0);
14581     }
14582     else
14583     {
14584         xblas_xsum(temp, mx, 2*n, &r->x, &rerrx, _state);
14585     }
14586 
14587     /*
14588      * calculate imaginary part
14589      */
14590     mx = (double)(0);
14591     for(i=0; i<=n-1; i++)
14592     {
14593         v = a->ptr.p_complex[i].x*b->ptr.p_complex[i].y;
14594         temp->ptr.p_double[2*i+0] = v;
14595         mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
14596         v = a->ptr.p_complex[i].y*b->ptr.p_complex[i].x;
14597         temp->ptr.p_double[2*i+1] = v;
14598         mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
14599     }
14600     if( ae_fp_eq(mx,(double)(0)) )
14601     {
14602         r->y = (double)(0);
14603         rerry = (double)(0);
14604     }
14605     else
14606     {
14607         xblas_xsum(temp, mx, 2*n, &r->y, &rerry, _state);
14608     }
14609 
14610     /*
14611      * total error
14612      */
14613     if( ae_fp_eq(rerrx,(double)(0))&&ae_fp_eq(rerry,(double)(0)) )
14614     {
14615         *rerr = (double)(0);
14616     }
14617     else
14618     {
14619         *rerr = ae_maxreal(rerrx, rerry, _state)*ae_sqrt(1+ae_sqr(ae_minreal(rerrx, rerry, _state)/ae_maxreal(rerrx, rerry, _state), _state), _state);
14620     }
14621 }
14622 
14623 
14624 /*************************************************************************
14625 Internal subroutine for extra-precise calculation of SUM(w[i]).
14626 
14627 INPUT PARAMETERS:
14628     W   -   array[0..N-1], values to be added
14629             W is modified during calculations.
14630     MX  -   max(W[i])
14631     N   -   array size
14632 
14633 OUTPUT PARAMETERS:
14634     R   -   SUM(w[i])
14635     RErr-   error estimate for R
14636 
14637   -- ALGLIB --
14638      Copyright 24.08.2009 by Bochkanov Sergey
14639 *************************************************************************/
xblas_xsum(ae_vector * w,double mx,ae_int_t n,double * r,double * rerr,ae_state * _state)14640 static void xblas_xsum(/* Real    */ ae_vector* w,
14641      double mx,
14642      ae_int_t n,
14643      double* r,
14644      double* rerr,
14645      ae_state *_state)
14646 {
14647     ae_int_t i;
14648     ae_int_t k;
14649     ae_int_t ks;
14650     double v;
14651     double s;
14652     double ln2;
14653     double chunk;
14654     double invchunk;
14655     ae_bool allzeros;
14656 
14657     *r = 0;
14658     *rerr = 0;
14659 
14660 
14661     /*
14662      * special cases:
14663      * * N=0
14664      * * N is too large to use integer arithmetics
14665      */
14666     if( n==0 )
14667     {
14668         *r = (double)(0);
14669         *rerr = (double)(0);
14670         return;
14671     }
14672     if( ae_fp_eq(mx,(double)(0)) )
14673     {
14674         *r = (double)(0);
14675         *rerr = (double)(0);
14676         return;
14677     }
14678     ae_assert(n<536870912, "XDot: N is too large!", _state);
14679 
14680     /*
14681      * Prepare
14682      */
14683     ln2 = ae_log((double)(2), _state);
14684     *rerr = mx*ae_machineepsilon;
14685 
14686     /*
14687      * 1. find S such that 0.5<=S*MX<1
14688      * 2. multiply W by S, so task is normalized in some sense
14689      * 3. S:=1/S so we can obtain original vector multiplying by S
14690      */
14691     k = ae_round(ae_log(mx, _state)/ln2, _state);
14692     s = xblas_xfastpow((double)(2), -k, _state);
14693     if( !ae_isfinite(s, _state) )
14694     {
14695 
14696         /*
14697          * Overflow or underflow during evaluation of S; fallback low-precision code
14698          */
14699         *r = (double)(0);
14700         *rerr = mx*ae_machineepsilon;
14701         for(i=0; i<=n-1; i++)
14702         {
14703             *r = *r+w->ptr.p_double[i];
14704         }
14705         return;
14706     }
14707     while(ae_fp_greater_eq(s*mx,(double)(1)))
14708     {
14709         s = 0.5*s;
14710     }
14711     while(ae_fp_less(s*mx,0.5))
14712     {
14713         s = 2*s;
14714     }
14715     ae_v_muld(&w->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
14716     s = 1/s;
14717 
14718     /*
14719      * find Chunk=2^M such that N*Chunk<2^29
14720      *
14721      * we have chosen upper limit (2^29) with enough space left
14722      * to tolerate possible problems with rounding and N's close
14723      * to the limit, so we don't want to be very strict here.
14724      */
14725     k = ae_trunc(ae_log((double)536870912/(double)n, _state)/ln2, _state);
14726     chunk = xblas_xfastpow((double)(2), k, _state);
14727     if( ae_fp_less(chunk,(double)(2)) )
14728     {
14729         chunk = (double)(2);
14730     }
14731     invchunk = 1/chunk;
14732 
14733     /*
14734      * calculate result
14735      */
14736     *r = (double)(0);
14737     ae_v_muld(&w->ptr.p_double[0], 1, ae_v_len(0,n-1), chunk);
14738     for(;;)
14739     {
14740         s = s*invchunk;
14741         allzeros = ae_true;
14742         ks = 0;
14743         for(i=0; i<=n-1; i++)
14744         {
14745             v = w->ptr.p_double[i];
14746             k = ae_trunc(v, _state);
14747             if( ae_fp_neq(v,(double)(k)) )
14748             {
14749                 allzeros = ae_false;
14750             }
14751             w->ptr.p_double[i] = chunk*(v-k);
14752             ks = ks+k;
14753         }
14754         *r = *r+s*ks;
14755         v = ae_fabs(*r, _state);
14756         if( allzeros||ae_fp_eq(s*n+mx,mx) )
14757         {
14758             break;
14759         }
14760     }
14761 
14762     /*
14763      * correct error
14764      */
14765     *rerr = ae_maxreal(*rerr, ae_fabs(*r, _state)*ae_machineepsilon, _state);
14766 }
14767 
14768 
14769 /*************************************************************************
14770 Fast Pow
14771 
14772   -- ALGLIB --
14773      Copyright 24.08.2009 by Bochkanov Sergey
14774 *************************************************************************/
xblas_xfastpow(double r,ae_int_t n,ae_state * _state)14775 static double xblas_xfastpow(double r, ae_int_t n, ae_state *_state)
14776 {
14777     double result;
14778 
14779 
14780     result = (double)(0);
14781     if( n>0 )
14782     {
14783         if( n%2==0 )
14784         {
14785             result = ae_sqr(xblas_xfastpow(r, n/2, _state), _state);
14786         }
14787         else
14788         {
14789             result = r*xblas_xfastpow(r, n-1, _state);
14790         }
14791         return result;
14792     }
14793     if( n==0 )
14794     {
14795         result = (double)(1);
14796     }
14797     if( n<0 )
14798     {
14799         result = xblas_xfastpow(1/r, -n, _state);
14800     }
14801     return result;
14802 }
14803 
14804 
14805 #endif
14806 #if defined(AE_COMPILE_LINMIN) || !defined(AE_PARTIAL_BUILD)
14807 
14808 
14809 /*************************************************************************
14810 Normalizes direction/step pair: makes |D|=1, scales Stp.
14811 If |D|=0, it returns, leavind D/Stp unchanged.
14812 
14813   -- ALGLIB --
14814      Copyright 01.04.2010 by Bochkanov Sergey
14815 *************************************************************************/
linminnormalized(ae_vector * d,double * stp,ae_int_t n,ae_state * _state)14816 void linminnormalized(/* Real    */ ae_vector* d,
14817      double* stp,
14818      ae_int_t n,
14819      ae_state *_state)
14820 {
14821     double mx;
14822     double s;
14823     ae_int_t i;
14824 
14825 
14826 
14827     /*
14828      * first, scale D to avoid underflow/overflow durng squaring
14829      */
14830     mx = (double)(0);
14831     for(i=0; i<=n-1; i++)
14832     {
14833         mx = ae_maxreal(mx, ae_fabs(d->ptr.p_double[i], _state), _state);
14834     }
14835     if( ae_fp_eq(mx,(double)(0)) )
14836     {
14837         return;
14838     }
14839     s = 1/mx;
14840     ae_v_muld(&d->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
14841     *stp = *stp/s;
14842 
14843     /*
14844      * normalize D
14845      */
14846     s = ae_v_dotproduct(&d->ptr.p_double[0], 1, &d->ptr.p_double[0], 1, ae_v_len(0,n-1));
14847     s = 1/ae_sqrt(s, _state);
14848     ae_v_muld(&d->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
14849     *stp = *stp/s;
14850 }
14851 
14852 
14853 /*************************************************************************
14854 THE  PURPOSE  OF  MCSRCH  IS  TO  FIND A STEP WHICH SATISFIES A SUFFICIENT
14855 DECREASE CONDITION AND A CURVATURE CONDITION.
14856 
14857 AT EACH STAGE THE SUBROUTINE  UPDATES  AN  INTERVAL  OF  UNCERTAINTY  WITH
14858 ENDPOINTS  STX  AND  STY.  THE INTERVAL OF UNCERTAINTY IS INITIALLY CHOSEN
14859 SO THAT IT CONTAINS A MINIMIZER OF THE MODIFIED FUNCTION
14860 
14861     F(X+STP*S) - F(X) - FTOL*STP*(GRADF(X)'S).
14862 
14863 IF  A STEP  IS OBTAINED FOR  WHICH THE MODIFIED FUNCTION HAS A NONPOSITIVE
14864 FUNCTION  VALUE  AND  NONNEGATIVE  DERIVATIVE,   THEN   THE   INTERVAL  OF
14865 UNCERTAINTY IS CHOSEN SO THAT IT CONTAINS A MINIMIZER OF F(X+STP*S).
14866 
14867 THE  ALGORITHM  IS  DESIGNED TO FIND A STEP WHICH SATISFIES THE SUFFICIENT
14868 DECREASE CONDITION
14869 
14870     F(X+STP*S) .LE. F(X) + FTOL*STP*(GRADF(X)'S),
14871 
14872 AND THE CURVATURE CONDITION
14873 
14874     ABS(GRADF(X+STP*S)'S)) .LE. GTOL*ABS(GRADF(X)'S).
14875 
14876 IF  FTOL  IS  LESS  THAN GTOL AND IF, FOR EXAMPLE, THE FUNCTION IS BOUNDED
14877 BELOW,  THEN  THERE  IS  ALWAYS  A  STEP  WHICH SATISFIES BOTH CONDITIONS.
14878 IF  NO  STEP  CAN BE FOUND  WHICH  SATISFIES  BOTH  CONDITIONS,  THEN  THE
14879 ALGORITHM  USUALLY STOPS  WHEN  ROUNDING ERRORS  PREVENT FURTHER PROGRESS.
14880 IN THIS CASE STP ONLY SATISFIES THE SUFFICIENT DECREASE CONDITION.
14881 
14882 
14883 :::::::::::::IMPORTANT NOTES:::::::::::::
14884 
14885 NOTE 1:
14886 
14887 This routine  guarantees that it will stop at the last point where function
14888 value was calculated. It won't make several additional function evaluations
14889 after finding good point. So if you store function evaluations requested by
14890 this routine, you can be sure that last one is the point where we've stopped.
14891 
14892 NOTE 2:
14893 
14894 when 0<StpMax<StpMin, algorithm will terminate with INFO=5 and Stp=StpMax
14895 
14896 NOTE 3:
14897 
14898 this algorithm guarantees that, if MCINFO=1 or MCINFO=5, then:
14899 * F(final_point)<F(initial_point) - strict inequality
14900 * final_point<>initial_point - after rounding to machine precision
14901 
14902 NOTE 4:
14903 
14904 when non-descent direction is specified, algorithm stops with MCINFO=0,
14905 Stp=0 and initial point at X[].
14906 :::::::::::::::::::::::::::::::::::::::::
14907 
14908 
14909 PARAMETERS DESCRIPRION
14910 
14911 STAGE IS ZERO ON FIRST CALL, ZERO ON FINAL EXIT
14912 
14913 N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER OF VARIABLES.
14914 
14915 X IS  AN  ARRAY  OF  LENGTH N. ON INPUT IT MUST CONTAIN THE BASE POINT FOR
14916 THE LINE SEARCH. ON OUTPUT IT CONTAINS X+STP*S.
14917 
14918 F IS  A  VARIABLE. ON INPUT IT MUST CONTAIN THE VALUE OF F AT X. ON OUTPUT
14919 IT CONTAINS THE VALUE OF F AT X + STP*S.
14920 
14921 G IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE GRADIENT OF F AT X.
14922 ON OUTPUT IT CONTAINS THE GRADIENT OF F AT X + STP*S.
14923 
14924 S IS AN INPUT ARRAY OF LENGTH N WHICH SPECIFIES THE SEARCH DIRECTION.
14925 
14926 STP  IS  A NONNEGATIVE VARIABLE. ON INPUT STP CONTAINS AN INITIAL ESTIMATE
14927 OF A SATISFACTORY STEP. ON OUTPUT STP CONTAINS THE FINAL ESTIMATE.
14928 
14929 FTOL AND GTOL ARE NONNEGATIVE INPUT VARIABLES. TERMINATION OCCURS WHEN THE
14930 SUFFICIENT DECREASE CONDITION AND THE DIRECTIONAL DERIVATIVE CONDITION ARE
14931 SATISFIED.
14932 
14933 XTOL IS A NONNEGATIVE INPUT VARIABLE. TERMINATION OCCURS WHEN THE RELATIVE
14934 WIDTH OF THE INTERVAL OF UNCERTAINTY IS AT MOST XTOL.
14935 
14936 STPMIN AND STPMAX ARE NONNEGATIVE INPUT VARIABLES WHICH SPECIFY LOWER  AND
14937 UPPER BOUNDS FOR THE STEP.
14938 
14939 MAXFEV IS A POSITIVE INTEGER INPUT VARIABLE. TERMINATION OCCURS WHEN THE
14940 NUMBER OF CALLS TO FCN IS AT LEAST MAXFEV BY THE END OF AN ITERATION.
14941 
14942 INFO IS AN INTEGER OUTPUT VARIABLE SET AS FOLLOWS:
14943     INFO = 0  IMPROPER INPUT PARAMETERS.
14944 
14945     INFO = 1  THE SUFFICIENT DECREASE CONDITION AND THE
14946               DIRECTIONAL DERIVATIVE CONDITION HOLD.
14947 
14948     INFO = 2  RELATIVE WIDTH OF THE INTERVAL OF UNCERTAINTY
14949               IS AT MOST XTOL.
14950 
14951     INFO = 3  NUMBER OF CALLS TO FCN HAS REACHED MAXFEV.
14952 
14953     INFO = 4  THE STEP IS AT THE LOWER BOUND STPMIN.
14954 
14955     INFO = 5  THE STEP IS AT THE UPPER BOUND STPMAX.
14956 
14957     INFO = 6  ROUNDING ERRORS PREVENT FURTHER PROGRESS.
14958               THERE MAY NOT BE A STEP WHICH SATISFIES THE
14959               SUFFICIENT DECREASE AND CURVATURE CONDITIONS.
14960               TOLERANCES MAY BE TOO SMALL.
14961 
14962 NFEV IS AN INTEGER OUTPUT VARIABLE SET TO THE NUMBER OF CALLS TO FCN.
14963 
14964 WA IS A WORK ARRAY OF LENGTH N.
14965 
14966 ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JUNE 1983
14967 JORGE J. MORE', DAVID J. THUENTE
14968 *************************************************************************/
mcsrch(ae_int_t n,ae_vector * x,double * f,ae_vector * g,ae_vector * s,double * stp,double stpmax,double gtol,ae_int_t * info,ae_int_t * nfev,ae_vector * wa,linminstate * state,ae_int_t * stage,ae_state * _state)14969 void mcsrch(ae_int_t n,
14970      /* Real    */ ae_vector* x,
14971      double* f,
14972      /* Real    */ ae_vector* g,
14973      /* Real    */ ae_vector* s,
14974      double* stp,
14975      double stpmax,
14976      double gtol,
14977      ae_int_t* info,
14978      ae_int_t* nfev,
14979      /* Real    */ ae_vector* wa,
14980      linminstate* state,
14981      ae_int_t* stage,
14982      ae_state *_state)
14983 {
14984     ae_int_t i;
14985     double v;
14986     double p5;
14987     double p66;
14988     double zero;
14989 
14990 
14991 
14992     /*
14993      * init
14994      */
14995     p5 = 0.5;
14996     p66 = 0.66;
14997     state->xtrapf = 4.0;
14998     zero = (double)(0);
14999     if( ae_fp_eq(stpmax,(double)(0)) )
15000     {
15001         stpmax = linmin_defstpmax;
15002     }
15003     if( ae_fp_less(*stp,linmin_stpmin) )
15004     {
15005         *stp = linmin_stpmin;
15006     }
15007     if( ae_fp_greater(*stp,stpmax) )
15008     {
15009         *stp = stpmax;
15010     }
15011 
15012     /*
15013      * Main cycle
15014      */
15015     for(;;)
15016     {
15017         if( *stage==0 )
15018         {
15019 
15020             /*
15021              * NEXT
15022              */
15023             *stage = 2;
15024             continue;
15025         }
15026         if( *stage==2 )
15027         {
15028             state->infoc = 1;
15029             *info = 0;
15030 
15031             /*
15032              *     CHECK THE INPUT PARAMETERS FOR ERRORS.
15033              */
15034             if( ae_fp_less(stpmax,linmin_stpmin)&&ae_fp_greater(stpmax,(double)(0)) )
15035             {
15036                 *info = 5;
15037                 *stp = stpmax;
15038                 *stage = 0;
15039                 return;
15040             }
15041             if( ((((((n<=0||ae_fp_less_eq(*stp,(double)(0)))||ae_fp_less(linmin_ftol,(double)(0)))||ae_fp_less(gtol,zero))||ae_fp_less(linmin_xtol,zero))||ae_fp_less(linmin_stpmin,zero))||ae_fp_less(stpmax,linmin_stpmin))||linmin_maxfev<=0 )
15042             {
15043                 *stage = 0;
15044                 return;
15045             }
15046 
15047             /*
15048              *     COMPUTE THE INITIAL GRADIENT IN THE SEARCH DIRECTION
15049              *     AND CHECK THAT S IS A DESCENT DIRECTION.
15050              */
15051             v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
15052             state->dginit = v;
15053             if( ae_fp_greater_eq(state->dginit,(double)(0)) )
15054             {
15055                 *stage = 0;
15056                 *stp = (double)(0);
15057                 return;
15058             }
15059 
15060             /*
15061              *     INITIALIZE LOCAL VARIABLES.
15062              */
15063             state->brackt = ae_false;
15064             state->stage1 = ae_true;
15065             *nfev = 0;
15066             state->finit = *f;
15067             state->dgtest = linmin_ftol*state->dginit;
15068             state->width = stpmax-linmin_stpmin;
15069             state->width1 = state->width/p5;
15070             ae_v_move(&wa->ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
15071 
15072             /*
15073              *     THE VARIABLES STX, FX, DGX CONTAIN THE VALUES OF THE STEP,
15074              *     FUNCTION, AND DIRECTIONAL DERIVATIVE AT THE BEST STEP.
15075              *     THE VARIABLES STY, FY, DGY CONTAIN THE VALUE OF THE STEP,
15076              *     FUNCTION, AND DERIVATIVE AT THE OTHER ENDPOINT OF
15077              *     THE INTERVAL OF UNCERTAINTY.
15078              *     THE VARIABLES STP, F, DG CONTAIN THE VALUES OF THE STEP,
15079              *     FUNCTION, AND DERIVATIVE AT THE CURRENT STEP.
15080              */
15081             state->stx = (double)(0);
15082             state->fx = state->finit;
15083             state->dgx = state->dginit;
15084             state->sty = (double)(0);
15085             state->fy = state->finit;
15086             state->dgy = state->dginit;
15087 
15088             /*
15089              * NEXT
15090              */
15091             *stage = 3;
15092             continue;
15093         }
15094         if( *stage==3 )
15095         {
15096 
15097             /*
15098              *     START OF ITERATION.
15099              *
15100              *     SET THE MINIMUM AND MAXIMUM STEPS TO CORRESPOND
15101              *     TO THE PRESENT INTERVAL OF UNCERTAINTY.
15102              */
15103             if( state->brackt )
15104             {
15105                 if( ae_fp_less(state->stx,state->sty) )
15106                 {
15107                     state->stmin = state->stx;
15108                     state->stmax = state->sty;
15109                 }
15110                 else
15111                 {
15112                     state->stmin = state->sty;
15113                     state->stmax = state->stx;
15114                 }
15115             }
15116             else
15117             {
15118                 state->stmin = state->stx;
15119                 state->stmax = *stp+state->xtrapf*(*stp-state->stx);
15120             }
15121 
15122             /*
15123              *        FORCE THE STEP TO BE WITHIN THE BOUNDS STPMAX AND STPMIN.
15124              */
15125             if( ae_fp_greater(*stp,stpmax) )
15126             {
15127                 *stp = stpmax;
15128             }
15129             if( ae_fp_less(*stp,linmin_stpmin) )
15130             {
15131                 *stp = linmin_stpmin;
15132             }
15133 
15134             /*
15135              *        IF AN UNUSUAL TERMINATION IS TO OCCUR THEN LET
15136              *        STP BE THE LOWEST POINT OBTAINED SO FAR.
15137              */
15138             if( (((state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||*nfev>=linmin_maxfev-1)||state->infoc==0)||(state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,linmin_xtol*state->stmax)) )
15139             {
15140                 *stp = state->stx;
15141             }
15142 
15143             /*
15144              *        EVALUATE THE FUNCTION AND GRADIENT AT STP
15145              *        AND COMPUTE THE DIRECTIONAL DERIVATIVE.
15146              */
15147             ae_v_move(&x->ptr.p_double[0], 1, &wa->ptr.p_double[0], 1, ae_v_len(0,n-1));
15148             ae_v_addd(&x->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1), *stp);
15149 
15150             /*
15151              * NEXT
15152              */
15153             *stage = 4;
15154             return;
15155         }
15156         if( *stage==4 )
15157         {
15158             *info = 0;
15159             *nfev = *nfev+1;
15160             v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
15161             state->dg = v;
15162             state->ftest1 = state->finit+*stp*state->dgtest;
15163 
15164             /*
15165              *        TEST FOR CONVERGENCE.
15166              */
15167             if( (state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||state->infoc==0 )
15168             {
15169                 *info = 6;
15170             }
15171             if( ((ae_fp_eq(*stp,stpmax)&&ae_fp_less(*f,state->finit))&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_less_eq(state->dg,state->dgtest) )
15172             {
15173                 *info = 5;
15174             }
15175             if( ae_fp_eq(*stp,linmin_stpmin)&&((ae_fp_greater_eq(*f,state->finit)||ae_fp_greater(*f,state->ftest1))||ae_fp_greater_eq(state->dg,state->dgtest)) )
15176             {
15177                 *info = 4;
15178             }
15179             if( *nfev>=linmin_maxfev )
15180             {
15181                 *info = 3;
15182             }
15183             if( state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,linmin_xtol*state->stmax) )
15184             {
15185                 *info = 2;
15186             }
15187             if( (ae_fp_less(*f,state->finit)&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_less_eq(ae_fabs(state->dg, _state),-gtol*state->dginit) )
15188             {
15189                 *info = 1;
15190             }
15191 
15192             /*
15193              *        CHECK FOR TERMINATION.
15194              */
15195             if( *info!=0 )
15196             {
15197 
15198                 /*
15199                  * Check guarantees provided by the function for INFO=1 or INFO=5
15200                  */
15201                 if( *info==1||*info==5 )
15202                 {
15203                     v = 0.0;
15204                     for(i=0; i<=n-1; i++)
15205                     {
15206                         v = v+(wa->ptr.p_double[i]-x->ptr.p_double[i])*(wa->ptr.p_double[i]-x->ptr.p_double[i]);
15207                     }
15208                     if( ae_fp_greater_eq(*f,state->finit)||ae_fp_eq(v,0.0) )
15209                     {
15210                         *info = 6;
15211                     }
15212                 }
15213                 *stage = 0;
15214                 return;
15215             }
15216 
15217             /*
15218              *        IN THE FIRST STAGE WE SEEK A STEP FOR WHICH THE MODIFIED
15219              *        FUNCTION HAS A NONPOSITIVE VALUE AND NONNEGATIVE DERIVATIVE.
15220              */
15221             if( (state->stage1&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_greater_eq(state->dg,ae_minreal(linmin_ftol, gtol, _state)*state->dginit) )
15222             {
15223                 state->stage1 = ae_false;
15224             }
15225 
15226             /*
15227              *        A MODIFIED FUNCTION IS USED TO PREDICT THE STEP ONLY IF
15228              *        WE HAVE NOT OBTAINED A STEP FOR WHICH THE MODIFIED
15229              *        FUNCTION HAS A NONPOSITIVE FUNCTION VALUE AND NONNEGATIVE
15230              *        DERIVATIVE, AND IF A LOWER FUNCTION VALUE HAS BEEN
15231              *        OBTAINED BUT THE DECREASE IS NOT SUFFICIENT.
15232              */
15233             if( (state->stage1&&ae_fp_less_eq(*f,state->fx))&&ae_fp_greater(*f,state->ftest1) )
15234             {
15235 
15236                 /*
15237                  *           DEFINE THE MODIFIED FUNCTION AND DERIVATIVE VALUES.
15238                  */
15239                 state->fm = *f-*stp*state->dgtest;
15240                 state->fxm = state->fx-state->stx*state->dgtest;
15241                 state->fym = state->fy-state->sty*state->dgtest;
15242                 state->dgm = state->dg-state->dgtest;
15243                 state->dgxm = state->dgx-state->dgtest;
15244                 state->dgym = state->dgy-state->dgtest;
15245 
15246                 /*
15247                  *           CALL CSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
15248                  *           AND TO COMPUTE THE NEW STEP.
15249                  */
15250                 linmin_mcstep(&state->stx, &state->fxm, &state->dgxm, &state->sty, &state->fym, &state->dgym, stp, state->fm, state->dgm, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
15251 
15252                 /*
15253                  *           RESET THE FUNCTION AND GRADIENT VALUES FOR F.
15254                  */
15255                 state->fx = state->fxm+state->stx*state->dgtest;
15256                 state->fy = state->fym+state->sty*state->dgtest;
15257                 state->dgx = state->dgxm+state->dgtest;
15258                 state->dgy = state->dgym+state->dgtest;
15259             }
15260             else
15261             {
15262 
15263                 /*
15264                  *           CALL MCSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
15265                  *           AND TO COMPUTE THE NEW STEP.
15266                  */
15267                 linmin_mcstep(&state->stx, &state->fx, &state->dgx, &state->sty, &state->fy, &state->dgy, stp, *f, state->dg, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
15268             }
15269 
15270             /*
15271              *        FORCE A SUFFICIENT DECREASE IN THE SIZE OF THE
15272              *        INTERVAL OF UNCERTAINTY.
15273              */
15274             if( state->brackt )
15275             {
15276                 if( ae_fp_greater_eq(ae_fabs(state->sty-state->stx, _state),p66*state->width1) )
15277                 {
15278                     *stp = state->stx+p5*(state->sty-state->stx);
15279                 }
15280                 state->width1 = state->width;
15281                 state->width = ae_fabs(state->sty-state->stx, _state);
15282             }
15283 
15284             /*
15285              *  NEXT.
15286              */
15287             *stage = 3;
15288             continue;
15289         }
15290     }
15291 }
15292 
15293 
15294 /*************************************************************************
15295 These functions perform Armijo line search using  at  most  FMAX  function
15296 evaluations.  It  doesn't  enforce  some  kind  of  " sufficient decrease"
15297 criterion - it just tries different Armijo steps and returns optimum found
15298 so far.
15299 
15300 Optimization is done using F-rcomm interface:
15301 * ArmijoCreate initializes State structure
15302   (reusing previously allocated buffers)
15303 * ArmijoIteration is subsequently called
15304 * ArmijoResults returns results
15305 
15306 INPUT PARAMETERS:
15307     N       -   problem size
15308     X       -   array[N], starting point
15309     F       -   F(X+S*STP)
15310     S       -   step direction, S>0
15311     STP     -   step length
15312     STPMAX  -   maximum value for STP or zero (if no limit is imposed)
15313     FMAX    -   maximum number of function evaluations
15314     State   -   optimization state
15315 
15316   -- ALGLIB --
15317      Copyright 05.10.2010 by Bochkanov Sergey
15318 *************************************************************************/
armijocreate(ae_int_t n,ae_vector * x,double f,ae_vector * s,double stp,double stpmax,ae_int_t fmax,armijostate * state,ae_state * _state)15319 void armijocreate(ae_int_t n,
15320      /* Real    */ ae_vector* x,
15321      double f,
15322      /* Real    */ ae_vector* s,
15323      double stp,
15324      double stpmax,
15325      ae_int_t fmax,
15326      armijostate* state,
15327      ae_state *_state)
15328 {
15329 
15330 
15331     if( state->x.cnt<n )
15332     {
15333         ae_vector_set_length(&state->x, n, _state);
15334     }
15335     if( state->xbase.cnt<n )
15336     {
15337         ae_vector_set_length(&state->xbase, n, _state);
15338     }
15339     if( state->s.cnt<n )
15340     {
15341         ae_vector_set_length(&state->s, n, _state);
15342     }
15343     state->stpmax = stpmax;
15344     state->fmax = fmax;
15345     state->stplen = stp;
15346     state->fcur = f;
15347     state->n = n;
15348     ae_v_move(&state->xbase.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
15349     ae_v_move(&state->s.ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
15350     ae_vector_set_length(&state->rstate.ia, 0+1, _state);
15351     ae_vector_set_length(&state->rstate.ra, 0+1, _state);
15352     state->rstate.stage = -1;
15353 }
15354 
15355 
15356 /*************************************************************************
15357 This is rcomm-based search function
15358 
15359   -- ALGLIB --
15360      Copyright 05.10.2010 by Bochkanov Sergey
15361 *************************************************************************/
armijoiteration(armijostate * state,ae_state * _state)15362 ae_bool armijoiteration(armijostate* state, ae_state *_state)
15363 {
15364     double v;
15365     ae_int_t n;
15366     ae_bool result;
15367 
15368 
15369 
15370     /*
15371      * Reverse communication preparations
15372      * I know it looks ugly, but it works the same way
15373      * anywhere from C++ to Python.
15374      *
15375      * This code initializes locals by:
15376      * * random values determined during code
15377      *   generation - on first subroutine call
15378      * * values from previous call - on subsequent calls
15379      */
15380     if( state->rstate.stage>=0 )
15381     {
15382         n = state->rstate.ia.ptr.p_int[0];
15383         v = state->rstate.ra.ptr.p_double[0];
15384     }
15385     else
15386     {
15387         n = 359;
15388         v = -58;
15389     }
15390     if( state->rstate.stage==0 )
15391     {
15392         goto lbl_0;
15393     }
15394     if( state->rstate.stage==1 )
15395     {
15396         goto lbl_1;
15397     }
15398     if( state->rstate.stage==2 )
15399     {
15400         goto lbl_2;
15401     }
15402     if( state->rstate.stage==3 )
15403     {
15404         goto lbl_3;
15405     }
15406 
15407     /*
15408      * Routine body
15409      */
15410     if( (ae_fp_less_eq(state->stplen,(double)(0))||ae_fp_less(state->stpmax,(double)(0)))||state->fmax<2 )
15411     {
15412         state->info = 0;
15413         result = ae_false;
15414         return result;
15415     }
15416     if( ae_fp_less_eq(state->stplen,linmin_stpmin) )
15417     {
15418         state->info = 4;
15419         result = ae_false;
15420         return result;
15421     }
15422     n = state->n;
15423     state->nfev = 0;
15424 
15425     /*
15426      * We always need F
15427      */
15428     state->needf = ae_true;
15429 
15430     /*
15431      * Bound StpLen
15432      */
15433     if( ae_fp_greater(state->stplen,state->stpmax)&&ae_fp_neq(state->stpmax,(double)(0)) )
15434     {
15435         state->stplen = state->stpmax;
15436     }
15437 
15438     /*
15439      * Increase length
15440      */
15441     v = state->stplen*linmin_armijofactor;
15442     if( ae_fp_greater(v,state->stpmax)&&ae_fp_neq(state->stpmax,(double)(0)) )
15443     {
15444         v = state->stpmax;
15445     }
15446     ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
15447     ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
15448     state->rstate.stage = 0;
15449     goto lbl_rcomm;
15450 lbl_0:
15451     state->nfev = state->nfev+1;
15452     if( ae_fp_greater_eq(state->f,state->fcur) )
15453     {
15454         goto lbl_4;
15455     }
15456     state->stplen = v;
15457     state->fcur = state->f;
15458 lbl_6:
15459     if( ae_false )
15460     {
15461         goto lbl_7;
15462     }
15463 
15464     /*
15465      * test stopping conditions
15466      */
15467     if( state->nfev>=state->fmax )
15468     {
15469         state->info = 3;
15470         result = ae_false;
15471         return result;
15472     }
15473     if( ae_fp_greater_eq(state->stplen,state->stpmax) )
15474     {
15475         state->info = 5;
15476         result = ae_false;
15477         return result;
15478     }
15479 
15480     /*
15481      * evaluate F
15482      */
15483     v = state->stplen*linmin_armijofactor;
15484     if( ae_fp_greater(v,state->stpmax)&&ae_fp_neq(state->stpmax,(double)(0)) )
15485     {
15486         v = state->stpmax;
15487     }
15488     ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
15489     ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
15490     state->rstate.stage = 1;
15491     goto lbl_rcomm;
15492 lbl_1:
15493     state->nfev = state->nfev+1;
15494 
15495     /*
15496      * make decision
15497      */
15498     if( ae_fp_less(state->f,state->fcur) )
15499     {
15500         state->stplen = v;
15501         state->fcur = state->f;
15502     }
15503     else
15504     {
15505         state->info = 1;
15506         result = ae_false;
15507         return result;
15508     }
15509     goto lbl_6;
15510 lbl_7:
15511 lbl_4:
15512 
15513     /*
15514      * Decrease length
15515      */
15516     v = state->stplen/linmin_armijofactor;
15517     ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
15518     ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
15519     state->rstate.stage = 2;
15520     goto lbl_rcomm;
15521 lbl_2:
15522     state->nfev = state->nfev+1;
15523     if( ae_fp_greater_eq(state->f,state->fcur) )
15524     {
15525         goto lbl_8;
15526     }
15527     state->stplen = state->stplen/linmin_armijofactor;
15528     state->fcur = state->f;
15529 lbl_10:
15530     if( ae_false )
15531     {
15532         goto lbl_11;
15533     }
15534 
15535     /*
15536      * test stopping conditions
15537      */
15538     if( state->nfev>=state->fmax )
15539     {
15540         state->info = 3;
15541         result = ae_false;
15542         return result;
15543     }
15544     if( ae_fp_less_eq(state->stplen,linmin_stpmin) )
15545     {
15546         state->info = 4;
15547         result = ae_false;
15548         return result;
15549     }
15550 
15551     /*
15552      * evaluate F
15553      */
15554     v = state->stplen/linmin_armijofactor;
15555     ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
15556     ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
15557     state->rstate.stage = 3;
15558     goto lbl_rcomm;
15559 lbl_3:
15560     state->nfev = state->nfev+1;
15561 
15562     /*
15563      * make decision
15564      */
15565     if( ae_fp_less(state->f,state->fcur) )
15566     {
15567         state->stplen = state->stplen/linmin_armijofactor;
15568         state->fcur = state->f;
15569     }
15570     else
15571     {
15572         state->info = 1;
15573         result = ae_false;
15574         return result;
15575     }
15576     goto lbl_10;
15577 lbl_11:
15578 lbl_8:
15579 
15580     /*
15581      * Nothing to be done
15582      */
15583     state->info = 1;
15584     result = ae_false;
15585     return result;
15586 
15587     /*
15588      * Saving state
15589      */
15590 lbl_rcomm:
15591     result = ae_true;
15592     state->rstate.ia.ptr.p_int[0] = n;
15593     state->rstate.ra.ptr.p_double[0] = v;
15594     return result;
15595 }
15596 
15597 
15598 /*************************************************************************
15599 Results of Armijo search
15600 
15601 OUTPUT PARAMETERS:
15602     INFO    -   on output it is set to one of the return codes:
15603                 * 0     improper input params
15604                 * 1     optimum step is found with at most FMAX evaluations
15605                 * 3     FMAX evaluations were used,
15606                         X contains optimum found so far
15607                 * 4     step is at lower bound STPMIN
15608                 * 5     step is at upper bound
15609     STP     -   step length (in case of failure it is still returned)
15610     F       -   function value (in case of failure it is still returned)
15611 
15612   -- ALGLIB --
15613      Copyright 05.10.2010 by Bochkanov Sergey
15614 *************************************************************************/
armijoresults(armijostate * state,ae_int_t * info,double * stp,double * f,ae_state * _state)15615 void armijoresults(armijostate* state,
15616      ae_int_t* info,
15617      double* stp,
15618      double* f,
15619      ae_state *_state)
15620 {
15621 
15622 
15623     *info = state->info;
15624     *stp = state->stplen;
15625     *f = state->fcur;
15626 }
15627 
15628 
linmin_mcstep(double * stx,double * fx,double * dx,double * sty,double * fy,double * dy,double * stp,double fp,double dp,ae_bool * brackt,double stmin,double stmax,ae_int_t * info,ae_state * _state)15629 static void linmin_mcstep(double* stx,
15630      double* fx,
15631      double* dx,
15632      double* sty,
15633      double* fy,
15634      double* dy,
15635      double* stp,
15636      double fp,
15637      double dp,
15638      ae_bool* brackt,
15639      double stmin,
15640      double stmax,
15641      ae_int_t* info,
15642      ae_state *_state)
15643 {
15644     ae_bool bound;
15645     double gamma;
15646     double p;
15647     double q;
15648     double r;
15649     double s;
15650     double sgnd;
15651     double stpc;
15652     double stpf;
15653     double stpq;
15654     double theta;
15655 
15656 
15657     *info = 0;
15658 
15659     /*
15660      *     CHECK THE INPUT PARAMETERS FOR ERRORS.
15661      */
15662     if( ((*brackt&&(ae_fp_less_eq(*stp,ae_minreal(*stx, *sty, _state))||ae_fp_greater_eq(*stp,ae_maxreal(*stx, *sty, _state))))||ae_fp_greater_eq(*dx*(*stp-(*stx)),(double)(0)))||ae_fp_less(stmax,stmin) )
15663     {
15664         return;
15665     }
15666 
15667     /*
15668      *     DETERMINE IF THE DERIVATIVES HAVE OPPOSITE SIGN.
15669      */
15670     sgnd = dp*(*dx/ae_fabs(*dx, _state));
15671 
15672     /*
15673      *     FIRST CASE. A HIGHER FUNCTION VALUE.
15674      *     THE MINIMUM IS BRACKETED. IF THE CUBIC STEP IS CLOSER
15675      *     TO STX THAN THE QUADRATIC STEP, THE CUBIC STEP IS TAKEN,
15676      *     ELSE THE AVERAGE OF THE CUBIC AND QUADRATIC STEPS IS TAKEN.
15677      */
15678     if( ae_fp_greater(fp,*fx) )
15679     {
15680         *info = 1;
15681         bound = ae_true;
15682         theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
15683         s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
15684         gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
15685         if( ae_fp_less(*stp,*stx) )
15686         {
15687             gamma = -gamma;
15688         }
15689         p = gamma-(*dx)+theta;
15690         q = gamma-(*dx)+gamma+dp;
15691         r = p/q;
15692         stpc = *stx+r*(*stp-(*stx));
15693         stpq = *stx+*dx/((*fx-fp)/(*stp-(*stx))+(*dx))/2*(*stp-(*stx));
15694         if( ae_fp_less(ae_fabs(stpc-(*stx), _state),ae_fabs(stpq-(*stx), _state)) )
15695         {
15696             stpf = stpc;
15697         }
15698         else
15699         {
15700             stpf = stpc+(stpq-stpc)/2;
15701         }
15702         *brackt = ae_true;
15703     }
15704     else
15705     {
15706         if( ae_fp_less(sgnd,(double)(0)) )
15707         {
15708 
15709             /*
15710              *     SECOND CASE. A LOWER FUNCTION VALUE AND DERIVATIVES OF
15711              *     OPPOSITE SIGN. THE MINIMUM IS BRACKETED. IF THE CUBIC
15712              *     STEP IS CLOSER TO STX THAN THE QUADRATIC (SECANT) STEP,
15713              *     THE CUBIC STEP IS TAKEN, ELSE THE QUADRATIC STEP IS TAKEN.
15714              */
15715             *info = 2;
15716             bound = ae_false;
15717             theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
15718             s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
15719             gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
15720             if( ae_fp_greater(*stp,*stx) )
15721             {
15722                 gamma = -gamma;
15723             }
15724             p = gamma-dp+theta;
15725             q = gamma-dp+gamma+(*dx);
15726             r = p/q;
15727             stpc = *stp+r*(*stx-(*stp));
15728             stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
15729             if( ae_fp_greater(ae_fabs(stpc-(*stp), _state),ae_fabs(stpq-(*stp), _state)) )
15730             {
15731                 stpf = stpc;
15732             }
15733             else
15734             {
15735                 stpf = stpq;
15736             }
15737             *brackt = ae_true;
15738         }
15739         else
15740         {
15741             if( ae_fp_less(ae_fabs(dp, _state),ae_fabs(*dx, _state)) )
15742             {
15743 
15744                 /*
15745                  *     THIRD CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
15746                  *     SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DECREASES.
15747                  *     THE CUBIC STEP IS ONLY USED IF THE CUBIC TENDS TO INFINITY
15748                  *     IN THE DIRECTION OF THE STEP OR IF THE MINIMUM OF THE CUBIC
15749                  *     IS BEYOND STP. OTHERWISE THE CUBIC STEP IS DEFINED TO BE
15750                  *     EITHER STPMIN OR STPMAX. THE QUADRATIC (SECANT) STEP IS ALSO
15751                  *     COMPUTED AND IF THE MINIMUM IS BRACKETED THEN THE THE STEP
15752                  *     CLOSEST TO STX IS TAKEN, ELSE THE STEP FARTHEST AWAY IS TAKEN.
15753                  */
15754                 *info = 3;
15755                 bound = ae_true;
15756                 theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
15757                 s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
15758 
15759                 /*
15760                  *        THE CASE GAMMA = 0 ONLY ARISES IF THE CUBIC DOES NOT TEND
15761                  *        TO INFINITY IN THE DIRECTION OF THE STEP.
15762                  */
15763                 gamma = s*ae_sqrt(ae_maxreal((double)(0), ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state), _state);
15764                 if( ae_fp_greater(*stp,*stx) )
15765                 {
15766                     gamma = -gamma;
15767                 }
15768                 p = gamma-dp+theta;
15769                 q = gamma+(*dx-dp)+gamma;
15770                 r = p/q;
15771                 if( ae_fp_less(r,(double)(0))&&ae_fp_neq(gamma,(double)(0)) )
15772                 {
15773                     stpc = *stp+r*(*stx-(*stp));
15774                 }
15775                 else
15776                 {
15777                     if( ae_fp_greater(*stp,*stx) )
15778                     {
15779                         stpc = stmax;
15780                     }
15781                     else
15782                     {
15783                         stpc = stmin;
15784                     }
15785                 }
15786                 stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
15787                 if( *brackt )
15788                 {
15789                     if( ae_fp_less(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
15790                     {
15791                         stpf = stpc;
15792                     }
15793                     else
15794                     {
15795                         stpf = stpq;
15796                     }
15797                 }
15798                 else
15799                 {
15800                     if( ae_fp_greater(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
15801                     {
15802                         stpf = stpc;
15803                     }
15804                     else
15805                     {
15806                         stpf = stpq;
15807                     }
15808                 }
15809             }
15810             else
15811             {
15812 
15813                 /*
15814                  *     FOURTH CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
15815                  *     SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DOES
15816                  *     NOT DECREASE. IF THE MINIMUM IS NOT BRACKETED, THE STEP
15817                  *     IS EITHER STPMIN OR STPMAX, ELSE THE CUBIC STEP IS TAKEN.
15818                  */
15819                 *info = 4;
15820                 bound = ae_false;
15821                 if( *brackt )
15822                 {
15823                     theta = 3*(fp-(*fy))/(*sty-(*stp))+(*dy)+dp;
15824                     s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dy, _state), ae_fabs(dp, _state), _state), _state);
15825                     gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dy/s*(dp/s), _state);
15826                     if( ae_fp_greater(*stp,*sty) )
15827                     {
15828                         gamma = -gamma;
15829                     }
15830                     p = gamma-dp+theta;
15831                     q = gamma-dp+gamma+(*dy);
15832                     r = p/q;
15833                     stpc = *stp+r*(*sty-(*stp));
15834                     stpf = stpc;
15835                 }
15836                 else
15837                 {
15838                     if( ae_fp_greater(*stp,*stx) )
15839                     {
15840                         stpf = stmax;
15841                     }
15842                     else
15843                     {
15844                         stpf = stmin;
15845                     }
15846                 }
15847             }
15848         }
15849     }
15850 
15851     /*
15852      *     UPDATE THE INTERVAL OF UNCERTAINTY. THIS UPDATE DOES NOT
15853      *     DEPEND ON THE NEW STEP OR THE CASE ANALYSIS ABOVE.
15854      */
15855     if( ae_fp_greater(fp,*fx) )
15856     {
15857         *sty = *stp;
15858         *fy = fp;
15859         *dy = dp;
15860     }
15861     else
15862     {
15863         if( ae_fp_less(sgnd,0.0) )
15864         {
15865             *sty = *stx;
15866             *fy = *fx;
15867             *dy = *dx;
15868         }
15869         *stx = *stp;
15870         *fx = fp;
15871         *dx = dp;
15872     }
15873 
15874     /*
15875      *     COMPUTE THE NEW STEP AND SAFEGUARD IT.
15876      */
15877     stpf = ae_minreal(stmax, stpf, _state);
15878     stpf = ae_maxreal(stmin, stpf, _state);
15879     *stp = stpf;
15880     if( *brackt&&bound )
15881     {
15882         if( ae_fp_greater(*sty,*stx) )
15883         {
15884             *stp = ae_minreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
15885         }
15886         else
15887         {
15888             *stp = ae_maxreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
15889         }
15890     }
15891 }
15892 
15893 
_linminstate_init(void * _p,ae_state * _state,ae_bool make_automatic)15894 void _linminstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
15895 {
15896     linminstate *p = (linminstate*)_p;
15897     ae_touch_ptr((void*)p);
15898 }
15899 
15900 
_linminstate_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)15901 void _linminstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
15902 {
15903     linminstate *dst = (linminstate*)_dst;
15904     linminstate *src = (linminstate*)_src;
15905     dst->brackt = src->brackt;
15906     dst->stage1 = src->stage1;
15907     dst->infoc = src->infoc;
15908     dst->dg = src->dg;
15909     dst->dgm = src->dgm;
15910     dst->dginit = src->dginit;
15911     dst->dgtest = src->dgtest;
15912     dst->dgx = src->dgx;
15913     dst->dgxm = src->dgxm;
15914     dst->dgy = src->dgy;
15915     dst->dgym = src->dgym;
15916     dst->finit = src->finit;
15917     dst->ftest1 = src->ftest1;
15918     dst->fm = src->fm;
15919     dst->fx = src->fx;
15920     dst->fxm = src->fxm;
15921     dst->fy = src->fy;
15922     dst->fym = src->fym;
15923     dst->stx = src->stx;
15924     dst->sty = src->sty;
15925     dst->stmin = src->stmin;
15926     dst->stmax = src->stmax;
15927     dst->width = src->width;
15928     dst->width1 = src->width1;
15929     dst->xtrapf = src->xtrapf;
15930 }
15931 
15932 
_linminstate_clear(void * _p)15933 void _linminstate_clear(void* _p)
15934 {
15935     linminstate *p = (linminstate*)_p;
15936     ae_touch_ptr((void*)p);
15937 }
15938 
15939 
_linminstate_destroy(void * _p)15940 void _linminstate_destroy(void* _p)
15941 {
15942     linminstate *p = (linminstate*)_p;
15943     ae_touch_ptr((void*)p);
15944 }
15945 
15946 
_armijostate_init(void * _p,ae_state * _state,ae_bool make_automatic)15947 void _armijostate_init(void* _p, ae_state *_state, ae_bool make_automatic)
15948 {
15949     armijostate *p = (armijostate*)_p;
15950     ae_touch_ptr((void*)p);
15951     ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
15952     ae_vector_init(&p->xbase, 0, DT_REAL, _state, make_automatic);
15953     ae_vector_init(&p->s, 0, DT_REAL, _state, make_automatic);
15954     _rcommstate_init(&p->rstate, _state, make_automatic);
15955 }
15956 
15957 
_armijostate_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)15958 void _armijostate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
15959 {
15960     armijostate *dst = (armijostate*)_dst;
15961     armijostate *src = (armijostate*)_src;
15962     dst->needf = src->needf;
15963     ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
15964     dst->f = src->f;
15965     dst->n = src->n;
15966     ae_vector_init_copy(&dst->xbase, &src->xbase, _state, make_automatic);
15967     ae_vector_init_copy(&dst->s, &src->s, _state, make_automatic);
15968     dst->stplen = src->stplen;
15969     dst->fcur = src->fcur;
15970     dst->stpmax = src->stpmax;
15971     dst->fmax = src->fmax;
15972     dst->nfev = src->nfev;
15973     dst->info = src->info;
15974     _rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic);
15975 }
15976 
15977 
_armijostate_clear(void * _p)15978 void _armijostate_clear(void* _p)
15979 {
15980     armijostate *p = (armijostate*)_p;
15981     ae_touch_ptr((void*)p);
15982     ae_vector_clear(&p->x);
15983     ae_vector_clear(&p->xbase);
15984     ae_vector_clear(&p->s);
15985     _rcommstate_clear(&p->rstate);
15986 }
15987 
15988 
_armijostate_destroy(void * _p)15989 void _armijostate_destroy(void* _p)
15990 {
15991     armijostate *p = (armijostate*)_p;
15992     ae_touch_ptr((void*)p);
15993     ae_vector_destroy(&p->x);
15994     ae_vector_destroy(&p->xbase);
15995     ae_vector_destroy(&p->s);
15996     _rcommstate_destroy(&p->rstate);
15997 }
15998 
15999 
16000 #endif
16001 #if defined(AE_COMPILE_NEARUNITYUNIT) || !defined(AE_PARTIAL_BUILD)
16002 
16003 
nulog1p(double x,ae_state * _state)16004 double nulog1p(double x, ae_state *_state)
16005 {
16006     double z;
16007     double lp;
16008     double lq;
16009     double result;
16010 
16011 
16012     z = 1.0+x;
16013     if( ae_fp_less(z,0.70710678118654752440)||ae_fp_greater(z,1.41421356237309504880) )
16014     {
16015         result = ae_log(z, _state);
16016         return result;
16017     }
16018     z = x*x;
16019     lp = 4.5270000862445199635215E-5;
16020     lp = lp*x+4.9854102823193375972212E-1;
16021     lp = lp*x+6.5787325942061044846969E0;
16022     lp = lp*x+2.9911919328553073277375E1;
16023     lp = lp*x+6.0949667980987787057556E1;
16024     lp = lp*x+5.7112963590585538103336E1;
16025     lp = lp*x+2.0039553499201281259648E1;
16026     lq = 1.0000000000000000000000E0;
16027     lq = lq*x+1.5062909083469192043167E1;
16028     lq = lq*x+8.3047565967967209469434E1;
16029     lq = lq*x+2.2176239823732856465394E2;
16030     lq = lq*x+3.0909872225312059774938E2;
16031     lq = lq*x+2.1642788614495947685003E2;
16032     lq = lq*x+6.0118660497603843919306E1;
16033     z = -0.5*z+x*(z*lp/lq);
16034     result = x+z;
16035     return result;
16036 }
16037 
16038 
nuexpm1(double x,ae_state * _state)16039 double nuexpm1(double x, ae_state *_state)
16040 {
16041     double r;
16042     double xx;
16043     double ep;
16044     double eq;
16045     double result;
16046 
16047 
16048     if( ae_fp_less(x,-0.5)||ae_fp_greater(x,0.5) )
16049     {
16050         result = ae_exp(x, _state)-1.0;
16051         return result;
16052     }
16053     xx = x*x;
16054     ep = 1.2617719307481059087798E-4;
16055     ep = ep*xx+3.0299440770744196129956E-2;
16056     ep = ep*xx+9.9999999999999999991025E-1;
16057     eq = 3.0019850513866445504159E-6;
16058     eq = eq*xx+2.5244834034968410419224E-3;
16059     eq = eq*xx+2.2726554820815502876593E-1;
16060     eq = eq*xx+2.0000000000000000000897E0;
16061     r = x*ep;
16062     r = r/(eq-r);
16063     result = r+r;
16064     return result;
16065 }
16066 
16067 
nucosm1(double x,ae_state * _state)16068 double nucosm1(double x, ae_state *_state)
16069 {
16070     double xx;
16071     double c;
16072     double result;
16073 
16074 
16075     if( ae_fp_less(x,-0.25*ae_pi)||ae_fp_greater(x,0.25*ae_pi) )
16076     {
16077         result = ae_cos(x, _state)-1;
16078         return result;
16079     }
16080     xx = x*x;
16081     c = 4.7377507964246204691685E-14;
16082     c = c*xx-1.1470284843425359765671E-11;
16083     c = c*xx+2.0876754287081521758361E-9;
16084     c = c*xx-2.7557319214999787979814E-7;
16085     c = c*xx+2.4801587301570552304991E-5;
16086     c = c*xx-1.3888888888888872993737E-3;
16087     c = c*xx+4.1666666666666666609054E-2;
16088     result = -0.5*xx+xx*xx*c;
16089     return result;
16090 }
16091 
16092 
16093 #endif
16094 #if defined(AE_COMPILE_NTHEORY) || !defined(AE_PARTIAL_BUILD)
16095 
16096 
findprimitiverootandinverse(ae_int_t n,ae_int_t * proot,ae_int_t * invproot,ae_state * _state)16097 void findprimitiverootandinverse(ae_int_t n,
16098      ae_int_t* proot,
16099      ae_int_t* invproot,
16100      ae_state *_state)
16101 {
16102     ae_int_t candroot;
16103     ae_int_t phin;
16104     ae_int_t q;
16105     ae_int_t f;
16106     ae_bool allnonone;
16107     ae_int_t x;
16108     ae_int_t lastx;
16109     ae_int_t y;
16110     ae_int_t lasty;
16111     ae_int_t a;
16112     ae_int_t b;
16113     ae_int_t t;
16114     ae_int_t n2;
16115 
16116     *proot = 0;
16117     *invproot = 0;
16118 
16119     ae_assert(n>=3, "FindPrimitiveRootAndInverse: N<3", _state);
16120     *proot = 0;
16121     *invproot = 0;
16122 
16123     /*
16124      * check that N is prime
16125      */
16126     ae_assert(ntheory_isprime(n, _state), "FindPrimitiveRoot: N is not prime", _state);
16127 
16128     /*
16129      * Because N is prime, Euler totient function is equal to N-1
16130      */
16131     phin = n-1;
16132 
16133     /*
16134      * Test different values of PRoot - from 2 to N-1.
16135      * One of these values MUST be primitive root.
16136      *
16137      * For testing we use algorithm from Wiki (Primitive root modulo n):
16138      * * compute phi(N)
16139      * * determine the different prime factors of phi(N), say p1, ..., pk
16140      * * for every element m of Zn*, compute m^(phi(N)/pi) mod N for i=1..k
16141      *   using a fast algorithm for modular exponentiation.
16142      * * a number m for which these k results are all different from 1 is a
16143      *   primitive root.
16144      */
16145     for(candroot=2; candroot<=n-1; candroot++)
16146     {
16147 
16148         /*
16149          * We have current candidate root in CandRoot.
16150          *
16151          * Scan different prime factors of PhiN. Here:
16152          * * F is a current candidate factor
16153          * * Q is a current quotient - amount which was left after dividing PhiN
16154          *   by all previous factors
16155          *
16156          * For each factor, perform test mentioned above.
16157          */
16158         q = phin;
16159         f = 2;
16160         allnonone = ae_true;
16161         while(q>1)
16162         {
16163             if( q%f==0 )
16164             {
16165                 t = ntheory_modexp(candroot, phin/f, n, _state);
16166                 if( t==1 )
16167                 {
16168                     allnonone = ae_false;
16169                     break;
16170                 }
16171                 while(q%f==0)
16172                 {
16173                     q = q/f;
16174                 }
16175             }
16176             f = f+1;
16177         }
16178         if( allnonone )
16179         {
16180             *proot = candroot;
16181             break;
16182         }
16183     }
16184     ae_assert(*proot>=2, "FindPrimitiveRoot: internal error (root not found)", _state);
16185 
16186     /*
16187      * Use extended Euclidean algorithm to find multiplicative inverse of primitive root
16188      */
16189     x = 0;
16190     lastx = 1;
16191     y = 1;
16192     lasty = 0;
16193     a = *proot;
16194     b = n;
16195     while(b!=0)
16196     {
16197         q = a/b;
16198         t = a%b;
16199         a = b;
16200         b = t;
16201         t = lastx-q*x;
16202         lastx = x;
16203         x = t;
16204         t = lasty-q*y;
16205         lasty = y;
16206         y = t;
16207     }
16208     while(lastx<0)
16209     {
16210         lastx = lastx+n;
16211     }
16212     *invproot = lastx;
16213 
16214     /*
16215      * Check that it is safe to perform multiplication modulo N.
16216      * Check results for consistency.
16217      */
16218     n2 = (n-1)*(n-1);
16219     ae_assert(n2/(n-1)==n-1, "FindPrimitiveRoot: internal error", _state);
16220     ae_assert(*proot*(*invproot)/(*proot)==(*invproot), "FindPrimitiveRoot: internal error", _state);
16221     ae_assert(*proot*(*invproot)/(*invproot)==(*proot), "FindPrimitiveRoot: internal error", _state);
16222     ae_assert(*proot*(*invproot)%n==1, "FindPrimitiveRoot: internal error", _state);
16223 }
16224 
16225 
ntheory_isprime(ae_int_t n,ae_state * _state)16226 static ae_bool ntheory_isprime(ae_int_t n, ae_state *_state)
16227 {
16228     ae_int_t p;
16229     ae_bool result;
16230 
16231 
16232     result = ae_false;
16233     p = 2;
16234     while(p*p<=n)
16235     {
16236         if( n%p==0 )
16237         {
16238             return result;
16239         }
16240         p = p+1;
16241     }
16242     result = ae_true;
16243     return result;
16244 }
16245 
16246 
ntheory_modmul(ae_int_t a,ae_int_t b,ae_int_t n,ae_state * _state)16247 static ae_int_t ntheory_modmul(ae_int_t a,
16248      ae_int_t b,
16249      ae_int_t n,
16250      ae_state *_state)
16251 {
16252     ae_int_t t;
16253     double ra;
16254     double rb;
16255     ae_int_t result;
16256 
16257 
16258     ae_assert(a>=0&&a<n, "ModMul: A<0 or A>=N", _state);
16259     ae_assert(b>=0&&b<n, "ModMul: B<0 or B>=N", _state);
16260 
16261     /*
16262      * Base cases
16263      */
16264     ra = (double)(a);
16265     rb = (double)(b);
16266     if( b==0||a==0 )
16267     {
16268         result = 0;
16269         return result;
16270     }
16271     if( b==1||a==1 )
16272     {
16273         result = a*b;
16274         return result;
16275     }
16276     if( ae_fp_eq(ra*rb,(double)(a*b)) )
16277     {
16278         result = a*b%n;
16279         return result;
16280     }
16281 
16282     /*
16283      * Non-base cases
16284      */
16285     if( b%2==0 )
16286     {
16287 
16288         /*
16289          * A*B = (A*(B/2)) * 2
16290          *
16291          * Product T=A*(B/2) is calculated recursively, product T*2 is
16292          * calculated as follows:
16293          * * result:=T-N
16294          * * result:=result+T
16295          * * if result<0 then result:=result+N
16296          *
16297          * In case integer result overflows, we generate exception
16298          */
16299         t = ntheory_modmul(a, b/2, n, _state);
16300         result = t-n;
16301         result = result+t;
16302         if( result<0 )
16303         {
16304             result = result+n;
16305         }
16306     }
16307     else
16308     {
16309 
16310         /*
16311          * A*B = (A*(B div 2)) * 2 + A
16312          *
16313          * Product T=A*(B/2) is calculated recursively, product T*2 is
16314          * calculated as follows:
16315          * * result:=T-N
16316          * * result:=result+T
16317          * * if result<0 then result:=result+N
16318          *
16319          * In case integer result overflows, we generate exception
16320          */
16321         t = ntheory_modmul(a, b/2, n, _state);
16322         result = t-n;
16323         result = result+t;
16324         if( result<0 )
16325         {
16326             result = result+n;
16327         }
16328         result = result-n;
16329         result = result+a;
16330         if( result<0 )
16331         {
16332             result = result+n;
16333         }
16334     }
16335     return result;
16336 }
16337 
16338 
ntheory_modexp(ae_int_t a,ae_int_t b,ae_int_t n,ae_state * _state)16339 static ae_int_t ntheory_modexp(ae_int_t a,
16340      ae_int_t b,
16341      ae_int_t n,
16342      ae_state *_state)
16343 {
16344     ae_int_t t;
16345     ae_int_t result;
16346 
16347 
16348     ae_assert(a>=0&&a<n, "ModExp: A<0 or A>=N", _state);
16349     ae_assert(b>=0, "ModExp: B<0", _state);
16350 
16351     /*
16352      * Base cases
16353      */
16354     if( b==0 )
16355     {
16356         result = 1;
16357         return result;
16358     }
16359     if( b==1 )
16360     {
16361         result = a;
16362         return result;
16363     }
16364 
16365     /*
16366      * Non-base cases
16367      */
16368     if( b%2==0 )
16369     {
16370         t = ntheory_modmul(a, a, n, _state);
16371         result = ntheory_modexp(t, b/2, n, _state);
16372     }
16373     else
16374     {
16375         t = ntheory_modmul(a, a, n, _state);
16376         result = ntheory_modexp(t, b/2, n, _state);
16377         result = ntheory_modmul(result, a, n, _state);
16378     }
16379     return result;
16380 }
16381 
16382 
16383 #endif
16384 #if defined(AE_COMPILE_FTBASE) || !defined(AE_PARTIAL_BUILD)
16385 
16386 
16387 /*************************************************************************
16388 This subroutine generates FFT plan for K complex FFT's with length N each.
16389 
16390 INPUT PARAMETERS:
16391     N           -   FFT length (in complex numbers), N>=1
16392     K           -   number of repetitions, K>=1
16393 
16394 OUTPUT PARAMETERS:
16395     Plan        -   plan
16396 
16397   -- ALGLIB --
16398      Copyright 05.04.2013 by Bochkanov Sergey
16399 *************************************************************************/
ftcomplexfftplan(ae_int_t n,ae_int_t k,fasttransformplan * plan,ae_state * _state)16400 void ftcomplexfftplan(ae_int_t n,
16401      ae_int_t k,
16402      fasttransformplan* plan,
16403      ae_state *_state)
16404 {
16405     ae_frame _frame_block;
16406     srealarray bluesteinbuf;
16407     ae_int_t rowptr;
16408     ae_int_t bluesteinsize;
16409     ae_int_t precrptr;
16410     ae_int_t preciptr;
16411     ae_int_t precrsize;
16412     ae_int_t precisize;
16413 
16414     ae_frame_make(_state, &_frame_block);
16415     memset(&bluesteinbuf, 0, sizeof(bluesteinbuf));
16416     _fasttransformplan_clear(plan);
16417     _srealarray_init(&bluesteinbuf, _state, ae_true);
16418 
16419 
16420     /*
16421      * Initial check for parameters
16422      */
16423     ae_assert(n>0, "FTComplexFFTPlan: N<=0", _state);
16424     ae_assert(k>0, "FTComplexFFTPlan: K<=0", _state);
16425 
16426     /*
16427      * Determine required sizes of precomputed real and integer
16428      * buffers. This stage of code is highly dependent on internals
16429      * of FTComplexFFTPlanRec() and must be kept synchronized with
16430      * possible changes in internals of plan generation function.
16431      *
16432      * Buffer size is determined as follows:
16433      * * N is factorized
16434      * * we factor out anything which is less or equal to MaxRadix
16435      * * prime factor F>RaderThreshold requires 4*FTBaseFindSmooth(2*F-1)
16436      *   real entries to store precomputed Quantities for Bluestein's
16437      *   transformation
16438      * * prime factor F<=RaderThreshold does NOT require
16439      *   precomputed storage
16440      */
16441     precrsize = 0;
16442     precisize = 0;
16443     ftbase_ftdeterminespacerequirements(n, &precrsize, &precisize, _state);
16444     if( precrsize>0 )
16445     {
16446         ae_vector_set_length(&plan->precr, precrsize, _state);
16447     }
16448     if( precisize>0 )
16449     {
16450         ae_vector_set_length(&plan->preci, precisize, _state);
16451     }
16452 
16453     /*
16454      * Generate plan
16455      */
16456     rowptr = 0;
16457     precrptr = 0;
16458     preciptr = 0;
16459     bluesteinsize = 1;
16460     ae_vector_set_length(&plan->buffer, 2*n*k, _state);
16461     ftbase_ftcomplexfftplanrec(n, k, ae_true, ae_true, &rowptr, &bluesteinsize, &precrptr, &preciptr, plan, _state);
16462     ae_vector_set_length(&bluesteinbuf.val, bluesteinsize, _state);
16463     ae_shared_pool_set_seed(&plan->bluesteinpool, &bluesteinbuf, sizeof(bluesteinbuf), _srealarray_init, _srealarray_init_copy, _srealarray_destroy, _state);
16464 
16465     /*
16466      * Check that actual amount of precomputed space used by transformation
16467      * plan is EXACTLY equal to amount of space allocated by us.
16468      */
16469     ae_assert(precrptr==precrsize, "FTComplexFFTPlan: internal error (PrecRPtr<>PrecRSize)", _state);
16470     ae_assert(preciptr==precisize, "FTComplexFFTPlan: internal error (PrecRPtr<>PrecRSize)", _state);
16471     ae_frame_leave(_state);
16472 }
16473 
16474 
16475 /*************************************************************************
16476 This subroutine applies transformation plan to input/output array A.
16477 
16478 INPUT PARAMETERS:
16479     Plan        -   transformation plan
16480     A           -   array, must be large enough for plan to work
16481     OffsA       -   offset of the subarray to process
16482     RepCnt      -   repetition count (transformation is repeatedly applied
16483                     to subsequent subarrays)
16484 
16485 OUTPUT PARAMETERS:
16486     Plan        -   plan (temporary buffers can be modified, plan itself
16487                     is unchanged and can be reused)
16488     A           -   transformed array
16489 
16490   -- ALGLIB --
16491      Copyright 05.04.2013 by Bochkanov Sergey
16492 *************************************************************************/
ftapplyplan(fasttransformplan * plan,ae_vector * a,ae_int_t offsa,ae_int_t repcnt,ae_state * _state)16493 void ftapplyplan(fasttransformplan* plan,
16494      /* Real    */ ae_vector* a,
16495      ae_int_t offsa,
16496      ae_int_t repcnt,
16497      ae_state *_state)
16498 {
16499     ae_int_t plansize;
16500     ae_int_t i;
16501 
16502 
16503     plansize = plan->entries.ptr.pp_int[0][ftbase_coloperandscnt]*plan->entries.ptr.pp_int[0][ftbase_coloperandsize]*plan->entries.ptr.pp_int[0][ftbase_colmicrovectorsize];
16504     for(i=0; i<=repcnt-1; i++)
16505     {
16506         ftbase_ftapplysubplan(plan, 0, a, offsa+plansize*i, 0, &plan->buffer, 1, _state);
16507     }
16508 }
16509 
16510 
16511 /*************************************************************************
16512 Returns good factorization N=N1*N2.
16513 
16514 Usually N1<=N2 (but not always - small N's may be exception).
16515 if N1<>1 then N2<>1.
16516 
16517 Factorization is chosen depending on task type and codelets we have.
16518 
16519   -- ALGLIB --
16520      Copyright 01.05.2009 by Bochkanov Sergey
16521 *************************************************************************/
ftbasefactorize(ae_int_t n,ae_int_t tasktype,ae_int_t * n1,ae_int_t * n2,ae_state * _state)16522 void ftbasefactorize(ae_int_t n,
16523      ae_int_t tasktype,
16524      ae_int_t* n1,
16525      ae_int_t* n2,
16526      ae_state *_state)
16527 {
16528     ae_int_t j;
16529 
16530     *n1 = 0;
16531     *n2 = 0;
16532 
16533     *n1 = 0;
16534     *n2 = 0;
16535 
16536     /*
16537      * try to find good codelet
16538      */
16539     if( *n1*(*n2)!=n )
16540     {
16541         for(j=ftbase_ftbasecodeletrecommended; j>=2; j--)
16542         {
16543             if( n%j==0 )
16544             {
16545                 *n1 = j;
16546                 *n2 = n/j;
16547                 break;
16548             }
16549         }
16550     }
16551 
16552     /*
16553      * try to factorize N
16554      */
16555     if( *n1*(*n2)!=n )
16556     {
16557         for(j=ftbase_ftbasecodeletrecommended+1; j<=n-1; j++)
16558         {
16559             if( n%j==0 )
16560             {
16561                 *n1 = j;
16562                 *n2 = n/j;
16563                 break;
16564             }
16565         }
16566     }
16567 
16568     /*
16569      * looks like N is prime :(
16570      */
16571     if( *n1*(*n2)!=n )
16572     {
16573         *n1 = 1;
16574         *n2 = n;
16575     }
16576 
16577     /*
16578      * normalize
16579      */
16580     if( *n2==1&&*n1!=1 )
16581     {
16582         *n2 = *n1;
16583         *n1 = 1;
16584     }
16585 }
16586 
16587 
16588 /*************************************************************************
16589 Is number smooth?
16590 
16591   -- ALGLIB --
16592      Copyright 01.05.2009 by Bochkanov Sergey
16593 *************************************************************************/
ftbaseissmooth(ae_int_t n,ae_state * _state)16594 ae_bool ftbaseissmooth(ae_int_t n, ae_state *_state)
16595 {
16596     ae_int_t i;
16597     ae_bool result;
16598 
16599 
16600     for(i=2; i<=ftbase_ftbasemaxsmoothfactor; i++)
16601     {
16602         while(n%i==0)
16603         {
16604             n = n/i;
16605         }
16606     }
16607     result = n==1;
16608     return result;
16609 }
16610 
16611 
16612 /*************************************************************************
16613 Returns smallest smooth (divisible only by 2, 3, 5) number that is greater
16614 than or equal to max(N,2)
16615 
16616   -- ALGLIB --
16617      Copyright 01.05.2009 by Bochkanov Sergey
16618 *************************************************************************/
ftbasefindsmooth(ae_int_t n,ae_state * _state)16619 ae_int_t ftbasefindsmooth(ae_int_t n, ae_state *_state)
16620 {
16621     ae_int_t best;
16622     ae_int_t result;
16623 
16624 
16625     best = 2;
16626     while(best<n)
16627     {
16628         best = 2*best;
16629     }
16630     ftbase_ftbasefindsmoothrec(n, 1, 2, &best, _state);
16631     result = best;
16632     return result;
16633 }
16634 
16635 
16636 /*************************************************************************
16637 Returns  smallest  smooth  (divisible only by 2, 3, 5) even number that is
16638 greater than or equal to max(N,2)
16639 
16640   -- ALGLIB --
16641      Copyright 01.05.2009 by Bochkanov Sergey
16642 *************************************************************************/
ftbasefindsmootheven(ae_int_t n,ae_state * _state)16643 ae_int_t ftbasefindsmootheven(ae_int_t n, ae_state *_state)
16644 {
16645     ae_int_t best;
16646     ae_int_t result;
16647 
16648 
16649     best = 2;
16650     while(best<n)
16651     {
16652         best = 2*best;
16653     }
16654     ftbase_ftbasefindsmoothrec(n, 2, 2, &best, _state);
16655     result = best;
16656     return result;
16657 }
16658 
16659 
16660 /*************************************************************************
16661 Returns estimate of FLOP count for the FFT.
16662 
16663 It is only an estimate based on operations count for the PERFECT FFT
16664 and relative inefficiency of the algorithm actually used.
16665 
16666 N should be power of 2, estimates are badly wrong for non-power-of-2 N's.
16667 
16668   -- ALGLIB --
16669      Copyright 01.05.2009 by Bochkanov Sergey
16670 *************************************************************************/
ftbasegetflopestimate(ae_int_t n,ae_state * _state)16671 double ftbasegetflopestimate(ae_int_t n, ae_state *_state)
16672 {
16673     double result;
16674 
16675 
16676     result = ftbase_ftbaseinefficiencyfactor*(4*n*ae_log((double)(n), _state)/ae_log((double)(2), _state)-6*n+8);
16677     return result;
16678 }
16679 
16680 
16681 /*************************************************************************
16682 This function returns EXACT estimate of the space requirements for N-point
16683 FFT. Internals of this function are highly dependent on details of different
16684 FFTs employed by this unit, so every time algorithm is changed this function
16685 has to be rewritten.
16686 
16687 INPUT PARAMETERS:
16688     N           -   transform length
16689     PrecRSize   -   must be set to zero
16690     PrecISize   -   must be set to zero
16691 
16692 OUTPUT PARAMETERS:
16693     PrecRSize   -   number of real temporaries required for transformation
16694     PrecISize   -   number of integer temporaries required for transformation
16695 
16696 
16697   -- ALGLIB --
16698      Copyright 05.04.2013 by Bochkanov Sergey
16699 *************************************************************************/
ftbase_ftdeterminespacerequirements(ae_int_t n,ae_int_t * precrsize,ae_int_t * precisize,ae_state * _state)16700 static void ftbase_ftdeterminespacerequirements(ae_int_t n,
16701      ae_int_t* precrsize,
16702      ae_int_t* precisize,
16703      ae_state *_state)
16704 {
16705     ae_int_t ncur;
16706     ae_int_t f;
16707     ae_int_t i;
16708 
16709 
16710 
16711     /*
16712      * Determine required sizes of precomputed real and integer
16713      * buffers. This stage of code is highly dependent on internals
16714      * of FTComplexFFTPlanRec() and must be kept synchronized with
16715      * possible changes in internals of plan generation function.
16716      *
16717      * Buffer size is determined as follows:
16718      * * N is factorized
16719      * * we factor out anything which is less or equal to MaxRadix
16720      * * prime factor F>RaderThreshold requires 4*FTBaseFindSmooth(2*F-1)
16721      *   real entries to store precomputed Quantities for Bluestein's
16722      *   transformation
16723      * * prime factor F<=RaderThreshold requires 2*(F-1)+ESTIMATE(F-1)
16724      *   precomputed storage
16725      */
16726     ncur = n;
16727     for(i=2; i<=ftbase_maxradix; i++)
16728     {
16729         while(ncur%i==0)
16730         {
16731             ncur = ncur/i;
16732         }
16733     }
16734     f = 2;
16735     while(f<=ncur)
16736     {
16737         while(ncur%f==0)
16738         {
16739             if( f>ftbase_raderthreshold )
16740             {
16741                 *precrsize = *precrsize+4*ftbasefindsmooth(2*f-1, _state);
16742             }
16743             else
16744             {
16745                 *precrsize = *precrsize+2*(f-1);
16746                 ftbase_ftdeterminespacerequirements(f-1, precrsize, precisize, _state);
16747             }
16748             ncur = ncur/f;
16749         }
16750         f = f+1;
16751     }
16752 }
16753 
16754 
16755 /*************************************************************************
16756 Recurrent function called by FTComplexFFTPlan() and other functions. It
16757 recursively builds transformation plan
16758 
16759 INPUT PARAMETERS:
16760     N           -   FFT length (in complex numbers), N>=1
16761     K           -   number of repetitions, K>=1
16762     ChildPlan   -   if True, plan generator inserts OpStart/opEnd in the
16763                     plan header/footer.
16764     TopmostPlan -   if True, plan generator assumes that it is topmost plan:
16765                     * it may use global buffer for transpositions
16766                     and there is no other plan which executes in parallel
16767     RowPtr      -   index which points to past-the-last entry generated so far
16768     BluesteinSize-  amount of storage (in real numbers) required for Bluestein buffer
16769     PrecRPtr    -   pointer to unused part of precomputed real buffer (Plan.PrecR):
16770                     * when this function stores some data to precomputed buffer,
16771                       it advances pointer.
16772                     * it is responsibility of the function to assert that
16773                       Plan.PrecR has enough space to store data before actually
16774                       writing to buffer.
16775                     * it is responsibility of the caller to allocate enough
16776                       space before calling this function
16777     PrecIPtr    -   pointer to unused part of precomputed integer buffer (Plan.PrecI):
16778                     * when this function stores some data to precomputed buffer,
16779                       it advances pointer.
16780                     * it is responsibility of the function to assert that
16781                       Plan.PrecR has enough space to store data before actually
16782                       writing to buffer.
16783                     * it is responsibility of the caller to allocate enough
16784                       space before calling this function
16785     Plan        -   plan (generated so far)
16786 
16787 OUTPUT PARAMETERS:
16788     RowPtr      -   updated pointer (advanced by number of entries generated
16789                     by function)
16790     BluesteinSize-  updated amount
16791                     (may be increased, but may never be decreased)
16792 
16793 NOTE: in case TopmostPlan is True, ChildPlan is also must be True.
16794 
16795   -- ALGLIB --
16796      Copyright 05.04.2013 by Bochkanov Sergey
16797 *************************************************************************/
ftbase_ftcomplexfftplanrec(ae_int_t n,ae_int_t k,ae_bool childplan,ae_bool topmostplan,ae_int_t * rowptr,ae_int_t * bluesteinsize,ae_int_t * precrptr,ae_int_t * preciptr,fasttransformplan * plan,ae_state * _state)16798 static void ftbase_ftcomplexfftplanrec(ae_int_t n,
16799      ae_int_t k,
16800      ae_bool childplan,
16801      ae_bool topmostplan,
16802      ae_int_t* rowptr,
16803      ae_int_t* bluesteinsize,
16804      ae_int_t* precrptr,
16805      ae_int_t* preciptr,
16806      fasttransformplan* plan,
16807      ae_state *_state)
16808 {
16809     ae_frame _frame_block;
16810     srealarray localbuf;
16811     ae_int_t m;
16812     ae_int_t n1;
16813     ae_int_t n2;
16814     ae_int_t gq;
16815     ae_int_t giq;
16816     ae_int_t row0;
16817     ae_int_t row1;
16818     ae_int_t row2;
16819     ae_int_t row3;
16820 
16821     ae_frame_make(_state, &_frame_block);
16822     memset(&localbuf, 0, sizeof(localbuf));
16823     _srealarray_init(&localbuf, _state, ae_true);
16824 
16825     ae_assert(n>0, "FTComplexFFTPlan: N<=0", _state);
16826     ae_assert(k>0, "FTComplexFFTPlan: K<=0", _state);
16827     ae_assert(!topmostplan||childplan, "FTComplexFFTPlan: ChildPlan is inconsistent with TopmostPlan", _state);
16828 
16829     /*
16830      * Try to generate "topmost" plan
16831      */
16832     if( topmostplan&&n>ftbase_recursivethreshold )
16833     {
16834         ftbase_ftfactorize(n, ae_false, &n1, &n2, _state);
16835         if( n1*n2==0 )
16836         {
16837 
16838             /*
16839              * Handle prime-factor FFT with Bluestein's FFT.
16840              * Determine size of Bluestein's buffer.
16841              */
16842             m = ftbasefindsmooth(2*n-1, _state);
16843             *bluesteinsize = ae_maxint(2*m, *bluesteinsize, _state);
16844 
16845             /*
16846              * Generate plan
16847              */
16848             ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16849             ftbase_ftpushentry4(plan, rowptr, ftbase_opbluesteinsfft, k, n, 2, m, 2, *precrptr, 0, _state);
16850             row0 = *rowptr;
16851             ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
16852             ftbase_ftcomplexfftplanrec(m, 1, ae_true, ae_true, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
16853             row1 = *rowptr;
16854             plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
16855             ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
16856 
16857             /*
16858              * Fill precomputed buffer
16859              */
16860             ftbase_ftprecomputebluesteinsfft(n, m, &plan->precr, *precrptr, _state);
16861 
16862             /*
16863              * Update pointer to the precomputed area
16864              */
16865             *precrptr = *precrptr+4*m;
16866         }
16867         else
16868         {
16869 
16870             /*
16871              * Handle composite FFT with recursive Cooley-Tukey which
16872              * uses global buffer instead of local one.
16873              */
16874             ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16875             ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
16876             row0 = *rowptr;
16877             ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n2, n1, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
16878             ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
16879             ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
16880             row2 = *rowptr;
16881             ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n1, n2, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
16882             ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
16883             ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
16884             row1 = *rowptr;
16885             ftbase_ftcomplexfftplanrec(n1, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
16886             plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
16887             row3 = *rowptr;
16888             ftbase_ftcomplexfftplanrec(n2, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
16889             plan->entries.ptr.pp_int[row2][ftbase_colparam0] = row3-row2;
16890         }
16891         ae_frame_leave(_state);
16892         return;
16893     }
16894 
16895     /*
16896      * Prepare "non-topmost" plan:
16897      * * calculate factorization
16898      * * use local (shared) buffer
16899      * * update buffer size - ANY plan will need at least
16900      *   2*N temporaries, additional requirements can be
16901      *   applied later
16902      */
16903     ftbase_ftfactorize(n, ae_false, &n1, &n2, _state);
16904 
16905     /*
16906      * Handle FFT's with N1*N2=0: either small-N or prime-factor
16907      */
16908     if( n1*n2==0 )
16909     {
16910         if( n<=ftbase_maxradix )
16911         {
16912 
16913             /*
16914              * Small-N FFT
16915              */
16916             if( childplan )
16917             {
16918                 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16919             }
16920             ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexcodeletfft, k, n, 2, 0, _state);
16921             if( childplan )
16922             {
16923                 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
16924             }
16925             ae_frame_leave(_state);
16926             return;
16927         }
16928         if( n<=ftbase_raderthreshold )
16929         {
16930 
16931             /*
16932              * Handle prime-factor FFT's with Rader's FFT
16933              */
16934             m = n-1;
16935             if( childplan )
16936             {
16937                 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16938             }
16939             findprimitiverootandinverse(n, &gq, &giq, _state);
16940             ftbase_ftpushentry4(plan, rowptr, ftbase_opradersfft, k, n, 2, 2, gq, giq, *precrptr, _state);
16941             ftbase_ftprecomputeradersfft(n, gq, giq, &plan->precr, *precrptr, _state);
16942             *precrptr = *precrptr+2*(n-1);
16943             row0 = *rowptr;
16944             ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
16945             ftbase_ftcomplexfftplanrec(m, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
16946             row1 = *rowptr;
16947             plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
16948             if( childplan )
16949             {
16950                 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
16951             }
16952         }
16953         else
16954         {
16955 
16956             /*
16957              * Handle prime-factor FFT's with Bluestein's FFT
16958              */
16959             m = ftbasefindsmooth(2*n-1, _state);
16960             *bluesteinsize = ae_maxint(2*m, *bluesteinsize, _state);
16961             if( childplan )
16962             {
16963                 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16964             }
16965             ftbase_ftpushentry4(plan, rowptr, ftbase_opbluesteinsfft, k, n, 2, m, 2, *precrptr, 0, _state);
16966             ftbase_ftprecomputebluesteinsfft(n, m, &plan->precr, *precrptr, _state);
16967             *precrptr = *precrptr+4*m;
16968             row0 = *rowptr;
16969             ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
16970             ftbase_ftcomplexfftplanrec(m, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
16971             row1 = *rowptr;
16972             plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
16973             if( childplan )
16974             {
16975                 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
16976             }
16977         }
16978         ae_frame_leave(_state);
16979         return;
16980     }
16981 
16982     /*
16983      * Handle Cooley-Tukey FFT with small N1
16984      */
16985     if( n1<=ftbase_maxradix )
16986     {
16987 
16988         /*
16989          * Specialized transformation for small N1:
16990          * * N2 short inplace FFT's, each N1-point, with integrated twiddle factors
16991          * * N1 long FFT's
16992          * * final transposition
16993          */
16994         if( childplan )
16995         {
16996             ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
16997         }
16998         ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexcodelettwfft, k, n1, 2*n2, 0, _state);
16999         ftbase_ftcomplexfftplanrec(n2, k*n1, ae_false, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
17000         ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
17001         if( childplan )
17002         {
17003             ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
17004         }
17005         ae_frame_leave(_state);
17006         return;
17007     }
17008 
17009     /*
17010      * Handle general Cooley-Tukey FFT, either "flat" or "recursive"
17011      */
17012     if( n<=ftbase_recursivethreshold )
17013     {
17014 
17015         /*
17016          * General code for large N1/N2, "flat" version without explicit recurrence
17017          * (nested subplans are inserted directly into the body of the plan)
17018          */
17019         if( childplan )
17020         {
17021             ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
17022         }
17023         ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
17024         ftbase_ftcomplexfftplanrec(n1, k*n2, ae_false, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
17025         ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
17026         ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
17027         ftbase_ftcomplexfftplanrec(n2, k*n1, ae_false, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
17028         ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
17029         if( childplan )
17030         {
17031             ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
17032         }
17033     }
17034     else
17035     {
17036 
17037         /*
17038          * General code for large N1/N2, "recursive" version - nested subplans
17039          * are separated from the plan body.
17040          *
17041          * Generate parent plan.
17042          */
17043         if( childplan )
17044         {
17045             ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
17046         }
17047         ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
17048         row0 = *rowptr;
17049         ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n2, n1, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
17050         ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
17051         ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
17052         row2 = *rowptr;
17053         ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n1, n2, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
17054         ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
17055         if( childplan )
17056         {
17057             ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
17058         }
17059 
17060         /*
17061          * Generate child subplans, insert refence to parent plans
17062          */
17063         row1 = *rowptr;
17064         ftbase_ftcomplexfftplanrec(n1, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
17065         plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
17066         row3 = *rowptr;
17067         ftbase_ftcomplexfftplanrec(n2, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
17068         plan->entries.ptr.pp_int[row2][ftbase_colparam0] = row3-row2;
17069     }
17070     ae_frame_leave(_state);
17071 }
17072 
17073 
17074 /*************************************************************************
17075 This function pushes one more entry to the plan. It resizes Entries matrix
17076 if needed.
17077 
17078 INPUT PARAMETERS:
17079     Plan        -   plan (generated so far)
17080     RowPtr      -   index which points to past-the-last entry generated so far
17081     EType       -   entry type
17082     EOpCnt      -   operands count
17083     EOpSize     -   operand size
17084     EMcvSize    -   microvector size
17085     EParam0     -   parameter 0
17086 
17087 OUTPUT PARAMETERS:
17088     Plan        -   updated plan
17089     RowPtr      -   updated pointer
17090 
17091 NOTE: Param1 is set to -1.
17092 
17093   -- ALGLIB --
17094      Copyright 05.04.2013 by Bochkanov Sergey
17095 *************************************************************************/
ftbase_ftpushentry(fasttransformplan * plan,ae_int_t * rowptr,ae_int_t etype,ae_int_t eopcnt,ae_int_t eopsize,ae_int_t emcvsize,ae_int_t eparam0,ae_state * _state)17096 static void ftbase_ftpushentry(fasttransformplan* plan,
17097      ae_int_t* rowptr,
17098      ae_int_t etype,
17099      ae_int_t eopcnt,
17100      ae_int_t eopsize,
17101      ae_int_t emcvsize,
17102      ae_int_t eparam0,
17103      ae_state *_state)
17104 {
17105 
17106 
17107     ftbase_ftpushentry2(plan, rowptr, etype, eopcnt, eopsize, emcvsize, eparam0, -1, _state);
17108 }
17109 
17110 
17111 /*************************************************************************
17112 Same as FTPushEntry(), but sets Param0 AND Param1.
17113 This function pushes one more entry to the plan. It resized Entries matrix
17114 if needed.
17115 
17116 INPUT PARAMETERS:
17117     Plan        -   plan (generated so far)
17118     RowPtr      -   index which points to past-the-last entry generated so far
17119     EType       -   entry type
17120     EOpCnt      -   operands count
17121     EOpSize     -   operand size
17122     EMcvSize    -   microvector size
17123     EParam0     -   parameter 0
17124     EParam1     -   parameter 1
17125 
17126 OUTPUT PARAMETERS:
17127     Plan        -   updated plan
17128     RowPtr      -   updated pointer
17129 
17130   -- ALGLIB --
17131      Copyright 05.04.2013 by Bochkanov Sergey
17132 *************************************************************************/
ftbase_ftpushentry2(fasttransformplan * plan,ae_int_t * rowptr,ae_int_t etype,ae_int_t eopcnt,ae_int_t eopsize,ae_int_t emcvsize,ae_int_t eparam0,ae_int_t eparam1,ae_state * _state)17133 static void ftbase_ftpushentry2(fasttransformplan* plan,
17134      ae_int_t* rowptr,
17135      ae_int_t etype,
17136      ae_int_t eopcnt,
17137      ae_int_t eopsize,
17138      ae_int_t emcvsize,
17139      ae_int_t eparam0,
17140      ae_int_t eparam1,
17141      ae_state *_state)
17142 {
17143 
17144 
17145     if( *rowptr>=plan->entries.rows )
17146     {
17147         imatrixresize(&plan->entries, ae_maxint(2*plan->entries.rows, 1, _state), ftbase_colscnt, _state);
17148     }
17149     plan->entries.ptr.pp_int[*rowptr][ftbase_coltype] = etype;
17150     plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandscnt] = eopcnt;
17151     plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandsize] = eopsize;
17152     plan->entries.ptr.pp_int[*rowptr][ftbase_colmicrovectorsize] = emcvsize;
17153     plan->entries.ptr.pp_int[*rowptr][ftbase_colparam0] = eparam0;
17154     plan->entries.ptr.pp_int[*rowptr][ftbase_colparam1] = eparam1;
17155     plan->entries.ptr.pp_int[*rowptr][ftbase_colparam2] = 0;
17156     plan->entries.ptr.pp_int[*rowptr][ftbase_colparam3] = 0;
17157     *rowptr = *rowptr+1;
17158 }
17159 
17160 
17161 /*************************************************************************
17162 Same as FTPushEntry(), but sets Param0, Param1, Param2 and Param3.
17163 This function pushes one more entry to the plan. It resized Entries matrix
17164 if needed.
17165 
17166 INPUT PARAMETERS:
17167     Plan        -   plan (generated so far)
17168     RowPtr      -   index which points to past-the-last entry generated so far
17169     EType       -   entry type
17170     EOpCnt      -   operands count
17171     EOpSize     -   operand size
17172     EMcvSize    -   microvector size
17173     EParam0     -   parameter 0
17174     EParam1     -   parameter 1
17175     EParam2     -   parameter 2
17176     EParam3     -   parameter 3
17177 
17178 OUTPUT PARAMETERS:
17179     Plan        -   updated plan
17180     RowPtr      -   updated pointer
17181 
17182   -- ALGLIB --
17183      Copyright 05.04.2013 by Bochkanov Sergey
17184 *************************************************************************/
ftbase_ftpushentry4(fasttransformplan * plan,ae_int_t * rowptr,ae_int_t etype,ae_int_t eopcnt,ae_int_t eopsize,ae_int_t emcvsize,ae_int_t eparam0,ae_int_t eparam1,ae_int_t eparam2,ae_int_t eparam3,ae_state * _state)17185 static void ftbase_ftpushentry4(fasttransformplan* plan,
17186      ae_int_t* rowptr,
17187      ae_int_t etype,
17188      ae_int_t eopcnt,
17189      ae_int_t eopsize,
17190      ae_int_t emcvsize,
17191      ae_int_t eparam0,
17192      ae_int_t eparam1,
17193      ae_int_t eparam2,
17194      ae_int_t eparam3,
17195      ae_state *_state)
17196 {
17197 
17198 
17199     if( *rowptr>=plan->entries.rows )
17200     {
17201         imatrixresize(&plan->entries, ae_maxint(2*plan->entries.rows, 1, _state), ftbase_colscnt, _state);
17202     }
17203     plan->entries.ptr.pp_int[*rowptr][ftbase_coltype] = etype;
17204     plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandscnt] = eopcnt;
17205     plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandsize] = eopsize;
17206     plan->entries.ptr.pp_int[*rowptr][ftbase_colmicrovectorsize] = emcvsize;
17207     plan->entries.ptr.pp_int[*rowptr][ftbase_colparam0] = eparam0;
17208     plan->entries.ptr.pp_int[*rowptr][ftbase_colparam1] = eparam1;
17209     plan->entries.ptr.pp_int[*rowptr][ftbase_colparam2] = eparam2;
17210     plan->entries.ptr.pp_int[*rowptr][ftbase_colparam3] = eparam3;
17211     *rowptr = *rowptr+1;
17212 }
17213 
17214 
17215 /*************************************************************************
17216 This subroutine applies subplan to input/output array A.
17217 
17218 INPUT PARAMETERS:
17219     Plan        -   transformation plan
17220     SubPlan     -   subplan index
17221     A           -   array, must be large enough for plan to work
17222     ABase       -   base offset in array A, this value points to start of
17223                     subarray whose length is equal to length of the plan
17224     AOffset     -   offset with respect to ABase, 0<=AOffset<PlanLength.
17225                     This is an offset within large PlanLength-subarray of
17226                     the chunk to process.
17227     Buf         -   temporary buffer whose length is equal to plan length
17228                     (without taking into account RepCnt) or larger.
17229     OffsBuf     -   offset in the buffer array
17230     RepCnt      -   repetition count (transformation is repeatedly applied
17231                     to subsequent subarrays)
17232 
17233 OUTPUT PARAMETERS:
17234     Plan        -   plan (temporary buffers can be modified, plan itself
17235                     is unchanged and can be reused)
17236     A           -   transformed array
17237 
17238   -- ALGLIB --
17239      Copyright 05.04.2013 by Bochkanov Sergey
17240 *************************************************************************/
ftbase_ftapplysubplan(fasttransformplan * plan,ae_int_t subplan,ae_vector * a,ae_int_t abase,ae_int_t aoffset,ae_vector * buf,ae_int_t repcnt,ae_state * _state)17241 static void ftbase_ftapplysubplan(fasttransformplan* plan,
17242      ae_int_t subplan,
17243      /* Real    */ ae_vector* a,
17244      ae_int_t abase,
17245      ae_int_t aoffset,
17246      /* Real    */ ae_vector* buf,
17247      ae_int_t repcnt,
17248      ae_state *_state)
17249 {
17250     ae_frame _frame_block;
17251     ae_int_t rowidx;
17252     ae_int_t i;
17253     ae_int_t n1;
17254     ae_int_t n2;
17255     ae_int_t operation;
17256     ae_int_t operandscnt;
17257     ae_int_t operandsize;
17258     ae_int_t microvectorsize;
17259     ae_int_t param0;
17260     ae_int_t param1;
17261     ae_int_t parentsize;
17262     ae_int_t childsize;
17263     ae_int_t chunksize;
17264     ae_int_t lastchunksize;
17265     srealarray *bufa;
17266     ae_smart_ptr _bufa;
17267     srealarray *bufb;
17268     ae_smart_ptr _bufb;
17269     srealarray *bufc;
17270     ae_smart_ptr _bufc;
17271     srealarray *bufd;
17272     ae_smart_ptr _bufd;
17273 
17274     ae_frame_make(_state, &_frame_block);
17275     memset(&_bufa, 0, sizeof(_bufa));
17276     memset(&_bufb, 0, sizeof(_bufb));
17277     memset(&_bufc, 0, sizeof(_bufc));
17278     memset(&_bufd, 0, sizeof(_bufd));
17279     ae_smart_ptr_init(&_bufa, (void**)&bufa, _state, ae_true);
17280     ae_smart_ptr_init(&_bufb, (void**)&bufb, _state, ae_true);
17281     ae_smart_ptr_init(&_bufc, (void**)&bufc, _state, ae_true);
17282     ae_smart_ptr_init(&_bufd, (void**)&bufd, _state, ae_true);
17283 
17284     ae_assert(plan->entries.ptr.pp_int[subplan][ftbase_coltype]==ftbase_opstart, "FTApplySubPlan: incorrect subplan header", _state);
17285     rowidx = subplan+1;
17286     while(plan->entries.ptr.pp_int[rowidx][ftbase_coltype]!=ftbase_opend)
17287     {
17288         operation = plan->entries.ptr.pp_int[rowidx][ftbase_coltype];
17289         operandscnt = repcnt*plan->entries.ptr.pp_int[rowidx][ftbase_coloperandscnt];
17290         operandsize = plan->entries.ptr.pp_int[rowidx][ftbase_coloperandsize];
17291         microvectorsize = plan->entries.ptr.pp_int[rowidx][ftbase_colmicrovectorsize];
17292         param0 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
17293         param1 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam1];
17294         touchint(&param1, _state);
17295 
17296         /*
17297          * Process "jump" operation
17298          */
17299         if( operation==ftbase_opjmp )
17300         {
17301             rowidx = rowidx+plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
17302             continue;
17303         }
17304 
17305         /*
17306          * Process "parallel call" operation:
17307          * * we perform initial check for consistency between parent and child plans
17308          * * we call FTSplitAndApplyParallelPlan(), which splits parallel plan into
17309          *   several parallel tasks
17310          */
17311         if( operation==ftbase_opparallelcall )
17312         {
17313             parentsize = operandsize*microvectorsize;
17314             childsize = plan->entries.ptr.pp_int[rowidx+param0][ftbase_coloperandscnt]*plan->entries.ptr.pp_int[rowidx+param0][ftbase_coloperandsize]*plan->entries.ptr.pp_int[rowidx+param0][ftbase_colmicrovectorsize];
17315             ae_assert(plan->entries.ptr.pp_int[rowidx+param0][ftbase_coltype]==ftbase_opstart, "FTApplySubPlan: incorrect child subplan header", _state);
17316             ae_assert(parentsize==childsize, "FTApplySubPlan: incorrect child subplan header", _state);
17317             chunksize = ae_maxint(ftbase_recursivethreshold/childsize, 1, _state);
17318             lastchunksize = operandscnt%chunksize;
17319             if( lastchunksize==0 )
17320             {
17321                 lastchunksize = chunksize;
17322             }
17323             i = 0;
17324             while(i<operandscnt)
17325             {
17326                 chunksize = ae_minint(chunksize, operandscnt-i, _state);
17327                 ftbase_ftapplysubplan(plan, rowidx+param0, a, abase, aoffset+i*childsize, buf, chunksize, _state);
17328                 i = i+chunksize;
17329             }
17330             rowidx = rowidx+1;
17331             continue;
17332         }
17333 
17334         /*
17335          * Process "reference complex FFT" operation
17336          */
17337         if( operation==ftbase_opcomplexreffft )
17338         {
17339             ftbase_ftapplycomplexreffft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, buf, _state);
17340             rowidx = rowidx+1;
17341             continue;
17342         }
17343 
17344         /*
17345          * Process "codelet FFT" operation
17346          */
17347         if( operation==ftbase_opcomplexcodeletfft )
17348         {
17349             ftbase_ftapplycomplexcodeletfft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, _state);
17350             rowidx = rowidx+1;
17351             continue;
17352         }
17353 
17354         /*
17355          * Process "integrated codelet FFT" operation
17356          */
17357         if( operation==ftbase_opcomplexcodelettwfft )
17358         {
17359             ftbase_ftapplycomplexcodelettwfft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, _state);
17360             rowidx = rowidx+1;
17361             continue;
17362         }
17363 
17364         /*
17365          * Process Bluestein's FFT operation
17366          */
17367         if( operation==ftbase_opbluesteinsfft )
17368         {
17369             ae_assert(microvectorsize==2, "FTApplySubPlan: microvectorsize!=2 for Bluesteins FFT", _state);
17370             ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufa, _state);
17371             ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufb, _state);
17372             ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufc, _state);
17373             ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufd, _state);
17374             ftbase_ftbluesteinsfft(plan, a, abase, aoffset, operandscnt, operandsize, plan->entries.ptr.pp_int[rowidx][ftbase_colparam0], plan->entries.ptr.pp_int[rowidx][ftbase_colparam2], rowidx+plan->entries.ptr.pp_int[rowidx][ftbase_colparam1], &bufa->val, &bufb->val, &bufc->val, &bufd->val, _state);
17375             ae_shared_pool_recycle(&plan->bluesteinpool, &_bufa, _state);
17376             ae_shared_pool_recycle(&plan->bluesteinpool, &_bufb, _state);
17377             ae_shared_pool_recycle(&plan->bluesteinpool, &_bufc, _state);
17378             ae_shared_pool_recycle(&plan->bluesteinpool, &_bufd, _state);
17379             rowidx = rowidx+1;
17380             continue;
17381         }
17382 
17383         /*
17384          * Process Rader's FFT
17385          */
17386         if( operation==ftbase_opradersfft )
17387         {
17388             ftbase_ftradersfft(plan, a, abase, aoffset, operandscnt, operandsize, rowidx+plan->entries.ptr.pp_int[rowidx][ftbase_colparam0], plan->entries.ptr.pp_int[rowidx][ftbase_colparam1], plan->entries.ptr.pp_int[rowidx][ftbase_colparam2], plan->entries.ptr.pp_int[rowidx][ftbase_colparam3], buf, _state);
17389             rowidx = rowidx+1;
17390             continue;
17391         }
17392 
17393         /*
17394          * Process "complex twiddle factors" operation
17395          */
17396         if( operation==ftbase_opcomplexfftfactors )
17397         {
17398             ae_assert(microvectorsize==2, "FTApplySubPlan: MicrovectorSize<>1", _state);
17399             n1 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
17400             n2 = operandsize/n1;
17401             for(i=0; i<=operandscnt-1; i++)
17402             {
17403                 ftbase_ffttwcalc(a, abase+aoffset+i*operandsize*2, n1, n2, _state);
17404             }
17405             rowidx = rowidx+1;
17406             continue;
17407         }
17408 
17409         /*
17410          * Process "complex transposition" operation
17411          */
17412         if( operation==ftbase_opcomplextranspose )
17413         {
17414             ae_assert(microvectorsize==2, "FTApplySubPlan: MicrovectorSize<>1", _state);
17415             n1 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
17416             n2 = operandsize/n1;
17417             for(i=0; i<=operandscnt-1; i++)
17418             {
17419                 ftbase_internalcomplexlintranspose(a, n1, n2, abase+aoffset+i*operandsize*2, buf, _state);
17420             }
17421             rowidx = rowidx+1;
17422             continue;
17423         }
17424 
17425         /*
17426          * Error
17427          */
17428         ae_assert(ae_false, "FTApplySubPlan: unexpected plan type", _state);
17429     }
17430     ae_frame_leave(_state);
17431 }
17432 
17433 
17434 /*************************************************************************
17435 This subroutine applies complex reference FFT to input/output array A.
17436 
17437 VERY SLOW OPERATION, do not use it in real life plans :)
17438 
17439 INPUT PARAMETERS:
17440     A           -   array, must be large enough for plan to work
17441     Offs        -   offset of the subarray to process
17442     OperandsCnt -   operands count (see description of FastTransformPlan)
17443     OperandSize -   operand size (see description of FastTransformPlan)
17444     MicrovectorSize-microvector size (see description of FastTransformPlan)
17445     Buf         -   temporary array, must be at least OperandsCnt*OperandSize*MicrovectorSize
17446 
17447 OUTPUT PARAMETERS:
17448     A           -   transformed array
17449 
17450   -- ALGLIB --
17451      Copyright 05.04.2013 by Bochkanov Sergey
17452 *************************************************************************/
ftbase_ftapplycomplexreffft(ae_vector * a,ae_int_t offs,ae_int_t operandscnt,ae_int_t operandsize,ae_int_t microvectorsize,ae_vector * buf,ae_state * _state)17453 static void ftbase_ftapplycomplexreffft(/* Real    */ ae_vector* a,
17454      ae_int_t offs,
17455      ae_int_t operandscnt,
17456      ae_int_t operandsize,
17457      ae_int_t microvectorsize,
17458      /* Real    */ ae_vector* buf,
17459      ae_state *_state)
17460 {
17461     ae_int_t opidx;
17462     ae_int_t i;
17463     ae_int_t k;
17464     double hre;
17465     double him;
17466     double c;
17467     double s;
17468     double re;
17469     double im;
17470     ae_int_t n;
17471 
17472 
17473     ae_assert(operandscnt>=1, "FTApplyComplexRefFFT: OperandsCnt<1", _state);
17474     ae_assert(operandsize>=1, "FTApplyComplexRefFFT: OperandSize<1", _state);
17475     ae_assert(microvectorsize==2, "FTApplyComplexRefFFT: MicrovectorSize<>2", _state);
17476     n = operandsize;
17477     for(opidx=0; opidx<=operandscnt-1; opidx++)
17478     {
17479         for(i=0; i<=n-1; i++)
17480         {
17481             hre = (double)(0);
17482             him = (double)(0);
17483             for(k=0; k<=n-1; k++)
17484             {
17485                 re = a->ptr.p_double[offs+opidx*operandsize*2+2*k+0];
17486                 im = a->ptr.p_double[offs+opidx*operandsize*2+2*k+1];
17487                 c = ae_cos(-2*ae_pi*k*i/n, _state);
17488                 s = ae_sin(-2*ae_pi*k*i/n, _state);
17489                 hre = hre+c*re-s*im;
17490                 him = him+c*im+s*re;
17491             }
17492             buf->ptr.p_double[2*i+0] = hre;
17493             buf->ptr.p_double[2*i+1] = him;
17494         }
17495         for(i=0; i<=operandsize*2-1; i++)
17496         {
17497             a->ptr.p_double[offs+opidx*operandsize*2+i] = buf->ptr.p_double[i];
17498         }
17499     }
17500 }
17501 
17502 
17503 /*************************************************************************
17504 This subroutine applies complex codelet FFT to input/output array A.
17505 
17506 INPUT PARAMETERS:
17507     A           -   array, must be large enough for plan to work
17508     Offs        -   offset of the subarray to process
17509     OperandsCnt -   operands count (see description of FastTransformPlan)
17510     OperandSize -   operand size (see description of FastTransformPlan)
17511     MicrovectorSize-microvector size, must be 2
17512 
17513 OUTPUT PARAMETERS:
17514     A           -   transformed array
17515 
17516   -- ALGLIB --
17517      Copyright 05.04.2013 by Bochkanov Sergey
17518 *************************************************************************/
ftbase_ftapplycomplexcodeletfft(ae_vector * a,ae_int_t offs,ae_int_t operandscnt,ae_int_t operandsize,ae_int_t microvectorsize,ae_state * _state)17519 static void ftbase_ftapplycomplexcodeletfft(/* Real    */ ae_vector* a,
17520      ae_int_t offs,
17521      ae_int_t operandscnt,
17522      ae_int_t operandsize,
17523      ae_int_t microvectorsize,
17524      ae_state *_state)
17525 {
17526     ae_int_t opidx;
17527     ae_int_t n;
17528     ae_int_t aoffset;
17529     double a0x;
17530     double a0y;
17531     double a1x;
17532     double a1y;
17533     double a2x;
17534     double a2y;
17535     double a3x;
17536     double a3y;
17537     double a4x;
17538     double a4y;
17539     double a5x;
17540     double a5y;
17541     double v0;
17542     double v1;
17543     double v2;
17544     double v3;
17545     double t1x;
17546     double t1y;
17547     double t2x;
17548     double t2y;
17549     double t3x;
17550     double t3y;
17551     double t4x;
17552     double t4y;
17553     double t5x;
17554     double t5y;
17555     double m1x;
17556     double m1y;
17557     double m2x;
17558     double m2y;
17559     double m3x;
17560     double m3y;
17561     double m4x;
17562     double m4y;
17563     double m5x;
17564     double m5y;
17565     double s1x;
17566     double s1y;
17567     double s2x;
17568     double s2y;
17569     double s3x;
17570     double s3y;
17571     double s4x;
17572     double s4y;
17573     double s5x;
17574     double s5y;
17575     double c1;
17576     double c2;
17577     double c3;
17578     double c4;
17579     double c5;
17580     double v;
17581 
17582 
17583     ae_assert(operandscnt>=1, "FTApplyComplexCodeletFFT: OperandsCnt<1", _state);
17584     ae_assert(operandsize>=1, "FTApplyComplexCodeletFFT: OperandSize<1", _state);
17585     ae_assert(microvectorsize==2, "FTApplyComplexCodeletFFT: MicrovectorSize<>2", _state);
17586     n = operandsize;
17587 
17588     /*
17589      * Hard-coded transforms for different N's
17590      */
17591     ae_assert(n<=ftbase_maxradix, "FTApplyComplexCodeletFFT: N>MaxRadix", _state);
17592     if( n==2 )
17593     {
17594         for(opidx=0; opidx<=operandscnt-1; opidx++)
17595         {
17596             aoffset = offs+opidx*operandsize*2;
17597             a0x = a->ptr.p_double[aoffset+0];
17598             a0y = a->ptr.p_double[aoffset+1];
17599             a1x = a->ptr.p_double[aoffset+2];
17600             a1y = a->ptr.p_double[aoffset+3];
17601             v0 = a0x+a1x;
17602             v1 = a0y+a1y;
17603             v2 = a0x-a1x;
17604             v3 = a0y-a1y;
17605             a->ptr.p_double[aoffset+0] = v0;
17606             a->ptr.p_double[aoffset+1] = v1;
17607             a->ptr.p_double[aoffset+2] = v2;
17608             a->ptr.p_double[aoffset+3] = v3;
17609         }
17610         return;
17611     }
17612     if( n==3 )
17613     {
17614         c1 = ae_cos(2*ae_pi/3, _state)-1;
17615         c2 = ae_sin(2*ae_pi/3, _state);
17616         for(opidx=0; opidx<=operandscnt-1; opidx++)
17617         {
17618             aoffset = offs+opidx*operandsize*2;
17619             a0x = a->ptr.p_double[aoffset+0];
17620             a0y = a->ptr.p_double[aoffset+1];
17621             a1x = a->ptr.p_double[aoffset+2];
17622             a1y = a->ptr.p_double[aoffset+3];
17623             a2x = a->ptr.p_double[aoffset+4];
17624             a2y = a->ptr.p_double[aoffset+5];
17625             t1x = a1x+a2x;
17626             t1y = a1y+a2y;
17627             a0x = a0x+t1x;
17628             a0y = a0y+t1y;
17629             m1x = c1*t1x;
17630             m1y = c1*t1y;
17631             m2x = c2*(a1y-a2y);
17632             m2y = c2*(a2x-a1x);
17633             s1x = a0x+m1x;
17634             s1y = a0y+m1y;
17635             a1x = s1x+m2x;
17636             a1y = s1y+m2y;
17637             a2x = s1x-m2x;
17638             a2y = s1y-m2y;
17639             a->ptr.p_double[aoffset+0] = a0x;
17640             a->ptr.p_double[aoffset+1] = a0y;
17641             a->ptr.p_double[aoffset+2] = a1x;
17642             a->ptr.p_double[aoffset+3] = a1y;
17643             a->ptr.p_double[aoffset+4] = a2x;
17644             a->ptr.p_double[aoffset+5] = a2y;
17645         }
17646         return;
17647     }
17648     if( n==4 )
17649     {
17650         for(opidx=0; opidx<=operandscnt-1; opidx++)
17651         {
17652             aoffset = offs+opidx*operandsize*2;
17653             a0x = a->ptr.p_double[aoffset+0];
17654             a0y = a->ptr.p_double[aoffset+1];
17655             a1x = a->ptr.p_double[aoffset+2];
17656             a1y = a->ptr.p_double[aoffset+3];
17657             a2x = a->ptr.p_double[aoffset+4];
17658             a2y = a->ptr.p_double[aoffset+5];
17659             a3x = a->ptr.p_double[aoffset+6];
17660             a3y = a->ptr.p_double[aoffset+7];
17661             t1x = a0x+a2x;
17662             t1y = a0y+a2y;
17663             t2x = a1x+a3x;
17664             t2y = a1y+a3y;
17665             m2x = a0x-a2x;
17666             m2y = a0y-a2y;
17667             m3x = a1y-a3y;
17668             m3y = a3x-a1x;
17669             a->ptr.p_double[aoffset+0] = t1x+t2x;
17670             a->ptr.p_double[aoffset+1] = t1y+t2y;
17671             a->ptr.p_double[aoffset+4] = t1x-t2x;
17672             a->ptr.p_double[aoffset+5] = t1y-t2y;
17673             a->ptr.p_double[aoffset+2] = m2x+m3x;
17674             a->ptr.p_double[aoffset+3] = m2y+m3y;
17675             a->ptr.p_double[aoffset+6] = m2x-m3x;
17676             a->ptr.p_double[aoffset+7] = m2y-m3y;
17677         }
17678         return;
17679     }
17680     if( n==5 )
17681     {
17682         v = 2*ae_pi/5;
17683         c1 = (ae_cos(v, _state)+ae_cos(2*v, _state))/2-1;
17684         c2 = (ae_cos(v, _state)-ae_cos(2*v, _state))/2;
17685         c3 = -ae_sin(v, _state);
17686         c4 = -(ae_sin(v, _state)+ae_sin(2*v, _state));
17687         c5 = ae_sin(v, _state)-ae_sin(2*v, _state);
17688         for(opidx=0; opidx<=operandscnt-1; opidx++)
17689         {
17690             aoffset = offs+opidx*operandsize*2;
17691             t1x = a->ptr.p_double[aoffset+2]+a->ptr.p_double[aoffset+8];
17692             t1y = a->ptr.p_double[aoffset+3]+a->ptr.p_double[aoffset+9];
17693             t2x = a->ptr.p_double[aoffset+4]+a->ptr.p_double[aoffset+6];
17694             t2y = a->ptr.p_double[aoffset+5]+a->ptr.p_double[aoffset+7];
17695             t3x = a->ptr.p_double[aoffset+2]-a->ptr.p_double[aoffset+8];
17696             t3y = a->ptr.p_double[aoffset+3]-a->ptr.p_double[aoffset+9];
17697             t4x = a->ptr.p_double[aoffset+6]-a->ptr.p_double[aoffset+4];
17698             t4y = a->ptr.p_double[aoffset+7]-a->ptr.p_double[aoffset+5];
17699             t5x = t1x+t2x;
17700             t5y = t1y+t2y;
17701             a->ptr.p_double[aoffset+0] = a->ptr.p_double[aoffset+0]+t5x;
17702             a->ptr.p_double[aoffset+1] = a->ptr.p_double[aoffset+1]+t5y;
17703             m1x = c1*t5x;
17704             m1y = c1*t5y;
17705             m2x = c2*(t1x-t2x);
17706             m2y = c2*(t1y-t2y);
17707             m3x = -c3*(t3y+t4y);
17708             m3y = c3*(t3x+t4x);
17709             m4x = -c4*t4y;
17710             m4y = c4*t4x;
17711             m5x = -c5*t3y;
17712             m5y = c5*t3x;
17713             s3x = m3x-m4x;
17714             s3y = m3y-m4y;
17715             s5x = m3x+m5x;
17716             s5y = m3y+m5y;
17717             s1x = a->ptr.p_double[aoffset+0]+m1x;
17718             s1y = a->ptr.p_double[aoffset+1]+m1y;
17719             s2x = s1x+m2x;
17720             s2y = s1y+m2y;
17721             s4x = s1x-m2x;
17722             s4y = s1y-m2y;
17723             a->ptr.p_double[aoffset+2] = s2x+s3x;
17724             a->ptr.p_double[aoffset+3] = s2y+s3y;
17725             a->ptr.p_double[aoffset+4] = s4x+s5x;
17726             a->ptr.p_double[aoffset+5] = s4y+s5y;
17727             a->ptr.p_double[aoffset+6] = s4x-s5x;
17728             a->ptr.p_double[aoffset+7] = s4y-s5y;
17729             a->ptr.p_double[aoffset+8] = s2x-s3x;
17730             a->ptr.p_double[aoffset+9] = s2y-s3y;
17731         }
17732         return;
17733     }
17734     if( n==6 )
17735     {
17736         c1 = ae_cos(2*ae_pi/3, _state)-1;
17737         c2 = ae_sin(2*ae_pi/3, _state);
17738         c3 = ae_cos(-ae_pi/3, _state);
17739         c4 = ae_sin(-ae_pi/3, _state);
17740         for(opidx=0; opidx<=operandscnt-1; opidx++)
17741         {
17742             aoffset = offs+opidx*operandsize*2;
17743             a0x = a->ptr.p_double[aoffset+0];
17744             a0y = a->ptr.p_double[aoffset+1];
17745             a1x = a->ptr.p_double[aoffset+2];
17746             a1y = a->ptr.p_double[aoffset+3];
17747             a2x = a->ptr.p_double[aoffset+4];
17748             a2y = a->ptr.p_double[aoffset+5];
17749             a3x = a->ptr.p_double[aoffset+6];
17750             a3y = a->ptr.p_double[aoffset+7];
17751             a4x = a->ptr.p_double[aoffset+8];
17752             a4y = a->ptr.p_double[aoffset+9];
17753             a5x = a->ptr.p_double[aoffset+10];
17754             a5y = a->ptr.p_double[aoffset+11];
17755             v0 = a0x;
17756             v1 = a0y;
17757             a0x = a0x+a3x;
17758             a0y = a0y+a3y;
17759             a3x = v0-a3x;
17760             a3y = v1-a3y;
17761             v0 = a1x;
17762             v1 = a1y;
17763             a1x = a1x+a4x;
17764             a1y = a1y+a4y;
17765             a4x = v0-a4x;
17766             a4y = v1-a4y;
17767             v0 = a2x;
17768             v1 = a2y;
17769             a2x = a2x+a5x;
17770             a2y = a2y+a5y;
17771             a5x = v0-a5x;
17772             a5y = v1-a5y;
17773             t4x = a4x*c3-a4y*c4;
17774             t4y = a4x*c4+a4y*c3;
17775             a4x = t4x;
17776             a4y = t4y;
17777             t5x = -a5x*c3-a5y*c4;
17778             t5y = a5x*c4-a5y*c3;
17779             a5x = t5x;
17780             a5y = t5y;
17781             t1x = a1x+a2x;
17782             t1y = a1y+a2y;
17783             a0x = a0x+t1x;
17784             a0y = a0y+t1y;
17785             m1x = c1*t1x;
17786             m1y = c1*t1y;
17787             m2x = c2*(a1y-a2y);
17788             m2y = c2*(a2x-a1x);
17789             s1x = a0x+m1x;
17790             s1y = a0y+m1y;
17791             a1x = s1x+m2x;
17792             a1y = s1y+m2y;
17793             a2x = s1x-m2x;
17794             a2y = s1y-m2y;
17795             t1x = a4x+a5x;
17796             t1y = a4y+a5y;
17797             a3x = a3x+t1x;
17798             a3y = a3y+t1y;
17799             m1x = c1*t1x;
17800             m1y = c1*t1y;
17801             m2x = c2*(a4y-a5y);
17802             m2y = c2*(a5x-a4x);
17803             s1x = a3x+m1x;
17804             s1y = a3y+m1y;
17805             a4x = s1x+m2x;
17806             a4y = s1y+m2y;
17807             a5x = s1x-m2x;
17808             a5y = s1y-m2y;
17809             a->ptr.p_double[aoffset+0] = a0x;
17810             a->ptr.p_double[aoffset+1] = a0y;
17811             a->ptr.p_double[aoffset+2] = a3x;
17812             a->ptr.p_double[aoffset+3] = a3y;
17813             a->ptr.p_double[aoffset+4] = a1x;
17814             a->ptr.p_double[aoffset+5] = a1y;
17815             a->ptr.p_double[aoffset+6] = a4x;
17816             a->ptr.p_double[aoffset+7] = a4y;
17817             a->ptr.p_double[aoffset+8] = a2x;
17818             a->ptr.p_double[aoffset+9] = a2y;
17819             a->ptr.p_double[aoffset+10] = a5x;
17820             a->ptr.p_double[aoffset+11] = a5y;
17821         }
17822         return;
17823     }
17824 }
17825 
17826 
17827 /*************************************************************************
17828 This subroutine applies complex "integrated" codelet FFT  to  input/output
17829 array A. "Integrated" codelet differs from "normal" one in following ways:
17830 * it can work with MicrovectorSize>1
17831 * hence, it can be used in Cooley-Tukey FFT without transpositions
17832 * it performs inlined multiplication by twiddle factors of Cooley-Tukey
17833   FFT with N2=MicrovectorSize/2.
17834 
17835 INPUT PARAMETERS:
17836     A           -   array, must be large enough for plan to work
17837     Offs        -   offset of the subarray to process
17838     OperandsCnt -   operands count (see description of FastTransformPlan)
17839     OperandSize -   operand size (see description of FastTransformPlan)
17840     MicrovectorSize-microvector size, must be 1
17841 
17842 OUTPUT PARAMETERS:
17843     A           -   transformed array
17844 
17845   -- ALGLIB --
17846      Copyright 05.04.2013 by Bochkanov Sergey
17847 *************************************************************************/
ftbase_ftapplycomplexcodelettwfft(ae_vector * a,ae_int_t offs,ae_int_t operandscnt,ae_int_t operandsize,ae_int_t microvectorsize,ae_state * _state)17848 static void ftbase_ftapplycomplexcodelettwfft(/* Real    */ ae_vector* a,
17849      ae_int_t offs,
17850      ae_int_t operandscnt,
17851      ae_int_t operandsize,
17852      ae_int_t microvectorsize,
17853      ae_state *_state)
17854 {
17855     ae_int_t opidx;
17856     ae_int_t mvidx;
17857     ae_int_t n;
17858     ae_int_t m;
17859     ae_int_t aoffset0;
17860     ae_int_t aoffset2;
17861     ae_int_t aoffset4;
17862     ae_int_t aoffset6;
17863     ae_int_t aoffset8;
17864     ae_int_t aoffset10;
17865     double a0x;
17866     double a0y;
17867     double a1x;
17868     double a1y;
17869     double a2x;
17870     double a2y;
17871     double a3x;
17872     double a3y;
17873     double a4x;
17874     double a4y;
17875     double a5x;
17876     double a5y;
17877     double v0;
17878     double v1;
17879     double v2;
17880     double v3;
17881     double q0x;
17882     double q0y;
17883     double t1x;
17884     double t1y;
17885     double t2x;
17886     double t2y;
17887     double t3x;
17888     double t3y;
17889     double t4x;
17890     double t4y;
17891     double t5x;
17892     double t5y;
17893     double m1x;
17894     double m1y;
17895     double m2x;
17896     double m2y;
17897     double m3x;
17898     double m3y;
17899     double m4x;
17900     double m4y;
17901     double m5x;
17902     double m5y;
17903     double s1x;
17904     double s1y;
17905     double s2x;
17906     double s2y;
17907     double s3x;
17908     double s3y;
17909     double s4x;
17910     double s4y;
17911     double s5x;
17912     double s5y;
17913     double c1;
17914     double c2;
17915     double c3;
17916     double c4;
17917     double c5;
17918     double v;
17919     double tw0;
17920     double tw1;
17921     double twx;
17922     double twxm1;
17923     double twy;
17924     double tw2x;
17925     double tw2y;
17926     double tw3x;
17927     double tw3y;
17928     double tw4x;
17929     double tw4y;
17930     double tw5x;
17931     double tw5y;
17932 
17933 
17934     ae_assert(operandscnt>=1, "FTApplyComplexCodeletFFT: OperandsCnt<1", _state);
17935     ae_assert(operandsize>=1, "FTApplyComplexCodeletFFT: OperandSize<1", _state);
17936     ae_assert(microvectorsize>=1, "FTApplyComplexCodeletFFT: MicrovectorSize<>1", _state);
17937     ae_assert(microvectorsize%2==0, "FTApplyComplexCodeletFFT: MicrovectorSize is not even", _state);
17938     n = operandsize;
17939     m = microvectorsize/2;
17940 
17941     /*
17942      * Hard-coded transforms for different N's
17943      */
17944     ae_assert(n<=ftbase_maxradix, "FTApplyComplexCodeletTwFFT: N>MaxRadix", _state);
17945     if( n==2 )
17946     {
17947         v = -2*ae_pi/(n*m);
17948         tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
17949         tw1 = ae_sin(v, _state);
17950         for(opidx=0; opidx<=operandscnt-1; opidx++)
17951         {
17952             aoffset0 = offs+opidx*operandsize*microvectorsize;
17953             aoffset2 = aoffset0+microvectorsize;
17954             twxm1 = 0.0;
17955             twy = 0.0;
17956             for(mvidx=0; mvidx<=m-1; mvidx++)
17957             {
17958                 a0x = a->ptr.p_double[aoffset0];
17959                 a0y = a->ptr.p_double[aoffset0+1];
17960                 a1x = a->ptr.p_double[aoffset2];
17961                 a1y = a->ptr.p_double[aoffset2+1];
17962                 v0 = a0x+a1x;
17963                 v1 = a0y+a1y;
17964                 v2 = a0x-a1x;
17965                 v3 = a0y-a1y;
17966                 a->ptr.p_double[aoffset0] = v0;
17967                 a->ptr.p_double[aoffset0+1] = v1;
17968                 a->ptr.p_double[aoffset2] = v2*(1+twxm1)-v3*twy;
17969                 a->ptr.p_double[aoffset2+1] = v3*(1+twxm1)+v2*twy;
17970                 aoffset0 = aoffset0+2;
17971                 aoffset2 = aoffset2+2;
17972                 if( (mvidx+1)%ftbase_updatetw==0 )
17973                 {
17974                     v = -2*ae_pi*(mvidx+1)/(n*m);
17975                     twxm1 = ae_sin(0.5*v, _state);
17976                     twxm1 = -2*twxm1*twxm1;
17977                     twy = ae_sin(v, _state);
17978                 }
17979                 else
17980                 {
17981                     v = twxm1+tw0+twxm1*tw0-twy*tw1;
17982                     twy = twy+tw1+twxm1*tw1+twy*tw0;
17983                     twxm1 = v;
17984                 }
17985             }
17986         }
17987         return;
17988     }
17989     if( n==3 )
17990     {
17991         v = -2*ae_pi/(n*m);
17992         tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
17993         tw1 = ae_sin(v, _state);
17994         c1 = ae_cos(2*ae_pi/3, _state)-1;
17995         c2 = ae_sin(2*ae_pi/3, _state);
17996         for(opidx=0; opidx<=operandscnt-1; opidx++)
17997         {
17998             aoffset0 = offs+opidx*operandsize*microvectorsize;
17999             aoffset2 = aoffset0+microvectorsize;
18000             aoffset4 = aoffset2+microvectorsize;
18001             twx = 1.0;
18002             twxm1 = 0.0;
18003             twy = 0.0;
18004             for(mvidx=0; mvidx<=m-1; mvidx++)
18005             {
18006                 a0x = a->ptr.p_double[aoffset0];
18007                 a0y = a->ptr.p_double[aoffset0+1];
18008                 a1x = a->ptr.p_double[aoffset2];
18009                 a1y = a->ptr.p_double[aoffset2+1];
18010                 a2x = a->ptr.p_double[aoffset4];
18011                 a2y = a->ptr.p_double[aoffset4+1];
18012                 t1x = a1x+a2x;
18013                 t1y = a1y+a2y;
18014                 a0x = a0x+t1x;
18015                 a0y = a0y+t1y;
18016                 m1x = c1*t1x;
18017                 m1y = c1*t1y;
18018                 m2x = c2*(a1y-a2y);
18019                 m2y = c2*(a2x-a1x);
18020                 s1x = a0x+m1x;
18021                 s1y = a0y+m1y;
18022                 a1x = s1x+m2x;
18023                 a1y = s1y+m2y;
18024                 a2x = s1x-m2x;
18025                 a2y = s1y-m2y;
18026                 tw2x = twx*twx-twy*twy;
18027                 tw2y = 2*twx*twy;
18028                 a->ptr.p_double[aoffset0] = a0x;
18029                 a->ptr.p_double[aoffset0+1] = a0y;
18030                 a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy;
18031                 a->ptr.p_double[aoffset2+1] = a1y*twx+a1x*twy;
18032                 a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y;
18033                 a->ptr.p_double[aoffset4+1] = a2y*tw2x+a2x*tw2y;
18034                 aoffset0 = aoffset0+2;
18035                 aoffset2 = aoffset2+2;
18036                 aoffset4 = aoffset4+2;
18037                 if( (mvidx+1)%ftbase_updatetw==0 )
18038                 {
18039                     v = -2*ae_pi*(mvidx+1)/(n*m);
18040                     twxm1 = ae_sin(0.5*v, _state);
18041                     twxm1 = -2*twxm1*twxm1;
18042                     twy = ae_sin(v, _state);
18043                     twx = twxm1+1;
18044                 }
18045                 else
18046                 {
18047                     v = twxm1+tw0+twxm1*tw0-twy*tw1;
18048                     twy = twy+tw1+twxm1*tw1+twy*tw0;
18049                     twxm1 = v;
18050                     twx = v+1;
18051                 }
18052             }
18053         }
18054         return;
18055     }
18056     if( n==4 )
18057     {
18058         v = -2*ae_pi/(n*m);
18059         tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
18060         tw1 = ae_sin(v, _state);
18061         for(opidx=0; opidx<=operandscnt-1; opidx++)
18062         {
18063             aoffset0 = offs+opidx*operandsize*microvectorsize;
18064             aoffset2 = aoffset0+microvectorsize;
18065             aoffset4 = aoffset2+microvectorsize;
18066             aoffset6 = aoffset4+microvectorsize;
18067             twx = 1.0;
18068             twxm1 = 0.0;
18069             twy = 0.0;
18070             for(mvidx=0; mvidx<=m-1; mvidx++)
18071             {
18072                 a0x = a->ptr.p_double[aoffset0];
18073                 a0y = a->ptr.p_double[aoffset0+1];
18074                 a1x = a->ptr.p_double[aoffset2];
18075                 a1y = a->ptr.p_double[aoffset2+1];
18076                 a2x = a->ptr.p_double[aoffset4];
18077                 a2y = a->ptr.p_double[aoffset4+1];
18078                 a3x = a->ptr.p_double[aoffset6];
18079                 a3y = a->ptr.p_double[aoffset6+1];
18080                 t1x = a0x+a2x;
18081                 t1y = a0y+a2y;
18082                 t2x = a1x+a3x;
18083                 t2y = a1y+a3y;
18084                 m2x = a0x-a2x;
18085                 m2y = a0y-a2y;
18086                 m3x = a1y-a3y;
18087                 m3y = a3x-a1x;
18088                 tw2x = twx*twx-twy*twy;
18089                 tw2y = 2*twx*twy;
18090                 tw3x = twx*tw2x-twy*tw2y;
18091                 tw3y = twx*tw2y+twy*tw2x;
18092                 a1x = m2x+m3x;
18093                 a1y = m2y+m3y;
18094                 a2x = t1x-t2x;
18095                 a2y = t1y-t2y;
18096                 a3x = m2x-m3x;
18097                 a3y = m2y-m3y;
18098                 a->ptr.p_double[aoffset0] = t1x+t2x;
18099                 a->ptr.p_double[aoffset0+1] = t1y+t2y;
18100                 a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy;
18101                 a->ptr.p_double[aoffset2+1] = a1y*twx+a1x*twy;
18102                 a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y;
18103                 a->ptr.p_double[aoffset4+1] = a2y*tw2x+a2x*tw2y;
18104                 a->ptr.p_double[aoffset6] = a3x*tw3x-a3y*tw3y;
18105                 a->ptr.p_double[aoffset6+1] = a3y*tw3x+a3x*tw3y;
18106                 aoffset0 = aoffset0+2;
18107                 aoffset2 = aoffset2+2;
18108                 aoffset4 = aoffset4+2;
18109                 aoffset6 = aoffset6+2;
18110                 if( (mvidx+1)%ftbase_updatetw==0 )
18111                 {
18112                     v = -2*ae_pi*(mvidx+1)/(n*m);
18113                     twxm1 = ae_sin(0.5*v, _state);
18114                     twxm1 = -2*twxm1*twxm1;
18115                     twy = ae_sin(v, _state);
18116                     twx = twxm1+1;
18117                 }
18118                 else
18119                 {
18120                     v = twxm1+tw0+twxm1*tw0-twy*tw1;
18121                     twy = twy+tw1+twxm1*tw1+twy*tw0;
18122                     twxm1 = v;
18123                     twx = v+1;
18124                 }
18125             }
18126         }
18127         return;
18128     }
18129     if( n==5 )
18130     {
18131         v = -2*ae_pi/(n*m);
18132         tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
18133         tw1 = ae_sin(v, _state);
18134         v = 2*ae_pi/5;
18135         c1 = (ae_cos(v, _state)+ae_cos(2*v, _state))/2-1;
18136         c2 = (ae_cos(v, _state)-ae_cos(2*v, _state))/2;
18137         c3 = -ae_sin(v, _state);
18138         c4 = -(ae_sin(v, _state)+ae_sin(2*v, _state));
18139         c5 = ae_sin(v, _state)-ae_sin(2*v, _state);
18140         for(opidx=0; opidx<=operandscnt-1; opidx++)
18141         {
18142             aoffset0 = offs+opidx*operandsize*microvectorsize;
18143             aoffset2 = aoffset0+microvectorsize;
18144             aoffset4 = aoffset2+microvectorsize;
18145             aoffset6 = aoffset4+microvectorsize;
18146             aoffset8 = aoffset6+microvectorsize;
18147             twx = 1.0;
18148             twxm1 = 0.0;
18149             twy = 0.0;
18150             for(mvidx=0; mvidx<=m-1; mvidx++)
18151             {
18152                 a0x = a->ptr.p_double[aoffset0];
18153                 a0y = a->ptr.p_double[aoffset0+1];
18154                 a1x = a->ptr.p_double[aoffset2];
18155                 a1y = a->ptr.p_double[aoffset2+1];
18156                 a2x = a->ptr.p_double[aoffset4];
18157                 a2y = a->ptr.p_double[aoffset4+1];
18158                 a3x = a->ptr.p_double[aoffset6];
18159                 a3y = a->ptr.p_double[aoffset6+1];
18160                 a4x = a->ptr.p_double[aoffset8];
18161                 a4y = a->ptr.p_double[aoffset8+1];
18162                 t1x = a1x+a4x;
18163                 t1y = a1y+a4y;
18164                 t2x = a2x+a3x;
18165                 t2y = a2y+a3y;
18166                 t3x = a1x-a4x;
18167                 t3y = a1y-a4y;
18168                 t4x = a3x-a2x;
18169                 t4y = a3y-a2y;
18170                 t5x = t1x+t2x;
18171                 t5y = t1y+t2y;
18172                 q0x = a0x+t5x;
18173                 q0y = a0y+t5y;
18174                 m1x = c1*t5x;
18175                 m1y = c1*t5y;
18176                 m2x = c2*(t1x-t2x);
18177                 m2y = c2*(t1y-t2y);
18178                 m3x = -c3*(t3y+t4y);
18179                 m3y = c3*(t3x+t4x);
18180                 m4x = -c4*t4y;
18181                 m4y = c4*t4x;
18182                 m5x = -c5*t3y;
18183                 m5y = c5*t3x;
18184                 s3x = m3x-m4x;
18185                 s3y = m3y-m4y;
18186                 s5x = m3x+m5x;
18187                 s5y = m3y+m5y;
18188                 s1x = q0x+m1x;
18189                 s1y = q0y+m1y;
18190                 s2x = s1x+m2x;
18191                 s2y = s1y+m2y;
18192                 s4x = s1x-m2x;
18193                 s4y = s1y-m2y;
18194                 tw2x = twx*twx-twy*twy;
18195                 tw2y = 2*twx*twy;
18196                 tw3x = twx*tw2x-twy*tw2y;
18197                 tw3y = twx*tw2y+twy*tw2x;
18198                 tw4x = tw2x*tw2x-tw2y*tw2y;
18199                 tw4y = tw2x*tw2y+tw2y*tw2x;
18200                 a1x = s2x+s3x;
18201                 a1y = s2y+s3y;
18202                 a2x = s4x+s5x;
18203                 a2y = s4y+s5y;
18204                 a3x = s4x-s5x;
18205                 a3y = s4y-s5y;
18206                 a4x = s2x-s3x;
18207                 a4y = s2y-s3y;
18208                 a->ptr.p_double[aoffset0] = q0x;
18209                 a->ptr.p_double[aoffset0+1] = q0y;
18210                 a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy;
18211                 a->ptr.p_double[aoffset2+1] = a1x*twy+a1y*twx;
18212                 a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y;
18213                 a->ptr.p_double[aoffset4+1] = a2x*tw2y+a2y*tw2x;
18214                 a->ptr.p_double[aoffset6] = a3x*tw3x-a3y*tw3y;
18215                 a->ptr.p_double[aoffset6+1] = a3x*tw3y+a3y*tw3x;
18216                 a->ptr.p_double[aoffset8] = a4x*tw4x-a4y*tw4y;
18217                 a->ptr.p_double[aoffset8+1] = a4x*tw4y+a4y*tw4x;
18218                 aoffset0 = aoffset0+2;
18219                 aoffset2 = aoffset2+2;
18220                 aoffset4 = aoffset4+2;
18221                 aoffset6 = aoffset6+2;
18222                 aoffset8 = aoffset8+2;
18223                 if( (mvidx+1)%ftbase_updatetw==0 )
18224                 {
18225                     v = -2*ae_pi*(mvidx+1)/(n*m);
18226                     twxm1 = ae_sin(0.5*v, _state);
18227                     twxm1 = -2*twxm1*twxm1;
18228                     twy = ae_sin(v, _state);
18229                     twx = twxm1+1;
18230                 }
18231                 else
18232                 {
18233                     v = twxm1+tw0+twxm1*tw0-twy*tw1;
18234                     twy = twy+tw1+twxm1*tw1+twy*tw0;
18235                     twxm1 = v;
18236                     twx = v+1;
18237                 }
18238             }
18239         }
18240         return;
18241     }
18242     if( n==6 )
18243     {
18244         c1 = ae_cos(2*ae_pi/3, _state)-1;
18245         c2 = ae_sin(2*ae_pi/3, _state);
18246         c3 = ae_cos(-ae_pi/3, _state);
18247         c4 = ae_sin(-ae_pi/3, _state);
18248         v = -2*ae_pi/(n*m);
18249         tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
18250         tw1 = ae_sin(v, _state);
18251         for(opidx=0; opidx<=operandscnt-1; opidx++)
18252         {
18253             aoffset0 = offs+opidx*operandsize*microvectorsize;
18254             aoffset2 = aoffset0+microvectorsize;
18255             aoffset4 = aoffset2+microvectorsize;
18256             aoffset6 = aoffset4+microvectorsize;
18257             aoffset8 = aoffset6+microvectorsize;
18258             aoffset10 = aoffset8+microvectorsize;
18259             twx = 1.0;
18260             twxm1 = 0.0;
18261             twy = 0.0;
18262             for(mvidx=0; mvidx<=m-1; mvidx++)
18263             {
18264                 a0x = a->ptr.p_double[aoffset0+0];
18265                 a0y = a->ptr.p_double[aoffset0+1];
18266                 a1x = a->ptr.p_double[aoffset2+0];
18267                 a1y = a->ptr.p_double[aoffset2+1];
18268                 a2x = a->ptr.p_double[aoffset4+0];
18269                 a2y = a->ptr.p_double[aoffset4+1];
18270                 a3x = a->ptr.p_double[aoffset6+0];
18271                 a3y = a->ptr.p_double[aoffset6+1];
18272                 a4x = a->ptr.p_double[aoffset8+0];
18273                 a4y = a->ptr.p_double[aoffset8+1];
18274                 a5x = a->ptr.p_double[aoffset10+0];
18275                 a5y = a->ptr.p_double[aoffset10+1];
18276                 v0 = a0x;
18277                 v1 = a0y;
18278                 a0x = a0x+a3x;
18279                 a0y = a0y+a3y;
18280                 a3x = v0-a3x;
18281                 a3y = v1-a3y;
18282                 v0 = a1x;
18283                 v1 = a1y;
18284                 a1x = a1x+a4x;
18285                 a1y = a1y+a4y;
18286                 a4x = v0-a4x;
18287                 a4y = v1-a4y;
18288                 v0 = a2x;
18289                 v1 = a2y;
18290                 a2x = a2x+a5x;
18291                 a2y = a2y+a5y;
18292                 a5x = v0-a5x;
18293                 a5y = v1-a5y;
18294                 t4x = a4x*c3-a4y*c4;
18295                 t4y = a4x*c4+a4y*c3;
18296                 a4x = t4x;
18297                 a4y = t4y;
18298                 t5x = -a5x*c3-a5y*c4;
18299                 t5y = a5x*c4-a5y*c3;
18300                 a5x = t5x;
18301                 a5y = t5y;
18302                 t1x = a1x+a2x;
18303                 t1y = a1y+a2y;
18304                 a0x = a0x+t1x;
18305                 a0y = a0y+t1y;
18306                 m1x = c1*t1x;
18307                 m1y = c1*t1y;
18308                 m2x = c2*(a1y-a2y);
18309                 m2y = c2*(a2x-a1x);
18310                 s1x = a0x+m1x;
18311                 s1y = a0y+m1y;
18312                 a1x = s1x+m2x;
18313                 a1y = s1y+m2y;
18314                 a2x = s1x-m2x;
18315                 a2y = s1y-m2y;
18316                 t1x = a4x+a5x;
18317                 t1y = a4y+a5y;
18318                 a3x = a3x+t1x;
18319                 a3y = a3y+t1y;
18320                 m1x = c1*t1x;
18321                 m1y = c1*t1y;
18322                 m2x = c2*(a4y-a5y);
18323                 m2y = c2*(a5x-a4x);
18324                 s1x = a3x+m1x;
18325                 s1y = a3y+m1y;
18326                 a4x = s1x+m2x;
18327                 a4y = s1y+m2y;
18328                 a5x = s1x-m2x;
18329                 a5y = s1y-m2y;
18330                 tw2x = twx*twx-twy*twy;
18331                 tw2y = 2*twx*twy;
18332                 tw3x = twx*tw2x-twy*tw2y;
18333                 tw3y = twx*tw2y+twy*tw2x;
18334                 tw4x = tw2x*tw2x-tw2y*tw2y;
18335                 tw4y = 2*tw2x*tw2y;
18336                 tw5x = tw3x*tw2x-tw3y*tw2y;
18337                 tw5y = tw3x*tw2y+tw3y*tw2x;
18338                 a->ptr.p_double[aoffset0+0] = a0x;
18339                 a->ptr.p_double[aoffset0+1] = a0y;
18340                 a->ptr.p_double[aoffset2+0] = a3x*twx-a3y*twy;
18341                 a->ptr.p_double[aoffset2+1] = a3y*twx+a3x*twy;
18342                 a->ptr.p_double[aoffset4+0] = a1x*tw2x-a1y*tw2y;
18343                 a->ptr.p_double[aoffset4+1] = a1y*tw2x+a1x*tw2y;
18344                 a->ptr.p_double[aoffset6+0] = a4x*tw3x-a4y*tw3y;
18345                 a->ptr.p_double[aoffset6+1] = a4y*tw3x+a4x*tw3y;
18346                 a->ptr.p_double[aoffset8+0] = a2x*tw4x-a2y*tw4y;
18347                 a->ptr.p_double[aoffset8+1] = a2y*tw4x+a2x*tw4y;
18348                 a->ptr.p_double[aoffset10+0] = a5x*tw5x-a5y*tw5y;
18349                 a->ptr.p_double[aoffset10+1] = a5y*tw5x+a5x*tw5y;
18350                 aoffset0 = aoffset0+2;
18351                 aoffset2 = aoffset2+2;
18352                 aoffset4 = aoffset4+2;
18353                 aoffset6 = aoffset6+2;
18354                 aoffset8 = aoffset8+2;
18355                 aoffset10 = aoffset10+2;
18356                 if( (mvidx+1)%ftbase_updatetw==0 )
18357                 {
18358                     v = -2*ae_pi*(mvidx+1)/(n*m);
18359                     twxm1 = ae_sin(0.5*v, _state);
18360                     twxm1 = -2*twxm1*twxm1;
18361                     twy = ae_sin(v, _state);
18362                     twx = twxm1+1;
18363                 }
18364                 else
18365                 {
18366                     v = twxm1+tw0+twxm1*tw0-twy*tw1;
18367                     twy = twy+tw1+twxm1*tw1+twy*tw0;
18368                     twxm1 = v;
18369                     twx = v+1;
18370                 }
18371             }
18372         }
18373         return;
18374     }
18375 }
18376 
18377 
18378 /*************************************************************************
18379 This subroutine precomputes data for complex Bluestein's  FFT  and  writes
18380 them to array PrecR[] at specified offset. It  is  responsibility  of  the
18381 caller to make sure that PrecR[] is large enough.
18382 
18383 INPUT PARAMETERS:
18384     N           -   original size of the transform
18385     M           -   size of the "padded" Bluestein's transform
18386     PrecR       -   preallocated array
18387     Offs        -   offset
18388 
18389 OUTPUT PARAMETERS:
18390     PrecR       -   data at Offs:Offs+4*M-1 are modified:
18391                     * PrecR[Offs:Offs+2*M-1] stores Z[k]=exp(i*pi*k^2/N)
18392                     * PrecR[Offs+2*M:Offs+4*M-1] stores FFT of the Z
18393                     Other parts of PrecR are unchanged.
18394 
18395 NOTE: this function performs internal M-point FFT. It allocates temporary
18396       plan which is destroyed after leaving this function.
18397 
18398   -- ALGLIB --
18399      Copyright 08.05.2013 by Bochkanov Sergey
18400 *************************************************************************/
ftbase_ftprecomputebluesteinsfft(ae_int_t n,ae_int_t m,ae_vector * precr,ae_int_t offs,ae_state * _state)18401 static void ftbase_ftprecomputebluesteinsfft(ae_int_t n,
18402      ae_int_t m,
18403      /* Real    */ ae_vector* precr,
18404      ae_int_t offs,
18405      ae_state *_state)
18406 {
18407     ae_frame _frame_block;
18408     ae_int_t i;
18409     double bx;
18410     double by;
18411     fasttransformplan plan;
18412 
18413     ae_frame_make(_state, &_frame_block);
18414     memset(&plan, 0, sizeof(plan));
18415     _fasttransformplan_init(&plan, _state, ae_true);
18416 
18417 
18418     /*
18419      * Fill first half of PrecR with b[k] = exp(i*pi*k^2/N)
18420      */
18421     for(i=0; i<=2*m-1; i++)
18422     {
18423         precr->ptr.p_double[offs+i] = (double)(0);
18424     }
18425     for(i=0; i<=n-1; i++)
18426     {
18427         bx = ae_cos(ae_pi/n*i*i, _state);
18428         by = ae_sin(ae_pi/n*i*i, _state);
18429         precr->ptr.p_double[offs+2*i+0] = bx;
18430         precr->ptr.p_double[offs+2*i+1] = by;
18431         precr->ptr.p_double[offs+2*((m-i)%m)+0] = bx;
18432         precr->ptr.p_double[offs+2*((m-i)%m)+1] = by;
18433     }
18434 
18435     /*
18436      * Precomputed FFT
18437      */
18438     ftcomplexfftplan(m, 1, &plan, _state);
18439     for(i=0; i<=2*m-1; i++)
18440     {
18441         precr->ptr.p_double[offs+2*m+i] = precr->ptr.p_double[offs+i];
18442     }
18443     ftbase_ftapplysubplan(&plan, 0, precr, offs+2*m, 0, &plan.buffer, 1, _state);
18444     ae_frame_leave(_state);
18445 }
18446 
18447 
18448 /*************************************************************************
18449 This subroutine applies complex Bluestein's FFT to input/output array A.
18450 
18451 INPUT PARAMETERS:
18452     Plan        -   transformation plan
18453     A           -   array, must be large enough for plan to work
18454     ABase       -   base offset in array A, this value points to start of
18455                     subarray whose length is equal to length of the plan
18456     AOffset     -   offset with respect to ABase, 0<=AOffset<PlanLength.
18457                     This is an offset within large PlanLength-subarray of
18458                     the chunk to process.
18459     OperandsCnt -   number of repeated operands (length N each)
18460     N           -   original data length (measured in complex numbers)
18461     M           -   padded data length (measured in complex numbers)
18462     PrecOffs    -   offset of the precomputed data for the plan
18463     SubPlan     -   position of the length-M FFT subplan which is used by
18464                     transformation
18465     BufA        -   temporary buffer, at least 2*M elements
18466     BufB        -   temporary buffer, at least 2*M elements
18467     BufC        -   temporary buffer, at least 2*M elements
18468     BufD        -   temporary buffer, at least 2*M elements
18469 
18470 OUTPUT PARAMETERS:
18471     A           -   transformed array
18472 
18473   -- ALGLIB --
18474      Copyright 05.04.2013 by Bochkanov Sergey
18475 *************************************************************************/
ftbase_ftbluesteinsfft(fasttransformplan * plan,ae_vector * a,ae_int_t abase,ae_int_t aoffset,ae_int_t operandscnt,ae_int_t n,ae_int_t m,ae_int_t precoffs,ae_int_t subplan,ae_vector * bufa,ae_vector * bufb,ae_vector * bufc,ae_vector * bufd,ae_state * _state)18476 static void ftbase_ftbluesteinsfft(fasttransformplan* plan,
18477      /* Real    */ ae_vector* a,
18478      ae_int_t abase,
18479      ae_int_t aoffset,
18480      ae_int_t operandscnt,
18481      ae_int_t n,
18482      ae_int_t m,
18483      ae_int_t precoffs,
18484      ae_int_t subplan,
18485      /* Real    */ ae_vector* bufa,
18486      /* Real    */ ae_vector* bufb,
18487      /* Real    */ ae_vector* bufc,
18488      /* Real    */ ae_vector* bufd,
18489      ae_state *_state)
18490 {
18491     ae_int_t op;
18492     ae_int_t i;
18493     double x;
18494     double y;
18495     double bx;
18496     double by;
18497     double ax;
18498     double ay;
18499     double rx;
18500     double ry;
18501     ae_int_t p0;
18502     ae_int_t p1;
18503     ae_int_t p2;
18504 
18505 
18506     for(op=0; op<=operandscnt-1; op++)
18507     {
18508 
18509         /*
18510          * Multiply A by conj(Z), store to buffer.
18511          * Pad A by zeros.
18512          *
18513          * NOTE: Z[k]=exp(i*pi*k^2/N)
18514          */
18515         p0 = abase+aoffset+op*2*n;
18516         p1 = precoffs;
18517         for(i=0; i<=n-1; i++)
18518         {
18519             x = a->ptr.p_double[p0+0];
18520             y = a->ptr.p_double[p0+1];
18521             bx = plan->precr.ptr.p_double[p1+0];
18522             by = -plan->precr.ptr.p_double[p1+1];
18523             bufa->ptr.p_double[2*i+0] = x*bx-y*by;
18524             bufa->ptr.p_double[2*i+1] = x*by+y*bx;
18525             p0 = p0+2;
18526             p1 = p1+2;
18527         }
18528         for(i=2*n; i<=2*m-1; i++)
18529         {
18530             bufa->ptr.p_double[i] = (double)(0);
18531         }
18532 
18533         /*
18534          * Perform convolution of A and Z (using precomputed
18535          * FFT of Z stored in Plan structure).
18536          */
18537         ftbase_ftapplysubplan(plan, subplan, bufa, 0, 0, bufc, 1, _state);
18538         p0 = 0;
18539         p1 = precoffs+2*m;
18540         for(i=0; i<=m-1; i++)
18541         {
18542             ax = bufa->ptr.p_double[p0+0];
18543             ay = bufa->ptr.p_double[p0+1];
18544             bx = plan->precr.ptr.p_double[p1+0];
18545             by = plan->precr.ptr.p_double[p1+1];
18546             bufa->ptr.p_double[p0+0] = ax*bx-ay*by;
18547             bufa->ptr.p_double[p0+1] = -(ax*by+ay*bx);
18548             p0 = p0+2;
18549             p1 = p1+2;
18550         }
18551         ftbase_ftapplysubplan(plan, subplan, bufa, 0, 0, bufc, 1, _state);
18552 
18553         /*
18554          * Post processing:
18555          *     A:=conj(Z)*conj(A)/M
18556          * Here conj(A)/M corresponds to last stage of inverse DFT,
18557          * and conj(Z) comes from Bluestein's FFT algorithm.
18558          */
18559         p0 = precoffs;
18560         p1 = 0;
18561         p2 = abase+aoffset+op*2*n;
18562         for(i=0; i<=n-1; i++)
18563         {
18564             bx = plan->precr.ptr.p_double[p0+0];
18565             by = plan->precr.ptr.p_double[p0+1];
18566             rx = bufa->ptr.p_double[p1+0]/m;
18567             ry = -bufa->ptr.p_double[p1+1]/m;
18568             a->ptr.p_double[p2+0] = rx*bx-ry*(-by);
18569             a->ptr.p_double[p2+1] = rx*(-by)+ry*bx;
18570             p0 = p0+2;
18571             p1 = p1+2;
18572             p2 = p2+2;
18573         }
18574     }
18575 }
18576 
18577 
18578 /*************************************************************************
18579 This subroutine precomputes data for complex Rader's FFT and  writes  them
18580 to array PrecR[] at specified offset. It  is  responsibility of the caller
18581 to make sure that PrecR[] is large enough.
18582 
18583 INPUT PARAMETERS:
18584     N           -   original size of the transform (before reduction to N-1)
18585     RQ          -   primitive root modulo N
18586     RIQ         -   inverse of primitive root modulo N
18587     PrecR       -   preallocated array
18588     Offs        -   offset
18589 
18590 OUTPUT PARAMETERS:
18591     PrecR       -   data at Offs:Offs+2*(N-1)-1 store FFT of Rader's factors,
18592                     other parts of PrecR are unchanged.
18593 
18594 NOTE: this function performs internal (N-1)-point FFT. It allocates temporary
18595       plan which is destroyed after leaving this function.
18596 
18597   -- ALGLIB --
18598      Copyright 08.05.2013 by Bochkanov Sergey
18599 *************************************************************************/
ftbase_ftprecomputeradersfft(ae_int_t n,ae_int_t rq,ae_int_t riq,ae_vector * precr,ae_int_t offs,ae_state * _state)18600 static void ftbase_ftprecomputeradersfft(ae_int_t n,
18601      ae_int_t rq,
18602      ae_int_t riq,
18603      /* Real    */ ae_vector* precr,
18604      ae_int_t offs,
18605      ae_state *_state)
18606 {
18607     ae_frame _frame_block;
18608     ae_int_t q;
18609     fasttransformplan plan;
18610     ae_int_t kiq;
18611     double v;
18612 
18613     ae_frame_make(_state, &_frame_block);
18614     memset(&plan, 0, sizeof(plan));
18615     _fasttransformplan_init(&plan, _state, ae_true);
18616 
18617 
18618     /*
18619      * Fill PrecR with Rader factors, perform FFT
18620      */
18621     kiq = 1;
18622     for(q=0; q<=n-2; q++)
18623     {
18624         v = -2*ae_pi*kiq/n;
18625         precr->ptr.p_double[offs+2*q+0] = ae_cos(v, _state);
18626         precr->ptr.p_double[offs+2*q+1] = ae_sin(v, _state);
18627         kiq = kiq*riq%n;
18628     }
18629     ftcomplexfftplan(n-1, 1, &plan, _state);
18630     ftbase_ftapplysubplan(&plan, 0, precr, offs, 0, &plan.buffer, 1, _state);
18631     ae_frame_leave(_state);
18632 }
18633 
18634 
18635 /*************************************************************************
18636 This subroutine applies complex Rader's FFT to input/output array A.
18637 
18638 INPUT PARAMETERS:
18639     A           -   array, must be large enough for plan to work
18640     ABase       -   base offset in array A, this value points to start of
18641                     subarray whose length is equal to length of the plan
18642     AOffset     -   offset with respect to ABase, 0<=AOffset<PlanLength.
18643                     This is an offset within large PlanLength-subarray of
18644                     the chunk to process.
18645     OperandsCnt -   number of repeated operands (length N each)
18646     N           -   original data length (measured in complex numbers)
18647     SubPlan     -   position of the (N-1)-point FFT subplan which is used
18648                     by transformation
18649     RQ          -   primitive root modulo N
18650     RIQ         -   inverse of primitive root modulo N
18651     PrecOffs    -   offset of the precomputed data for the plan
18652     Buf         -   temporary array
18653 
18654 OUTPUT PARAMETERS:
18655     A           -   transformed array
18656 
18657   -- ALGLIB --
18658      Copyright 05.04.2013 by Bochkanov Sergey
18659 *************************************************************************/
ftbase_ftradersfft(fasttransformplan * plan,ae_vector * a,ae_int_t abase,ae_int_t aoffset,ae_int_t operandscnt,ae_int_t n,ae_int_t subplan,ae_int_t rq,ae_int_t riq,ae_int_t precoffs,ae_vector * buf,ae_state * _state)18660 static void ftbase_ftradersfft(fasttransformplan* plan,
18661      /* Real    */ ae_vector* a,
18662      ae_int_t abase,
18663      ae_int_t aoffset,
18664      ae_int_t operandscnt,
18665      ae_int_t n,
18666      ae_int_t subplan,
18667      ae_int_t rq,
18668      ae_int_t riq,
18669      ae_int_t precoffs,
18670      /* Real    */ ae_vector* buf,
18671      ae_state *_state)
18672 {
18673     ae_int_t opidx;
18674     ae_int_t i;
18675     ae_int_t q;
18676     ae_int_t kq;
18677     ae_int_t kiq;
18678     double x0;
18679     double y0;
18680     ae_int_t p0;
18681     ae_int_t p1;
18682     double ax;
18683     double ay;
18684     double bx;
18685     double by;
18686     double rx;
18687     double ry;
18688 
18689 
18690     ae_assert(operandscnt>=1, "FTApplyComplexRefFFT: OperandsCnt<1", _state);
18691 
18692     /*
18693      * Process operands
18694      */
18695     for(opidx=0; opidx<=operandscnt-1; opidx++)
18696     {
18697 
18698         /*
18699          * fill QA
18700          */
18701         kq = 1;
18702         p0 = abase+aoffset+opidx*n*2;
18703         p1 = aoffset+opidx*n*2;
18704         rx = a->ptr.p_double[p0+0];
18705         ry = a->ptr.p_double[p0+1];
18706         x0 = rx;
18707         y0 = ry;
18708         for(q=0; q<=n-2; q++)
18709         {
18710             ax = a->ptr.p_double[p0+2*kq+0];
18711             ay = a->ptr.p_double[p0+2*kq+1];
18712             buf->ptr.p_double[p1+0] = ax;
18713             buf->ptr.p_double[p1+1] = ay;
18714             rx = rx+ax;
18715             ry = ry+ay;
18716             kq = kq*rq%n;
18717             p1 = p1+2;
18718         }
18719         p0 = abase+aoffset+opidx*n*2;
18720         p1 = aoffset+opidx*n*2;
18721         for(q=0; q<=n-2; q++)
18722         {
18723             a->ptr.p_double[p0] = buf->ptr.p_double[p1];
18724             a->ptr.p_double[p0+1] = buf->ptr.p_double[p1+1];
18725             p0 = p0+2;
18726             p1 = p1+2;
18727         }
18728 
18729         /*
18730          * Convolution
18731          */
18732         ftbase_ftapplysubplan(plan, subplan, a, abase, aoffset+opidx*n*2, buf, 1, _state);
18733         p0 = abase+aoffset+opidx*n*2;
18734         p1 = precoffs;
18735         for(i=0; i<=n-2; i++)
18736         {
18737             ax = a->ptr.p_double[p0+0];
18738             ay = a->ptr.p_double[p0+1];
18739             bx = plan->precr.ptr.p_double[p1+0];
18740             by = plan->precr.ptr.p_double[p1+1];
18741             a->ptr.p_double[p0+0] = ax*bx-ay*by;
18742             a->ptr.p_double[p0+1] = -(ax*by+ay*bx);
18743             p0 = p0+2;
18744             p1 = p1+2;
18745         }
18746         ftbase_ftapplysubplan(plan, subplan, a, abase, aoffset+opidx*n*2, buf, 1, _state);
18747         p0 = abase+aoffset+opidx*n*2;
18748         for(i=0; i<=n-2; i++)
18749         {
18750             a->ptr.p_double[p0+0] = a->ptr.p_double[p0+0]/(n-1);
18751             a->ptr.p_double[p0+1] = -a->ptr.p_double[p0+1]/(n-1);
18752             p0 = p0+2;
18753         }
18754 
18755         /*
18756          * Result
18757          */
18758         buf->ptr.p_double[aoffset+opidx*n*2+0] = rx;
18759         buf->ptr.p_double[aoffset+opidx*n*2+1] = ry;
18760         kiq = 1;
18761         p0 = aoffset+opidx*n*2;
18762         p1 = abase+aoffset+opidx*n*2;
18763         for(q=0; q<=n-2; q++)
18764         {
18765             buf->ptr.p_double[p0+2*kiq+0] = x0+a->ptr.p_double[p1+0];
18766             buf->ptr.p_double[p0+2*kiq+1] = y0+a->ptr.p_double[p1+1];
18767             kiq = kiq*riq%n;
18768             p1 = p1+2;
18769         }
18770         p0 = abase+aoffset+opidx*n*2;
18771         p1 = aoffset+opidx*n*2;
18772         for(q=0; q<=n-1; q++)
18773         {
18774             a->ptr.p_double[p0] = buf->ptr.p_double[p1];
18775             a->ptr.p_double[p0+1] = buf->ptr.p_double[p1+1];
18776             p0 = p0+2;
18777             p1 = p1+2;
18778         }
18779     }
18780 }
18781 
18782 
18783 /*************************************************************************
18784 Factorizes task size N into product of two smaller sizes N1 and N2
18785 
18786 INPUT PARAMETERS:
18787     N       -   task size, N>0
18788     IsRoot  -   whether taks is root task (first one in a sequence)
18789 
18790 OUTPUT PARAMETERS:
18791     N1, N2  -   such numbers that:
18792                 * for prime N:                  N1=N2=0
18793                 * for composite N<=MaxRadix:    N1=N2=0
18794                 * for composite N>MaxRadix:     1<=N1<=N2, N1*N2=N
18795 
18796   -- ALGLIB --
18797      Copyright 08.04.2013 by Bochkanov Sergey
18798 *************************************************************************/
ftbase_ftfactorize(ae_int_t n,ae_bool isroot,ae_int_t * n1,ae_int_t * n2,ae_state * _state)18799 static void ftbase_ftfactorize(ae_int_t n,
18800      ae_bool isroot,
18801      ae_int_t* n1,
18802      ae_int_t* n2,
18803      ae_state *_state)
18804 {
18805     ae_int_t j;
18806     ae_int_t k;
18807 
18808     *n1 = 0;
18809     *n2 = 0;
18810 
18811     ae_assert(n>0, "FTFactorize: N<=0", _state);
18812     *n1 = 0;
18813     *n2 = 0;
18814 
18815     /*
18816      * Small N
18817      */
18818     if( n<=ftbase_maxradix )
18819     {
18820         return;
18821     }
18822 
18823     /*
18824      * Large N, recursive split
18825      */
18826     if( n>ftbase_recursivethreshold )
18827     {
18828         k = ae_iceil(ae_sqrt((double)(n), _state), _state)+1;
18829         ae_assert(k*k>=n, "FTFactorize: internal error during recursive factorization", _state);
18830         for(j=k; j>=2; j--)
18831         {
18832             if( n%j==0 )
18833             {
18834                 *n1 = ae_minint(n/j, j, _state);
18835                 *n2 = ae_maxint(n/j, j, _state);
18836                 return;
18837             }
18838         }
18839     }
18840 
18841     /*
18842      * N>MaxRadix, try to find good codelet
18843      */
18844     for(j=ftbase_maxradix; j>=2; j--)
18845     {
18846         if( n%j==0 )
18847         {
18848             *n1 = j;
18849             *n2 = n/j;
18850             break;
18851         }
18852     }
18853 
18854     /*
18855      * In case no good codelet was found,
18856      * try to factorize N into product of ANY primes.
18857      */
18858     if( *n1*(*n2)!=n )
18859     {
18860         for(j=2; j<=n-1; j++)
18861         {
18862             if( n%j==0 )
18863             {
18864                 *n1 = j;
18865                 *n2 = n/j;
18866                 break;
18867             }
18868             if( j*j>n )
18869             {
18870                 break;
18871             }
18872         }
18873     }
18874 
18875     /*
18876      * normalize
18877      */
18878     if( *n1>(*n2) )
18879     {
18880         j = *n1;
18881         *n1 = *n2;
18882         *n2 = j;
18883     }
18884 }
18885 
18886 
18887 /*************************************************************************
18888 Returns optimistic estimate of the FFT cost, in UNITs (1 UNIT = 100 KFLOPs)
18889 
18890 INPUT PARAMETERS:
18891     N       -   task size, N>0
18892 
18893 RESULU:
18894     cost in UNITs, rounded down to nearest integer
18895 
18896 NOTE: If FFT cost is less than 1 UNIT, it will return 0 as result.
18897 
18898   -- ALGLIB --
18899      Copyright 08.04.2013 by Bochkanov Sergey
18900 *************************************************************************/
ftbase_ftoptimisticestimate(ae_int_t n,ae_state * _state)18901 static ae_int_t ftbase_ftoptimisticestimate(ae_int_t n, ae_state *_state)
18902 {
18903     ae_int_t result;
18904 
18905 
18906     ae_assert(n>0, "FTOptimisticEstimate: N<=0", _state);
18907     result = ae_ifloor(1.0E-5*5*n*ae_log((double)(n), _state)/ae_log((double)(2), _state), _state);
18908     return result;
18909 }
18910 
18911 
18912 /*************************************************************************
18913 Twiddle factors calculation
18914 
18915   -- ALGLIB --
18916      Copyright 01.05.2009 by Bochkanov Sergey
18917 *************************************************************************/
ftbase_ffttwcalc(ae_vector * a,ae_int_t aoffset,ae_int_t n1,ae_int_t n2,ae_state * _state)18918 static void ftbase_ffttwcalc(/* Real    */ ae_vector* a,
18919      ae_int_t aoffset,
18920      ae_int_t n1,
18921      ae_int_t n2,
18922      ae_state *_state)
18923 {
18924     ae_int_t i;
18925     ae_int_t j2;
18926     ae_int_t n;
18927     ae_int_t halfn1;
18928     ae_int_t offs;
18929     double x;
18930     double y;
18931     double twxm1;
18932     double twy;
18933     double twbasexm1;
18934     double twbasey;
18935     double twrowxm1;
18936     double twrowy;
18937     double tmpx;
18938     double tmpy;
18939     double v;
18940     ae_int_t updatetw2;
18941 
18942 
18943 
18944     /*
18945      * Multiplication by twiddle factors for complex Cooley-Tukey FFT
18946      * with N factorized as N1*N2.
18947      *
18948      * Naive solution to this problem is given below:
18949      *
18950      *     > for K:=1 to N2-1 do
18951      *     >     for J:=1 to N1-1 do
18952      *     >     begin
18953      *     >         Idx:=K*N1+J;
18954      *     >         X:=A[AOffset+2*Idx+0];
18955      *     >         Y:=A[AOffset+2*Idx+1];
18956      *     >         TwX:=Cos(-2*Pi()*K*J/(N1*N2));
18957      *     >         TwY:=Sin(-2*Pi()*K*J/(N1*N2));
18958      *     >         A[AOffset+2*Idx+0]:=X*TwX-Y*TwY;
18959      *     >         A[AOffset+2*Idx+1]:=X*TwY+Y*TwX;
18960      *     >     end;
18961      *
18962      * However, there are exist more efficient solutions.
18963      *
18964      * Each pass of the inner cycle corresponds to multiplication of one
18965      * entry of A by W[k,j]=exp(-I*2*pi*k*j/N). This factor can be rewritten
18966      * as exp(-I*2*pi*k/N)^j. So we can replace costly exponentiation by
18967      * repeated multiplication: W[k,j+1]=W[k,j]*exp(-I*2*pi*k/N), with
18968      * second factor being computed once in the beginning of the iteration.
18969      *
18970      * Also, exp(-I*2*pi*k/N) can be represented as exp(-I*2*pi/N)^k, i.e.
18971      * we have W[K+1,1]=W[K,1]*W[1,1].
18972      *
18973      * In our loop we use following variables:
18974      * * [TwBaseXM1,TwBaseY] =   [cos(2*pi/N)-1,     sin(2*pi/N)]
18975      * * [TwRowXM1, TwRowY]  =   [cos(2*pi*I/N)-1,   sin(2*pi*I/N)]
18976      * * [TwXM1,    TwY]     =   [cos(2*pi*I*J/N)-1, sin(2*pi*I*J/N)]
18977      *
18978      * Meaning of the variables:
18979      * * [TwXM1,TwY] is current twiddle factor W[I,J]
18980      * * [TwRowXM1, TwRowY] is W[I,1]
18981      * * [TwBaseXM1,TwBaseY] is W[1,1]
18982      *
18983      * During inner loop we multiply current twiddle factor by W[I,1],
18984      * during outer loop we update W[I,1].
18985      *
18986      */
18987     ae_assert(ftbase_updatetw>=2, "FFTTwCalc: internal error - UpdateTw<2", _state);
18988     updatetw2 = ftbase_updatetw/2;
18989     halfn1 = n1/2;
18990     n = n1*n2;
18991     v = -2*ae_pi/n;
18992     twbasexm1 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
18993     twbasey = ae_sin(v, _state);
18994     twrowxm1 = (double)(0);
18995     twrowy = (double)(0);
18996     offs = aoffset;
18997     for(i=0; i<=n2-1; i++)
18998     {
18999 
19000         /*
19001          * Initialize twiddle factor for current row
19002          */
19003         twxm1 = (double)(0);
19004         twy = (double)(0);
19005 
19006         /*
19007          * N1-point block is separated into 2-point chunks and residual 1-point chunk
19008          * (in case N1 is odd). Unrolled loop is several times faster.
19009          */
19010         for(j2=0; j2<=halfn1-1; j2++)
19011         {
19012 
19013             /*
19014              * Processing:
19015              * * process first element in a chunk.
19016              * * update twiddle factor (unconditional update)
19017              * * process second element
19018              * * conditional update of the twiddle factor
19019              */
19020             x = a->ptr.p_double[offs+0];
19021             y = a->ptr.p_double[offs+1];
19022             tmpx = x*(1+twxm1)-y*twy;
19023             tmpy = x*twy+y*(1+twxm1);
19024             a->ptr.p_double[offs+0] = tmpx;
19025             a->ptr.p_double[offs+1] = tmpy;
19026             tmpx = (1+twxm1)*twrowxm1-twy*twrowy;
19027             twy = twy+(1+twxm1)*twrowy+twy*twrowxm1;
19028             twxm1 = twxm1+tmpx;
19029             x = a->ptr.p_double[offs+2];
19030             y = a->ptr.p_double[offs+3];
19031             tmpx = x*(1+twxm1)-y*twy;
19032             tmpy = x*twy+y*(1+twxm1);
19033             a->ptr.p_double[offs+2] = tmpx;
19034             a->ptr.p_double[offs+3] = tmpy;
19035             offs = offs+4;
19036             if( (j2+1)%updatetw2==0&&j2<halfn1-1 )
19037             {
19038 
19039                 /*
19040                  * Recalculate twiddle factor
19041                  */
19042                 v = -2*ae_pi*i*2*(j2+1)/n;
19043                 twxm1 = ae_sin(0.5*v, _state);
19044                 twxm1 = -2*twxm1*twxm1;
19045                 twy = ae_sin(v, _state);
19046             }
19047             else
19048             {
19049 
19050                 /*
19051                  * Update twiddle factor
19052                  */
19053                 tmpx = (1+twxm1)*twrowxm1-twy*twrowy;
19054                 twy = twy+(1+twxm1)*twrowy+twy*twrowxm1;
19055                 twxm1 = twxm1+tmpx;
19056             }
19057         }
19058         if( n1%2==1 )
19059         {
19060 
19061             /*
19062              * Handle residual chunk
19063              */
19064             x = a->ptr.p_double[offs+0];
19065             y = a->ptr.p_double[offs+1];
19066             tmpx = x*(1+twxm1)-y*twy;
19067             tmpy = x*twy+y*(1+twxm1);
19068             a->ptr.p_double[offs+0] = tmpx;
19069             a->ptr.p_double[offs+1] = tmpy;
19070             offs = offs+2;
19071         }
19072 
19073         /*
19074          * update TwRow: TwRow(new) = TwRow(old)*TwBase
19075          */
19076         if( i<n2-1 )
19077         {
19078             if( (i+1)%ftbase_updatetw==0 )
19079             {
19080                 v = -2*ae_pi*(i+1)/n;
19081                 twrowxm1 = ae_sin(0.5*v, _state);
19082                 twrowxm1 = -2*twrowxm1*twrowxm1;
19083                 twrowy = ae_sin(v, _state);
19084             }
19085             else
19086             {
19087                 tmpx = twbasexm1+twrowxm1*twbasexm1-twrowy*twbasey;
19088                 tmpy = twbasey+twrowxm1*twbasey+twrowy*twbasexm1;
19089                 twrowxm1 = twrowxm1+tmpx;
19090                 twrowy = twrowy+tmpy;
19091             }
19092         }
19093     }
19094 }
19095 
19096 
19097 /*************************************************************************
19098 Linear transpose: transpose complex matrix stored in 1-dimensional array
19099 
19100   -- ALGLIB --
19101      Copyright 01.05.2009 by Bochkanov Sergey
19102 *************************************************************************/
ftbase_internalcomplexlintranspose(ae_vector * a,ae_int_t m,ae_int_t n,ae_int_t astart,ae_vector * buf,ae_state * _state)19103 static void ftbase_internalcomplexlintranspose(/* Real    */ ae_vector* a,
19104      ae_int_t m,
19105      ae_int_t n,
19106      ae_int_t astart,
19107      /* Real    */ ae_vector* buf,
19108      ae_state *_state)
19109 {
19110 
19111 
19112     ftbase_ffticltrec(a, astart, n, buf, 0, m, m, n, _state);
19113     ae_v_move(&a->ptr.p_double[astart], 1, &buf->ptr.p_double[0], 1, ae_v_len(astart,astart+2*m*n-1));
19114 }
19115 
19116 
19117 /*************************************************************************
19118 Recurrent subroutine for a InternalComplexLinTranspose
19119 
19120 Write A^T to B, where:
19121 * A is m*n complex matrix stored in array A as pairs of real/image values,
19122   beginning from AStart position, with AStride stride
19123 * B is n*m complex matrix stored in array B as pairs of real/image values,
19124   beginning from BStart position, with BStride stride
19125 stride is measured in complex numbers, i.e. in real/image pairs.
19126 
19127   -- ALGLIB --
19128      Copyright 01.05.2009 by Bochkanov Sergey
19129 *************************************************************************/
ftbase_ffticltrec(ae_vector * a,ae_int_t astart,ae_int_t astride,ae_vector * b,ae_int_t bstart,ae_int_t bstride,ae_int_t m,ae_int_t n,ae_state * _state)19130 static void ftbase_ffticltrec(/* Real    */ ae_vector* a,
19131      ae_int_t astart,
19132      ae_int_t astride,
19133      /* Real    */ ae_vector* b,
19134      ae_int_t bstart,
19135      ae_int_t bstride,
19136      ae_int_t m,
19137      ae_int_t n,
19138      ae_state *_state)
19139 {
19140     ae_int_t i;
19141     ae_int_t j;
19142     ae_int_t idx1;
19143     ae_int_t idx2;
19144     ae_int_t m2;
19145     ae_int_t m1;
19146     ae_int_t n1;
19147 
19148 
19149     if( m==0||n==0 )
19150     {
19151         return;
19152     }
19153     if( ae_maxint(m, n, _state)<=8 )
19154     {
19155         m2 = 2*bstride;
19156         for(i=0; i<=m-1; i++)
19157         {
19158             idx1 = bstart+2*i;
19159             idx2 = astart+2*i*astride;
19160             for(j=0; j<=n-1; j++)
19161             {
19162                 b->ptr.p_double[idx1+0] = a->ptr.p_double[idx2+0];
19163                 b->ptr.p_double[idx1+1] = a->ptr.p_double[idx2+1];
19164                 idx1 = idx1+m2;
19165                 idx2 = idx2+2;
19166             }
19167         }
19168         return;
19169     }
19170     if( n>m )
19171     {
19172 
19173         /*
19174          * New partition:
19175          *
19176          * "A^T -> B" becomes "(A1 A2)^T -> ( B1 )
19177          *                                  ( B2 )
19178          */
19179         n1 = n/2;
19180         if( n-n1>=8&&n1%8!=0 )
19181         {
19182             n1 = n1+(8-n1%8);
19183         }
19184         ae_assert(n-n1>0, "Assertion failed", _state);
19185         ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m, n1, _state);
19186         ftbase_ffticltrec(a, astart+2*n1, astride, b, bstart+2*n1*bstride, bstride, m, n-n1, _state);
19187     }
19188     else
19189     {
19190 
19191         /*
19192          * New partition:
19193          *
19194          * "A^T -> B" becomes "( A1 )^T -> ( B1 B2 )
19195          *                     ( A2 )
19196          */
19197         m1 = m/2;
19198         if( m-m1>=8&&m1%8!=0 )
19199         {
19200             m1 = m1+(8-m1%8);
19201         }
19202         ae_assert(m-m1>0, "Assertion failed", _state);
19203         ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m1, n, _state);
19204         ftbase_ffticltrec(a, astart+2*m1*astride, astride, b, bstart+2*m1, bstride, m-m1, n, _state);
19205     }
19206 }
19207 
19208 
19209 /*************************************************************************
19210 recurrent subroutine for FFTFindSmoothRec
19211 
19212   -- ALGLIB --
19213      Copyright 01.05.2009 by Bochkanov Sergey
19214 *************************************************************************/
ftbase_ftbasefindsmoothrec(ae_int_t n,ae_int_t seed,ae_int_t leastfactor,ae_int_t * best,ae_state * _state)19215 static void ftbase_ftbasefindsmoothrec(ae_int_t n,
19216      ae_int_t seed,
19217      ae_int_t leastfactor,
19218      ae_int_t* best,
19219      ae_state *_state)
19220 {
19221 
19222 
19223     ae_assert(ftbase_ftbasemaxsmoothfactor<=5, "FTBaseFindSmoothRec: internal error!", _state);
19224     if( seed>=n )
19225     {
19226         *best = ae_minint(*best, seed, _state);
19227         return;
19228     }
19229     if( leastfactor<=2 )
19230     {
19231         ftbase_ftbasefindsmoothrec(n, seed*2, 2, best, _state);
19232     }
19233     if( leastfactor<=3 )
19234     {
19235         ftbase_ftbasefindsmoothrec(n, seed*3, 3, best, _state);
19236     }
19237     if( leastfactor<=5 )
19238     {
19239         ftbase_ftbasefindsmoothrec(n, seed*5, 5, best, _state);
19240     }
19241 }
19242 
19243 
_fasttransformplan_init(void * _p,ae_state * _state,ae_bool make_automatic)19244 void _fasttransformplan_init(void* _p, ae_state *_state, ae_bool make_automatic)
19245 {
19246     fasttransformplan *p = (fasttransformplan*)_p;
19247     ae_touch_ptr((void*)p);
19248     ae_matrix_init(&p->entries, 0, 0, DT_INT, _state, make_automatic);
19249     ae_vector_init(&p->buffer, 0, DT_REAL, _state, make_automatic);
19250     ae_vector_init(&p->precr, 0, DT_REAL, _state, make_automatic);
19251     ae_vector_init(&p->preci, 0, DT_REAL, _state, make_automatic);
19252     ae_shared_pool_init(&p->bluesteinpool, _state, make_automatic);
19253 }
19254 
19255 
_fasttransformplan_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)19256 void _fasttransformplan_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
19257 {
19258     fasttransformplan *dst = (fasttransformplan*)_dst;
19259     fasttransformplan *src = (fasttransformplan*)_src;
19260     ae_matrix_init_copy(&dst->entries, &src->entries, _state, make_automatic);
19261     ae_vector_init_copy(&dst->buffer, &src->buffer, _state, make_automatic);
19262     ae_vector_init_copy(&dst->precr, &src->precr, _state, make_automatic);
19263     ae_vector_init_copy(&dst->preci, &src->preci, _state, make_automatic);
19264     ae_shared_pool_init_copy(&dst->bluesteinpool, &src->bluesteinpool, _state, make_automatic);
19265 }
19266 
19267 
_fasttransformplan_clear(void * _p)19268 void _fasttransformplan_clear(void* _p)
19269 {
19270     fasttransformplan *p = (fasttransformplan*)_p;
19271     ae_touch_ptr((void*)p);
19272     ae_matrix_clear(&p->entries);
19273     ae_vector_clear(&p->buffer);
19274     ae_vector_clear(&p->precr);
19275     ae_vector_clear(&p->preci);
19276     ae_shared_pool_clear(&p->bluesteinpool);
19277 }
19278 
19279 
_fasttransformplan_destroy(void * _p)19280 void _fasttransformplan_destroy(void* _p)
19281 {
19282     fasttransformplan *p = (fasttransformplan*)_p;
19283     ae_touch_ptr((void*)p);
19284     ae_matrix_destroy(&p->entries);
19285     ae_vector_destroy(&p->buffer);
19286     ae_vector_destroy(&p->precr);
19287     ae_vector_destroy(&p->preci);
19288     ae_shared_pool_destroy(&p->bluesteinpool);
19289 }
19290 
19291 
19292 #endif
19293 #if defined(AE_COMPILE_HPCCORES) || !defined(AE_PARTIAL_BUILD)
19294 
19295 
19296 /*************************************************************************
19297 Prepares HPC compuations  of  chunked  gradient with HPCChunkedGradient().
19298 You  have to call this function  before  calling  HPCChunkedGradient() for
19299 a new set of weights. You have to call it only once, see example below:
19300 
19301 HOW TO PROCESS DATASET WITH THIS FUNCTION:
19302     Grad:=0
19303     HPCPrepareChunkedGradient(Weights, WCount, NTotal, NOut, Buf)
19304     foreach chunk-of-dataset do
19305         HPCChunkedGradient(...)
19306     HPCFinalizeChunkedGradient(Buf, Grad)
19307 
19308 *************************************************************************/
hpcpreparechunkedgradient(ae_vector * weights,ae_int_t wcount,ae_int_t ntotal,ae_int_t nin,ae_int_t nout,mlpbuffers * buf,ae_state * _state)19309 void hpcpreparechunkedgradient(/* Real    */ ae_vector* weights,
19310      ae_int_t wcount,
19311      ae_int_t ntotal,
19312      ae_int_t nin,
19313      ae_int_t nout,
19314      mlpbuffers* buf,
19315      ae_state *_state)
19316 {
19317     ae_int_t i;
19318     ae_int_t batch4size;
19319     ae_int_t chunksize;
19320 
19321 
19322     chunksize = 4;
19323     batch4size = 3*chunksize*ntotal+chunksize*(2*nout+1);
19324     if( buf->xy.rows<chunksize||buf->xy.cols<nin+nout )
19325     {
19326         ae_matrix_set_length(&buf->xy, chunksize, nin+nout, _state);
19327     }
19328     if( buf->xy2.rows<chunksize||buf->xy2.cols<nin+nout )
19329     {
19330         ae_matrix_set_length(&buf->xy2, chunksize, nin+nout, _state);
19331     }
19332     if( buf->xyrow.cnt<nin+nout )
19333     {
19334         ae_vector_set_length(&buf->xyrow, nin+nout, _state);
19335     }
19336     if( buf->x.cnt<nin )
19337     {
19338         ae_vector_set_length(&buf->x, nin, _state);
19339     }
19340     if( buf->y.cnt<nout )
19341     {
19342         ae_vector_set_length(&buf->y, nout, _state);
19343     }
19344     if( buf->desiredy.cnt<nout )
19345     {
19346         ae_vector_set_length(&buf->desiredy, nout, _state);
19347     }
19348     if( buf->batch4buf.cnt<batch4size )
19349     {
19350         ae_vector_set_length(&buf->batch4buf, batch4size, _state);
19351     }
19352     if( buf->hpcbuf.cnt<wcount )
19353     {
19354         ae_vector_set_length(&buf->hpcbuf, wcount, _state);
19355     }
19356     if( buf->g.cnt<wcount )
19357     {
19358         ae_vector_set_length(&buf->g, wcount, _state);
19359     }
19360     if( !hpccores_hpcpreparechunkedgradientx(weights, wcount, &buf->hpcbuf, _state) )
19361     {
19362         for(i=0; i<=wcount-1; i++)
19363         {
19364             buf->hpcbuf.ptr.p_double[i] = 0.0;
19365         }
19366     }
19367     buf->wcount = wcount;
19368     buf->ntotal = ntotal;
19369     buf->nin = nin;
19370     buf->nout = nout;
19371     buf->chunksize = chunksize;
19372 }
19373 
19374 
19375 /*************************************************************************
19376 Finalizes HPC compuations  of  chunked gradient with HPCChunkedGradient().
19377 You  have to call this function  after  calling  HPCChunkedGradient()  for
19378 a new set of weights. You have to call it only once, see example below:
19379 
19380 HOW TO PROCESS DATASET WITH THIS FUNCTION:
19381     Grad:=0
19382     HPCPrepareChunkedGradient(Weights, WCount, NTotal, NOut, Buf)
19383     foreach chunk-of-dataset do
19384         HPCChunkedGradient(...)
19385     HPCFinalizeChunkedGradient(Buf, Grad)
19386 
19387 *************************************************************************/
hpcfinalizechunkedgradient(mlpbuffers * buf,ae_vector * grad,ae_state * _state)19388 void hpcfinalizechunkedgradient(mlpbuffers* buf,
19389      /* Real    */ ae_vector* grad,
19390      ae_state *_state)
19391 {
19392     ae_int_t i;
19393 
19394 
19395     if( !hpccores_hpcfinalizechunkedgradientx(&buf->hpcbuf, buf->wcount, grad, _state) )
19396     {
19397         for(i=0; i<=buf->wcount-1; i++)
19398         {
19399             grad->ptr.p_double[i] = grad->ptr.p_double[i]+buf->hpcbuf.ptr.p_double[i];
19400         }
19401     }
19402 }
19403 
19404 
19405 /*************************************************************************
19406 Fast kernel for chunked gradient.
19407 
19408 *************************************************************************/
hpcchunkedgradient(ae_vector * weights,ae_vector * structinfo,ae_vector * columnmeans,ae_vector * columnsigmas,ae_matrix * xy,ae_int_t cstart,ae_int_t csize,ae_vector * batch4buf,ae_vector * hpcbuf,double * e,ae_bool naturalerrorfunc,ae_state * _state)19409 ae_bool hpcchunkedgradient(/* Real    */ ae_vector* weights,
19410      /* Integer */ ae_vector* structinfo,
19411      /* Real    */ ae_vector* columnmeans,
19412      /* Real    */ ae_vector* columnsigmas,
19413      /* Real    */ ae_matrix* xy,
19414      ae_int_t cstart,
19415      ae_int_t csize,
19416      /* Real    */ ae_vector* batch4buf,
19417      /* Real    */ ae_vector* hpcbuf,
19418      double* e,
19419      ae_bool naturalerrorfunc,
19420      ae_state *_state)
19421 {
19422 #ifndef ALGLIB_INTERCEPTS_SSE2
19423     ae_bool result;
19424 
19425 
19426     result = ae_false;
19427     return result;
19428 #else
19429     return _ialglib_i_hpcchunkedgradient(weights, structinfo, columnmeans, columnsigmas, xy, cstart, csize, batch4buf, hpcbuf, e, naturalerrorfunc);
19430 #endif
19431 }
19432 
19433 
19434 /*************************************************************************
19435 Fast kernel for chunked processing.
19436 
19437 *************************************************************************/
hpcchunkedprocess(ae_vector * weights,ae_vector * structinfo,ae_vector * columnmeans,ae_vector * columnsigmas,ae_matrix * xy,ae_int_t cstart,ae_int_t csize,ae_vector * batch4buf,ae_vector * hpcbuf,ae_state * _state)19438 ae_bool hpcchunkedprocess(/* Real    */ ae_vector* weights,
19439      /* Integer */ ae_vector* structinfo,
19440      /* Real    */ ae_vector* columnmeans,
19441      /* Real    */ ae_vector* columnsigmas,
19442      /* Real    */ ae_matrix* xy,
19443      ae_int_t cstart,
19444      ae_int_t csize,
19445      /* Real    */ ae_vector* batch4buf,
19446      /* Real    */ ae_vector* hpcbuf,
19447      ae_state *_state)
19448 {
19449 #ifndef ALGLIB_INTERCEPTS_SSE2
19450     ae_bool result;
19451 
19452 
19453     result = ae_false;
19454     return result;
19455 #else
19456     return _ialglib_i_hpcchunkedprocess(weights, structinfo, columnmeans, columnsigmas, xy, cstart, csize, batch4buf, hpcbuf);
19457 #endif
19458 }
19459 
19460 
19461 /*************************************************************************
19462 Stub function.
19463 
19464   -- ALGLIB routine --
19465      14.06.2013
19466      Bochkanov Sergey
19467 *************************************************************************/
hpccores_hpcpreparechunkedgradientx(ae_vector * weights,ae_int_t wcount,ae_vector * hpcbuf,ae_state * _state)19468 static ae_bool hpccores_hpcpreparechunkedgradientx(/* Real    */ ae_vector* weights,
19469      ae_int_t wcount,
19470      /* Real    */ ae_vector* hpcbuf,
19471      ae_state *_state)
19472 {
19473 #ifndef ALGLIB_INTERCEPTS_SSE2
19474     ae_bool result;
19475 
19476 
19477     result = ae_false;
19478     return result;
19479 #else
19480     return _ialglib_i_hpcpreparechunkedgradientx(weights, wcount, hpcbuf);
19481 #endif
19482 }
19483 
19484 
19485 /*************************************************************************
19486 Stub function.
19487 
19488   -- ALGLIB routine --
19489      14.06.2013
19490      Bochkanov Sergey
19491 *************************************************************************/
hpccores_hpcfinalizechunkedgradientx(ae_vector * buf,ae_int_t wcount,ae_vector * grad,ae_state * _state)19492 static ae_bool hpccores_hpcfinalizechunkedgradientx(/* Real    */ ae_vector* buf,
19493      ae_int_t wcount,
19494      /* Real    */ ae_vector* grad,
19495      ae_state *_state)
19496 {
19497 #ifndef ALGLIB_INTERCEPTS_SSE2
19498     ae_bool result;
19499 
19500 
19501     result = ae_false;
19502     return result;
19503 #else
19504     return _ialglib_i_hpcfinalizechunkedgradientx(buf, wcount, grad);
19505 #endif
19506 }
19507 
19508 
_mlpbuffers_init(void * _p,ae_state * _state,ae_bool make_automatic)19509 void _mlpbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
19510 {
19511     mlpbuffers *p = (mlpbuffers*)_p;
19512     ae_touch_ptr((void*)p);
19513     ae_vector_init(&p->batch4buf, 0, DT_REAL, _state, make_automatic);
19514     ae_vector_init(&p->hpcbuf, 0, DT_REAL, _state, make_automatic);
19515     ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic);
19516     ae_matrix_init(&p->xy2, 0, 0, DT_REAL, _state, make_automatic);
19517     ae_vector_init(&p->xyrow, 0, DT_REAL, _state, make_automatic);
19518     ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic);
19519     ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic);
19520     ae_vector_init(&p->desiredy, 0, DT_REAL, _state, make_automatic);
19521     ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic);
19522     ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic);
19523 }
19524 
19525 
_mlpbuffers_init_copy(void * _dst,void * _src,ae_state * _state,ae_bool make_automatic)19526 void _mlpbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
19527 {
19528     mlpbuffers *dst = (mlpbuffers*)_dst;
19529     mlpbuffers *src = (mlpbuffers*)_src;
19530     dst->chunksize = src->chunksize;
19531     dst->ntotal = src->ntotal;
19532     dst->nin = src->nin;
19533     dst->nout = src->nout;
19534     dst->wcount = src->wcount;
19535     ae_vector_init_copy(&dst->batch4buf, &src->batch4buf, _state, make_automatic);
19536     ae_vector_init_copy(&dst->hpcbuf, &src->hpcbuf, _state, make_automatic);
19537     ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic);
19538     ae_matrix_init_copy(&dst->xy2, &src->xy2, _state, make_automatic);
19539     ae_vector_init_copy(&dst->xyrow, &src->xyrow, _state, make_automatic);
19540     ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic);
19541     ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic);
19542     ae_vector_init_copy(&dst->desiredy, &src->desiredy, _state, make_automatic);
19543     dst->e = src->e;
19544     ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic);
19545     ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic);
19546 }
19547 
19548 
_mlpbuffers_clear(void * _p)19549 void _mlpbuffers_clear(void* _p)
19550 {
19551     mlpbuffers *p = (mlpbuffers*)_p;
19552     ae_touch_ptr((void*)p);
19553     ae_vector_clear(&p->batch4buf);
19554     ae_vector_clear(&p->hpcbuf);
19555     ae_matrix_clear(&p->xy);
19556     ae_matrix_clear(&p->xy2);
19557     ae_vector_clear(&p->xyrow);
19558     ae_vector_clear(&p->x);
19559     ae_vector_clear(&p->y);
19560     ae_vector_clear(&p->desiredy);
19561     ae_vector_clear(&p->g);
19562     ae_vector_clear(&p->tmp0);
19563 }
19564 
19565 
_mlpbuffers_destroy(void * _p)19566 void _mlpbuffers_destroy(void* _p)
19567 {
19568     mlpbuffers *p = (mlpbuffers*)_p;
19569     ae_touch_ptr((void*)p);
19570     ae_vector_destroy(&p->batch4buf);
19571     ae_vector_destroy(&p->hpcbuf);
19572     ae_matrix_destroy(&p->xy);
19573     ae_matrix_destroy(&p->xy2);
19574     ae_vector_destroy(&p->xyrow);
19575     ae_vector_destroy(&p->x);
19576     ae_vector_destroy(&p->y);
19577     ae_vector_destroy(&p->desiredy);
19578     ae_vector_destroy(&p->g);
19579     ae_vector_destroy(&p->tmp0);
19580 }
19581 
19582 
19583 #endif
19584 #if defined(AE_COMPILE_ALGLIBBASICS) || !defined(AE_PARTIAL_BUILD)
19585 
19586 
19587 #endif
19588 
19589 }
19590 
19591