1 /*************************************************************************
2 ALGLIB 3.18.0 (source code generated 2021-10-25)
3 Copyright (c) Sergey Bochkanov (ALGLIB project).
4 
5 >>> SOURCE LICENSE >>>
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation (www.fsf.org); either version 2 of the
9 License, or (at your option) any later version.
10 
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 A copy of the GNU General Public License is available at
17 http://www.fsf.org/licensing/licenses
18 >>> END OF LICENSE >>>
19 *************************************************************************/
20 #ifdef _MSC_VER
21 #define _CRT_SECURE_NO_WARNINGS
22 #endif
23 #include "stdafx.h"
24 
25 //
26 // if AE_OS==AE_LINUX (will be redefined to AE_POSIX in ap.h),
27 // set _GNU_SOURCE flag BEFORE any #includes to get affinity
28 // management functions
29 //
30 #if (AE_OS==AE_LINUX) && !defined(_GNU_SOURCE)
31 #define _GNU_SOURCE
32 #endif
33 
34 //
35 // Must be defined before we include ap.h
36 //
37 #define _ALGLIB_IMPL_DEFINES
38 #define _ALGLIB_INTEGRITY_CHECKS_ONCE
39 
40 #include "ap.h"
41 #include <limits>
42 #include <locale.h>
43 #include <ctype.h>
44 
45 #if defined(AE_CPU)
46 #if (AE_CPU==AE_INTEL)
47 
48 #if AE_COMPILER==AE_MSVC
49 #include <intrin.h>
50 #endif
51 
52 #endif
53 #endif
54 
55 // disable some irrelevant warnings
56 #if (AE_COMPILER==AE_MSVC) && !defined(AE_ALL_WARNINGS)
57 #pragma warning(disable:4100)
58 #pragma warning(disable:4127)
59 #pragma warning(disable:4611)
60 #pragma warning(disable:4702)
61 #pragma warning(disable:4996)
62 #endif
63 
64 /////////////////////////////////////////////////////////////////////////
65 //
66 // THIS SECTION IMPLEMENTS BASIC FUNCTIONALITY LIKE
67 // MEMORY MANAGEMENT FOR VECTORS/MATRICES WHICH IS
68 // SHARED BETWEEN C++ AND PURE C LIBRARIES
69 //
70 /////////////////////////////////////////////////////////////////////////
71 namespace alglib_impl
72 {
73 /*
74  * OS-specific includes
75  */
76 #ifdef AE_USE_CPP
77 }
78 #endif
79 #if AE_OS==AE_WINDOWS || defined(AE_DEBUG4WINDOWS)
80 #ifndef _WIN32_WINNT
81 #define _WIN32_WINNT 0x0501
82 #endif
83 #include <windows.h>
84 #include <process.h>
85 #elif AE_OS==AE_POSIX || defined(AE_DEBUG4POSIX)
86 #include <time.h>
87 #include <unistd.h>
88 #include <pthread.h>
89 #include <sched.h>
90 #include <sys/time.h>
91 #endif
92 /* Debugging helpers for Windows */
93 #ifdef AE_DEBUG4WINDOWS
94 #include <windows.h>
95 #include <stdio.h>
96 #endif
97 #ifdef AE_USE_CPP
98 namespace alglib_impl
99 {
100 #endif
101 
102 /*
103  * local definitions
104  */
105 #define x_nb 16
106 #define AE_DATA_ALIGN 64
107 #define AE_PTR_ALIGN sizeof(void*)
108 #define DYN_BOTTOM ((void*)1)
109 #define DYN_FRAME  ((void*)2)
110 #define AE_LITTLE_ENDIAN 1
111 #define AE_BIG_ENDIAN 2
112 #define AE_MIXED_ENDIAN 3
113 #define AE_SER_ENTRY_LENGTH 11
114 #define AE_SER_ENTRIES_PER_ROW 5
115 
116 #define AE_SM_DEFAULT 0
117 #define AE_SM_ALLOC 1
118 #define AE_SM_READY2S 2
119 #define AE_SM_TO_STRING    10
120 #define AE_SM_TO_CPPSTRING 11
121 #define AE_SM_TO_STREAM    12
122 #define AE_SM_FROM_STRING  20
123 #define AE_SM_FROM_STREAM  22
124 
125 #define AE_LOCK_CYCLES 512
126 #define AE_LOCK_TESTS_BEFORE_YIELD 16
127 #define AE_CRITICAL_ASSERT(x) if( !(x) ) abort()
128 
129 /* IDs for set_dbg_value */
130 #define _ALGLIB_USE_ALLOC_COUNTER             0
131 #define _ALGLIB_USE_DBG_COUNTERS              1
132 #define _ALGLIB_USE_VENDOR_KERNELS          100
133 #define _ALGLIB_VENDOR_MEMSTAT              101
134 
135 #define _ALGLIB_DEBUG_WORKSTEALING          200
136 #define _ALGLIB_WSDBG_NCORES                201
137 #define _ALGLIB_WSDBG_PUSHROOT_OK           202
138 #define _ALGLIB_WSDBG_PUSHROOT_FAILED       203
139 
140 #define _ALGLIB_SET_GLOBAL_THREADING       1001
141 #define _ALGLIB_SET_NWORKERS               1002
142 
143 /* IDs for get_dbg_value */
144 #define _ALGLIB_GET_ALLOC_COUNTER             0
145 #define _ALGLIB_GET_CUMULATIVE_ALLOC_SIZE     1
146 #define _ALGLIB_GET_CUMULATIVE_ALLOC_COUNT    2
147 
148 #define _ALGLIB_GET_CORES_COUNT            1000
149 #define _ALGLIB_GET_GLOBAL_THREADING       1001
150 #define _ALGLIB_GET_NWORKERS               1002
151 
152 /*************************************************************************
153 Lock.
154 
155 This is internal structure which implements lock functionality.
156 *************************************************************************/
157 typedef struct
158 {
159 #if AE_OS==AE_WINDOWS
160     volatile ae_int_t * volatile p_lock;
161     char buf[sizeof(ae_int_t)+AE_LOCK_ALIGNMENT];
162 #elif AE_OS==AE_POSIX
163     pthread_mutex_t mutex;
164 #else
165     ae_bool is_locked;
166 #endif
167 } _lock;
168 
169 
170 
171 
172 /*
173  * Error tracking facilities; this fields are modified every time ae_set_error_flag()
174  * is called with non-zero cond. Thread unsafe access, but it does not matter actually.
175  */
176 static const char * sef_file  = "";
177 static int          sef_line  = 0;
178 static const char * sef_xdesc = "";
179 
180 /*
181  * Global flags, split into several char-sized variables in order
182  * to avoid problem with non-atomic reads/writes (single-byte ops
183  * are atomic on all modern architectures);
184  *
185  * Following variables are included:
186  * * threading-related settings
187  */
188 unsigned char _alglib_global_threading_flags = _ALGLIB_FLG_THREADING_SERIAL>>_ALGLIB_FLG_THREADING_SHIFT;
189 
190 /*
191  * DESCRIPTION: recommended number of active workers:
192  *              * positive value >=1 is used to specify exact number of active workers
193  *              * 0 means that ALL available cores are used
194  *              * negative value means that all cores EXCEPT for cores_to_use will be used
195  *                (say, -1 means that all cores except for one will be used). At least one
196  *                core will be used in this case, even if you assign -9999999 to this field.
197  *
198  *              Default value =  0 (fully parallel execution) when AE_NWORKERS is not defined
199  *                            =  0 for manually defined number of cores (AE_NWORKERS is defined)
200  * PROTECTION:  not needed; runtime modification is possible, but we do not need exact
201  *              synchronization.
202  */
203 #if defined(AE_NWORKERS) && (AE_NWORKERS<=0)
204 #error AE_NWORKERS must be positive number or not defined at all.
205 #endif
206 #if defined(AE_NWORKERS)
207 ae_int_t _alglib_cores_to_use = 0;
208 #else
209 ae_int_t _alglib_cores_to_use = 0;
210 #endif
211 
212 /*
213  * Debug counters
214  */
215 ae_int_t   _alloc_counter = 0;
216 ae_int_t   _alloc_counter_total = 0;
217 ae_bool    _use_alloc_counter = ae_false;
218 
219 ae_int_t   _dbg_alloc_total = 0;
220 ae_bool    _use_dbg_counters  = ae_false;
221 
222 ae_bool    _use_vendor_kernels          = ae_true;
223 
224 ae_bool    debug_workstealing           = ae_false; /* debug workstealing environment? False by default */
225 ae_int_t   dbgws_pushroot_ok            = 0;
226 ae_int_t   dbgws_pushroot_failed        = 0;
227 
228 #ifdef AE_SMP_DEBUGCOUNTERS
229 __declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_acquisitions = 0;
230 __declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_spinwaits = 0;
231 __declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_yields = 0;
232 #endif
233 
234 /*
235  * Allocation debugging
236  */
237 ae_bool     _force_malloc_failure = ae_false;
238 ae_int_t    _malloc_failure_after = 0;
239 
240 
241 /*
242  * Trace-related declarations:
243  * alglib_trace_type    -   trace output type
244  * alglib_trace_file    -   file descriptor (to be used by ALGLIB code which
245  *                          sends messages to trace log
246  * alglib_fclose_trace  -   whether we have to call fclose() when disabling or
247  *                          changing trace output
248  * alglib_trace_tags    -   string buffer used to store tags + two additional
249  *                          characters (leading and trailing commas) + null
250  *                          terminator
251  */
252 #define ALGLIB_TRACE_NONE 0
253 #define ALGLIB_TRACE_FILE 1
254 #define ALGLIB_TRACE_TAGS_LEN 2048
255 #define ALGLIB_TRACE_BUFFER_LEN (ALGLIB_TRACE_TAGS_LEN+2+1)
256 static ae_int_t  alglib_trace_type = ALGLIB_TRACE_NONE;
257 FILE            *alglib_trace_file = NULL;
258 static ae_bool   alglib_fclose_trace = ae_false;
259 static char      alglib_trace_tags[ALGLIB_TRACE_BUFFER_LEN];
260 
261 /*
262  * Fields for memory allocation over static array
263  */
264 #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
265 #if AE_THREADING!=AE_SERIAL_UNSAFE
266 #error Basis static malloc is thread-unsafe; define AE_THREADING=AE_SERIAL_UNSAFE to prove that you know it
267 #endif
268 static ae_int_t         sm_page_size = 0;
269 static ae_int_t         sm_page_cnt  = 0;
270 static ae_int_t         *sm_page_tbl = NULL;
271 static unsigned char    *sm_mem      = NULL;
272 #endif
273 
274 /*
275  * These declarations are used to ensure that
276  * sizeof(ae_bool)=1, sizeof(ae_int32_t)==4, sizeof(ae_int64_t)==8, sizeof(ae_int_t)==sizeof(void*).
277  * they will lead to syntax error otherwise (array size will be negative).
278  *
279  * you can remove them, if you want - they are not used anywhere.
280  *
281  */
282 static char     _ae_bool_must_be_8_bits_wide [1-2*((int)(sizeof(ae_bool))-1)*((int)(sizeof(ae_bool))-1)];
283 static char  _ae_int32_t_must_be_32_bits_wide[1-2*((int)(sizeof(ae_int32_t))-4)*((int)(sizeof(ae_int32_t))-4)];
284 static char  _ae_int64_t_must_be_64_bits_wide[1-2*((int)(sizeof(ae_int64_t))-8)*((int)(sizeof(ae_int64_t))-8)];
285 static char _ae_uint64_t_must_be_64_bits_wide[1-2*((int)(sizeof(ae_uint64_t))-8)*((int)(sizeof(ae_uint64_t))-8)];
286 static char  _ae_int_t_must_be_pointer_sized [1-2*((int)(sizeof(ae_int_t))-(int)sizeof(void*))*((int)(sizeof(ae_int_t))-(int)(sizeof(void*)))];
287 
288 /*
289  * This variable is used to prevent some tricky optimizations which may degrade multithreaded performance.
290  * It is touched once in the ae_init_pool() function from smp.c in order to prevent optimizations.
291  *
292  */
293 static volatile ae_int_t ae_never_change_it = 1;
294 
295 /*************************************************************************
296 This function should never  be  called.  It is  here  to  prevent spurious
297 compiler warnings about unused variables (in fact: used).
298 *************************************************************************/
ae_never_call_it()299 void ae_never_call_it()
300 {
301     ae_touch_ptr((void*)_ae_bool_must_be_8_bits_wide);
302     ae_touch_ptr((void*)_ae_int32_t_must_be_32_bits_wide);
303     ae_touch_ptr((void*)_ae_int64_t_must_be_64_bits_wide);
304     ae_touch_ptr((void*)_ae_uint64_t_must_be_64_bits_wide);
305     ae_touch_ptr((void*)_ae_int_t_must_be_pointer_sized);
306 }
307 
308 /*************************************************************************
309 Standard function wrappers for better GLIBC portability
310 *************************************************************************/
311 #if defined(X_FOR_LINUX)
312 __asm__(".symver exp,exp@GLIBC_2.2.5");
313 __asm__(".symver log,log@GLIBC_2.2.5");
314 __asm__(".symver pow,pow@GLIBC_2.2.5");
315 
__wrap_exp(double x)316 double __wrap_exp(double x)
317 {
318     return exp(x);
319 }
320 
__wrap_log(double x)321 double __wrap_log(double x)
322 {
323     return log(x);
324 }
325 
__wrap_pow(double x,double y)326 double __wrap_pow(double x, double y)
327 {
328     return pow(x, y);
329 }
330 #endif
331 
ae_set_dbg_flag(ae_int64_t flag_id,ae_int64_t flag_val)332 void ae_set_dbg_flag(ae_int64_t flag_id, ae_int64_t flag_val)
333 {
334     if( flag_id==_ALGLIB_USE_ALLOC_COUNTER )
335     {
336         _use_alloc_counter = flag_val!=0;
337         return;
338     }
339     if( flag_id==_ALGLIB_USE_DBG_COUNTERS )
340     {
341         _use_dbg_counters  = flag_val!=0;
342         return;
343     }
344     if( flag_id==_ALGLIB_USE_VENDOR_KERNELS )
345     {
346         _use_vendor_kernels = flag_val!=0;
347         return;
348     }
349     if( flag_id==_ALGLIB_DEBUG_WORKSTEALING )
350     {
351         debug_workstealing = flag_val!=0;
352         return;
353     }
354     if( flag_id==_ALGLIB_SET_GLOBAL_THREADING )
355     {
356         ae_set_global_threading((ae_uint64_t)flag_val);
357         return;
358     }
359     if( flag_id==_ALGLIB_SET_NWORKERS )
360     {
361         _alglib_cores_to_use = (ae_int_t)flag_val;
362         return;
363     }
364 }
365 
ae_get_dbg_value(ae_int64_t id)366 ae_int64_t ae_get_dbg_value(ae_int64_t id)
367 {
368     if( id==_ALGLIB_GET_ALLOC_COUNTER )
369         return _alloc_counter;
370     if( id==_ALGLIB_GET_CUMULATIVE_ALLOC_SIZE )
371         return _dbg_alloc_total;
372     if( id==_ALGLIB_GET_CUMULATIVE_ALLOC_COUNT )
373         return _alloc_counter_total;
374 
375     if( id==_ALGLIB_VENDOR_MEMSTAT )
376     {
377 #if defined(AE_MKL)
378         return ae_mkl_memstat();
379 #else
380         return 0;
381 #endif
382     }
383 
384     /* workstealing counters */
385     if( id==_ALGLIB_WSDBG_NCORES )
386 #if defined(AE_SMP)
387         return ae_cores_count();
388 #else
389         return 0;
390 #endif
391     if( id==_ALGLIB_WSDBG_PUSHROOT_OK )
392         return dbgws_pushroot_ok;
393     if( id==_ALGLIB_WSDBG_PUSHROOT_FAILED )
394         return dbgws_pushroot_failed;
395 
396     if( id==_ALGLIB_GET_CORES_COUNT )
397 #if defined(AE_SMP)
398         return ae_cores_count();
399 #else
400         return 0;
401 #endif
402     if( id==_ALGLIB_GET_GLOBAL_THREADING )
403         return (ae_int64_t)ae_get_global_threading();
404     if( id==_ALGLIB_GET_NWORKERS )
405         return (ae_int64_t)_alglib_cores_to_use;
406 
407     /* unknown value */
408     return 0;
409 }
410 
411 /************************************************************************
412 This function sets default (global) threading model:
413 * serial execution
414 * multithreading, if cores_to_use allows it
415 
416 ************************************************************************/
ae_set_global_threading(ae_uint64_t flg_value)417 void ae_set_global_threading(ae_uint64_t flg_value)
418 {
419     flg_value = flg_value&_ALGLIB_FLG_THREADING_MASK;
420     AE_CRITICAL_ASSERT(flg_value==_ALGLIB_FLG_THREADING_SERIAL || flg_value==_ALGLIB_FLG_THREADING_PARALLEL);
421     _alglib_global_threading_flags = (unsigned char)(flg_value>>_ALGLIB_FLG_THREADING_SHIFT);
422 }
423 
424 /************************************************************************
425 This function gets default (global) threading model:
426 * serial execution
427 * multithreading, if cores_to_use allows it
428 
429 ************************************************************************/
ae_get_global_threading()430 ae_uint64_t ae_get_global_threading()
431 {
432     return ((ae_uint64_t)_alglib_global_threading_flags)<<_ALGLIB_FLG_THREADING_SHIFT;
433 }
434 
ae_set_error_flag(ae_bool * p_flag,ae_bool cond,const char * filename,int lineno,const char * xdesc)435 void ae_set_error_flag(ae_bool *p_flag, ae_bool cond, const char *filename, int lineno, const char *xdesc)
436 {
437     if( cond )
438     {
439         *p_flag = ae_true;
440         sef_file = filename;
441         sef_line = lineno;
442         sef_xdesc= xdesc;
443 #ifdef ALGLIB_ABORT_ON_ERROR_FLAG
444         printf("[ALGLIB] aborting on ae_set_error_flag(cond=true)\n");
445         printf("[ALGLIB] %s:%d\n", filename, lineno);
446         printf("[ALGLIB] %s\n", xdesc);
447         fflush(stdout);
448         if( alglib_trace_file!=NULL ) fflush(alglib_trace_file);
449         abort();
450 #endif
451     }
452 }
453 
454 /************************************************************************
455 This function returns file name for the last call of ae_set_error_flag()
456 with non-zero cond parameter.
457 ************************************************************************/
ae_get_last_error_file()458 const char * ae_get_last_error_file()
459 {
460     return sef_file;
461 }
462 
463 /************************************************************************
464 This function returns line number for the last call of ae_set_error_flag()
465 with non-zero cond parameter.
466 ************************************************************************/
ae_get_last_error_line()467 int ae_get_last_error_line()
468 {
469     return sef_line;
470 }
471 
472 /************************************************************************
473 This function returns extra description for the last call of ae_set_error_flag()
474 with non-zero cond parameter.
475 ************************************************************************/
ae_get_last_error_xdesc()476 const char * ae_get_last_error_xdesc()
477 {
478     return sef_xdesc;
479 }
480 
ae_misalignment(const void * ptr,size_t alignment)481 ae_int_t ae_misalignment(const void *ptr, size_t alignment)
482 {
483     union _u
484     {
485         const void *ptr;
486         ae_int_t iptr;
487     } u;
488     u.ptr = ptr;
489     return (ae_int_t)(u.iptr%alignment);
490 }
491 
ae_align(void * ptr,size_t alignment)492 void* ae_align(void *ptr, size_t alignment)
493 {
494     char *result = (char*)ptr;
495     if( (result-(char*)0)%alignment!=0 )
496         result += alignment - (result-(char*)0)%alignment;
497     return result;
498 }
499 
500 /************************************************************************
501 This function maps nworkers  number  (which  can  be  positive,  zero  or
502 negative with 0 meaning "all cores", -1 meaning "all cores -1" and so on)
503 to "effective", strictly positive workers count.
504 
505 This  function  is  intended  to  be used by debugging/testing code which
506 tests different number of worker threads. It is NOT aligned  in  any  way
507 with ALGLIB multithreading framework (i.e. it can return  non-zero worker
508 count even for single-threaded GPLed ALGLIB).
509 ************************************************************************/
ae_get_effective_workers(ae_int_t nworkers)510 ae_int_t ae_get_effective_workers(ae_int_t nworkers)
511 {
512     ae_int_t ncores;
513 
514     /* determine cores count */
515 #if defined(AE_NWORKERS)
516     ncores = AE_NWORKERS;
517 #elif AE_OS==AE_WINDOWS
518     SYSTEM_INFO sysInfo;
519     GetSystemInfo(&sysInfo);
520     ncores = (ae_int_t)(sysInfo.dwNumberOfProcessors);
521 #elif AE_OS==AE_POSIX
522     {
523         long r = sysconf(_SC_NPROCESSORS_ONLN);
524         ncores = r<=0 ? 1 : r;
525     }
526 #else
527     ncores = 1;
528 #endif
529     AE_CRITICAL_ASSERT(ncores>=1);
530 
531     /* map nworkers to its effective value */
532     if( nworkers>=1 )
533         return nworkers>ncores ? ncores : nworkers;
534     return ncores+nworkers>=1 ? ncores+nworkers : 1;
535 }
536 
537 /*************************************************************************
538 This function belongs to the family of  "optional  atomics",  i.e.  atomic
539 functions which either perform atomic changes - or do nothing at  all,  if
540 current compiler settings do not allow us to generate atomic code.
541 
542 All "optional atomics" are synchronized, i.e. either all of them work - or
543 no one of the works.
544 
545 This particular function performs atomic addition on pointer-sized  value,
546 which must be pointer-size aligned.
547 
548 NOTE: this function is not intended to be extremely high performance  one,
549       so use it only when necessary.
550 *************************************************************************/
ae_optional_atomic_add_i(ae_int_t * p,ae_int_t v)551 void  ae_optional_atomic_add_i(ae_int_t *p, ae_int_t v)
552 {
553     AE_CRITICAL_ASSERT(ae_misalignment(p,sizeof(void*))==0);
554 #if AE_OS==AE_WINDOWS
555     for(;;)
556     {
557         /* perform conversion between ae_int_t* and void**
558            without compiler warnings about indirection levels */
559         union _u
560         {
561             PVOID volatile * volatile ptr;
562             volatile ae_int_t * volatile iptr;
563         } u;
564         u.iptr = p;
565 
566         /* atomic read for initial value */
567         PVOID v0 = InterlockedCompareExchangePointer(u.ptr, NULL, NULL);
568 
569         /* increment cached value and store */
570         if( InterlockedCompareExchangePointer(u.ptr, (PVOID)(((char*)v0)+v), v0)==v0 )
571             break;
572     }
573 #elif defined(__clang__) && (AE_CPU==AE_INTEL)
574     __atomic_fetch_add(p, v, __ATOMIC_RELAXED);
575 #elif (AE_COMPILER==AE_GNUC) && (AE_CPU==AE_INTEL) && (__GNUC__*100+__GNUC__>=470)
576     __atomic_add_fetch(p, v, __ATOMIC_RELAXED);
577 #else
578 #endif
579 }
580 
581 /*************************************************************************
582 This function belongs to the family of  "optional  atomics",  i.e.  atomic
583 functions which either perform atomic changes - or do nothing at  all,  if
584 current compiler settings do not allow us to generate atomic code.
585 
586 All "optional atomics" are synchronized, i.e. either all of them work - or
587 no one of the works.
588 
589 This  particular  function  performs  atomic  subtraction on pointer-sized
590 value, which must be pointer-size aligned.
591 
592 NOTE: this function is not intended to be extremely high performance  one,
593       so use it only when necessary.
594 *************************************************************************/
ae_optional_atomic_sub_i(ae_int_t * p,ae_int_t v)595 void  ae_optional_atomic_sub_i(ae_int_t *p, ae_int_t v)
596 {
597     AE_CRITICAL_ASSERT(ae_misalignment(p,sizeof(void*))==0);
598 #if AE_OS==AE_WINDOWS
599     for(;;)
600     {
601         /* perform conversion between ae_int_t* and void**
602            without compiler warnings about indirection levels */
603         union _u
604         {
605             PVOID volatile * volatile ptr;
606             volatile ae_int_t * volatile iptr;
607         } u;
608         u.iptr = p;
609 
610         /* atomic read for initial value, convert it to 1-byte pointer */
611         PVOID v0 = InterlockedCompareExchangePointer(u.ptr, NULL, NULL);
612 
613         /* increment cached value and store */
614         if( InterlockedCompareExchangePointer(u.ptr, (PVOID)(((char*)v0)-v), v0)==v0 )
615             break;
616     }
617 #elif defined(__clang__) && (AE_CPU==AE_INTEL)
618     __atomic_fetch_sub(p, v, __ATOMIC_RELAXED);
619 #elif (AE_COMPILER==AE_GNUC) && (AE_CPU==AE_INTEL) && (__GNUC__*100+__GNUC__>=470)
620     __atomic_sub_fetch(p, v, __ATOMIC_RELAXED);
621 #else
622 #endif
623 }
624 
625 
626 /*************************************************************************
627 This function cleans up automatically managed memory before caller terminates
628 ALGLIB executing by ae_break() or by simply stopping calling callback.
629 
630 For state!=NULL it calls thread_exception_handler() and the ae_state_clear().
631 For state==NULL it does nothing.
632 *************************************************************************/
ae_clean_up_before_breaking(ae_state * state)633 void ae_clean_up_before_breaking(ae_state *state)
634 {
635     if( state!=NULL )
636     {
637         if( state->thread_exception_handler!=NULL )
638             state->thread_exception_handler(state);
639         ae_state_clear(state);
640     }
641 }
642 
643 /*************************************************************************
644 This function abnormally aborts program, using one of several ways:
645 
646 * for state!=NULL and state->break_jump being initialized with  call  to
647   ae_state_set_break_jump() - it performs longjmp() to return site.
648 * otherwise, abort() is called
649 
650 In   all  cases,  for  state!=NULL  function  sets  state->last_error  and
651 state->error_msg fields. It also clears state with ae_state_clear().
652 
653 If state is not NULL and state->thread_exception_handler  is  set,  it  is
654 called prior to handling error and clearing state.
655 *************************************************************************/
ae_break(ae_state * state,ae_error_type error_type,const char * msg)656 void ae_break(ae_state *state, ae_error_type error_type, const char *msg)
657 {
658     if( state!=NULL )
659     {
660         if( alglib_trace_type!=ALGLIB_TRACE_NONE )
661             ae_trace("---!!! CRITICAL ERROR !!!--- exception with message '%s' was generated\n", msg!=NULL ? msg : "");
662         ae_clean_up_before_breaking(state);
663         state->last_error = error_type;
664         state->error_msg = msg;
665         if( state->break_jump!=NULL )
666             longjmp(*(state->break_jump), 1);
667         else
668             abort();
669     }
670     else
671         abort();
672 }
673 
674 #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
set_memory_pool(void * ptr,size_t size)675 void set_memory_pool(void *ptr, size_t size)
676 {
677     /*
678      * Integrity checks
679      */
680     AE_CRITICAL_ASSERT(sm_page_size==0);
681     AE_CRITICAL_ASSERT(sm_page_cnt==0);
682     AE_CRITICAL_ASSERT(sm_page_tbl==NULL);
683     AE_CRITICAL_ASSERT(sm_mem==NULL);
684     AE_CRITICAL_ASSERT(size>0);
685 
686     /*
687      * Align pointer
688      */
689     size -= ae_misalignment(ptr, sizeof(ae_int_t));
690     ptr   = ae_align(ptr, sizeof(ae_int_t));
691 
692     /*
693      * Calculate page size and page count, prepare pointers to page table and memory
694      */
695     sm_page_size = 256;
696     AE_CRITICAL_ASSERT(size>=(sm_page_size+sizeof(ae_int_t))+sm_page_size); /* we expect to have memory for at least one page + table entry + alignment */
697     sm_page_cnt = (size-sm_page_size)/(sm_page_size+sizeof(ae_int_t));
698     AE_CRITICAL_ASSERT(sm_page_cnt>0);
699     sm_page_tbl = (ae_int_t*)ptr;
700     sm_mem      = (unsigned char*)ae_align(sm_page_tbl+sm_page_cnt, sm_page_size);
701 
702     /*
703      * Mark all pages as free
704      */
705     memset(sm_page_tbl, 0, sm_page_cnt*sizeof(ae_int_t));
706 }
707 
ae_static_malloc(size_t size,size_t alignment)708 void* ae_static_malloc(size_t size, size_t alignment)
709 {
710     int rq_pages, i, j, cur_len;
711 
712     AE_CRITICAL_ASSERT(size>=0);
713     AE_CRITICAL_ASSERT(sm_page_size>0);
714     AE_CRITICAL_ASSERT(sm_page_cnt>0);
715     AE_CRITICAL_ASSERT(sm_page_tbl!=NULL);
716     AE_CRITICAL_ASSERT(sm_mem!=NULL);
717 
718     if( size==0 )
719         return NULL;
720     if( _force_malloc_failure )
721         return NULL;
722 
723     /* check that page alignment and requested alignment match each other */
724     AE_CRITICAL_ASSERT(alignment<=sm_page_size);
725     AE_CRITICAL_ASSERT((sm_page_size%alignment)==0);
726 
727     /* search long enough sequence of pages */
728     rq_pages = size/sm_page_size;
729     if( size%sm_page_size )
730         rq_pages++;
731     cur_len = 0;
732     for(i=0; i<sm_page_cnt;)
733     {
734         /* determine length of the sequence of free pages */
735         if( sm_page_tbl[i]==0 )
736             cur_len++;
737         else
738         {
739             AE_CRITICAL_ASSERT(sm_page_tbl[i]>0);
740             cur_len=0;
741             i += sm_page_tbl[i];
742             continue;
743         }
744 
745         /* found it? */
746         if( cur_len>=rq_pages )
747         {
748             /* update counters (if flag is set) */
749             if( _use_alloc_counter )
750             {
751                 ae_optional_atomic_add_i(&_alloc_counter, 1);
752                 ae_optional_atomic_add_i(&_alloc_counter_total, 1);
753             }
754             if( _use_dbg_counters )
755                 ae_optional_atomic_add_i(&_dbg_alloc_total, size);
756 
757             /* mark pages and return */
758             for(j=0; j<rq_pages; j++)
759                 sm_page_tbl[i-j] = -1;
760             sm_page_tbl[i-(rq_pages-1)] = rq_pages;
761             return sm_mem+(i-(rq_pages-1))*sm_page_size;
762         }
763 
764         /* next element */
765         i++;
766     }
767     return NULL;
768 }
769 
ae_static_free(void * block)770 void ae_static_free(void *block)
771 {
772     ae_int_t page_idx, page_cnt, i;
773     if( block==NULL )
774         return;
775     page_idx = (unsigned char*)block-sm_mem;
776     AE_CRITICAL_ASSERT(page_idx>=0);
777     AE_CRITICAL_ASSERT((page_idx%sm_page_size)==0);
778     page_idx = page_idx/sm_page_size;
779     AE_CRITICAL_ASSERT(page_idx<sm_page_cnt);
780     page_cnt = sm_page_tbl[page_idx];
781     AE_CRITICAL_ASSERT(page_cnt>=1);
782     for(i=0; i<page_cnt; i++)
783         sm_page_tbl[page_idx+i] = 0;
784 
785     /* update counters (if flag is set) */
786     if( _use_alloc_counter )
787         ae_optional_atomic_sub_i(&_alloc_counter, 1);
788 }
789 
memory_pool_stats(ae_int_t * bytes_used,ae_int_t * bytes_free)790 void memory_pool_stats(ae_int_t *bytes_used, ae_int_t *bytes_free)
791 {
792     int i;
793 
794     AE_CRITICAL_ASSERT(sm_page_size>0);
795     AE_CRITICAL_ASSERT(sm_page_cnt>0);
796     AE_CRITICAL_ASSERT(sm_page_tbl!=NULL);
797     AE_CRITICAL_ASSERT(sm_mem!=NULL);
798 
799     /* scan page table */
800     *bytes_used = 0;
801     *bytes_free = 0;
802     for(i=0; i<sm_page_cnt;)
803     {
804         if( sm_page_tbl[i]==0 )
805         {
806             (*bytes_free)++;
807             i++;
808         }
809         else
810         {
811             AE_CRITICAL_ASSERT(sm_page_tbl[i]>0);
812             *bytes_used += sm_page_tbl[i];
813             i += sm_page_tbl[i];
814         }
815     }
816     *bytes_used *= sm_page_size;
817     *bytes_free *= sm_page_size;
818 }
819 #endif
820 
aligned_malloc(size_t size,size_t alignment)821 void* aligned_malloc(size_t size, size_t alignment)
822 {
823 #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
824     return ae_static_malloc(size, alignment);
825 #else
826     char *result = NULL;
827 
828     if( size==0 )
829         return NULL;
830     if( _force_malloc_failure )
831         return NULL;
832     if( _malloc_failure_after>0 && _alloc_counter_total>=_malloc_failure_after )
833         return NULL;
834 
835     /* allocate */
836     if( alignment<=1 )
837     {
838         /* no alignment, just call alloc */
839         void *block;
840         void **p; ;
841         block = malloc(sizeof(void*)+size);
842         if( block==NULL )
843             return NULL;
844         p = (void**)block;
845         *p = block;
846         result = (char*)((char*)block+sizeof(void*));
847     }
848     else
849     {
850         /* align */
851         void *block;
852         block = malloc(alignment-1+sizeof(void*)+size);
853         if( block==NULL )
854             return NULL;
855         result = (char*)block+sizeof(void*);
856         /*if( (result-(char*)0)%alignment!=0 )
857             result += alignment - (result-(char*)0)%alignment;*/
858         result = (char*)ae_align(result, alignment);
859         *((void**)(result-sizeof(void*))) = block;
860     }
861 
862     /* update counters (if flag is set) */
863     if( _use_alloc_counter )
864     {
865         ae_optional_atomic_add_i(&_alloc_counter, 1);
866         ae_optional_atomic_add_i(&_alloc_counter_total, 1);
867     }
868     if( _use_dbg_counters )
869         ae_optional_atomic_add_i(&_dbg_alloc_total, (ae_int64_t)size);
870 
871     /* return */
872     return (void*)result;
873 #endif
874 }
875 
aligned_extract_ptr(void * block)876 void* aligned_extract_ptr(void *block)
877 {
878 #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
879     return NULL;
880 #else
881     if( block==NULL )
882         return NULL;
883     return *((void**)((char*)block-sizeof(void*)));
884 #endif
885 }
886 
aligned_free(void * block)887 void aligned_free(void *block)
888 {
889 #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
890     ae_static_free(block);
891 #else
892     void *p;
893     if( block==NULL )
894         return;
895     p = aligned_extract_ptr(block);
896     free(p);
897     if( _use_alloc_counter )
898         ae_optional_atomic_sub_i(&_alloc_counter, 1);
899 #endif
900 }
901 
eternal_malloc(size_t size)902 void* eternal_malloc(size_t size)
903 {
904     if( size==0 )
905         return NULL;
906     if( _force_malloc_failure )
907         return NULL;
908     return malloc(size);
909 }
910 
911 /************************************************************************
912 Allocate memory with automatic alignment.
913 
914 Returns NULL when zero size is specified.
915 
916 Error handling:
917 * if state is NULL, returns NULL on allocation error
918 * if state is not NULL, calls ae_break() on allocation error
919 ************************************************************************/
ae_malloc(size_t size,ae_state * state)920 void* ae_malloc(size_t size, ae_state *state)
921 {
922     void *result;
923     if( size==0 )
924         return NULL;
925     result = aligned_malloc(size,AE_DATA_ALIGN);
926     if( result==NULL && state!=NULL)
927         ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
928     return result;
929 }
930 
ae_free(void * p)931 void ae_free(void *p)
932 {
933     if( p!=NULL )
934         aligned_free(p);
935 }
936 
937 /************************************************************************
938 Sets pointers to the matrix rows.
939 
940 * dst must be correctly initialized matrix
941 * dst->data.ptr points to the beginning of memory block  allocated  for
942   row pointers.
943 * dst->ptr - undefined (initialized during algorithm processing)
944 * storage parameter points to the beginning of actual storage
945 ************************************************************************/
ae_matrix_update_row_pointers(ae_matrix * dst,void * storage)946 void ae_matrix_update_row_pointers(ae_matrix *dst, void *storage)
947 {
948     char *p_base;
949     void **pp_ptr;
950     ae_int_t i;
951     if( dst->rows>0 && dst->cols>0 )
952     {
953         p_base = (char*)storage;
954         pp_ptr = (void**)dst->data.ptr;
955         dst->ptr.pp_void = pp_ptr;
956         for(i=0; i<dst->rows; i++, p_base+=dst->stride*ae_sizeof(dst->datatype))
957             pp_ptr[i] = p_base;
958     }
959     else
960         dst->ptr.pp_void = NULL;
961 }
962 
963 /************************************************************************
964 Returns size of datatype.
965 Zero for dynamic types like strings or multiple precision types.
966 ************************************************************************/
ae_sizeof(ae_datatype datatype)967 ae_int_t ae_sizeof(ae_datatype datatype)
968 {
969     switch(datatype)
970     {
971         case DT_BOOL:       return (ae_int_t)sizeof(ae_bool);
972         case DT_INT:        return (ae_int_t)sizeof(ae_int_t);
973         case DT_REAL:       return (ae_int_t)sizeof(double);
974         case DT_COMPLEX:    return 2*(ae_int_t)sizeof(double);
975         default:            return 0;
976     }
977 }
978 
979 /************************************************************************
980 Checks that n bytes pointed by ptr are zero.
981 
982 This function is used in the constructors to check that  instance  fields
983 on entry are correctly initialized by zeros.
984 ************************************************************************/
ae_check_zeros(const void * ptr,ae_int_t n)985 ae_bool ae_check_zeros(const void *ptr, ae_int_t n)
986 {
987     ae_int_t nu, nr, i;
988     unsigned long long c = 0x0;
989 
990     /*
991      * determine leading and trailing lengths
992      */
993     nu = n/sizeof(unsigned long long);
994     nr = n%sizeof(unsigned long long);
995 
996     /*
997      * handle leading nu long long elements
998      */
999     if( nu>0 )
1000     {
1001         const unsigned long long *p_ull;
1002         p_ull = (const unsigned long long *)ptr;
1003         for(i=0; i<nu; i++)
1004             c |= p_ull[i];
1005     }
1006 
1007     /*
1008      * handle trailing nr char elements
1009      */
1010     if( nr>0 )
1011     {
1012         const unsigned char *p_uc;
1013         p_uc  = ((const unsigned char *)ptr)+nu*sizeof(unsigned long long);
1014         for(i=0; i<nr; i++)
1015             c |= p_uc[i];
1016     }
1017 
1018     /*
1019      * done
1020      */
1021     return c==0x0;
1022 }
1023 
1024 
1025 /************************************************************************
1026 This  dummy  function  is  used to prevent compiler messages about unused
1027 locals in automatically generated code.
1028 
1029 It makes nothing - just accepts pointer, "touches" it - and that is  all.
1030 It performs several tricky operations without side effects which  confuse
1031 compiler so it does not compain about unused locals in THIS function.
1032 ************************************************************************/
ae_touch_ptr(void * p)1033 void ae_touch_ptr(void *p)
1034 {
1035     void * volatile fake_variable0 = p;
1036     void * volatile fake_variable1 = fake_variable0;
1037     fake_variable0 = fake_variable1;
1038 }
1039 
1040 /************************************************************************
1041 This function initializes ALGLIB environment state.
1042 
1043 NOTES:
1044 * stacks contain no frames, so ae_make_frame() must be called before
1045   attaching dynamic blocks. Without it ae_leave_frame() will cycle
1046   forever (which is intended behavior).
1047 ************************************************************************/
ae_state_init(ae_state * state)1048 void ae_state_init(ae_state *state)
1049 {
1050     ae_int32_t *vp;
1051 
1052     /*
1053      * Set flags
1054      */
1055     state->flags = 0x0;
1056 
1057     /*
1058      * p_next points to itself because:
1059      * * correct program should be able to detect end of the list
1060      *   by looking at the ptr field.
1061      * * NULL p_next may be used to distinguish automatic blocks
1062      *   (in the list) from non-automatic (not in the list)
1063      */
1064     state->last_block.p_next = &(state->last_block);
1065     state->last_block.deallocator = NULL;
1066     state->last_block.ptr = DYN_BOTTOM;
1067     state->p_top_block = &(state->last_block);
1068     state->break_jump = NULL;
1069     state->error_msg = "";
1070 
1071     /*
1072      * determine endianness and initialize precomputed IEEE special quantities.
1073      */
1074     state->endianness = ae_get_endianness();
1075     if( state->endianness==AE_LITTLE_ENDIAN )
1076     {
1077         vp = (ae_int32_t*)(&state->v_nan);
1078         vp[0] = 0;
1079         vp[1] = (ae_int32_t)0x7FF80000;
1080         vp = (ae_int32_t*)(&state->v_posinf);
1081         vp[0] = 0;
1082         vp[1] = (ae_int32_t)0x7FF00000;
1083         vp = (ae_int32_t*)(&state->v_neginf);
1084         vp[0] = 0;
1085         vp[1] = (ae_int32_t)0xFFF00000;
1086     }
1087     else if( state->endianness==AE_BIG_ENDIAN )
1088     {
1089         vp = (ae_int32_t*)(&state->v_nan);
1090         vp[1] = 0;
1091         vp[0] = (ae_int32_t)0x7FF80000;
1092         vp = (ae_int32_t*)(&state->v_posinf);
1093         vp[1] = 0;
1094         vp[0] = (ae_int32_t)0x7FF00000;
1095         vp = (ae_int32_t*)(&state->v_neginf);
1096         vp[1] = 0;
1097         vp[0] = (ae_int32_t)0xFFF00000;
1098     }
1099     else
1100         abort();
1101 
1102     /*
1103      * set threading information
1104      */
1105     state->worker_thread = NULL;
1106     state->parent_task = NULL;
1107     state->thread_exception_handler = NULL;
1108 }
1109 
1110 
1111 /************************************************************************
1112 This function clears ALGLIB environment state.
1113 All dynamic data controlled by state are freed.
1114 ************************************************************************/
ae_state_clear(ae_state * state)1115 void ae_state_clear(ae_state *state)
1116 {
1117     while( state->p_top_block->ptr!=DYN_BOTTOM )
1118         ae_frame_leave(state);
1119 }
1120 
1121 
1122 /************************************************************************
1123 This function sets jump buffer for error handling.
1124 
1125 buf may be NULL.
1126 ************************************************************************/
ae_state_set_break_jump(ae_state * state,jmp_buf * buf)1127 void ae_state_set_break_jump(ae_state *state, jmp_buf *buf)
1128 {
1129     state->break_jump = buf;
1130 }
1131 
1132 
1133 /************************************************************************
1134 This function sets flags member of the ae_state structure
1135 
1136 buf may be NULL.
1137 ************************************************************************/
ae_state_set_flags(ae_state * state,ae_uint64_t flags)1138 void ae_state_set_flags(ae_state *state, ae_uint64_t flags)
1139 {
1140     state->flags = flags;
1141 }
1142 
1143 
1144 /************************************************************************
1145 This function makes new stack frame.
1146 
1147 This function takes two parameters: environment state and pointer to  the
1148 dynamic block which will be used as indicator  of  the  frame  beginning.
1149 This dynamic block must be initialized by caller and mustn't  be changed/
1150 deallocated/reused till ae_leave_frame called. It may be global or  local
1151 variable (local is even better).
1152 ************************************************************************/
ae_frame_make(ae_state * state,ae_frame * tmp)1153 void ae_frame_make(ae_state *state, ae_frame *tmp)
1154 {
1155     tmp->db_marker.p_next = state->p_top_block;
1156     tmp->db_marker.deallocator = NULL;
1157     tmp->db_marker.ptr = DYN_FRAME;
1158     state->p_top_block = &tmp->db_marker;
1159 }
1160 
1161 
1162 /************************************************************************
1163 This function leaves current stack frame and deallocates all automatic
1164 dynamic blocks which were attached to this frame.
1165 ************************************************************************/
ae_frame_leave(ae_state * state)1166 void ae_frame_leave(ae_state *state)
1167 {
1168     while( state->p_top_block->ptr!=DYN_FRAME && state->p_top_block->ptr!=DYN_BOTTOM)
1169     {
1170         if( state->p_top_block->ptr!=NULL && state->p_top_block->deallocator!=NULL)
1171             ((ae_deallocator)(state->p_top_block->deallocator))(state->p_top_block->ptr);
1172         state->p_top_block = state->p_top_block->p_next;
1173     }
1174     state->p_top_block = state->p_top_block->p_next;
1175 }
1176 
1177 
1178 /************************************************************************
1179 This function attaches block to the dynamic block list
1180 
1181 block               block
1182 state               ALGLIB environment state
1183 
1184 This function does NOT generate exceptions.
1185 
1186 NOTES:
1187 * never call it for special blocks which marks frame boundaries!
1188 ************************************************************************/
ae_db_attach(ae_dyn_block * block,ae_state * state)1189 void ae_db_attach(ae_dyn_block *block, ae_state *state)
1190 {
1191     block->p_next = state->p_top_block;
1192     state->p_top_block = block;
1193 }
1194 
1195 
1196 /************************************************************************
1197 This function initializes dynamic block:
1198 
1199 block               destination block, MUST be zero-filled on entry
1200 size                size (in bytes), >=0.
1201 state               ALGLIB environment state, non-NULL
1202 make_automatic      if true, vector is added to the dynamic block list
1203 
1204 block is assumed to be uninitialized, its fields are ignored. You may
1205 call this function with zero size in order to register block in the
1206 dynamic list.
1207 
1208 Error handling: calls ae_break() on allocation error. Block is left in
1209 valid state (empty, but valid).
1210 
1211 NOTES:
1212 * never call it for blocks which are already in the list; use ae_db_realloc
1213   for already allocated blocks.
1214 
1215 NOTE: no memory allocation is performed for initialization with size=0
1216 ************************************************************************/
ae_db_init(ae_dyn_block * block,ae_int_t size,ae_state * state,ae_bool make_automatic)1217 void ae_db_init(ae_dyn_block *block, ae_int_t size, ae_state *state, ae_bool make_automatic)
1218 {
1219     AE_CRITICAL_ASSERT(state!=NULL);
1220     AE_CRITICAL_ASSERT(ae_check_zeros(block,sizeof(*block)));
1221 
1222     /*
1223      * NOTE: these strange dances around block->ptr are necessary
1224      *       in order to correctly handle possible exceptions during
1225      *       memory allocation.
1226      */
1227     ae_assert(size>=0, "ae_db_init(): negative size", state);
1228     block->ptr = NULL;
1229     block->valgrind_hint = NULL;
1230     ae_touch_ptr(block->ptr);
1231     ae_touch_ptr(block->valgrind_hint);
1232     if( make_automatic )
1233         ae_db_attach(block, state);
1234     else
1235         block->p_next = NULL;
1236     if( size!=0 )
1237     {
1238         block->ptr = ae_malloc((size_t)size, state);
1239         block->valgrind_hint = aligned_extract_ptr(block->ptr);
1240     }
1241     block->deallocator = ae_free;
1242 }
1243 
1244 
1245 /************************************************************************
1246 This function realloc's dynamic block:
1247 
1248 block               destination block (initialized)
1249 size                new size (in bytes)
1250 state               ALGLIB environment state
1251 
1252 block is assumed to be initialized.
1253 
1254 This function:
1255 * deletes old contents
1256 * preserves automatic state
1257 
1258 Error handling: calls ae_break() on allocation error. Block is left in
1259 valid state - empty, but valid.
1260 
1261 NOTES:
1262 * never call it for special blocks which mark frame boundaries!
1263 ************************************************************************/
ae_db_realloc(ae_dyn_block * block,ae_int_t size,ae_state * state)1264 void ae_db_realloc(ae_dyn_block *block, ae_int_t size, ae_state *state)
1265 {
1266     AE_CRITICAL_ASSERT(state!=NULL);
1267 
1268     /*
1269      * NOTE: these strange dances around block->ptr are necessary
1270      *       in order to correctly handle possible exceptions during
1271      *       memory allocation.
1272      */
1273     ae_assert(size>=0, "ae_db_realloc(): negative size", state);
1274     if( block->ptr!=NULL )
1275     {
1276         ((ae_deallocator)block->deallocator)(block->ptr);
1277         block->ptr = NULL;
1278         block->valgrind_hint = NULL;
1279     }
1280     block->ptr = ae_malloc((size_t)size, state);
1281     block->valgrind_hint = aligned_extract_ptr(block->ptr);
1282     block->deallocator = ae_free;
1283 }
1284 
1285 
1286 /************************************************************************
1287 This function clears dynamic block (releases  all  dynamically  allocated
1288 memory). Dynamic block may be in automatic management list - in this case
1289 it will NOT be removed from list.
1290 
1291 block               destination block (initialized)
1292 
1293 NOTES:
1294 * never call it for special blocks which marks frame boundaries!
1295 ************************************************************************/
ae_db_free(ae_dyn_block * block)1296 void ae_db_free(ae_dyn_block *block)
1297 {
1298     if( block->ptr!=NULL )
1299         ((ae_deallocator)block->deallocator)(block->ptr);
1300     block->ptr = NULL;
1301     block->valgrind_hint = NULL;
1302     block->deallocator = ae_free;
1303 }
1304 
1305 /************************************************************************
1306 This function swaps contents of two dynamic blocks (pointers and
1307 deallocators) leaving other parameters (automatic management settings,
1308 etc.) unchanged.
1309 
1310 NOTES:
1311 * never call it for special blocks which marks frame boundaries!
1312 ************************************************************************/
ae_db_swap(ae_dyn_block * block1,ae_dyn_block * block2)1313 void ae_db_swap(ae_dyn_block *block1, ae_dyn_block *block2)
1314 {
1315     void (*deallocator)(void*) = NULL;
1316     void * volatile ptr;
1317     void * valgrind_hint;
1318 
1319     ptr = block1->ptr;
1320     valgrind_hint = block1->valgrind_hint;
1321     deallocator = block1->deallocator;
1322 
1323     block1->ptr = block2->ptr;
1324     block1->valgrind_hint = block2->valgrind_hint;
1325     block1->deallocator = block2->deallocator;
1326 
1327     block2->ptr = ptr;
1328     block2->valgrind_hint = valgrind_hint;
1329     block2->deallocator = deallocator;
1330 }
1331 
1332 /*************************************************************************
1333 This function creates ae_vector.
1334 Vector size may be zero. Vector contents is uninitialized.
1335 
1336 dst                 destination vector, MUST be zero-filled (we  check  it
1337                     and call abort() if *dst is non-zero; the rationale is
1338                     that we can not correctly handle errors in constructors
1339                     without zero-filling).
1340 size                vector size, may be zero
1341 datatype            guess what...
1342 state               pointer to current state structure. Can not be NULL.
1343                     used for exception handling (say, allocation error results
1344                     in longjmp call).
1345 make_automatic      if true, vector will be registered in the current frame
1346                     of the state structure;
1347 
1348 NOTE: no memory allocation is performed for initialization with size=0
1349 *************************************************************************/
ae_vector_init(ae_vector * dst,ae_int_t size,ae_datatype datatype,ae_state * state,ae_bool make_automatic)1350 void ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
1351 {
1352     /*
1353      * Integrity checks
1354      */
1355     AE_CRITICAL_ASSERT(state!=NULL);
1356     AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1357     ae_assert(size>=0, "ae_vector_init(): negative size", state);
1358 
1359     /* prepare for possible errors during allocation */
1360     dst->cnt = 0;
1361     dst->ptr.p_ptr = NULL;
1362 
1363     /* init */
1364     ae_db_init(&dst->data, size*ae_sizeof(datatype), state, make_automatic);
1365     dst->cnt = size;
1366     dst->datatype = datatype;
1367     dst->ptr.p_ptr = dst->data.ptr;
1368     dst->is_attached = ae_false;
1369 }
1370 
1371 
1372 /************************************************************************
1373 This function creates copy of ae_vector. New copy of the data is created,
1374 which is managed and owned by newly initialized vector.
1375 
1376 dst                 destination vector, MUST be zero-filled (we  check  it
1377                     and call abort() if *dst is non-zero; the rationale is
1378                     that we can not correctly handle errors in constructors
1379                     without zero-filling).
1380 src                 well, it is source
1381 state               pointer to current state structure. Can not be NULL.
1382                     used for exception handling (say, allocation error results
1383                     in longjmp call).
1384 make_automatic      if true, vector will be registered in the current frame
1385                     of the state structure;
1386 
1387 dst is assumed to be uninitialized, its fields are ignored.
1388 ************************************************************************/
ae_vector_init_copy(ae_vector * dst,ae_vector * src,ae_state * state,ae_bool make_automatic)1389 void ae_vector_init_copy(ae_vector *dst, ae_vector *src, ae_state *state, ae_bool make_automatic)
1390 {
1391     AE_CRITICAL_ASSERT(state!=NULL);
1392 
1393     ae_vector_init(dst, src->cnt, src->datatype, state, make_automatic);
1394     if( src->cnt!=0 )
1395         memmove(dst->ptr.p_ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype)));
1396 }
1397 
1398 /************************************************************************
1399 This function initializes ae_vector using X-structure as source. New copy
1400 of data is created, which is owned/managed by ae_vector  structure.  Both
1401 structures (source and destination) remain completely  independent  after
1402 this call.
1403 
1404 dst                 destination vector, MUST be zero-filled (we  check  it
1405                     and call abort() if *dst is non-zero; the rationale is
1406                     that we can not correctly handle errors in constructors
1407                     without zero-filling).
1408 src                 well, it is source
1409 state               pointer to current state structure. Can not be NULL.
1410                     used for exception handling (say, allocation error results
1411                     in longjmp call).
1412 make_automatic      if true, vector will be registered in the current frame
1413                     of the state structure;
1414 
1415 dst is assumed to be uninitialized, its fields are ignored.
1416 ************************************************************************/
ae_vector_init_from_x(ae_vector * dst,x_vector * src,ae_state * state,ae_bool make_automatic)1417 void ae_vector_init_from_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic)
1418 {
1419     AE_CRITICAL_ASSERT(state!=NULL);
1420 
1421     ae_vector_init(dst, (ae_int_t)src->cnt, (ae_datatype)src->datatype, state, make_automatic);
1422     if( src->cnt>0 )
1423         memmove(dst->ptr.p_ptr, src->x_ptr.p_ptr, (size_t)(((ae_int_t)src->cnt)*ae_sizeof((ae_datatype)src->datatype)));
1424 }
1425 
1426 /************************************************************************
1427 This function initializes ae_vector using X-structure as source.
1428 
1429 New vector is attached to source:
1430 * DST shares memory with SRC
1431 * both DST and SRC are writable - all writes to DST  change  elements  of
1432   SRC and vice versa.
1433 * DST can be reallocated with ae_vector_set_length(), in  this  case  SRC
1434   remains untouched
1435 * SRC, however, CAN NOT BE REALLOCATED AS LONG AS DST EXISTS
1436 
1437 NOTE: is_attached field is set  to  ae_true  in  order  to  indicate  that
1438       vector does not own its memory.
1439 
1440 dst                 destination vector
1441 src                 well, it is source
1442 state               pointer to current state structure. Can not be NULL.
1443                     used for exception handling (say, allocation error results
1444                     in longjmp call).
1445 make_automatic      if true, vector will be registered in the current frame
1446                     of the state structure;
1447 
1448 dst is assumed to be uninitialized, its fields are ignored.
1449 ************************************************************************/
ae_vector_init_attach_to_x(ae_vector * dst,x_vector * src,ae_state * state,ae_bool make_automatic)1450 void ae_vector_init_attach_to_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic)
1451 {
1452     volatile ae_int_t cnt;
1453 
1454     AE_CRITICAL_ASSERT(state!=NULL);
1455     AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1456 
1457     cnt = (ae_int_t)src->cnt;
1458 
1459     /* ensure that size is correct */
1460     ae_assert(cnt==src->cnt,  "ae_vector_init_attach_to_x(): 32/64 overflow", state);
1461     ae_assert(cnt>=0,         "ae_vector_init_attach_to_x(): negative length", state);
1462 
1463     /* prepare for possible errors during allocation */
1464     dst->cnt = 0;
1465     dst->ptr.p_ptr = NULL;
1466     dst->datatype = (ae_datatype)src->datatype;
1467 
1468     /* zero-size init in order to correctly register in the frame */
1469     ae_db_init(&dst->data, 0, state, make_automatic);
1470 
1471     /* init */
1472     dst->cnt = cnt;
1473     dst->ptr.p_ptr = src->x_ptr.p_ptr;
1474     dst->is_attached = ae_true;
1475 }
1476 
1477 /************************************************************************
1478 This function changes length of ae_vector.
1479 
1480 dst                 destination vector
1481 newsize             vector size, may be zero
1482 state               ALGLIB environment state, can not be NULL
1483 
1484 Error handling: calls ae_break() on allocation error
1485 
1486 NOTES:
1487 * vector must be initialized
1488 * all contents is destroyed during setlength() call
1489 * new size may be zero.
1490 ************************************************************************/
ae_vector_set_length(ae_vector * dst,ae_int_t newsize,ae_state * state)1491 void ae_vector_set_length(ae_vector *dst, ae_int_t newsize, ae_state *state)
1492 {
1493     AE_CRITICAL_ASSERT(state!=NULL);
1494     ae_assert(newsize>=0, "ae_vector_set_length(): negative size", state);
1495     if( dst->cnt==newsize )
1496         return;
1497 
1498     /* realloc, being ready for exception during reallocation (cnt=ptr=0 on entry) */
1499     dst->cnt = 0;
1500     dst->ptr.p_ptr = NULL;
1501     ae_db_realloc(&dst->data, newsize*ae_sizeof(dst->datatype), state);
1502     dst->cnt = newsize;
1503     dst->ptr.p_ptr = dst->data.ptr;
1504 }
1505 
1506 /************************************************************************
1507 This function resized ae_vector, preserving previously existing elements.
1508 Values of elements added during vector growth is undefined.
1509 
1510 dst                 destination vector
1511 newsize             vector size, may be zero
1512 state               ALGLIB environment state, can not be NULL
1513 
1514 Error handling: calls ae_break() on allocation error
1515 
1516 NOTES:
1517 * vector must be initialized
1518 * new size may be zero.
1519 ************************************************************************/
ae_vector_resize(ae_vector * dst,ae_int_t newsize,ae_state * state)1520 void ae_vector_resize(ae_vector *dst, ae_int_t newsize, ae_state *state)
1521 {
1522     ae_vector tmp;
1523     ae_int_t bytes_total;
1524 
1525     memset(&tmp, 0, sizeof(tmp));
1526     ae_vector_init(&tmp, newsize, dst->datatype, state, ae_false);
1527     bytes_total = (dst->cnt<newsize ? dst->cnt : newsize)*ae_sizeof(dst->datatype);
1528     if( bytes_total>0 )
1529         memmove(tmp.ptr.p_ptr, dst->ptr.p_ptr, bytes_total);
1530     ae_swap_vectors(dst, &tmp);
1531     ae_vector_clear(&tmp);
1532 }
1533 
1534 
1535 /************************************************************************
1536 This  function  provides  "CLEAR"  functionality  for vector (contents is
1537 cleared, but structure still left in valid state).
1538 
1539 The  function clears vector contents (releases all dynamically  allocated
1540 memory). Vector may be in automatic management list  -  in this  case  it
1541 will NOT be removed from list.
1542 
1543 IMPORTANT: this function does NOT invalidates dst; it just  releases  all
1544 dynamically allocated storage, but dst still may be used  after  call  to
1545 ae_vector_set_length().
1546 
1547 dst                 destination vector
1548 ************************************************************************/
ae_vector_clear(ae_vector * dst)1549 void ae_vector_clear(ae_vector *dst)
1550 {
1551     dst->cnt = 0;
1552     ae_db_free(&dst->data);
1553     dst->ptr.p_ptr = 0;
1554     dst->is_attached = ae_false;
1555 }
1556 
1557 
1558 /************************************************************************
1559 This  function  provides "DESTROY"  functionality for vector (contents is
1560 cleared, all internal structures are destroyed). For vectors it  is  same
1561 as CLEAR.
1562 
1563 dst                 destination vector
1564 ************************************************************************/
ae_vector_destroy(ae_vector * dst)1565 void ae_vector_destroy(ae_vector *dst)
1566 {
1567     ae_vector_clear(dst);
1568 }
1569 
1570 
1571 /************************************************************************
1572 This function efficiently swaps contents of two vectors, leaving other
1573 pararemeters (automatic management, etc.) unchanged.
1574 ************************************************************************/
ae_swap_vectors(ae_vector * vec1,ae_vector * vec2)1575 void ae_swap_vectors(ae_vector *vec1, ae_vector *vec2)
1576 {
1577     ae_int_t cnt;
1578     ae_datatype datatype;
1579     void *p_ptr;
1580 
1581     ae_assert(!vec1->is_attached, "ALGLIB: internal error, attempt to swap vectors attached to X-object", NULL);
1582     ae_assert(!vec2->is_attached, "ALGLIB: internal error, attempt to swap vectors attached to X-object", NULL);
1583 
1584     ae_db_swap(&vec1->data, &vec2->data);
1585 
1586     cnt = vec1->cnt;
1587     datatype = vec1->datatype;
1588     p_ptr = vec1->ptr.p_ptr;
1589     vec1->cnt = vec2->cnt;
1590     vec1->datatype = vec2->datatype;
1591     vec1->ptr.p_ptr = vec2->ptr.p_ptr;
1592     vec2->cnt = cnt;
1593     vec2->datatype = datatype;
1594     vec2->ptr.p_ptr = p_ptr;
1595 }
1596 
1597 /************************************************************************
1598 This function creates ae_matrix.
1599 
1600 Matrix size may be zero, in such cases both rows and cols are zero.
1601 Matrix contents is uninitialized.
1602 
1603 dst                 destination matrix, must be zero-filled
1604 rows                rows count
1605 cols                cols count
1606 datatype            element type
1607 state               pointer to current state structure. Can not be NULL.
1608                     used for exception handling (say, allocation error results
1609                     in longjmp call).
1610 make_automatic      if true, matrix will be registered in the current frame
1611                     of the state structure;
1612 
1613 dst is assumed to be uninitialized, its fields are ignored.
1614 
1615 NOTE: no memory allocation is performed for initialization with rows=cols=0
1616 ************************************************************************/
ae_matrix_init(ae_matrix * dst,ae_int_t rows,ae_int_t cols,ae_datatype datatype,ae_state * state,ae_bool make_automatic)1617 void ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
1618 {
1619     AE_CRITICAL_ASSERT(state!=NULL);
1620     AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1621 
1622     ae_assert(rows>=0 && cols>=0, "ae_matrix_init(): negative length", state);
1623 
1624     /* if one of rows/cols is zero, another MUST be too; perform quick exit */
1625     if( rows==0 || cols==0 )
1626     {
1627         dst->rows = 0;
1628         dst->cols = 0;
1629         dst->is_attached = ae_false;
1630         dst->ptr.pp_void = NULL;
1631         dst->stride = 0;
1632         dst->datatype = datatype;
1633         ae_db_init(&dst->data, 0, state, make_automatic);
1634         return;
1635     }
1636 
1637     /* init, being ready for exception during allocation (rows=cols=ptr=NULL on entry) */
1638     dst->is_attached = ae_false;
1639     dst->rows = 0;
1640     dst->cols = 0;
1641     dst->ptr.pp_void = NULL;
1642     dst->stride = cols;
1643     while( dst->stride*ae_sizeof(datatype)%AE_DATA_ALIGN!=0 )
1644         dst->stride++;
1645     dst->datatype = datatype;
1646     ae_db_init(&dst->data, rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(datatype))+AE_DATA_ALIGN-1, state, make_automatic);
1647     dst->rows = rows;
1648     dst->cols = cols;
1649     ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+rows*sizeof(void*),AE_DATA_ALIGN));
1650 }
1651 
1652 
1653 /************************************************************************
1654 This function creates copy of ae_matrix. A new copy of the data is created.
1655 
1656 dst                 destination matrix, must be zero-filled
1657 src                 well, it is source
1658 state               pointer to current state structure. Can not be NULL.
1659                     used for exception handling (say, allocation error results
1660                     in longjmp call).
1661 make_automatic      if true, matrix will be registered in the current frame
1662                     of the state structure;
1663 
1664 dst is assumed to be uninitialized, its fields are ignored.
1665 ************************************************************************/
ae_matrix_init_copy(ae_matrix * dst,ae_matrix * src,ae_state * state,ae_bool make_automatic)1666 void ae_matrix_init_copy(ae_matrix *dst, ae_matrix *src, ae_state *state, ae_bool make_automatic)
1667 {
1668     ae_int_t i;
1669     ae_matrix_init(dst, src->rows, src->cols, src->datatype, state, make_automatic);
1670     if( src->rows!=0 && src->cols!=0 )
1671     {
1672         if( dst->stride==src->stride )
1673             memmove(dst->ptr.pp_void[0], src->ptr.pp_void[0], (size_t)(src->rows*src->stride*ae_sizeof(src->datatype)));
1674         else
1675             for(i=0; i<dst->rows; i++)
1676                 memmove(dst->ptr.pp_void[i], src->ptr.pp_void[i], (size_t)(dst->cols*ae_sizeof(dst->datatype)));
1677     }
1678 }
1679 
1680 
1681 /************************************************************************
1682 This function initializes ae_matrix using X-structure as source. New copy
1683 of data is created, which is owned/managed by ae_matrix  structure.  Both
1684 structures (source and destination) remain completely  independent  after
1685 this call.
1686 
1687 dst                 destination matrix, must be zero-filled
1688 src                 well, it is source
1689 state               pointer to current state structure. Can not be NULL.
1690                     used for exception handling (say, allocation error results
1691                     in longjmp call).
1692 make_automatic      if true, matrix will be registered in the current frame
1693                     of the state structure;
1694 
1695 dst is assumed to be uninitialized, its fields are ignored.
1696 ************************************************************************/
ae_matrix_init_from_x(ae_matrix * dst,x_matrix * src,ae_state * state,ae_bool make_automatic)1697 void ae_matrix_init_from_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic)
1698 {
1699     char *p_src_row;
1700     char *p_dst_row;
1701     ae_int_t row_size;
1702     ae_int_t i;
1703     AE_CRITICAL_ASSERT(state!=NULL);
1704     ae_matrix_init(dst, (ae_int_t)src->rows, (ae_int_t)src->cols, (ae_datatype)src->datatype, state, make_automatic);
1705     if( src->rows!=0 && src->cols!=0 )
1706     {
1707         p_src_row = (char*)src->x_ptr.p_ptr;
1708         p_dst_row = (char*)(dst->ptr.pp_void[0]);
1709         row_size = ae_sizeof((ae_datatype)src->datatype)*(ae_int_t)src->cols;
1710         for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof((ae_datatype)src->datatype), p_dst_row+=dst->stride*ae_sizeof((ae_datatype)src->datatype))
1711             memmove(p_dst_row, p_src_row, (size_t)(row_size));
1712     }
1713 }
1714 
1715 
1716 /************************************************************************
1717 This function initializes ae_matrix using X-structure as source.
1718 
1719 New matrix is attached to source:
1720 * DST shares memory with SRC
1721 * both DST and SRC are writable - all writes to DST  change  elements  of
1722   SRC and vice versa.
1723 * DST can be reallocated with ae_matrix_set_length(), in  this  case  SRC
1724   remains untouched
1725 * SRC, however, CAN NOT BE REALLOCATED AS LONG AS DST EXISTS
1726 
1727 dst                 destination matrix, must be zero-filled
1728 src                 well, it is source
1729 state               pointer to current state structure. Can not be NULL.
1730                     used for exception handling (say, allocation error results
1731                     in longjmp call).
1732 make_automatic      if true, matrix will be registered in the current frame
1733                     of the state structure;
1734 
1735 dst is assumed to be uninitialized, its fields are ignored.
1736 ************************************************************************/
ae_matrix_init_attach_to_x(ae_matrix * dst,x_matrix * src,ae_state * state,ae_bool make_automatic)1737 void ae_matrix_init_attach_to_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic)
1738 {
1739     ae_int_t rows, cols;
1740 
1741     AE_CRITICAL_ASSERT(state!=NULL);
1742     AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1743 
1744     rows = (ae_int_t)src->rows;
1745     cols = (ae_int_t)src->cols;
1746 
1747     /* check that X-source is densely packed */
1748     ae_assert(src->cols==src->stride, "ae_matrix_init_attach_to_x(): unsupported stride", state);
1749 
1750     /* ensure that size is correct */
1751     ae_assert(rows==src->rows,      "ae_matrix_init_attach_to_x(): 32/64 overflow", state);
1752     ae_assert(cols==src->cols,      "ae_matrix_init_attach_to_x(): 32/64 overflow", state);
1753     ae_assert(rows>=0 && cols>=0,   "ae_matrix_init_attach_to_x(): negative length", state);
1754 
1755     /* if one of rows/cols is zero, another MUST be too */
1756     if( rows==0 || cols==0 )
1757     {
1758         rows = 0;
1759         cols = 0;
1760     }
1761 
1762     /* init, being ready for allocation error */
1763     dst->is_attached = ae_true;
1764     dst->rows = 0;
1765     dst->cols = 0;
1766     dst->stride = cols;
1767     dst->datatype = (ae_datatype)src->datatype;
1768     dst->ptr.pp_void = NULL;
1769     ae_db_init(&dst->data, rows*(ae_int_t)sizeof(void*), state, make_automatic);
1770     dst->rows = rows;
1771     dst->cols = cols;
1772     if( dst->rows>0 && dst->cols>0 )
1773     {
1774         ae_int_t i, rowsize;
1775         char *p_row;
1776         void **pp_ptr;
1777 
1778         p_row = (char*)src->x_ptr.p_ptr;
1779         rowsize = dst->stride*ae_sizeof(dst->datatype);
1780         pp_ptr  = (void**)dst->data.ptr;
1781         dst->ptr.pp_void = pp_ptr;
1782         for(i=0; i<dst->rows; i++, p_row+=rowsize)
1783             pp_ptr[i] = p_row;
1784     }
1785 }
1786 
1787 
1788 /************************************************************************
1789 This function changes length of ae_matrix.
1790 
1791 dst                 destination matrix
1792 rows                size, may be zero
1793 cols                size, may be zero
1794 state               ALGLIB environment state
1795 
1796 Error handling:
1797 * if state is NULL, returns ae_false on allocation error
1798 * if state is not NULL, calls ae_break() on allocation error
1799 * returns ae_true on success
1800 
1801 NOTES:
1802 * matrix must be initialized
1803 * all contents is destroyed during setlength() call
1804 * new size may be zero.
1805 ************************************************************************/
ae_matrix_set_length(ae_matrix * dst,ae_int_t rows,ae_int_t cols,ae_state * state)1806 void ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_state *state)
1807 {
1808     AE_CRITICAL_ASSERT(state!=NULL);
1809     ae_assert(rows>=0 && cols>=0, "ae_matrix_set_length(): negative length", state);
1810     if( dst->rows==rows && dst->cols==cols )
1811         return;
1812 
1813     /* prepare stride */
1814     dst->stride = cols;
1815     while( dst->stride*ae_sizeof(dst->datatype)%AE_DATA_ALIGN!=0 )
1816         dst->stride++;
1817 
1818     /* realloc, being ready for an exception during reallocation (rows=cols=0 on entry) */
1819     dst->rows = 0;
1820     dst->cols = 0;
1821     dst->ptr.pp_void = NULL;
1822     ae_db_realloc(&dst->data, rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(dst->datatype))+AE_DATA_ALIGN-1, state);
1823     dst->rows = rows;
1824     dst->cols = cols;
1825 
1826     /* update pointers to rows */
1827     ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+dst->rows*sizeof(void*),AE_DATA_ALIGN));
1828 }
1829 
1830 
1831 /************************************************************************
1832 This  function  provides  "CLEAR"  functionality  for vector (contents is
1833 cleared, but structure still left in valid state).
1834 
1835 The  function clears matrix contents (releases all dynamically  allocated
1836 memory). Matrix may be in automatic management list  -  in this  case  it
1837 will NOT be removed from list.
1838 
1839 IMPORTANT: this function does NOT invalidates dst; it just  releases  all
1840 dynamically allocated storage, but dst still may be used  after  call  to
1841 ae_matrix_set_length().
1842 
1843 dst                 destination matrix
1844 ************************************************************************/
ae_matrix_clear(ae_matrix * dst)1845 void ae_matrix_clear(ae_matrix *dst)
1846 {
1847     dst->rows = 0;
1848     dst->cols = 0;
1849     dst->stride = 0;
1850     ae_db_free(&dst->data);
1851     dst->ptr.p_ptr = 0;
1852     dst->is_attached = ae_false;
1853 }
1854 
1855 
1856 /************************************************************************
1857 This  function  provides  "DESTROY" functionality for matrix (contents is
1858 cleared, but structure still left in valid state).
1859 
1860 For matrices it is same as CLEAR.
1861 
1862 dst                 destination matrix
1863 ************************************************************************/
ae_matrix_destroy(ae_matrix * dst)1864 void ae_matrix_destroy(ae_matrix *dst)
1865 {
1866     ae_matrix_clear(dst);
1867 }
1868 
1869 
1870 /************************************************************************
1871 This function efficiently swaps contents of two vectors, leaving other
1872 pararemeters (automatic management, etc.) unchanged.
1873 ************************************************************************/
ae_swap_matrices(ae_matrix * mat1,ae_matrix * mat2)1874 void ae_swap_matrices(ae_matrix *mat1, ae_matrix *mat2)
1875 {
1876     ae_int_t rows;
1877     ae_int_t cols;
1878     ae_int_t stride;
1879     ae_datatype datatype;
1880     void *p_ptr;
1881 
1882     ae_assert(!mat1->is_attached, "ALGLIB: internal error, attempt to swap matrices attached to X-object", NULL);
1883     ae_assert(!mat2->is_attached, "ALGLIB: internal error, attempt to swap matrices attached to X-object", NULL);
1884 
1885     ae_db_swap(&mat1->data, &mat2->data);
1886 
1887     rows = mat1->rows;
1888     cols = mat1->cols;
1889     stride = mat1->stride;
1890     datatype = mat1->datatype;
1891     p_ptr = mat1->ptr.p_ptr;
1892 
1893     mat1->rows = mat2->rows;
1894     mat1->cols = mat2->cols;
1895     mat1->stride = mat2->stride;
1896     mat1->datatype = mat2->datatype;
1897     mat1->ptr.p_ptr = mat2->ptr.p_ptr;
1898 
1899     mat2->rows = rows;
1900     mat2->cols = cols;
1901     mat2->stride = stride;
1902     mat2->datatype = datatype;
1903     mat2->ptr.p_ptr = p_ptr;
1904 }
1905 
1906 
1907 /************************************************************************
1908 This function creates smart pointer structure.
1909 
1910 dst                 destination smart pointer, must be zero-filled
1911 subscriber          pointer to pointer which receives updates in the
1912                     internal object stored in ae_smart_ptr. Any update to
1913                     dst->ptr is translated to subscriber. Can be NULL.
1914 state               pointer to current state structure. Can not be NULL.
1915                     used for exception handling (say, allocation error results
1916                     in longjmp call).
1917 make_automatic      if true, pointer will be registered in the current frame
1918                     of the state structure;
1919 
1920 Error handling:
1921 * on failure calls ae_break() with NULL state pointer. Usually it  results
1922   in abort() call.
1923 
1924 After initialization, smart pointer stores NULL pointer.
1925 ************************************************************************/
ae_smart_ptr_init(ae_smart_ptr * dst,void ** subscriber,ae_state * state,ae_bool make_automatic)1926 void ae_smart_ptr_init(ae_smart_ptr *dst, void **subscriber, ae_state *state, ae_bool make_automatic)
1927 {
1928     AE_CRITICAL_ASSERT(state!=NULL);
1929     AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1930     dst->subscriber = subscriber;
1931     dst->ptr = NULL;
1932     if( dst->subscriber!=NULL )
1933         *(dst->subscriber) = dst->ptr;
1934     dst->is_owner = ae_false;
1935     dst->is_dynamic = ae_false;
1936     dst->frame_entry.deallocator = ae_smart_ptr_destroy;
1937     dst->frame_entry.ptr = dst;
1938     if( make_automatic )
1939         ae_db_attach(&dst->frame_entry, state);
1940 }
1941 
1942 
1943 /************************************************************************
1944 This function clears smart pointer structure.
1945 
1946 dst                 destination smart pointer.
1947 
1948 After call to this function smart pointer contains NULL reference,  which
1949 is  propagated  to  its  subscriber  (in  cases  non-NULL  subscruber was
1950 specified during pointer creation).
1951 ************************************************************************/
ae_smart_ptr_clear(void * _dst)1952 void ae_smart_ptr_clear(void *_dst)
1953 {
1954     ae_smart_ptr *dst = (ae_smart_ptr*)_dst;
1955     if( dst->is_owner && dst->ptr!=NULL )
1956     {
1957         dst->destroy(dst->ptr);
1958         if( dst->is_dynamic )
1959             ae_free(dst->ptr);
1960     }
1961     dst->is_owner = ae_false;
1962     dst->is_dynamic = ae_false;
1963     dst->ptr = NULL;
1964     dst->destroy = NULL;
1965     if( dst->subscriber!=NULL )
1966         *(dst->subscriber) = NULL;
1967 }
1968 
1969 
1970 /************************************************************************
1971 This function dstroys smart pointer structure (same as clearing it).
1972 
1973 dst                 destination smart pointer.
1974 ************************************************************************/
ae_smart_ptr_destroy(void * _dst)1975 void ae_smart_ptr_destroy(void *_dst)
1976 {
1977     ae_smart_ptr_clear(_dst);
1978 }
1979 
1980 
1981 /************************************************************************
1982 This function assigns pointer to ae_smart_ptr structure.
1983 
1984 dst                 destination smart pointer.
1985 new_ptr             new pointer to assign
1986 is_owner            whether smart pointer owns new_ptr
1987 is_dynamic          whether object is dynamic - clearing such object
1988                     requires BOTH calling destructor function AND calling
1989                     ae_free() for memory occupied by object.
1990 destroy             destructor function
1991 
1992 In case smart pointer already contains non-NULL value and owns this value,
1993 it is freed before assigning new pointer.
1994 
1995 Changes in pointer are propagated to its  subscriber  (in  case  non-NULL
1996 subscriber was specified during pointer creation).
1997 
1998 You can specify NULL new_ptr, in which case is_owner/destroy are ignored.
1999 ************************************************************************/
ae_smart_ptr_assign(ae_smart_ptr * dst,void * new_ptr,ae_bool is_owner,ae_bool is_dynamic,void (* destroy)(void *))2000 void ae_smart_ptr_assign(ae_smart_ptr *dst, void *new_ptr, ae_bool is_owner, ae_bool is_dynamic, void (*destroy)(void*))
2001 {
2002     if( dst->is_owner && dst->ptr!=NULL )
2003     {
2004         dst->destroy(dst->ptr);
2005         if( dst->is_dynamic )
2006             ae_free(dst->ptr);
2007     }
2008     if( new_ptr!=NULL )
2009     {
2010         dst->ptr = new_ptr;
2011         dst->is_owner = is_owner;
2012         dst->is_dynamic = is_dynamic;
2013         dst->destroy = destroy;
2014     }
2015     else
2016     {
2017         dst->ptr = NULL;
2018         dst->is_owner = ae_false;
2019         dst->is_dynamic = ae_false;
2020         dst->destroy = NULL;
2021     }
2022     if( dst->subscriber!=NULL )
2023         *(dst->subscriber) = dst->ptr;
2024 }
2025 
2026 
2027 /************************************************************************
2028 This function releases pointer owned by ae_smart_ptr structure:
2029 * all internal fields are set to NULL
2030 * destructor function for internal pointer is NOT called even when we own
2031   this pointer. After this call ae_smart_ptr releases  ownership  of  its
2032   pointer and passes it to caller.
2033 * changes in pointer are propagated to its subscriber (in  case  non-NULL
2034   subscriber was specified during pointer creation).
2035 
2036 dst                 destination smart pointer.
2037 ************************************************************************/
ae_smart_ptr_release(ae_smart_ptr * dst)2038 void ae_smart_ptr_release(ae_smart_ptr *dst)
2039 {
2040     dst->is_owner = ae_false;
2041     dst->is_dynamic = ae_false;
2042     dst->ptr = NULL;
2043     dst->destroy = NULL;
2044     if( dst->subscriber!=NULL )
2045         *(dst->subscriber) = NULL;
2046 }
2047 
2048 /************************************************************************
2049 This function copies contents of ae_vector (SRC) to x_vector (DST).
2050 
2051 This function should not be called for  DST  which  is  attached  to  SRC
2052 (opposite situation, when SRC is attached to DST, is possible).
2053 
2054 Depending on situation, following actions are performed
2055 * for SRC attached to DST, this function performs no actions (no need  to
2056   do anything)
2057 * for independent vectors of different sizes it allocates storage in  DST
2058   and copy contents of SRC  to  DST.  DST->last_action field  is  set  to
2059   ACT_NEW_LOCATION, and DST->owner is set to OWN_AE.
2060 * for  independent  vectors   of  same  sizes  it does not perform memory
2061   (re)allocation.  It  just  copies  SRC  to  already   existing   place.
2062   DST->last_action   is   set   to    ACT_SAME_LOCATION  (unless  it  was
2063   ACT_NEW_LOCATION), DST->owner is unmodified.
2064 
2065 dst                 destination vector
2066 src                 source, vector in x-format
2067 state               ALGLIB environment state
2068 
2069 NOTES:
2070 * dst is assumed to be initialized. Its contents is freed before  copying
2071   data  from src  (if  size / type  are  different)  or  overwritten  (if
2072   possible given destination size).
2073 ************************************************************************/
ae_x_set_vector(x_vector * dst,ae_vector * src,ae_state * state)2074 void ae_x_set_vector(x_vector *dst, ae_vector *src, ae_state *state)
2075 {
2076     if( src->ptr.p_ptr == dst->x_ptr.p_ptr )
2077     {
2078         /* src->ptr points to the beginning of dst, attached matrices, no need to copy */
2079         return;
2080     }
2081     if( dst->cnt!=src->cnt || dst->datatype!=src->datatype )
2082     {
2083         if( dst->owner==OWN_AE )
2084             ae_free(dst->x_ptr.p_ptr);
2085         dst->x_ptr.p_ptr = ae_malloc((size_t)(src->cnt*ae_sizeof(src->datatype)), state);
2086         if( src->cnt!=0 && dst->x_ptr.p_ptr==NULL )
2087             ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
2088         dst->last_action = ACT_NEW_LOCATION;
2089         dst->cnt = src->cnt;
2090         dst->datatype = src->datatype;
2091         dst->owner = OWN_AE;
2092     }
2093     else
2094     {
2095         if( dst->last_action==ACT_UNCHANGED )
2096             dst->last_action = ACT_SAME_LOCATION;
2097         else if( dst->last_action==ACT_SAME_LOCATION )
2098             dst->last_action = ACT_SAME_LOCATION;
2099         else if( dst->last_action==ACT_NEW_LOCATION )
2100             dst->last_action = ACT_NEW_LOCATION;
2101         else
2102             ae_assert(ae_false, "ALGLIB: internal error in ae_x_set_vector()", state);
2103     }
2104     if( src->cnt )
2105         memmove(dst->x_ptr.p_ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype)));
2106 }
2107 
2108 /************************************************************************
2109 This function copies contents of ae_matrix to x_matrix.
2110 
2111 This function should not be called for  DST  which  is  attached  to  SRC
2112 (opposite situation, when SRC is attached to DST, is possible).
2113 
2114 Depending on situation, following actions are performed
2115 * for SRC attached to DST, this function performs no actions (no need  to
2116   do anything)
2117 * for independent matrices of different sizes it allocates storage in DST
2118   and copy contents of SRC  to  DST.  DST->last_action field  is  set  to
2119   ACT_NEW_LOCATION, and DST->owner is set to OWN_AE.
2120 * for  independent  matrices  of  same  sizes  it does not perform memory
2121   (re)allocation.  It  just  copies  SRC  to  already   existing   place.
2122   DST->last_action   is   set   to    ACT_SAME_LOCATION  (unless  it  was
2123   ACT_NEW_LOCATION), DST->owner is unmodified.
2124 
2125 dst                 destination vector
2126 src                 source, matrix in x-format
2127 state               ALGLIB environment state
2128 
2129 NOTES:
2130 * dst is assumed to be initialized. Its contents is freed before  copying
2131   data  from src  (if  size / type  are  different)  or  overwritten  (if
2132   possible given destination size).
2133 ************************************************************************/
ae_x_set_matrix(x_matrix * dst,ae_matrix * src,ae_state * state)2134 void ae_x_set_matrix(x_matrix *dst, ae_matrix *src, ae_state *state)
2135 {
2136     char *p_src_row;
2137     char *p_dst_row;
2138     ae_int_t i;
2139     ae_int_t row_size;
2140     if( src->ptr.pp_void!=NULL && src->ptr.pp_void[0] == dst->x_ptr.p_ptr )
2141     {
2142         /* src->ptr points to the beginning of dst, attached matrices, no need to copy */
2143         return;
2144     }
2145     if( dst->rows!=src->rows || dst->cols!=src->cols || dst->datatype!=src->datatype )
2146     {
2147         if( dst->owner==OWN_AE )
2148             ae_free(dst->x_ptr.p_ptr);
2149         dst->rows = src->rows;
2150         dst->cols = src->cols;
2151         dst->stride = src->cols;
2152         dst->datatype = src->datatype;
2153         dst->x_ptr.p_ptr = ae_malloc((size_t)(dst->rows*((ae_int_t)dst->stride)*ae_sizeof(src->datatype)), state);
2154         if( dst->rows!=0 && dst->stride!=0 && dst->x_ptr.p_ptr==NULL )
2155             ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
2156         dst->last_action = ACT_NEW_LOCATION;
2157         dst->owner = OWN_AE;
2158     }
2159     else
2160     {
2161         if( dst->last_action==ACT_UNCHANGED )
2162             dst->last_action = ACT_SAME_LOCATION;
2163         else if( dst->last_action==ACT_SAME_LOCATION )
2164             dst->last_action = ACT_SAME_LOCATION;
2165         else if( dst->last_action==ACT_NEW_LOCATION )
2166             dst->last_action = ACT_NEW_LOCATION;
2167         else
2168             ae_assert(ae_false, "ALGLIB: internal error in ae_x_set_vector()", state);
2169     }
2170     if( src->rows!=0 && src->cols!=0 )
2171     {
2172         p_src_row = (char*)(src->ptr.pp_void[0]);
2173         p_dst_row = (char*)dst->x_ptr.p_ptr;
2174         row_size = ae_sizeof(src->datatype)*src->cols;
2175         for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof(src->datatype), p_dst_row+=dst->stride*ae_sizeof(src->datatype))
2176             memmove(p_dst_row, p_src_row, (size_t)(row_size));
2177     }
2178 }
2179 
2180 /************************************************************************
2181 This function attaches x_vector to ae_vector's contents.
2182 Ownership of memory allocated is not changed (it is still managed by
2183 ae_matrix).
2184 
2185 dst                 destination vector
2186 src                 source, vector in x-format
2187 state               ALGLIB environment state
2188 
2189 NOTES:
2190 * dst is assumed to be initialized. Its contents is freed before
2191   attaching to src.
2192 * this function doesn't need ae_state parameter because it can't fail
2193   (assuming correctly initialized src)
2194 ************************************************************************/
ae_x_attach_to_vector(x_vector * dst,ae_vector * src)2195 void ae_x_attach_to_vector(x_vector *dst, ae_vector *src)
2196 {
2197     if( dst->owner==OWN_AE )
2198         ae_free(dst->x_ptr.p_ptr);
2199     dst->x_ptr.p_ptr = src->ptr.p_ptr;
2200     dst->last_action = ACT_NEW_LOCATION;
2201     dst->cnt = src->cnt;
2202     dst->datatype = src->datatype;
2203     dst->owner = OWN_CALLER;
2204 }
2205 
2206 /************************************************************************
2207 This function attaches x_matrix to ae_matrix's contents.
2208 Ownership of memory allocated is not changed (it is still managed by
2209 ae_matrix).
2210 
2211 dst                 destination vector
2212 src                 source, matrix in x-format
2213 state               ALGLIB environment state
2214 
2215 NOTES:
2216 * dst is assumed to be initialized. Its contents is freed before
2217   attaching to src.
2218 * this function doesn't need ae_state parameter because it can't fail
2219   (assuming correctly initialized src)
2220 ************************************************************************/
ae_x_attach_to_matrix(x_matrix * dst,ae_matrix * src)2221 void ae_x_attach_to_matrix(x_matrix *dst, ae_matrix *src)
2222 {
2223     if( dst->owner==OWN_AE )
2224             ae_free(dst->x_ptr.p_ptr);
2225     dst->rows = src->rows;
2226     dst->cols = src->cols;
2227     dst->stride = src->stride;
2228     dst->datatype = src->datatype;
2229     dst->x_ptr.p_ptr = &(src->ptr.pp_double[0][0]);
2230     dst->last_action = ACT_NEW_LOCATION;
2231     dst->owner = OWN_CALLER;
2232 }
2233 
2234 /************************************************************************
2235 This function clears x_vector. It does nothing  if vector is not owned by
2236 ALGLIB environment.
2237 
2238 dst                 vector
2239 ************************************************************************/
x_vector_clear(x_vector * dst)2240 void x_vector_clear(x_vector *dst)
2241 {
2242     if( dst->owner==OWN_AE )
2243         aligned_free(dst->x_ptr.p_ptr);
2244     dst->x_ptr.p_ptr = NULL;
2245     dst->cnt = 0;
2246 }
2247 
2248 /************************************************************************
2249 Assertion
2250 
2251 For  non-NULL  state  it  allows  to  gracefully  leave  ALGLIB  session,
2252 removing all frames and deallocating registered dynamic data structure.
2253 
2254 For NULL state it just abort()'s program.
2255 
2256 IMPORTANT: this function ALWAYS evaluates its argument.  It  can  not  be
2257            replaced by macro which does nothing. So, you may place actual
2258            function calls at cond, and these will always be performed.
2259 ************************************************************************/
ae_assert(ae_bool cond,const char * msg,ae_state * state)2260 void ae_assert(ae_bool cond, const char *msg, ae_state *state)
2261 {
2262     if( !cond )
2263         ae_break(state, ERR_ASSERTION_FAILED, msg);
2264 }
2265 
2266 /************************************************************************
2267 CPUID
2268 
2269 Returns information about features CPU and compiler support.
2270 
2271 You must tell ALGLIB what CPU family is used by defining AE_CPU symbol
2272 (without this hint zero will be returned).
2273 
2274 Note: results of this function depend on both CPU and compiler;
2275 if compiler doesn't support SSE intrinsics, function won't set
2276 corresponding flag.
2277 ************************************************************************/
2278 static volatile ae_bool _ae_cpuid_initialized = ae_false;
2279 static volatile ae_bool _ae_cpuid_has_sse2 = ae_false;
2280 static volatile ae_bool _ae_cpuid_has_avx2 = ae_false;
2281 static volatile ae_bool _ae_cpuid_has_fma  = ae_false;
ae_cpuid()2282 ae_int_t ae_cpuid()
2283 {
2284     /*
2285      * to speed up CPU detection we cache results from previous attempts
2286      * there is no synchronization, but it is still thread safe.
2287      *
2288      * thread safety is guaranteed on all modern architectures which
2289      * have following property: simultaneous writes by different cores
2290      * to the same location will be executed in serial manner.
2291      *
2292      */
2293     ae_int_t result;
2294 
2295     /*
2296      * if not initialized, determine system properties
2297      */
2298     if( !_ae_cpuid_initialized )
2299     {
2300         /*
2301          * SSE2
2302          */
2303 #if defined(AE_CPU)
2304 #if (AE_CPU==AE_INTEL)
2305 #if AE_COMPILER==AE_MSVC
2306         {
2307             /* SSE2 support */
2308             #if defined(_ALGLIB_HAS_SSE2_INTRINSICS)
2309             int CPUInfo[4];
2310             __cpuid(CPUInfo, 1);
2311             if( (CPUInfo[3]&0x04000000)!=0 )
2312                 _ae_cpuid_has_sse2 = ae_true;
2313             #endif
2314 
2315             /* check OS support for XSAVE XGETBV */
2316            #if defined(_ALGLIB_HAS_AVX2_INTRINSICS)
2317             __cpuid(CPUInfo, 1);
2318             if( (CPUInfo[2]&(0x1<<27))!=0 )
2319                 if( (_xgetbv(0)&0x6)==0x6 )
2320                 {
2321                     /* AVX2 support */
2322                     #if defined(_ALGLIB_HAS_AVX2_INTRINSICS) && (_MSC_VER>=1600)
2323                     if( _ae_cpuid_has_sse2 )
2324                     {
2325                         __cpuidex(CPUInfo, 7, 0);
2326                         if( (CPUInfo[1]&(0x1<<5))!=0 )
2327                             _ae_cpuid_has_avx2 = ae_true;
2328                     }
2329                     #endif
2330 
2331                     /* FMA support */
2332                     #if defined(_ALGLIB_HAS_FMA_INTRINSICS) && (_MSC_VER>=1600)
2333                     if( _ae_cpuid_has_avx2 )
2334                     {
2335                         __cpuid(CPUInfo, 1);
2336                         if( (CPUInfo[2]&(0x1<<12))!=0 )
2337                             _ae_cpuid_has_fma = ae_true;
2338                     }
2339                     #endif
2340                 }
2341             #endif
2342         }
2343 #elif AE_COMPILER==AE_GNUC
2344         {
2345             ae_int_t a,b,c,d;
2346 
2347             /* SSE2 support */
2348             #if defined(_ALGLIB_HAS_SSE2_INTRINSICS)
2349             __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
2350             if( (d&0x04000000)!=0 )
2351                 _ae_cpuid_has_sse2 = ae_true;
2352             #endif
2353 
2354             /* check OS support for XSAVE XGETBV */
2355            #if defined(_ALGLIB_HAS_AVX2_INTRINSICS)
2356             __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
2357             if( (c&(0x1<<27))!=0 )
2358             {
2359                 __asm__ volatile ("xgetbv" : "=a" (a), "=d" (d) : "c" (0));
2360                 if( (a&0x6)==0x6 )
2361                 {
2362                     /* AVX2 support */
2363                     #if defined(_ALGLIB_HAS_AVX2_INTRINSICS)
2364                     if( _ae_cpuid_has_sse2 )
2365                     {
2366                         __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (7), "c" (0) );
2367                         if( (b&(0x1<<5))!=0 )
2368                             _ae_cpuid_has_avx2 = ae_true;
2369                     }
2370                     #endif
2371 
2372                     /* FMA support */
2373                     #if defined(_ALGLIB_HAS_FMA_INTRINSICS)
2374                     if( _ae_cpuid_has_avx2 )
2375                     {
2376                         __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1) );
2377                         if( (c&(0x1<<12))!=0 )
2378                             _ae_cpuid_has_fma = ae_true;
2379                     }
2380                     #endif
2381                 }
2382             }
2383            #endif
2384         }
2385 #elif AE_COMPILER==AE_SUNC
2386         {
2387             ae_int_t a,b,c,d;
2388             __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
2389             if( (d&0x04000000)!=0 )
2390                 _ae_cpuid_has_sse2 = ae_true;
2391         }
2392 #else
2393 #endif
2394 #endif
2395 #endif
2396         /*
2397          * Perform one more CPUID call to generate memory fence
2398          */
2399 #if AE_CPU==AE_INTEL
2400 #if AE_COMPILER==AE_MSVC
2401         { int CPUInfo[4]; __cpuid(CPUInfo, 1); }
2402 #elif AE_COMPILER==AE_GNUC
2403         { ae_int_t a,b,c,d; __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1)); }
2404 #elif AE_COMPILER==AE_SUNC
2405         { ae_int_t a,b,c,d; __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1)); }
2406 #else
2407 #endif
2408 #endif
2409 
2410         /*
2411          * set initialization flag
2412          */
2413         _ae_cpuid_initialized = ae_true;
2414     }
2415 
2416     /*
2417      * return
2418      */
2419     result = 0;
2420     if( _ae_cpuid_has_sse2 )
2421         result = result|CPU_SSE2;
2422     if( _ae_cpuid_has_avx2 )
2423         result = result|CPU_AVX2;
2424     if( _ae_cpuid_has_fma )
2425         result = result|CPU_FMA;
2426     return result;
2427 }
2428 
2429 /************************************************************************
2430 Activates tracing to file
2431 
2432 IMPORTANT: this function is NOT thread-safe!  Calling  it  from  multiple
2433            threads will result in undefined  behavior.  Calling  it  when
2434            some thread calls ALGLIB functions  may  result  in  undefined
2435            behavior.
2436 ************************************************************************/
ae_trace_file(const char * tags,const char * filename)2437 void ae_trace_file(const char *tags, const char *filename)
2438 {
2439     /*
2440      * clean up previous call
2441      */
2442     if( alglib_fclose_trace )
2443     {
2444         if( alglib_trace_file!=NULL )
2445             fclose(alglib_trace_file);
2446         alglib_trace_file = NULL;
2447         alglib_fclose_trace = ae_false;
2448     }
2449 
2450     /*
2451      * store ",tags," to buffer. Leading and trailing commas allow us
2452      * to perform checks for various tags by simply calling strstr().
2453      */
2454     memset(alglib_trace_tags, 0, ALGLIB_TRACE_BUFFER_LEN);
2455     strcat(alglib_trace_tags, ",");
2456     strncat(alglib_trace_tags, tags, ALGLIB_TRACE_TAGS_LEN);
2457     strcat(alglib_trace_tags, ",");
2458     for(int i=0; alglib_trace_tags[i]!=0; i++)
2459         alglib_trace_tags[i] = tolower(alglib_trace_tags[i]);
2460 
2461     /*
2462      * set up trace
2463      */
2464     alglib_trace_type = ALGLIB_TRACE_FILE;
2465     alglib_trace_file = fopen(filename, "ab");
2466     alglib_fclose_trace = ae_true;
2467 }
2468 
2469 /************************************************************************
2470 Disables tracing
2471 ************************************************************************/
ae_trace_disable()2472 void ae_trace_disable()
2473 {
2474     alglib_trace_type = ALGLIB_TRACE_NONE;
2475     if( alglib_fclose_trace )
2476         fclose(alglib_trace_file);
2477     alglib_trace_file = NULL;
2478     alglib_fclose_trace = ae_false;
2479 }
2480 
2481 /************************************************************************
2482 Checks whether specific kind of tracing is enabled
2483 ************************************************************************/
ae_is_trace_enabled(const char * tag)2484 ae_bool ae_is_trace_enabled(const char *tag)
2485 {
2486     char buf[ALGLIB_TRACE_BUFFER_LEN];
2487 
2488     /* check global trace status */
2489     if( alglib_trace_type==ALGLIB_TRACE_NONE || alglib_trace_file==NULL )
2490         return ae_false;
2491 
2492     /* copy tag to buffer, lowercase it */
2493     memset(buf, 0, ALGLIB_TRACE_BUFFER_LEN);
2494     strcat(buf, ",");
2495     strncat(buf, tag, ALGLIB_TRACE_TAGS_LEN);
2496     strcat(buf, "?");
2497     for(int i=0; buf[i]!=0; i++)
2498         buf[i] = tolower(buf[i]);
2499 
2500     /* contains tag (followed by comma, which means exact match) */
2501     buf[strlen(buf)-1] = ',';
2502     if( strstr(alglib_trace_tags,buf)!=NULL )
2503         return ae_true;
2504 
2505     /* contains tag (followed by dot, which means match with child) */
2506     buf[strlen(buf)-1] = '.';
2507     if( strstr(alglib_trace_tags,buf)!=NULL )
2508         return ae_true;
2509 
2510     /* nothing */
2511     return ae_false;
2512 }
2513 
ae_trace(const char * printf_fmt,...)2514 void ae_trace(const char * printf_fmt, ...)
2515 {
2516     /* check global trace status */
2517     if( alglib_trace_type==ALGLIB_TRACE_FILE && alglib_trace_file!=NULL )
2518     {
2519         va_list args;
2520 
2521         /* fprintf() */
2522         va_start(args, printf_fmt);
2523         vfprintf(alglib_trace_file, printf_fmt, args);
2524         va_end(args);
2525 
2526         /* flush output */
2527         fflush(alglib_trace_file);
2528     }
2529 }
2530 
ae_tickcount()2531 int ae_tickcount()
2532 {
2533 #if AE_OS==AE_WINDOWS || defined(AE_DEBUG4WINDOWS)
2534     return (int)GetTickCount();
2535 #elif AE_OS==AE_POSIX || defined(AE_DEBUG4POSIX)
2536     struct timeval now;
2537     ae_int64_t r, v;
2538     gettimeofday(&now, NULL);
2539     v = now.tv_sec;
2540     r = v*1000;
2541     v = now.tv_usec/1000;
2542     r = r+v;
2543     return r;
2544     /*struct timespec now;
2545     if (clock_gettime(CLOCK_MONOTONIC, &now) )
2546         return 0;
2547     return now.tv_sec * 1000.0 + now.tv_nsec / 1000000.0;*/
2548 #else
2549     return 0;
2550 #endif
2551 }
2552 
2553 
2554 /************************************************************************
2555 Real math functions
2556 ************************************************************************/
ae_fp_eq(double v1,double v2)2557 ae_bool ae_fp_eq(double v1, double v2)
2558 {
2559     /* IEEE-strict floating point comparison */
2560     volatile double x = v1;
2561     volatile double y = v2;
2562     return x==y;
2563 }
2564 
ae_fp_neq(double v1,double v2)2565 ae_bool ae_fp_neq(double v1, double v2)
2566 {
2567     /* IEEE-strict floating point comparison */
2568     return !ae_fp_eq(v1,v2);
2569 }
2570 
ae_fp_less(double v1,double v2)2571 ae_bool ae_fp_less(double v1, double v2)
2572 {
2573     /* IEEE-strict floating point comparison */
2574     volatile double x = v1;
2575     volatile double y = v2;
2576     return x<y;
2577 }
2578 
ae_fp_less_eq(double v1,double v2)2579 ae_bool ae_fp_less_eq(double v1, double v2)
2580 {
2581     /* IEEE-strict floating point comparison */
2582     volatile double x = v1;
2583     volatile double y = v2;
2584     return x<=y;
2585 }
2586 
ae_fp_greater(double v1,double v2)2587 ae_bool ae_fp_greater(double v1, double v2)
2588 {
2589     /* IEEE-strict floating point comparison */
2590     volatile double x = v1;
2591     volatile double y = v2;
2592     return x>y;
2593 }
2594 
ae_fp_greater_eq(double v1,double v2)2595 ae_bool ae_fp_greater_eq(double v1, double v2)
2596 {
2597     /* IEEE-strict floating point comparison */
2598     volatile double x = v1;
2599     volatile double y = v2;
2600     return x>=y;
2601 }
2602 
ae_isfinite_stateless(double x,ae_int_t endianness)2603 ae_bool ae_isfinite_stateless(double x, ae_int_t endianness)
2604 {
2605     union _u
2606     {
2607         double a;
2608         ae_int32_t p[2];
2609     } u;
2610     ae_int32_t high;
2611     u.a = x;
2612     if( endianness==AE_LITTLE_ENDIAN )
2613         high = u.p[1];
2614     else
2615         high = u.p[0];
2616     return (high & (ae_int32_t)0x7FF00000)!=(ae_int32_t)0x7FF00000;
2617 }
2618 
ae_isnan_stateless(double x,ae_int_t endianness)2619 ae_bool ae_isnan_stateless(double x,    ae_int_t endianness)
2620 {
2621     union _u
2622     {
2623         double a;
2624         ae_int32_t p[2];
2625     } u;
2626     ae_int32_t high, low;
2627     u.a = x;
2628     if( endianness==AE_LITTLE_ENDIAN )
2629     {
2630         high = u.p[1];
2631         low =  u.p[0];
2632     }
2633     else
2634     {
2635         high = u.p[0];
2636         low =  u.p[1];
2637     }
2638     return ((high &0x7FF00000)==0x7FF00000) && (((high &0x000FFFFF)!=0) || (low!=0));
2639 }
2640 
ae_isinf_stateless(double x,ae_int_t endianness)2641 ae_bool ae_isinf_stateless(double x,    ae_int_t endianness)
2642 {
2643     union _u
2644     {
2645         double a;
2646         ae_int32_t p[2];
2647     } u;
2648     ae_int32_t high, low;
2649     u.a = x;
2650     if( endianness==AE_LITTLE_ENDIAN )
2651     {
2652         high = u.p[1];
2653         low  = u.p[0];
2654     }
2655     else
2656     {
2657         high = u.p[0];
2658         low  = u.p[1];
2659     }
2660 
2661     /* 31 least significant bits of high are compared */
2662     return ((high&0x7FFFFFFF)==0x7FF00000) && (low==0);
2663 }
2664 
ae_isposinf_stateless(double x,ae_int_t endianness)2665 ae_bool ae_isposinf_stateless(double x, ae_int_t endianness)
2666 {
2667     union _u
2668     {
2669         double a;
2670         ae_int32_t p[2];
2671     } u;
2672     ae_int32_t high, low;
2673     u.a = x;
2674     if( endianness==AE_LITTLE_ENDIAN )
2675     {
2676         high = u.p[1];
2677         low  = u.p[0];
2678     }
2679     else
2680     {
2681         high = u.p[0];
2682         low  = u.p[1];
2683     }
2684 
2685     /* all 32 bits of high are compared */
2686     return (high==(ae_int32_t)0x7FF00000) && (low==0);
2687 }
2688 
ae_isneginf_stateless(double x,ae_int_t endianness)2689 ae_bool ae_isneginf_stateless(double x, ae_int_t endianness)
2690 {
2691     union _u
2692     {
2693         double a;
2694         ae_int32_t p[2];
2695     } u;
2696     ae_int32_t high, low;
2697     u.a = x;
2698     if( endianness==AE_LITTLE_ENDIAN )
2699     {
2700         high = u.p[1];
2701         low  = u.p[0];
2702     }
2703     else
2704     {
2705         high = u.p[0];
2706         low  = u.p[1];
2707     }
2708 
2709     /* this code is a bit tricky to avoid comparison of high with 0xFFF00000, which may be unsafe with some buggy compilers */
2710     return ((high&0x7FFFFFFF)==0x7FF00000) && (high!=(ae_int32_t)0x7FF00000) && (low==0);
2711 }
2712 
ae_get_endianness()2713 ae_int_t ae_get_endianness()
2714 {
2715     union
2716     {
2717         double a;
2718         ae_int32_t p[2];
2719     } u;
2720 
2721     /*
2722      * determine endianness
2723      * two types are supported: big-endian and little-endian.
2724      * mixed-endian hardware is NOT supported.
2725      *
2726      * 1983 is used as magic number because its non-periodic double
2727      * representation allow us to easily distinguish between upper
2728      * and lower halfs and to detect mixed endian hardware.
2729      *
2730      */
2731     u.a = 1.0/1983.0;
2732     if( u.p[1]==(ae_int32_t)0x3f408642 )
2733         return AE_LITTLE_ENDIAN;
2734     if( u.p[0]==(ae_int32_t)0x3f408642 )
2735         return AE_BIG_ENDIAN;
2736     return AE_MIXED_ENDIAN;
2737 }
2738 
ae_isfinite(double x,ae_state * state)2739 ae_bool ae_isfinite(double x,ae_state *state)
2740 {
2741     return ae_isfinite_stateless(x, state->endianness);
2742 }
2743 
ae_isnan(double x,ae_state * state)2744 ae_bool ae_isnan(double x,   ae_state *state)
2745 {
2746     return ae_isnan_stateless(x, state->endianness);
2747 }
2748 
ae_isinf(double x,ae_state * state)2749 ae_bool ae_isinf(double x,   ae_state *state)
2750 {
2751     return ae_isinf_stateless(x, state->endianness);
2752 }
2753 
ae_isposinf(double x,ae_state * state)2754 ae_bool ae_isposinf(double x,ae_state *state)
2755 {
2756     return ae_isposinf_stateless(x, state->endianness);
2757 }
2758 
ae_isneginf(double x,ae_state * state)2759 ae_bool ae_isneginf(double x,ae_state *state)
2760 {
2761     return ae_isneginf_stateless(x, state->endianness);
2762 }
2763 
ae_fabs(double x,ae_state * state)2764 double ae_fabs(double x,  ae_state *state)
2765 {
2766     return fabs(x);
2767 }
2768 
ae_iabs(ae_int_t x,ae_state * state)2769 ae_int_t ae_iabs(ae_int_t x, ae_state *state)
2770 {
2771     return x>=0 ? x : -x;
2772 }
2773 
ae_sqr(double x,ae_state * state)2774 double ae_sqr(double x,  ae_state *state)
2775 {
2776     return x*x;
2777 }
2778 
ae_sqrt(double x,ae_state * state)2779 double ae_sqrt(double x, ae_state *state)
2780 {
2781     return sqrt(x);
2782 }
2783 
ae_sign(double x,ae_state * state)2784 ae_int_t ae_sign(double x, ae_state *state)
2785 {
2786     if( x>0 ) return  1;
2787     if( x<0 ) return -1;
2788     return 0;
2789 }
2790 
ae_round(double x,ae_state * state)2791 ae_int_t ae_round(double x, ae_state *state)
2792 {
2793     return (ae_int_t)(ae_ifloor(x+0.5,state));
2794 }
2795 
ae_trunc(double x,ae_state * state)2796 ae_int_t ae_trunc(double x, ae_state *state)
2797 {
2798     return (ae_int_t)(x>0 ? ae_ifloor(x,state) : ae_iceil(x,state));
2799 }
2800 
ae_ifloor(double x,ae_state * state)2801 ae_int_t ae_ifloor(double x, ae_state *state)
2802 {
2803     return (ae_int_t)(floor(x));
2804 }
2805 
ae_iceil(double x,ae_state * state)2806 ae_int_t ae_iceil(double x,  ae_state *state)
2807 {
2808     return (ae_int_t)(ceil(x));
2809 }
2810 
ae_maxint(ae_int_t m1,ae_int_t m2,ae_state * state)2811 ae_int_t ae_maxint(ae_int_t m1, ae_int_t m2, ae_state *state)
2812 {
2813     return m1>m2 ? m1 : m2;
2814 }
2815 
ae_minint(ae_int_t m1,ae_int_t m2,ae_state * state)2816 ae_int_t ae_minint(ae_int_t m1, ae_int_t m2, ae_state *state)
2817 {
2818     return m1>m2 ? m2 : m1;
2819 }
2820 
ae_maxreal(double m1,double m2,ae_state * state)2821 double ae_maxreal(double m1, double m2, ae_state *state)
2822 {
2823     return m1>m2 ? m1 : m2;
2824 }
2825 
ae_minreal(double m1,double m2,ae_state * state)2826 double ae_minreal(double m1, double m2, ae_state *state)
2827 {
2828     return m1>m2 ? m2 : m1;
2829 }
2830 
ae_randomreal(ae_state * state)2831 double ae_randomreal(ae_state *state)
2832 {
2833     int i1 = rand();
2834     int i2 = rand();
2835     double mx = (double)(RAND_MAX)+1.0;
2836     volatile double tmp0 = i2/mx;
2837     volatile double tmp1 = i1+tmp0;
2838     return tmp1/mx;
2839 }
2840 
ae_randominteger(ae_int_t maxv,ae_state * state)2841 ae_int_t ae_randominteger(ae_int_t maxv, ae_state *state)
2842 {
2843     return rand()%maxv;
2844 }
2845 
ae_sin(double x,ae_state * state)2846 double   ae_sin(double x, ae_state *state)
2847 {
2848     return sin(x);
2849 }
2850 
ae_cos(double x,ae_state * state)2851 double   ae_cos(double x, ae_state *state)
2852 {
2853     return cos(x);
2854 }
2855 
ae_tan(double x,ae_state * state)2856 double   ae_tan(double x, ae_state *state)
2857 {
2858     return tan(x);
2859 }
2860 
ae_sinh(double x,ae_state * state)2861 double   ae_sinh(double x, ae_state *state)
2862 {
2863     return sinh(x);
2864 }
2865 
ae_cosh(double x,ae_state * state)2866 double   ae_cosh(double x, ae_state *state)
2867 {
2868     return cosh(x);
2869 }
ae_tanh(double x,ae_state * state)2870 double   ae_tanh(double x, ae_state *state)
2871 {
2872     return tanh(x);
2873 }
2874 
ae_asin(double x,ae_state * state)2875 double   ae_asin(double x, ae_state *state)
2876 {
2877     return asin(x);
2878 }
2879 
ae_acos(double x,ae_state * state)2880 double   ae_acos(double x, ae_state *state)
2881 {
2882     return acos(x);
2883 }
2884 
ae_atan(double x,ae_state * state)2885 double   ae_atan(double x, ae_state *state)
2886 {
2887     return atan(x);
2888 }
2889 
ae_atan2(double y,double x,ae_state * state)2890 double   ae_atan2(double y, double x, ae_state *state)
2891 {
2892     return atan2(y,x);
2893 }
2894 
ae_log(double x,ae_state * state)2895 double   ae_log(double x, ae_state *state)
2896 {
2897     return log(x);
2898 }
2899 
ae_pow(double x,double y,ae_state * state)2900 double   ae_pow(double x, double y, ae_state *state)
2901 {
2902     return pow(x,y);
2903 }
2904 
ae_exp(double x,ae_state * state)2905 double   ae_exp(double x, ae_state *state)
2906 {
2907     return exp(x);
2908 }
2909 
2910 /************************************************************************
2911 Symmetric/Hermitian properties: check and force
2912 ************************************************************************/
x_split_length(ae_int_t n,ae_int_t nb,ae_int_t * n1,ae_int_t * n2)2913 static void x_split_length(ae_int_t n, ae_int_t nb, ae_int_t* n1, ae_int_t* n2)
2914 {
2915     ae_int_t r;
2916     if( n<=nb )
2917     {
2918         *n1 = n;
2919         *n2 = 0;
2920     }
2921     else
2922     {
2923         if( n%nb!=0 )
2924         {
2925             *n2 = n%nb;
2926             *n1 = n-(*n2);
2927         }
2928         else
2929         {
2930             *n2 = n/2;
2931             *n1 = n-(*n2);
2932             if( *n1%nb==0 )
2933             {
2934                 return;
2935             }
2936             r = nb-*n1%nb;
2937             *n1 = *n1+r;
2938             *n2 = *n2-r;
2939         }
2940     }
2941 }
x_safepythag2(double x,double y)2942 static double x_safepythag2(double x, double y)
2943 {
2944     double w;
2945     double xabs;
2946     double yabs;
2947     double z;
2948     xabs = fabs(x);
2949     yabs = fabs(y);
2950     w = xabs>yabs ? xabs : yabs;
2951     z = xabs<yabs ? xabs : yabs;
2952     if( z==0 )
2953         return w;
2954     else
2955     {
2956         double t;
2957         t = z/w;
2958         return w*sqrt(1+t*t);
2959     }
2960 }
2961 /*
2962  * this function checks difference between offdiagonal blocks BL and BU
2963  * (see below). Block BL is specified by offsets (offset0,offset1)  and
2964  * sizes (len0,len1).
2965  *
2966  *     [ .          ]
2967  *     [   A0  BU   ]
2968  * A = [   BL  A1   ]
2969  *     [          . ]
2970  *
2971  *  this subroutine updates current values of:
2972  *  a) mx       maximum value of A[i,j] found so far
2973  *  b) err      componentwise difference between elements of BL and BU^T
2974  *
2975  */
is_symmetric_rec_off_stat(x_matrix * a,ae_int_t offset0,ae_int_t offset1,ae_int_t len0,ae_int_t len1,ae_bool * nonfinite,double * mx,double * err,ae_state * _state)2976 static void is_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
2977 {
2978     /* try to split problem into two smaller ones */
2979     if( len0>x_nb || len1>x_nb )
2980     {
2981         ae_int_t n1, n2;
2982         if( len0>len1 )
2983         {
2984             x_split_length(len0, x_nb, &n1, &n2);
2985             is_symmetric_rec_off_stat(a, offset0,    offset1, n1, len1, nonfinite, mx, err, _state);
2986             is_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state);
2987         }
2988         else
2989         {
2990             x_split_length(len1, x_nb, &n1, &n2);
2991             is_symmetric_rec_off_stat(a, offset0, offset1,    len0, n1, nonfinite, mx, err, _state);
2992             is_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state);
2993         }
2994         return;
2995     }
2996     else
2997     {
2998         /* base case */
2999         double *p1, *p2, *prow, *pcol;
3000         double v;
3001         ae_int_t i, j;
3002 
3003         p1 = (double*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3004         p2 = (double*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3005         for(i=0; i<len0; i++)
3006         {
3007             pcol = p2+i;
3008             prow = p1+i*a->stride;
3009             for(j=0; j<len1; j++)
3010             {
3011                 if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) )
3012                 {
3013                     *nonfinite = ae_true;
3014                 }
3015                 else
3016                 {
3017                     v = fabs(*pcol);
3018                     *mx =  *mx>v ? *mx : v;
3019                     v = fabs(*prow);
3020                     *mx =  *mx>v ? *mx : v;
3021                     v = fabs(*pcol-*prow);
3022                     *err = *err>v ? *err : v;
3023                 }
3024                 pcol += a->stride;
3025                 prow++;
3026             }
3027         }
3028     }
3029 }
3030 /*
3031  * this function checks that diagonal block A0 is symmetric.
3032  * Block A0 is specified by its offset and size.
3033  *
3034  *     [ .          ]
3035  *     [   A0       ]
3036  * A = [       .    ]
3037  *     [          . ]
3038  *
3039  *  this subroutine updates current values of:
3040  *  a) mx       maximum value of A[i,j] found so far
3041  *  b) err      componentwise difference between A0 and A0^T
3042  *
3043  */
is_symmetric_rec_diag_stat(x_matrix * a,ae_int_t offset,ae_int_t len,ae_bool * nonfinite,double * mx,double * err,ae_state * _state)3044 static void is_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
3045 {
3046     double *p, *prow, *pcol;
3047     double v;
3048     ae_int_t i, j;
3049 
3050     /* try to split problem into two smaller ones */
3051     if( len>x_nb )
3052     {
3053         ae_int_t n1, n2;
3054         x_split_length(len, x_nb, &n1, &n2);
3055         is_symmetric_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state);
3056         is_symmetric_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state);
3057         is_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state);
3058         return;
3059     }
3060 
3061     /* base case */
3062     p = (double*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3063     for(i=0; i<len; i++)
3064     {
3065         pcol = p+i;
3066         prow = p+i*a->stride;
3067         for(j=0; j<i; j++,pcol+=a->stride,prow++)
3068         {
3069             if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) )
3070             {
3071                 *nonfinite = ae_true;
3072             }
3073             else
3074             {
3075                 v = fabs(*pcol);
3076                 *mx =  *mx>v ? *mx : v;
3077                 v = fabs(*prow);
3078                 *mx =  *mx>v ? *mx : v;
3079                 v = fabs(*pcol-*prow);
3080                 *err = *err>v ? *err : v;
3081             }
3082         }
3083         v = fabs(p[i+i*a->stride]);
3084         *mx =  *mx>v ? *mx : v;
3085     }
3086 }
3087 /*
3088  * this function checks difference between offdiagonal blocks BL and BU
3089  * (see below). Block BL is specified by offsets (offset0,offset1)  and
3090  * sizes (len0,len1).
3091  *
3092  *     [ .          ]
3093  *     [   A0  BU   ]
3094  * A = [   BL  A1   ]
3095  *     [          . ]
3096  *
3097  *  this subroutine updates current values of:
3098  *  a) mx       maximum value of A[i,j] found so far
3099  *  b) err      componentwise difference between elements of BL and BU^H
3100  *
3101  */
is_hermitian_rec_off_stat(x_matrix * a,ae_int_t offset0,ae_int_t offset1,ae_int_t len0,ae_int_t len1,ae_bool * nonfinite,double * mx,double * err,ae_state * _state)3102 static void is_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
3103 {
3104     /* try to split problem into two smaller ones */
3105     if( len0>x_nb || len1>x_nb )
3106     {
3107         ae_int_t n1, n2;
3108         if( len0>len1 )
3109         {
3110             x_split_length(len0, x_nb, &n1, &n2);
3111             is_hermitian_rec_off_stat(a, offset0,    offset1, n1, len1, nonfinite, mx, err, _state);
3112             is_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state);
3113         }
3114         else
3115         {
3116             x_split_length(len1, x_nb, &n1, &n2);
3117             is_hermitian_rec_off_stat(a, offset0, offset1,    len0, n1, nonfinite, mx, err, _state);
3118             is_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state);
3119         }
3120         return;
3121     }
3122     else
3123     {
3124         /* base case */
3125         ae_complex *p1, *p2, *prow, *pcol;
3126         double v;
3127         ae_int_t i, j;
3128 
3129         p1 = (ae_complex*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3130         p2 = (ae_complex*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3131         for(i=0; i<len0; i++)
3132         {
3133             pcol = p2+i;
3134             prow = p1+i*a->stride;
3135             for(j=0; j<len1; j++)
3136             {
3137                 if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) )
3138                 {
3139                     *nonfinite = ae_true;
3140                 }
3141                 else
3142                 {
3143                     v = x_safepythag2(pcol->x, pcol->y);
3144                     *mx =  *mx>v ? *mx : v;
3145                     v = x_safepythag2(prow->x, prow->y);
3146                     *mx =  *mx>v ? *mx : v;
3147                     v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y);
3148                     *err = *err>v ? *err : v;
3149                 }
3150                 pcol += a->stride;
3151                 prow++;
3152             }
3153         }
3154     }
3155 }
3156 /*
3157  * this function checks that diagonal block A0 is Hermitian.
3158  * Block A0 is specified by its offset and size.
3159  *
3160  *     [ .          ]
3161  *     [   A0       ]
3162  * A = [       .    ]
3163  *     [          . ]
3164  *
3165  *  this subroutine updates current values of:
3166  *  a) mx       maximum value of A[i,j] found so far
3167  *  b) err      componentwise difference between A0 and A0^H
3168  *
3169  */
is_hermitian_rec_diag_stat(x_matrix * a,ae_int_t offset,ae_int_t len,ae_bool * nonfinite,double * mx,double * err,ae_state * _state)3170 static void is_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
3171 {
3172     ae_complex *p, *prow, *pcol;
3173     double v;
3174     ae_int_t i, j;
3175 
3176     /* try to split problem into two smaller ones */
3177     if( len>x_nb )
3178     {
3179         ae_int_t n1, n2;
3180         x_split_length(len, x_nb, &n1, &n2);
3181         is_hermitian_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state);
3182         is_hermitian_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state);
3183         is_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state);
3184         return;
3185     }
3186 
3187     /* base case */
3188     p = (ae_complex*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3189     for(i=0; i<len; i++)
3190     {
3191         pcol = p+i;
3192         prow = p+i*a->stride;
3193         for(j=0; j<i; j++,pcol+=a->stride,prow++)
3194         {
3195             if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) )
3196             {
3197                 *nonfinite = ae_true;
3198             }
3199             else
3200             {
3201                 v = x_safepythag2(pcol->x, pcol->y);
3202                 *mx =  *mx>v ? *mx : v;
3203                 v = x_safepythag2(prow->x, prow->y);
3204                 *mx =  *mx>v ? *mx : v;
3205                 v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y);
3206                 *err = *err>v ? *err : v;
3207             }
3208         }
3209         if( !ae_isfinite(p[i+i*a->stride].x, _state) || !ae_isfinite(p[i+i*a->stride].y, _state) )
3210         {
3211             *nonfinite = ae_true;
3212         }
3213         else
3214         {
3215             v = fabs(p[i+i*a->stride].x);
3216             *mx =  *mx>v ? *mx : v;
3217             v = fabs(p[i+i*a->stride].y);
3218             *err =  *err>v ? *err : v;
3219         }
3220     }
3221 }
3222 /*
3223  * this function copies offdiagonal block BL to its symmetric counterpart
3224  * BU (see below). Block BL is specified by offsets (offset0,offset1)
3225  * and sizes (len0,len1).
3226  *
3227  *     [ .          ]
3228  *     [   A0  BU   ]
3229  * A = [   BL  A1   ]
3230  *     [          . ]
3231  *
3232  */
force_symmetric_rec_off_stat(x_matrix * a,ae_int_t offset0,ae_int_t offset1,ae_int_t len0,ae_int_t len1)3233 static void force_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1)
3234 {
3235     /* try to split problem into two smaller ones */
3236     if( len0>x_nb || len1>x_nb )
3237     {
3238         ae_int_t n1, n2;
3239         if( len0>len1 )
3240         {
3241             x_split_length(len0, x_nb, &n1, &n2);
3242             force_symmetric_rec_off_stat(a, offset0,    offset1, n1, len1);
3243             force_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1);
3244         }
3245         else
3246         {
3247             x_split_length(len1, x_nb, &n1, &n2);
3248             force_symmetric_rec_off_stat(a, offset0, offset1,    len0, n1);
3249             force_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2);
3250         }
3251         return;
3252     }
3253     else
3254     {
3255         /* base case */
3256         double *p1, *p2, *prow, *pcol;
3257         ae_int_t i, j;
3258 
3259         p1 = (double*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3260         p2 = (double*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3261         for(i=0; i<len0; i++)
3262         {
3263             pcol = p2+i;
3264             prow = p1+i*a->stride;
3265             for(j=0; j<len1; j++)
3266             {
3267                 *pcol = *prow;
3268                 pcol += a->stride;
3269                 prow++;
3270             }
3271         }
3272     }
3273 }
3274 /*
3275  * this function copies lower part of diagonal block A0 to its upper part
3276  * Block is specified by offset and size.
3277  *
3278  *     [ .          ]
3279  *     [   A0       ]
3280  * A = [       .    ]
3281  *     [          . ]
3282  *
3283  */
force_symmetric_rec_diag_stat(x_matrix * a,ae_int_t offset,ae_int_t len)3284 static void force_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len)
3285 {
3286     double *p, *prow, *pcol;
3287     ae_int_t i, j;
3288 
3289     /* try to split problem into two smaller ones */
3290     if( len>x_nb )
3291     {
3292         ae_int_t n1, n2;
3293         x_split_length(len, x_nb, &n1, &n2);
3294         force_symmetric_rec_diag_stat(a, offset, n1);
3295         force_symmetric_rec_diag_stat(a, offset+n1, n2);
3296         force_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1);
3297         return;
3298     }
3299 
3300     /* base case */
3301     p = (double*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3302     for(i=0; i<len; i++)
3303     {
3304         pcol = p+i;
3305         prow = p+i*a->stride;
3306         for(j=0; j<i; j++,pcol+=a->stride,prow++)
3307             *pcol = *prow;
3308     }
3309 }
3310 /*
3311  * this function copies Hermitian transpose of offdiagonal block BL to
3312  * its symmetric counterpart BU (see below). Block BL is specified by
3313  * offsets (offset0,offset1) and sizes (len0,len1).
3314  *
3315  *     [ .          ]
3316  *     [   A0  BU   ]
3317  * A = [   BL  A1   ]
3318  *     [          . ]
3319  */
force_hermitian_rec_off_stat(x_matrix * a,ae_int_t offset0,ae_int_t offset1,ae_int_t len0,ae_int_t len1)3320 static void force_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1)
3321 {
3322     /* try to split problem into two smaller ones */
3323     if( len0>x_nb || len1>x_nb )
3324     {
3325         ae_int_t n1, n2;
3326         if( len0>len1 )
3327         {
3328             x_split_length(len0, x_nb, &n1, &n2);
3329             force_hermitian_rec_off_stat(a, offset0,    offset1, n1, len1);
3330             force_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1);
3331         }
3332         else
3333         {
3334             x_split_length(len1, x_nb, &n1, &n2);
3335             force_hermitian_rec_off_stat(a, offset0, offset1,    len0, n1);
3336             force_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2);
3337         }
3338         return;
3339     }
3340     else
3341     {
3342         /* base case */
3343         ae_complex *p1, *p2, *prow, *pcol;
3344         ae_int_t i, j;
3345 
3346         p1 = (ae_complex*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3347         p2 = (ae_complex*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3348         for(i=0; i<len0; i++)
3349         {
3350             pcol = p2+i;
3351             prow = p1+i*a->stride;
3352             for(j=0; j<len1; j++)
3353             {
3354                 *pcol = *prow;
3355                 pcol += a->stride;
3356                 prow++;
3357             }
3358         }
3359     }
3360 }
3361 /*
3362  * this function copies Hermitian transpose of lower part of
3363  * diagonal block A0 to its upper part Block is specified by offset and size.
3364  *
3365  *     [ .          ]
3366  *     [   A0       ]
3367  * A = [       .    ]
3368  *     [          . ]
3369  *
3370  */
force_hermitian_rec_diag_stat(x_matrix * a,ae_int_t offset,ae_int_t len)3371 static void force_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len)
3372 {
3373     ae_complex *p, *prow, *pcol;
3374     ae_int_t i, j;
3375 
3376     /* try to split problem into two smaller ones */
3377     if( len>x_nb )
3378     {
3379         ae_int_t n1, n2;
3380         x_split_length(len, x_nb, &n1, &n2);
3381         force_hermitian_rec_diag_stat(a, offset, n1);
3382         force_hermitian_rec_diag_stat(a, offset+n1, n2);
3383         force_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1);
3384         return;
3385     }
3386 
3387     /* base case */
3388     p = (ae_complex*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3389     for(i=0; i<len; i++)
3390     {
3391         pcol = p+i;
3392         prow = p+i*a->stride;
3393         for(j=0; j<i; j++,pcol+=a->stride,prow++)
3394             *pcol = *prow;
3395     }
3396 }
x_is_symmetric(x_matrix * a)3397 ae_bool x_is_symmetric(x_matrix *a)
3398 {
3399     double mx, err;
3400     ae_bool nonfinite;
3401     ae_state _alglib_env_state;
3402     if( a->datatype!=DT_REAL )
3403         return ae_false;
3404     if( a->cols!=a->rows )
3405         return ae_false;
3406     if( a->cols==0 || a->rows==0 )
3407         return ae_true;
3408     ae_state_init(&_alglib_env_state);
3409     mx = 0;
3410     err = 0;
3411     nonfinite = ae_false;
3412     is_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state);
3413     if( nonfinite )
3414         return ae_false;
3415     if( mx==0 )
3416         return ae_true;
3417     return err/mx<=1.0E-14;
3418 }
x_is_hermitian(x_matrix * a)3419 ae_bool x_is_hermitian(x_matrix *a)
3420 {
3421     double mx, err;
3422     ae_bool nonfinite;
3423     ae_state _alglib_env_state;
3424     if( a->datatype!=DT_COMPLEX )
3425         return ae_false;
3426     if( a->cols!=a->rows )
3427         return ae_false;
3428     if( a->cols==0 || a->rows==0 )
3429         return ae_true;
3430     ae_state_init(&_alglib_env_state);
3431     mx = 0;
3432     err = 0;
3433     nonfinite = ae_false;
3434     is_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state);
3435     if( nonfinite )
3436         return ae_false;
3437     if( mx==0 )
3438         return ae_true;
3439     return err/mx<=1.0E-14;
3440 }
x_force_symmetric(x_matrix * a)3441 ae_bool x_force_symmetric(x_matrix *a)
3442 {
3443     if( a->datatype!=DT_REAL )
3444         return ae_false;
3445     if( a->cols!=a->rows )
3446         return ae_false;
3447     if( a->cols==0 || a->rows==0 )
3448         return ae_true;
3449     force_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows);
3450     return ae_true;
3451 }
x_force_hermitian(x_matrix * a)3452 ae_bool x_force_hermitian(x_matrix *a)
3453 {
3454     if( a->datatype!=DT_COMPLEX )
3455         return ae_false;
3456     if( a->cols!=a->rows )
3457         return ae_false;
3458     if( a->cols==0 || a->rows==0 )
3459         return ae_true;
3460     force_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows);
3461     return ae_true;
3462 }
3463 
ae_is_symmetric(ae_matrix * a)3464 ae_bool ae_is_symmetric(ae_matrix *a)
3465 {
3466     x_matrix x;
3467     x.owner = OWN_CALLER;
3468     ae_x_attach_to_matrix(&x, a);
3469     return x_is_symmetric(&x);
3470 }
3471 
ae_is_hermitian(ae_matrix * a)3472 ae_bool ae_is_hermitian(ae_matrix *a)
3473 {
3474     x_matrix x;
3475     x.owner = OWN_CALLER;
3476     ae_x_attach_to_matrix(&x, a);
3477     return x_is_hermitian(&x);
3478 }
3479 
ae_force_symmetric(ae_matrix * a)3480 ae_bool ae_force_symmetric(ae_matrix *a)
3481 {
3482     x_matrix x;
3483     x.owner = OWN_CALLER;
3484     ae_x_attach_to_matrix(&x, a);
3485     return x_force_symmetric(&x);
3486 }
3487 
ae_force_hermitian(ae_matrix * a)3488 ae_bool ae_force_hermitian(ae_matrix *a)
3489 {
3490     x_matrix x;
3491     x.owner = OWN_CALLER;
3492     ae_x_attach_to_matrix(&x, a);
3493     return x_force_hermitian(&x);
3494 }
3495 
3496 /************************************************************************
3497 This function converts six-bit value (from 0 to 63)  to  character  (only
3498 digits, lowercase and uppercase letters, minus and underscore are used).
3499 
3500 If v is negative or greater than 63, this function returns '?'.
3501 ************************************************************************/
3502 static char _sixbits2char_tbl[64] = {
3503         '0', '1', '2', '3', '4', '5', '6', '7',
3504         '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
3505         'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
3506         'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
3507         'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
3508         'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
3509         'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
3510         'u', 'v', 'w', 'x', 'y', 'z', '-', '_' };
3511 
ae_sixbits2char(ae_int_t v)3512 char ae_sixbits2char(ae_int_t v)
3513 {
3514 
3515     if( v<0 || v>63 )
3516         return '?';
3517     return _sixbits2char_tbl[v];
3518 
3519     /* v is correct, process it */
3520     /*if( v<10 )
3521         return '0'+v;
3522     v -= 10;
3523     if( v<26 )
3524         return 'A'+v;
3525     v -= 26;
3526     if( v<26 )
3527         return 'a'+v;
3528     v -= 26;
3529     return v==0 ? '-' : '_';*/
3530 }
3531 
3532 /************************************************************************
3533 This function converts character to six-bit value (from 0 to 63).
3534 
3535 This function is inverse of ae_sixbits2char()
3536 If c is not correct character, this function returns -1.
3537 ************************************************************************/
3538 static ae_int_t _ae_char2sixbits_tbl[] = {
3539     -1, -1, -1, -1, -1, -1, -1, -1,
3540     -1, -1, -1, -1, -1, -1, -1, -1,
3541     -1, -1, -1, -1, -1, -1, -1, -1,
3542     -1, -1, -1, -1, -1, -1, -1, -1,
3543     -1, -1, -1, -1, -1, -1, -1, -1,
3544     -1, -1, -1, -1, -1, 62, -1, -1,
3545      0,  1,  2,  3,  4,  5,  6,  7,
3546      8,  9, -1, -1, -1, -1, -1, -1,
3547     -1, 10, 11, 12, 13, 14, 15, 16,
3548     17, 18, 19, 20, 21, 22, 23, 24,
3549     25, 26, 27, 28, 29, 30, 31, 32,
3550     33, 34, 35, -1, -1, -1, -1, 63,
3551     -1, 36, 37, 38, 39, 40, 41, 42,
3552     43, 44, 45, 46, 47, 48, 49, 50,
3553     51, 52, 53, 54, 55, 56, 57, 58,
3554     59, 60, 61, -1, -1, -1, -1, -1 };
ae_char2sixbits(char c)3555 ae_int_t ae_char2sixbits(char c)
3556 {
3557     return (c>=0 && c<127) ? _ae_char2sixbits_tbl[(int)c] : -1;
3558 }
3559 
3560 /************************************************************************
3561 This function converts three bytes (24 bits) to four six-bit values
3562 (24 bits again).
3563 
3564 src     pointer to three bytes
3565 dst     pointer to four ints
3566 ************************************************************************/
ae_threebytes2foursixbits(const unsigned char * src,ae_int_t * dst)3567 void ae_threebytes2foursixbits(const unsigned char *src, ae_int_t *dst)
3568 {
3569     dst[0] = src[0] & 0x3F;
3570     dst[1] = (src[0]>>6) | ((src[1]&0x0F)<<2);
3571     dst[2] = (src[1]>>4) | ((src[2]&0x03)<<4);
3572     dst[3] = src[2]>>2;
3573 }
3574 
3575 /************************************************************************
3576 This function converts four six-bit values (24 bits) to three bytes
3577 (24 bits again).
3578 
3579 src     pointer to four ints
3580 dst     pointer to three bytes
3581 ************************************************************************/
ae_foursixbits2threebytes(const ae_int_t * src,unsigned char * dst)3582 void ae_foursixbits2threebytes(const ae_int_t *src, unsigned char *dst)
3583 {
3584     dst[0] = (unsigned char)(     src[0] | ((src[1]&0x03)<<6));
3585     dst[1] = (unsigned char)((src[1]>>2) | ((src[2]&0x0F)<<4));
3586     dst[2] = (unsigned char)((src[2]>>4) | (src[3]<<2));
3587 }
3588 
3589 /************************************************************************
3590 This function serializes boolean value into buffer
3591 
3592 v           boolean value to be serialized
3593 buf         buffer, at least 12 characters wide
3594             (11 chars for value, one for trailing zero)
3595 state       ALGLIB environment state
3596 ************************************************************************/
ae_bool2str(ae_bool v,char * buf,ae_state * state)3597 void ae_bool2str(ae_bool v, char *buf, ae_state *state)
3598 {
3599     char c = v ? '1' : '0';
3600     ae_int_t i;
3601     for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3602         buf[i] = c;
3603     buf[AE_SER_ENTRY_LENGTH] = 0;
3604 }
3605 
3606 /************************************************************************
3607 This function unserializes boolean value from buffer
3608 
3609 buf         buffer which contains value; leading spaces/tabs/newlines are
3610             ignored, traling spaces/tabs/newlines are treated as  end  of
3611             the boolean value.
3612 state       ALGLIB environment state
3613 
3614 This function raises an error in case unexpected symbol is found
3615 ************************************************************************/
ae_str2bool(const char * buf,ae_state * state,const char ** pasttheend)3616 ae_bool ae_str2bool(const char *buf, ae_state *state, const char **pasttheend)
3617 {
3618     ae_bool was0, was1;
3619     const char *emsg = "ALGLIB: unable to read boolean value from stream";
3620 
3621     was0 = ae_false;
3622     was1 = ae_false;
3623     while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3624         buf++;
3625     while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
3626     {
3627         if( *buf=='0' )
3628         {
3629             was0 = ae_true;
3630             buf++;
3631             continue;
3632         }
3633         if( *buf=='1' )
3634         {
3635             was1 = ae_true;
3636             buf++;
3637             continue;
3638         }
3639         ae_break(state, ERR_ASSERTION_FAILED, emsg);
3640     }
3641     *pasttheend = buf;
3642     if( (!was0) && (!was1) )
3643         ae_break(state, ERR_ASSERTION_FAILED, emsg);
3644     if( was0 && was1 )
3645         ae_break(state, ERR_ASSERTION_FAILED, emsg);
3646     return was1 ? ae_true : ae_false;
3647 }
3648 
3649 /************************************************************************
3650 This function serializes integer value into buffer
3651 
3652 v           integer value to be serialized
3653 buf         buffer, at least 12 characters wide
3654             (11 chars for value, one for trailing zero)
3655 state       ALGLIB environment state
3656 ************************************************************************/
ae_int2str(ae_int_t v,char * buf,ae_state * state)3657 void ae_int2str(ae_int_t v, char *buf, ae_state *state)
3658 {
3659     union _u
3660     {
3661         ae_int_t ival;
3662         unsigned char bytes[9];
3663     } u;
3664     ae_int_t i;
3665     ae_int_t sixbits[12];
3666     unsigned char c;
3667 
3668     /*
3669      * copy v to array of chars, sign extending it and
3670      * converting to little endian order
3671      *
3672      * because we don't want to mention size of ae_int_t explicitly,
3673      * we do it as follows:
3674      * 1. we fill u.bytes by zeros or ones (depending on sign of v)
3675      * 2. we copy v to u.ival
3676      * 3. if we run on big endian architecture, we reorder u.bytes
3677      * 4. now we have signed 64-bit representation of v stored in u.bytes
3678      * 5. additionally, we set 9th byte of u.bytes to zero in order to
3679      *    simplify conversion to six-bit representation
3680      */
3681     c = v<0 ? (unsigned char)0xFF : (unsigned char)0x00;
3682     u.ival = v;
3683     for(i=sizeof(ae_int_t); i<=8; i++) /* <=8 is preferred because it avoids unnecessary compiler warnings*/
3684         u.bytes[i] = c;
3685     u.bytes[8] = 0;
3686     if( state->endianness==AE_BIG_ENDIAN )
3687     {
3688         for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3689         {
3690             unsigned char tc;
3691             tc = u.bytes[i];
3692             u.bytes[i] = u.bytes[sizeof(ae_int_t)-1-i];
3693             u.bytes[sizeof(ae_int_t)-1-i] = tc;
3694         }
3695     }
3696 
3697     /*
3698      * convert to six-bit representation, output
3699      *
3700      * NOTE: last 12th element of sixbits is always zero, we do not output it
3701      */
3702     ae_threebytes2foursixbits(u.bytes+0, sixbits+0);
3703     ae_threebytes2foursixbits(u.bytes+3, sixbits+4);
3704     ae_threebytes2foursixbits(u.bytes+6, sixbits+8);
3705     for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3706         buf[i] = ae_sixbits2char(sixbits[i]);
3707     buf[AE_SER_ENTRY_LENGTH] = 0x00;
3708 }
3709 
3710 /************************************************************************
3711 This function serializes 64-bit integer value into buffer
3712 
3713 v           integer value to be serialized
3714 buf         buffer, at least 12 characters wide
3715             (11 chars for value, one for trailing zero)
3716 state       ALGLIB environment state
3717 ************************************************************************/
ae_int642str(ae_int64_t v,char * buf,ae_state * state)3718 void ae_int642str(ae_int64_t v, char *buf, ae_state *state)
3719 {
3720     unsigned char bytes[9];
3721     ae_int_t i;
3722     ae_int_t sixbits[12];
3723 
3724     /*
3725      * copy v to array of chars, sign extending it and
3726      * converting to little endian order
3727      *
3728      * because we don't want to mention size of ae_int_t explicitly,
3729      * we do it as follows:
3730      * 1. we fill bytes by zeros or ones (depending on sign of v)
3731      * 2. we memmove v to bytes
3732      * 3. if we run on big endian architecture, we reorder bytes
3733      * 4. now we have signed 64-bit representation of v stored in bytes
3734      * 5. additionally, we set 9th byte of bytes to zero in order to
3735      *    simplify conversion to six-bit representation
3736      */
3737     memset(bytes, v<0 ? 0xFF : 0x00, 8);
3738     memmove(bytes, &v, 8);
3739     bytes[8] = 0;
3740     if( state->endianness==AE_BIG_ENDIAN )
3741     {
3742         for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3743         {
3744             unsigned char tc;
3745             tc = bytes[i];
3746             bytes[i] = bytes[sizeof(ae_int_t)-1-i];
3747             bytes[sizeof(ae_int_t)-1-i] = tc;
3748         }
3749     }
3750 
3751     /*
3752      * convert to six-bit representation, output
3753      *
3754      * NOTE: last 12th element of sixbits is always zero, we do not output it
3755      */
3756     ae_threebytes2foursixbits(bytes+0, sixbits+0);
3757     ae_threebytes2foursixbits(bytes+3, sixbits+4);
3758     ae_threebytes2foursixbits(bytes+6, sixbits+8);
3759     for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3760         buf[i] = ae_sixbits2char(sixbits[i]);
3761     buf[AE_SER_ENTRY_LENGTH] = 0x00;
3762 }
3763 
3764 /************************************************************************
3765 This function unserializes integer value from string
3766 
3767 buf         buffer which contains value; leading spaces/tabs/newlines are
3768             ignored, traling spaces/tabs/newlines are treated as  end  of
3769             the boolean value.
3770 state       ALGLIB environment state
3771 
3772 This function raises an error in case unexpected symbol is found
3773 ************************************************************************/
ae_str2int(const char * buf,ae_state * state,const char ** pasttheend)3774 ae_int_t ae_str2int(const char *buf, ae_state *state, const char **pasttheend)
3775 {
3776     const char *emsg = "ALGLIB: unable to read integer value from stream";
3777     ae_int_t sixbits[12];
3778     ae_int_t sixbitsread, i;
3779     union _u
3780     {
3781         ae_int_t ival;
3782         unsigned char bytes[9];
3783     } u;
3784     /*
3785      * 1. skip leading spaces
3786      * 2. read and decode six-bit digits
3787      * 3. set trailing digits to zeros
3788      * 4. convert to little endian 64-bit integer representation
3789      * 5. convert to big endian representation, if needed
3790      */
3791     while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3792         buf++;
3793     sixbitsread = 0;
3794     while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
3795     {
3796         ae_int_t d;
3797         d = ae_char2sixbits(*buf);
3798         if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
3799             ae_break(state, ERR_ASSERTION_FAILED, emsg);
3800         sixbits[sixbitsread] = d;
3801         sixbitsread++;
3802         buf++;
3803     }
3804     *pasttheend = buf;
3805     if( sixbitsread==0 )
3806         ae_break(state, ERR_ASSERTION_FAILED, emsg);
3807     for(i=sixbitsread; i<12; i++)
3808         sixbits[i] = 0;
3809     ae_foursixbits2threebytes(sixbits+0, u.bytes+0);
3810     ae_foursixbits2threebytes(sixbits+4, u.bytes+3);
3811     ae_foursixbits2threebytes(sixbits+8, u.bytes+6);
3812     if( state->endianness==AE_BIG_ENDIAN )
3813     {
3814         for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3815         {
3816             unsigned char tc;
3817             tc = u.bytes[i];
3818             u.bytes[i] = u.bytes[sizeof(ae_int_t)-1-i];
3819             u.bytes[sizeof(ae_int_t)-1-i] = tc;
3820         }
3821     }
3822     return u.ival;
3823 }
3824 
3825 /************************************************************************
3826 This function unserializes 64-bit integer value from string
3827 
3828 buf         buffer which contains value; leading spaces/tabs/newlines are
3829             ignored, traling spaces/tabs/newlines are treated as  end  of
3830             the boolean value.
3831 state       ALGLIB environment state
3832 
3833 This function raises an error in case unexpected symbol is found
3834 ************************************************************************/
ae_str2int64(const char * buf,ae_state * state,const char ** pasttheend)3835 ae_int64_t ae_str2int64(const char *buf, ae_state *state, const char **pasttheend)
3836 {
3837     const char *emsg = "ALGLIB: unable to read integer value from stream";
3838     ae_int_t sixbits[12];
3839     ae_int_t sixbitsread, i;
3840     unsigned char bytes[9];
3841     ae_int64_t result;
3842 
3843     /*
3844      * 1. skip leading spaces
3845      * 2. read and decode six-bit digits
3846      * 3. set trailing digits to zeros
3847      * 4. convert to little endian 64-bit integer representation
3848      * 5. convert to big endian representation, if needed
3849      */
3850     while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3851         buf++;
3852     sixbitsread = 0;
3853     while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
3854     {
3855         ae_int_t d;
3856         d = ae_char2sixbits(*buf);
3857         if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
3858             ae_break(state, ERR_ASSERTION_FAILED, emsg);
3859         sixbits[sixbitsread] = d;
3860         sixbitsread++;
3861         buf++;
3862     }
3863     *pasttheend = buf;
3864     if( sixbitsread==0 )
3865         ae_break(state, ERR_ASSERTION_FAILED, emsg);
3866     for(i=sixbitsread; i<12; i++)
3867         sixbits[i] = 0;
3868     ae_foursixbits2threebytes(sixbits+0, bytes+0);
3869     ae_foursixbits2threebytes(sixbits+4, bytes+3);
3870     ae_foursixbits2threebytes(sixbits+8, bytes+6);
3871     if( state->endianness==AE_BIG_ENDIAN )
3872     {
3873         for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3874         {
3875             unsigned char tc;
3876             tc = bytes[i];
3877             bytes[i] = bytes[sizeof(ae_int_t)-1-i];
3878             bytes[sizeof(ae_int_t)-1-i] = tc;
3879         }
3880     }
3881     memmove(&result, bytes, sizeof(result));
3882     return result;
3883 }
3884 
3885 
3886 /************************************************************************
3887 This function serializes double value into buffer
3888 
3889 v           double value to be serialized
3890 buf         buffer, at least 12 characters wide
3891             (11 chars for value, one for trailing zero)
3892 state       ALGLIB environment state
3893 ************************************************************************/
ae_double2str(double v,char * buf,ae_state * state)3894 void ae_double2str(double v, char *buf, ae_state *state)
3895 {
3896     union _u
3897     {
3898         double dval;
3899         unsigned char bytes[9];
3900     } u;
3901     ae_int_t i;
3902     ae_int_t sixbits[12];
3903 
3904     /*
3905      * handle special quantities
3906      */
3907     if( ae_isnan(v, state) )
3908     {
3909         const char *s = ".nan_______";
3910         memmove(buf, s, strlen(s)+1);
3911         return;
3912     }
3913     if( ae_isposinf(v, state) )
3914     {
3915         const char *s = ".posinf____";
3916         memmove(buf, s, strlen(s)+1);
3917         return;
3918     }
3919     if( ae_isneginf(v, state) )
3920     {
3921         const char *s = ".neginf____";
3922         memmove(buf, s, strlen(s)+1);
3923         return;
3924     }
3925 
3926     /*
3927      * process general case:
3928      * 1. copy v to array of chars
3929      * 2. set 9th byte of u.bytes to zero in order to
3930      *    simplify conversion to six-bit representation
3931      * 3. convert to little endian (if needed)
3932      * 4. convert to six-bit representation
3933      *    (last 12th element of sixbits is always zero, we do not output it)
3934      */
3935     u.dval = v;
3936     u.bytes[8] = 0;
3937     if( state->endianness==AE_BIG_ENDIAN )
3938     {
3939         for(i=0; i<(ae_int_t)(sizeof(double)/2); i++)
3940         {
3941             unsigned char tc;
3942             tc = u.bytes[i];
3943             u.bytes[i] = u.bytes[sizeof(double)-1-i];
3944             u.bytes[sizeof(double)-1-i] = tc;
3945         }
3946     }
3947     ae_threebytes2foursixbits(u.bytes+0, sixbits+0);
3948     ae_threebytes2foursixbits(u.bytes+3, sixbits+4);
3949     ae_threebytes2foursixbits(u.bytes+6, sixbits+8);
3950     for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3951         buf[i] = ae_sixbits2char(sixbits[i]);
3952     buf[AE_SER_ENTRY_LENGTH] = 0x00;
3953 }
3954 
3955 /************************************************************************
3956 This function unserializes double value from string
3957 
3958 buf         buffer which contains value; leading spaces/tabs/newlines are
3959             ignored, traling spaces/tabs/newlines are treated as  end  of
3960             the boolean value.
3961 state       ALGLIB environment state
3962 
3963 This function raises an error in case unexpected symbol is found
3964 ************************************************************************/
ae_str2double(const char * buf,ae_state * state,const char ** pasttheend)3965 double ae_str2double(const char *buf, ae_state *state, const char **pasttheend)
3966 {
3967     const char *emsg = "ALGLIB: unable to read double value from stream";
3968     ae_int_t sixbits[12];
3969     ae_int_t sixbitsread, i;
3970     union _u
3971     {
3972         double dval;
3973         unsigned char bytes[9];
3974     } u;
3975 
3976 
3977      /*
3978       * skip leading spaces
3979       */
3980     while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3981         buf++;
3982 
3983     /*
3984      * Handle special cases
3985      */
3986     if( *buf=='.' )
3987     {
3988         const char *s_nan =    ".nan_______";
3989         const char *s_posinf = ".posinf____";
3990         const char *s_neginf = ".neginf____";
3991         if( strncmp(buf, s_nan, strlen(s_nan))==0 )
3992         {
3993             *pasttheend = buf+strlen(s_nan);
3994             return state->v_nan;
3995         }
3996         if( strncmp(buf, s_posinf, strlen(s_posinf))==0 )
3997         {
3998             *pasttheend = buf+strlen(s_posinf);
3999             return state->v_posinf;
4000         }
4001         if( strncmp(buf, s_neginf, strlen(s_neginf))==0 )
4002         {
4003             *pasttheend = buf+strlen(s_neginf);
4004             return state->v_neginf;
4005         }
4006         ae_break(state, ERR_ASSERTION_FAILED, emsg);
4007     }
4008 
4009     /*
4010      * General case:
4011      * 1. read and decode six-bit digits
4012      * 2. check that all 11 digits were read
4013      * 3. set last 12th digit to zero (needed for simplicity of conversion)
4014      * 4. convert to 8 bytes
4015      * 5. convert to big endian representation, if needed
4016      */
4017     sixbitsread = 0;
4018     while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
4019     {
4020         ae_int_t d;
4021         d = ae_char2sixbits(*buf);
4022         if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
4023             ae_break(state, ERR_ASSERTION_FAILED, emsg);
4024         sixbits[sixbitsread] = d;
4025         sixbitsread++;
4026         buf++;
4027     }
4028     *pasttheend = buf;
4029     if( sixbitsread!=AE_SER_ENTRY_LENGTH )
4030         ae_break(state, ERR_ASSERTION_FAILED, emsg);
4031     sixbits[AE_SER_ENTRY_LENGTH] = 0;
4032     ae_foursixbits2threebytes(sixbits+0, u.bytes+0);
4033     ae_foursixbits2threebytes(sixbits+4, u.bytes+3);
4034     ae_foursixbits2threebytes(sixbits+8, u.bytes+6);
4035     if( state->endianness==AE_BIG_ENDIAN )
4036     {
4037         for(i=0; i<(ae_int_t)(sizeof(double)/2); i++)
4038         {
4039             unsigned char tc;
4040             tc = u.bytes[i];
4041             u.bytes[i] = u.bytes[sizeof(double)-1-i];
4042             u.bytes[sizeof(double)-1-i] = tc;
4043         }
4044     }
4045     return u.dval;
4046 }
4047 
4048 
4049 /************************************************************************
4050 This function performs given number of spin-wait iterations
4051 ************************************************************************/
ae_spin_wait(ae_int_t cnt)4052 void ae_spin_wait(ae_int_t cnt)
4053 {
4054     /*
4055      * these strange operations with ae_never_change_it are necessary to
4056      * prevent compiler optimization of the loop.
4057      */
4058     volatile ae_int_t i;
4059 
4060     /* very unlikely because no one will wait for such amount of cycles */
4061     if( cnt>0x12345678 )
4062         ae_never_change_it = cnt%10;
4063 
4064     /* spin wait, test condition which will never be true */
4065     for(i=0; i<cnt; i++)
4066         if( ae_never_change_it>0 )
4067             ae_never_change_it--;
4068 }
4069 
4070 
4071 /************************************************************************
4072 This function causes the calling thread to relinquish the CPU. The thread
4073 is moved to the end of the queue and some other thread gets to run.
4074 
4075 NOTE: this function should NOT be called when AE_OS is AE_UNKNOWN  -  the
4076       whole program will be abnormally terminated.
4077 ************************************************************************/
ae_yield()4078 void ae_yield()
4079 {
4080 #if AE_OS==AE_WINDOWS
4081     if( !SwitchToThread() )
4082         Sleep(0);
4083 #elif AE_OS==AE_POSIX
4084     sched_yield();
4085 #else
4086     abort();
4087 #endif
4088 }
4089 
4090 /************************************************************************
4091 This function initializes _lock structure which  is  internally  used  by
4092 ae_lock high-level structure.
4093 
4094 _lock structure is statically allocated, no malloc() calls  is  performed
4095 during its allocation. However, you have to call  _ae_free_lock_raw()  in
4096 order to deallocate this lock properly.
4097 ************************************************************************/
_ae_init_lock_raw(_lock * p)4098 void _ae_init_lock_raw(_lock *p)
4099 {
4100 #if AE_OS==AE_WINDOWS
4101     p->p_lock = (ae_int_t*)ae_align((void*)(&p->buf),AE_LOCK_ALIGNMENT);
4102     p->p_lock[0] = 0;
4103 #elif AE_OS==AE_POSIX
4104     pthread_mutex_init(&p->mutex, NULL);
4105 #else
4106     p->is_locked = ae_false;
4107 #endif
4108 }
4109 
4110 
4111 /************************************************************************
4112 This function acquires _lock structure.
4113 
4114 It is low-level workhorse utilized by ae_acquire_lock().
4115 ************************************************************************/
_ae_acquire_lock_raw(_lock * p)4116 void _ae_acquire_lock_raw(_lock *p)
4117 {
4118 #if AE_OS==AE_WINDOWS
4119     ae_int_t cnt = 0;
4120 #ifdef AE_SMP_DEBUGCOUNTERS
4121     InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_acquisitions);
4122 #endif
4123     for(;;)
4124     {
4125 		if( InterlockedCompareExchange((LONG volatile *)p->p_lock, 1, 0)==0 )
4126 		    return;
4127         ae_spin_wait(AE_LOCK_CYCLES);
4128 #ifdef AE_SMP_DEBUGCOUNTERS
4129         InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_spinwaits);
4130 #endif
4131         cnt++;
4132         if( cnt%AE_LOCK_TESTS_BEFORE_YIELD==0 )
4133         {
4134 #ifdef AE_SMP_DEBUGCOUNTERS
4135             InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_yields);
4136 #endif
4137             ae_yield();
4138         }
4139     }
4140 #elif AE_OS==AE_POSIX
4141     ae_int_t cnt = 0;
4142     for(;;)
4143     {
4144 		if(  pthread_mutex_trylock(&p->mutex)==0 )
4145 		    return;
4146         ae_spin_wait(AE_LOCK_CYCLES);
4147         cnt++;
4148         if( cnt%AE_LOCK_TESTS_BEFORE_YIELD==0 )
4149             ae_yield();
4150     }
4151    ;
4152 #else
4153     AE_CRITICAL_ASSERT(!p->is_locked);
4154     p->is_locked = ae_true;
4155 #endif
4156 }
4157 
4158 
4159 /************************************************************************
4160 This function releases _lock structure.
4161 
4162 It is low-level lock function which is used by ae_release_lock.
4163 ************************************************************************/
_ae_release_lock_raw(_lock * p)4164 void _ae_release_lock_raw(_lock *p)
4165 {
4166 #if AE_OS==AE_WINDOWS
4167     InterlockedExchange((LONG volatile *)p->p_lock, 0);
4168 #elif AE_OS==AE_POSIX
4169     pthread_mutex_unlock(&p->mutex);
4170 #else
4171     p->is_locked = ae_false;
4172 #endif
4173 }
4174 
4175 
4176 /************************************************************************
4177 This function frees _lock structure.
4178 ************************************************************************/
_ae_free_lock_raw(_lock * p)4179 void _ae_free_lock_raw(_lock *p)
4180 {
4181 #if AE_OS==AE_POSIX
4182     pthread_mutex_destroy(&p->mutex);
4183 #endif
4184 }
4185 
4186 
4187 /************************************************************************
4188 This function initializes ae_lock structure.
4189 
4190 INPUT PARAMETERS:
4191     lock                -   pointer to lock structure, must be zero-filled
4192     state               -   pointer to state structure, used for exception
4193                             handling and management of automatic objects.
4194     make_automatic      -   if true, lock object is added to automatic
4195                             memory management list.
4196 
4197 NOTE: as a special exception, this function allows you  to  specify  NULL
4198       state pointer. In this case all exception arising during construction
4199       are handled as critical failures, with abort() being called.
4200       make_automatic must be false on such calls.
4201 ************************************************************************/
ae_init_lock(ae_lock * lock,ae_state * state,ae_bool make_automatic)4202 void ae_init_lock(ae_lock *lock, ae_state *state, ae_bool make_automatic)
4203 {
4204     _lock *p;
4205     AE_CRITICAL_ASSERT(ae_check_zeros(lock,sizeof(*lock)));
4206     if(state==NULL)
4207     {
4208         ae_state _tmp_state;
4209         AE_CRITICAL_ASSERT(!make_automatic);
4210         ae_state_init(&_tmp_state);
4211         ae_init_lock(lock, &_tmp_state, ae_false);
4212         ae_state_clear(&_tmp_state);
4213         return;
4214     }
4215     lock->eternal = ae_false;
4216     ae_db_init(&lock->db, sizeof(_lock), state, make_automatic);
4217     lock->lock_ptr = lock->db.ptr;
4218     p = (_lock*)lock->lock_ptr;
4219     _ae_init_lock_raw(p);
4220 }
4221 
4222 /************************************************************************
4223 This function initializes "eternal" ae_lock structure which  is  expected
4224 to persist until the end of the execution of the program.  Eternal  locks
4225 can not be deallocated (cleared) and  do  not  increase debug  allocation
4226 counters.  Errors  during  allocation  of eternal  locks  are  considered
4227 critical exceptions and handled by calling abort().
4228 
4229 INPUT PARAMETERS:
4230     lock                -   pointer to lock structure, must be zero-filled
4231     state               -   pointer to state structure, used for exception
4232                             handling and management of automatic objects;
4233                             non-NULL.
4234     make_automatic      -   if true, lock object is added to automatic
4235                             memory management list.
4236 ************************************************************************/
ae_init_lock_eternal(ae_lock * lock)4237 void ae_init_lock_eternal(ae_lock *lock)
4238 {
4239     _lock *p;
4240     AE_CRITICAL_ASSERT(ae_check_zeros(lock,sizeof(*lock)));
4241     lock->eternal = ae_true;
4242     lock->lock_ptr = eternal_malloc(sizeof(_lock));
4243     p = (_lock*)lock->lock_ptr;
4244     _ae_init_lock_raw(p);
4245 }
4246 
4247 
4248 /************************************************************************
4249 This function acquires lock. In case lock is busy, we perform several
4250 iterations inside tight loop before trying again.
4251 ************************************************************************/
ae_acquire_lock(ae_lock * lock)4252 void ae_acquire_lock(ae_lock *lock)
4253 {
4254     _lock *p;
4255     p = (_lock*)lock->lock_ptr;
4256     _ae_acquire_lock_raw(p);
4257 }
4258 
4259 
4260 /************************************************************************
4261 This function releases lock.
4262 ************************************************************************/
ae_release_lock(ae_lock * lock)4263 void ae_release_lock(ae_lock *lock)
4264 {
4265     _lock *p;
4266     p = (_lock*)lock->lock_ptr;
4267     _ae_release_lock_raw(p);
4268 }
4269 
4270 
4271 /************************************************************************
4272 This function frees ae_lock structure.
4273 ************************************************************************/
ae_free_lock(ae_lock * lock)4274 void ae_free_lock(ae_lock *lock)
4275 {
4276     _lock *p;
4277     AE_CRITICAL_ASSERT(!lock->eternal);
4278     p = (_lock*)lock->lock_ptr;
4279     if( p!=NULL )
4280         _ae_free_lock_raw(p);
4281     ae_db_free(&lock->db);
4282 }
4283 
4284 
4285 /************************************************************************
4286 This function creates ae_shared_pool structure.
4287 
4288 dst                 destination shared pool, must be zero-filled
4289                     already allocated, but not initialized.
4290 state               pointer to current state structure. Can not be NULL.
4291                     used for exception handling (say, allocation error results
4292                     in longjmp call).
4293 make_automatic      if true, vector will be registered in the current frame
4294                     of the state structure;
4295 
4296 Error handling:
4297 * on failure calls ae_break() with NULL state pointer. Usually it  results
4298   in abort() call.
4299 
4300 dst is assumed to be uninitialized, its fields are ignored.
4301 ************************************************************************/
ae_shared_pool_init(void * _dst,ae_state * state,ae_bool make_automatic)4302 void ae_shared_pool_init(void *_dst, ae_state *state, ae_bool make_automatic)
4303 {
4304     ae_shared_pool *dst;
4305 
4306     AE_CRITICAL_ASSERT(state!=NULL);
4307     dst = (ae_shared_pool*)_dst;
4308     AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
4309 
4310     /* init */
4311     dst->seed_object = NULL;
4312     dst->recycled_objects = NULL;
4313     dst->recycled_entries = NULL;
4314     dst->enumeration_counter = NULL;
4315     dst->size_of_object = 0;
4316     dst->init = NULL;
4317     dst->init_copy = NULL;
4318     dst->destroy = NULL;
4319     dst->frame_entry.deallocator = ae_shared_pool_destroy;
4320     dst->frame_entry.ptr = dst;
4321     if( make_automatic )
4322         ae_db_attach(&dst->frame_entry, state);
4323     ae_init_lock(&dst->pool_lock, state, ae_false);
4324 }
4325 
4326 
4327 /************************************************************************
4328 This function clears all dynamically allocated fields of the pool except
4329 for the lock. It does NOT try to acquire pool_lock.
4330 
4331 NOTE: this function is NOT thread-safe, it is not protected by lock.
4332 ************************************************************************/
ae_shared_pool_internalclear(ae_shared_pool * dst)4333 static void ae_shared_pool_internalclear(ae_shared_pool *dst)
4334 {
4335     ae_shared_pool_entry *ptr, *tmp;
4336 
4337     /* destroy seed */
4338     if( dst->seed_object!=NULL )
4339     {
4340         dst->destroy((void*)dst->seed_object);
4341         ae_free((void*)dst->seed_object);
4342         dst->seed_object = NULL;
4343     }
4344 
4345     /* destroy recycled objects */
4346     for(ptr=dst->recycled_objects; ptr!=NULL;)
4347     {
4348         tmp = (ae_shared_pool_entry*)ptr->next_entry;
4349         dst->destroy(ptr->obj);
4350         ae_free(ptr->obj);
4351         ae_free(ptr);
4352         ptr = tmp;
4353     }
4354     dst->recycled_objects = NULL;
4355 
4356     /* destroy recycled entries */
4357     for(ptr=dst->recycled_entries; ptr!=NULL;)
4358     {
4359         tmp = (ae_shared_pool_entry*)ptr->next_entry;
4360         ae_free(ptr);
4361         ptr = tmp;
4362     }
4363     dst->recycled_entries = NULL;
4364 }
4365 
4366 
4367 /************************************************************************
4368 This function creates copy of ae_shared_pool.
4369 
4370 dst                 destination pool, must be zero-filled
4371 src                 source pool
4372 state               pointer to current state structure. Can not be NULL.
4373                     used for exception handling (say, allocation error results
4374                     in longjmp call).
4375 make_automatic      if true, vector will be registered in the current frame
4376                     of the state structure;
4377 
4378 dst is assumed to be uninitialized, its fields are ignored.
4379 
4380 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4381       you should NOT call it when lock can be used by another thread.
4382 ************************************************************************/
ae_shared_pool_init_copy(void * _dst,void * _src,ae_state * state,ae_bool make_automatic)4383 void ae_shared_pool_init_copy(void *_dst, void *_src, ae_state *state, ae_bool make_automatic)
4384 {
4385     ae_shared_pool *dst, *src;
4386     ae_shared_pool_entry *ptr;
4387 
4388     /* state!=NULL, allocation errors result in exception */
4389     /* AE_CRITICAL_ASSERT(state!=NULL); */
4390 
4391     dst = (ae_shared_pool*)_dst;
4392     src = (ae_shared_pool*)_src;
4393     ae_shared_pool_init(dst, state, make_automatic);
4394 
4395     /* copy non-pointer fields */
4396     dst->size_of_object = src->size_of_object;
4397     dst->init = src->init;
4398     dst->init_copy = src->init_copy;
4399     dst->destroy = src->destroy;
4400 
4401     /* copy seed object */
4402     if( src->seed_object!=NULL )
4403     {
4404         dst->seed_object = ae_malloc(dst->size_of_object, state);
4405         memset(dst->seed_object, 0, dst->size_of_object);
4406         dst->init_copy(dst->seed_object, src->seed_object, state, ae_false);
4407     }
4408 
4409     /* copy recycled objects */
4410     dst->recycled_objects = NULL;
4411     for(ptr=src->recycled_objects; ptr!=NULL; ptr=(ae_shared_pool_entry*)ptr->next_entry)
4412     {
4413         ae_shared_pool_entry *tmp;
4414 
4415         /* allocate entry, immediately add to the recycled list
4416            (we do not want to lose it in case of future malloc failures) */
4417         tmp = (ae_shared_pool_entry*)ae_malloc(sizeof(ae_shared_pool_entry), state);
4418         memset(tmp, 0, sizeof(*tmp));
4419         tmp->next_entry = dst->recycled_objects;
4420         dst->recycled_objects = tmp;
4421 
4422         /* prepare place for object, init_copy() it */
4423         tmp->obj =  ae_malloc(dst->size_of_object, state);
4424         memset(tmp->obj, 0, dst->size_of_object);
4425         dst->init_copy(tmp->obj, ptr->obj, state, ae_false);
4426     }
4427 
4428     /* recycled entries are not copied because they do not store any information */
4429     dst->recycled_entries = NULL;
4430 
4431     /* enumeration counter is reset on copying */
4432     dst->enumeration_counter = NULL;
4433 
4434     /* initialize frame record */
4435     dst->frame_entry.deallocator = ae_shared_pool_destroy;
4436     dst->frame_entry.ptr = dst;
4437 }
4438 
4439 
4440 /************************************************************************
4441 This function performs destruction of the pool object.
4442 
4443 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4444       you should NOT call it when pool can be used by another thread.
4445 ************************************************************************/
ae_shared_pool_clear(void * _dst)4446 void ae_shared_pool_clear(void *_dst)
4447 {
4448     ae_shared_pool *dst = (ae_shared_pool*)_dst;
4449 
4450     /* clear seed and lists */
4451     ae_shared_pool_internalclear(dst);
4452 
4453     /* clear fields */
4454     dst->seed_object = NULL;
4455     dst->recycled_objects = NULL;
4456     dst->recycled_entries = NULL;
4457     dst->enumeration_counter = NULL;
4458     dst->size_of_object = 0;
4459     dst->init = NULL;
4460     dst->init_copy = NULL;
4461     dst->destroy = NULL;
4462 }
4463 
ae_shared_pool_destroy(void * _dst)4464 void ae_shared_pool_destroy(void *_dst)
4465 {
4466     ae_shared_pool *dst = (ae_shared_pool*)_dst;
4467     ae_shared_pool_clear(_dst);
4468     ae_free_lock(&dst->pool_lock);
4469 }
4470 
4471 
4472 /************************************************************************
4473 This function returns True, if internal seed object was set.  It  returns
4474 False for un-seeded pool.
4475 
4476 dst                 destination pool (initialized by constructor function)
4477 
4478 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4479       you should NOT call it when lock can be used by another thread.
4480 ************************************************************************/
ae_shared_pool_is_initialized(void * _dst)4481 ae_bool ae_shared_pool_is_initialized(void *_dst)
4482 {
4483     ae_shared_pool *dst = (ae_shared_pool*)_dst;
4484     return dst->seed_object!=NULL;
4485 }
4486 
4487 
4488 /************************************************************************
4489 This function sets internal seed object. All objects owned by the pool
4490 (current seed object, recycled objects) are automatically freed.
4491 
4492 dst                 destination pool (initialized by constructor function)
4493 seed_object         new seed object
4494 size_of_object      sizeof(), used to allocate memory
4495 init                constructor function
4496 init_copy           copy constructor
4497 clear               destructor function
4498 state               ALGLIB environment state
4499 
4500 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4501       you should NOT call it when lock can be used by another thread.
4502 ************************************************************************/
ae_shared_pool_set_seed(ae_shared_pool * dst,void * seed_object,ae_int_t size_of_object,void (* init)(void * dst,ae_state * state,ae_bool make_automatic),void (* init_copy)(void * dst,void * src,ae_state * state,ae_bool make_automatic),void (* destroy)(void * ptr),ae_state * state)4503 void ae_shared_pool_set_seed(
4504     ae_shared_pool  *dst,
4505     void            *seed_object,
4506     ae_int_t        size_of_object,
4507     void            (*init)(void* dst, ae_state* state, ae_bool make_automatic),
4508     void            (*init_copy)(void* dst, void* src, ae_state* state, ae_bool make_automatic),
4509     void            (*destroy)(void* ptr),
4510     ae_state        *state)
4511 {
4512     /* state!=NULL, allocation errors result in exception */
4513     AE_CRITICAL_ASSERT(state!=NULL);
4514 
4515     /* destroy internal objects */
4516     ae_shared_pool_internalclear(dst);
4517 
4518     /* set non-pointer fields */
4519     dst->size_of_object = size_of_object;
4520     dst->init = init;
4521     dst->init_copy = init_copy;
4522     dst->destroy = destroy;
4523 
4524     /* set seed object */
4525     dst->seed_object = ae_malloc(size_of_object, state);
4526     memset(dst->seed_object, 0, size_of_object);
4527     init_copy(dst->seed_object, seed_object, state, ae_false);
4528 }
4529 
4530 
4531 /************************************************************************
4532 This  function  retrieves  a  copy  of  the seed object from the pool and
4533 stores it to target smart pointer ptr.
4534 
4535 In case target pointer owns non-NULL  value,  it  is  deallocated  before
4536 storing value retrieved from pool. Target pointer becomes  owner  of  the
4537 value which was retrieved from pool.
4538 
4539 pool                pool
4540 pptr                pointer to ae_smart_ptr structure
4541 state               ALGLIB environment state
4542 
4543 NOTE: this function IS thread-safe.  It  acquires  pool  lock  during its
4544       operation and can be used simultaneously from several threads.
4545 ************************************************************************/
ae_shared_pool_retrieve(ae_shared_pool * pool,ae_smart_ptr * pptr,ae_state * state)4546 void ae_shared_pool_retrieve(
4547     ae_shared_pool  *pool,
4548     ae_smart_ptr    *pptr,
4549     ae_state        *state)
4550 {
4551     void *new_obj;
4552 
4553     /* state!=NULL, allocation errors are handled by throwing exception from ae_malloc() */
4554     AE_CRITICAL_ASSERT(state!=NULL);
4555 
4556     /* assert that pool was seeded */
4557     ae_assert(
4558         pool->seed_object!=NULL,
4559         "ALGLIB: shared pool is not seeded, PoolRetrieve() failed",
4560         state);
4561 
4562     /* acquire lock */
4563     ae_acquire_lock(&pool->pool_lock);
4564 
4565     /* try to reuse recycled objects */
4566     if( pool->recycled_objects!=NULL )
4567     {
4568         ae_shared_pool_entry *result;
4569 
4570         /* retrieve entry/object from list of recycled objects */
4571         result = pool->recycled_objects;
4572         pool->recycled_objects = (ae_shared_pool_entry*)pool->recycled_objects->next_entry;
4573         new_obj = result->obj;
4574         result->obj = NULL;
4575 
4576         /* move entry to list of recycled entries */
4577         result->next_entry = pool->recycled_entries;
4578         pool->recycled_entries = result;
4579 
4580         /* release lock */
4581         ae_release_lock(&pool->pool_lock);
4582 
4583         /* assign object to smart pointer */
4584         ae_smart_ptr_assign(pptr, new_obj, ae_true, ae_true, pool->destroy);
4585         return;
4586     }
4587 
4588     /* release lock; we do not need it anymore because copy constructor does not modify source variable */
4589     ae_release_lock(&pool->pool_lock);
4590 
4591     /* create new object from seed, immediately assign object to smart pointer
4592       (do not want to lose it in case of future failures) */
4593     new_obj = ae_malloc(pool->size_of_object, state);
4594     memset(new_obj, 0, pool->size_of_object);
4595     ae_smart_ptr_assign(pptr, new_obj, ae_true, ae_true, pool->destroy);
4596 
4597     /* perform actual copying; before this line smartptr points to zero-filled instance */
4598     pool->init_copy(new_obj, pool->seed_object, state, ae_false);
4599 }
4600 
4601 
4602 /************************************************************************
4603 This function recycles object owned by smart  pointer  by  moving  it  to
4604 internal storage of the shared pool.
4605 
4606 Source pointer must own the object. After function is over, it owns NULL
4607 pointer.
4608 
4609 pool                pool
4610 pptr                pointer to ae_smart_ptr structure
4611 state               ALGLIB environment state
4612 
4613 NOTE: this function IS thread-safe.  It  acquires  pool  lock  during its
4614       operation and can be used simultaneously from several threads.
4615 ************************************************************************/
ae_shared_pool_recycle(ae_shared_pool * pool,ae_smart_ptr * pptr,ae_state * state)4616 void ae_shared_pool_recycle(
4617     ae_shared_pool  *pool,
4618     ae_smart_ptr    *pptr,
4619     ae_state        *state)
4620 {
4621     ae_shared_pool_entry *new_entry;
4622 
4623     /* state!=NULL, allocation errors are handled by throwing exception from ae_malloc() */
4624     AE_CRITICAL_ASSERT(state!=NULL);
4625 
4626     /* assert that pool was seeded */
4627     ae_assert(
4628         pool->seed_object!=NULL,
4629         "ALGLIB: shared pool is not seeded, PoolRecycle() failed",
4630         state);
4631 
4632     /* assert that pointer non-null and owns the object */
4633     ae_assert(pptr->is_owner,  "ALGLIB: pptr in ae_shared_pool_recycle() does not own its pointer", state);
4634     ae_assert(pptr->ptr!=NULL, "ALGLIB: pptr in ae_shared_pool_recycle() is NULL", state);
4635 
4636     /* acquire lock */
4637     ae_acquire_lock(&pool->pool_lock);
4638 
4639     /* acquire shared pool entry (reuse one from recycled_entries or allocate new one) */
4640     if( pool->recycled_entries!=NULL )
4641     {
4642         /* reuse previously allocated entry */
4643         new_entry = pool->recycled_entries;
4644         pool->recycled_entries = (ae_shared_pool_entry*)new_entry->next_entry;
4645     }
4646     else
4647     {
4648         /*
4649          * Allocate memory for new entry.
4650          *
4651          * NOTE: we release pool lock during allocation because ae_malloc() may raise
4652          *       exception and we do not want our pool to be left in the locked state.
4653          */
4654         ae_release_lock(&pool->pool_lock);
4655         new_entry =  (ae_shared_pool_entry*)ae_malloc(sizeof(ae_shared_pool_entry), state);
4656         ae_acquire_lock(&pool->pool_lock);
4657     }
4658 
4659     /* add object to the list of recycled objects */
4660     new_entry->obj = pptr->ptr;
4661     new_entry->next_entry = pool->recycled_objects;
4662     pool->recycled_objects = new_entry;
4663 
4664     /* release lock object */
4665     ae_release_lock(&pool->pool_lock);
4666 
4667     /* release source pointer */
4668     ae_smart_ptr_release(pptr);
4669 }
4670 
4671 
4672 /************************************************************************
4673 This function clears internal list of  recycled  objects,  but  does  not
4674 change seed object managed by the pool.
4675 
4676 pool                pool
4677 state               ALGLIB environment state
4678 
4679 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4680       you should NOT call it when lock can be used by another thread.
4681 ************************************************************************/
ae_shared_pool_clear_recycled(ae_shared_pool * pool,ae_state * state)4682 void ae_shared_pool_clear_recycled(
4683     ae_shared_pool  *pool,
4684     ae_state        *state)
4685 {
4686     ae_shared_pool_entry *ptr, *tmp;
4687 
4688     /* clear recycled objects */
4689     for(ptr=pool->recycled_objects; ptr!=NULL;)
4690     {
4691         tmp = (ae_shared_pool_entry*)ptr->next_entry;
4692         pool->destroy(ptr->obj);
4693         ae_free(ptr->obj);
4694         ae_free(ptr);
4695         ptr = tmp;
4696     }
4697     pool->recycled_objects = NULL;
4698 }
4699 
4700 
4701 /************************************************************************
4702 This function allows to enumerate recycled elements of the  shared  pool.
4703 It stores pointer to the first recycled object in the smart pointer.
4704 
4705 IMPORTANT:
4706 * in case target pointer owns non-NULL  value,  it  is deallocated before
4707   storing value retrieved from pool.
4708 * recycled object IS NOT removed from pool
4709 * target pointer DOES NOT become owner of the new value
4710 * this function IS NOT thread-safe
4711 * you SHOULD NOT modify shared pool during enumeration (although you  can
4712   modify state of the objects retrieved from pool)
4713 * in case there is no recycled objects in the pool, NULL is stored to pptr
4714 * in case pool is not seeded, NULL is stored to pptr
4715 
4716 pool                pool
4717 pptr                pointer to ae_smart_ptr structure
4718 state               ALGLIB environment state
4719 ************************************************************************/
ae_shared_pool_first_recycled(ae_shared_pool * pool,ae_smart_ptr * pptr,ae_state * state)4720 void ae_shared_pool_first_recycled(
4721     ae_shared_pool  *pool,
4722     ae_smart_ptr    *pptr,
4723     ae_state        *state)
4724 {
4725     /* modify internal enumeration counter */
4726     pool->enumeration_counter = pool->recycled_objects;
4727 
4728     /* exit on empty list */
4729     if( pool->enumeration_counter==NULL )
4730     {
4731         ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
4732         return;
4733     }
4734 
4735     /* assign object to smart pointer */
4736     ae_smart_ptr_assign(pptr, pool->enumeration_counter->obj, ae_false, ae_false, pool->destroy);
4737 }
4738 
4739 
4740 /************************************************************************
4741 This function allows to enumerate recycled elements of the  shared  pool.
4742 It stores pointer to the next recycled object in the smart pointer.
4743 
4744 IMPORTANT:
4745 * in case target pointer owns non-NULL  value,  it  is deallocated before
4746   storing value retrieved from pool.
4747 * recycled object IS NOT removed from pool
4748 * target pointer DOES NOT become owner of the new value
4749 * this function IS NOT thread-safe
4750 * you SHOULD NOT modify shared pool during enumeration (although you  can
4751   modify state of the objects retrieved from pool)
4752 * in case there is no recycled objects left in the pool, NULL is stored.
4753 * in case pool is not seeded, NULL is stored.
4754 
4755 pool                pool
4756 pptr                pointer to ae_smart_ptr structure
4757 state               ALGLIB environment state
4758 ************************************************************************/
ae_shared_pool_next_recycled(ae_shared_pool * pool,ae_smart_ptr * pptr,ae_state * state)4759 void ae_shared_pool_next_recycled(
4760     ae_shared_pool  *pool,
4761     ae_smart_ptr    *pptr,
4762     ae_state        *state)
4763 {
4764     /* exit on end of list */
4765     if( pool->enumeration_counter==NULL )
4766     {
4767         ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
4768         return;
4769     }
4770 
4771     /* modify internal enumeration counter */
4772     pool->enumeration_counter = (ae_shared_pool_entry*)pool->enumeration_counter->next_entry;
4773 
4774     /* exit on empty list */
4775     if( pool->enumeration_counter==NULL )
4776     {
4777         ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
4778         return;
4779     }
4780 
4781     /* assign object to smart pointer */
4782     ae_smart_ptr_assign(pptr, pool->enumeration_counter->obj, ae_false, ae_false, pool->destroy);
4783 }
4784 
4785 
4786 
4787 /************************************************************************
4788 This function clears internal list of recycled objects and  seed  object.
4789 However, pool still can be used (after initialization with another seed).
4790 
4791 pool                pool
4792 state               ALGLIB environment state
4793 
4794 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4795       you should NOT call it when lock can be used by another thread.
4796 ************************************************************************/
ae_shared_pool_reset(ae_shared_pool * pool,ae_state * state)4797 void ae_shared_pool_reset(
4798     ae_shared_pool  *pool,
4799     ae_state        *state)
4800 {
4801     /* clear seed and lists */
4802     ae_shared_pool_internalclear(pool);
4803 
4804     /* clear fields */
4805     pool->seed_object = NULL;
4806     pool->recycled_objects = NULL;
4807     pool->recycled_entries = NULL;
4808     pool->enumeration_counter = NULL;
4809     pool->size_of_object = 0;
4810     pool->init = NULL;
4811     pool->init_copy = NULL;
4812     pool->destroy = NULL;
4813 }
4814 
4815 
4816 /************************************************************************
4817 This function initializes serializer
4818 ************************************************************************/
ae_serializer_init(ae_serializer * serializer)4819 void ae_serializer_init(ae_serializer *serializer)
4820 {
4821     serializer->mode = AE_SM_DEFAULT;
4822     serializer->entries_needed = 0;
4823     serializer->bytes_asked = 0;
4824 }
4825 
ae_serializer_clear(ae_serializer * serializer)4826 void ae_serializer_clear(ae_serializer *serializer)
4827 {
4828 }
4829 
ae_serializer_alloc_start(ae_serializer * serializer)4830 void ae_serializer_alloc_start(ae_serializer *serializer)
4831 {
4832     serializer->entries_needed = 0;
4833     serializer->bytes_asked = 0;
4834     serializer->mode = AE_SM_ALLOC;
4835 }
4836 
ae_serializer_alloc_entry(ae_serializer * serializer)4837 void ae_serializer_alloc_entry(ae_serializer *serializer)
4838 {
4839     serializer->entries_needed++;
4840 }
4841 
ae_serializer_alloc_byte_array(ae_serializer * serializer,ae_vector * bytes)4842 void ae_serializer_alloc_byte_array(ae_serializer *serializer, ae_vector *bytes)
4843 {
4844     ae_int_t n;
4845     n = bytes->cnt;
4846     n = n/8 + (n%8>0 ? 1 : 0);
4847     serializer->entries_needed += 1+n;
4848 }
4849 
4850 /************************************************************************
4851 After allocation phase is done, this function returns  required  size  of
4852 the output string buffer (including trailing zero symbol). Actual size of
4853 the data being stored can be a few characters smaller than requested.
4854 ************************************************************************/
ae_serializer_get_alloc_size(ae_serializer * serializer)4855 ae_int_t ae_serializer_get_alloc_size(ae_serializer *serializer)
4856 {
4857     ae_int_t rows, lastrowsize, result;
4858 
4859     serializer->mode = AE_SM_READY2S;
4860 
4861     /* if no entries needes (degenerate case) */
4862     if( serializer->entries_needed==0 )
4863     {
4864         serializer->bytes_asked = 4; /* a pair of chars for \r\n, one for dot, one for trailing zero */
4865         return serializer->bytes_asked;
4866     }
4867 
4868     /* non-degenerate case */
4869     rows = serializer->entries_needed/AE_SER_ENTRIES_PER_ROW;
4870     lastrowsize = AE_SER_ENTRIES_PER_ROW;
4871     if( serializer->entries_needed%AE_SER_ENTRIES_PER_ROW )
4872     {
4873         lastrowsize = serializer->entries_needed%AE_SER_ENTRIES_PER_ROW;
4874         rows++;
4875     }
4876 
4877     /* calculate result size */
4878     result  = ((rows-1)*AE_SER_ENTRIES_PER_ROW+lastrowsize)*AE_SER_ENTRY_LENGTH;    /* data size */
4879     result +=  (rows-1)*(AE_SER_ENTRIES_PER_ROW-1)+(lastrowsize-1);                 /* space symbols */
4880     result += rows*2;                                                               /* newline symbols */
4881     result += 1;                                                                    /* trailing dot */
4882     result += 1;                                                                    /* trailing zero */
4883     serializer->bytes_asked = result;
4884     return result;
4885 }
4886 
4887 #ifdef AE_USE_CPP_SERIALIZATION
ae_serializer_sstart_str(ae_serializer * serializer,std::string * buf)4888 void ae_serializer_sstart_str(ae_serializer *serializer, std::string *buf)
4889 {
4890     serializer->mode = AE_SM_TO_CPPSTRING;
4891     serializer->out_cppstr = buf;
4892     serializer->entries_saved = 0;
4893     serializer->bytes_written = 0;
4894 }
4895 
ae_serializer_ustart_str(ae_serializer * serializer,const std::string * buf)4896 void ae_serializer_ustart_str(ae_serializer *serializer, const std::string *buf)
4897 {
4898     serializer->mode = AE_SM_FROM_STRING;
4899     serializer->in_str = buf->c_str();
4900 }
4901 
cpp_writer(const char * p_string,ae_int_t aux)4902 static char cpp_writer(const char *p_string, ae_int_t aux)
4903 {
4904     std::ostream *stream = reinterpret_cast<std::ostream*>(aux);
4905     stream->write(p_string, strlen(p_string));
4906     return stream->bad() ? 1 : 0;
4907 }
4908 
cpp_reader(ae_int_t aux,ae_int_t cnt,char * p_buf)4909 static char cpp_reader(ae_int_t aux, ae_int_t cnt, char *p_buf)
4910 {
4911     std::istream *stream = reinterpret_cast<std::istream*>(aux);
4912     int c;
4913     if( cnt<=0 )
4914         return 1; /* unexpected cnt */
4915     for(;;)
4916     {
4917         c = stream->get();
4918         if( c<0 || c>255 )
4919             return 1; /* failure! */
4920         if( c!=' ' && c!='\t' && c!='\n' && c!='\r' )
4921             break;
4922     }
4923     p_buf[0] = (char)c;
4924     for(int k=1; k<cnt; k++)
4925     {
4926         c = stream->get();
4927         if( c<0 || c>255 || c==' ' || c=='\t' || c=='\n' || c=='\r' )
4928             return 1; /* failure! */
4929         p_buf[k] = (char)c;
4930     }
4931     p_buf[cnt] = 0;
4932     return 0; /* success */
4933 }
4934 
ae_serializer_sstart_stream(ae_serializer * serializer,std::ostream * stream)4935 void ae_serializer_sstart_stream(ae_serializer *serializer, std::ostream *stream)
4936 {
4937     serializer->mode = AE_SM_TO_STREAM;
4938     serializer->stream_writer = cpp_writer;
4939     serializer->stream_aux = reinterpret_cast<ae_int_t>(stream);
4940     serializer->entries_saved = 0;
4941     serializer->bytes_written = 0;
4942 }
4943 
ae_serializer_ustart_stream(ae_serializer * serializer,const std::istream * stream)4944 void ae_serializer_ustart_stream(ae_serializer *serializer, const std::istream *stream)
4945 {
4946     serializer->mode = AE_SM_FROM_STREAM;
4947     serializer->stream_reader = cpp_reader;
4948     serializer->stream_aux = reinterpret_cast<ae_int_t>(stream);
4949 }
4950 #endif
4951 
ae_serializer_sstart_str(ae_serializer * serializer,char * buf)4952 void ae_serializer_sstart_str(ae_serializer *serializer, char *buf)
4953 {
4954     serializer->mode = AE_SM_TO_STRING;
4955     serializer->out_str = buf;
4956     serializer->out_str[0] = 0;
4957     serializer->entries_saved = 0;
4958     serializer->bytes_written = 0;
4959 }
4960 
ae_serializer_ustart_str(ae_serializer * serializer,const char * buf)4961 void ae_serializer_ustart_str(ae_serializer *serializer, const char *buf)
4962 {
4963     serializer->mode = AE_SM_FROM_STRING;
4964     serializer->in_str = buf;
4965 }
4966 
ae_serializer_sstart_stream(ae_serializer * serializer,ae_stream_writer writer,ae_int_t aux)4967 void ae_serializer_sstart_stream(ae_serializer *serializer, ae_stream_writer writer, ae_int_t aux)
4968 {
4969     serializer->mode = AE_SM_TO_STREAM;
4970     serializer->stream_writer = writer;
4971     serializer->stream_aux = aux;
4972     serializer->entries_saved = 0;
4973     serializer->bytes_written = 0;
4974 }
4975 
ae_serializer_ustart_stream(ae_serializer * serializer,ae_stream_reader reader,ae_int_t aux)4976 void ae_serializer_ustart_stream(ae_serializer *serializer, ae_stream_reader reader, ae_int_t aux)
4977 {
4978     serializer->mode = AE_SM_FROM_STREAM;
4979     serializer->stream_reader = reader;
4980     serializer->stream_aux = aux;
4981 }
4982 
ae_serializer_serialize_bool(ae_serializer * serializer,ae_bool v,ae_state * state)4983 void ae_serializer_serialize_bool(ae_serializer *serializer, ae_bool v, ae_state *state)
4984 {
4985     char buf[AE_SER_ENTRY_LENGTH+2+1];
4986     const char *emsg = "ALGLIB: serialization integrity error";
4987     ae_int_t bytes_appended;
4988 
4989     /* prepare serialization, check consistency */
4990     ae_bool2str(v, buf, state);
4991     serializer->entries_saved++;
4992     if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
4993         strcat(buf, " ");
4994     else
4995         strcat(buf, "\r\n");
4996     bytes_appended = (ae_int_t)strlen(buf);
4997     ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
4998     serializer->bytes_written += bytes_appended;
4999 
5000     /* append to buffer */
5001 #ifdef AE_USE_CPP_SERIALIZATION
5002     if( serializer->mode==AE_SM_TO_CPPSTRING )
5003     {
5004         *(serializer->out_cppstr) += buf;
5005         return;
5006     }
5007 #endif
5008     if( serializer->mode==AE_SM_TO_STRING )
5009     {
5010         strcat(serializer->out_str, buf);
5011         serializer->out_str += bytes_appended;
5012         return;
5013     }
5014     if( serializer->mode==AE_SM_TO_STREAM )
5015     {
5016         ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
5017         return;
5018     }
5019     ae_break(state, ERR_ASSERTION_FAILED, emsg);
5020 }
5021 
ae_serializer_serialize_int(ae_serializer * serializer,ae_int_t v,ae_state * state)5022 void ae_serializer_serialize_int(ae_serializer *serializer, ae_int_t v, ae_state *state)
5023 {
5024     char buf[AE_SER_ENTRY_LENGTH+2+1];
5025     const char *emsg = "ALGLIB: serialization integrity error";
5026     ae_int_t bytes_appended;
5027 
5028     /* prepare serialization, check consistency */
5029     ae_int2str(v, buf, state);
5030     serializer->entries_saved++;
5031     if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
5032         strcat(buf, " ");
5033     else
5034         strcat(buf, "\r\n");
5035     bytes_appended = (ae_int_t)strlen(buf);
5036     ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
5037     serializer->bytes_written += bytes_appended;
5038 
5039     /* append to buffer */
5040 #ifdef AE_USE_CPP_SERIALIZATION
5041     if( serializer->mode==AE_SM_TO_CPPSTRING )
5042     {
5043         *(serializer->out_cppstr) += buf;
5044         return;
5045     }
5046 #endif
5047     if( serializer->mode==AE_SM_TO_STRING )
5048     {
5049         strcat(serializer->out_str, buf);
5050         serializer->out_str += bytes_appended;
5051         return;
5052     }
5053     if( serializer->mode==AE_SM_TO_STREAM )
5054     {
5055         ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
5056         return;
5057     }
5058     ae_break(state, ERR_ASSERTION_FAILED, emsg);
5059 }
5060 
ae_serializer_serialize_int64(ae_serializer * serializer,ae_int64_t v,ae_state * state)5061 void ae_serializer_serialize_int64(ae_serializer *serializer, ae_int64_t v, ae_state *state)
5062 {
5063     char buf[AE_SER_ENTRY_LENGTH+2+1];
5064     const char *emsg = "ALGLIB: serialization integrity error";
5065     ae_int_t bytes_appended;
5066 
5067     /* prepare serialization, check consistency */
5068     ae_int642str(v, buf, state);
5069     serializer->entries_saved++;
5070     if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
5071         strcat(buf, " ");
5072     else
5073         strcat(buf, "\r\n");
5074     bytes_appended = (ae_int_t)strlen(buf);
5075     ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
5076     serializer->bytes_written += bytes_appended;
5077 
5078     /* append to buffer */
5079 #ifdef AE_USE_CPP_SERIALIZATION
5080     if( serializer->mode==AE_SM_TO_CPPSTRING )
5081     {
5082         *(serializer->out_cppstr) += buf;
5083         return;
5084     }
5085 #endif
5086     if( serializer->mode==AE_SM_TO_STRING )
5087     {
5088         strcat(serializer->out_str, buf);
5089         serializer->out_str += bytes_appended;
5090         return;
5091     }
5092     if( serializer->mode==AE_SM_TO_STREAM )
5093     {
5094         ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
5095         return;
5096     }
5097     ae_break(state, ERR_ASSERTION_FAILED, emsg);
5098 }
5099 
ae_serializer_serialize_double(ae_serializer * serializer,double v,ae_state * state)5100 void ae_serializer_serialize_double(ae_serializer *serializer, double v, ae_state *state)
5101 {
5102     char buf[AE_SER_ENTRY_LENGTH+2+1];
5103     const char *emsg = "ALGLIB: serialization integrity error";
5104     ae_int_t bytes_appended;
5105 
5106     /* prepare serialization, check consistency */
5107     ae_double2str(v, buf, state);
5108     serializer->entries_saved++;
5109     if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
5110         strcat(buf, " ");
5111     else
5112         strcat(buf, "\r\n");
5113     bytes_appended = (ae_int_t)strlen(buf);
5114     ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
5115     serializer->bytes_written += bytes_appended;
5116 
5117     /* append to buffer */
5118 #ifdef AE_USE_CPP_SERIALIZATION
5119     if( serializer->mode==AE_SM_TO_CPPSTRING )
5120     {
5121         *(serializer->out_cppstr) += buf;
5122         return;
5123     }
5124 #endif
5125     if( serializer->mode==AE_SM_TO_STRING )
5126     {
5127         strcat(serializer->out_str, buf);
5128         serializer->out_str += bytes_appended;
5129         return;
5130     }
5131     if( serializer->mode==AE_SM_TO_STREAM )
5132     {
5133         ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
5134         return;
5135     }
5136     ae_break(state, ERR_ASSERTION_FAILED, emsg);
5137 }
5138 
ae_serializer_serialize_byte_array(ae_serializer * serializer,ae_vector * bytes,ae_state * state)5139 void ae_serializer_serialize_byte_array(ae_serializer *serializer, ae_vector *bytes, ae_state *state)
5140 {
5141     ae_int_t chunk_size, entries_count;
5142 
5143     chunk_size = 8;
5144 
5145     /* save array length */
5146     ae_serializer_serialize_int(serializer, bytes->cnt, state);
5147 
5148     /* determine entries count */
5149     entries_count = bytes->cnt/chunk_size + (bytes->cnt%chunk_size>0 ? 1 : 0);
5150     for(ae_int_t eidx=0; eidx<entries_count; eidx++)
5151     {
5152         ae_int64_t tmpi;
5153         ae_int_t elen;
5154         elen = bytes->cnt - eidx*chunk_size;
5155         elen = elen>chunk_size ? chunk_size : elen;
5156         memset(&tmpi, 0, sizeof(tmpi));
5157         memmove(&tmpi, bytes->ptr.p_ubyte + eidx*chunk_size, elen);
5158         ae_serializer_serialize_int64(serializer, tmpi, state);
5159     }
5160 }
5161 
ae_serializer_unserialize_bool(ae_serializer * serializer,ae_bool * v,ae_state * state)5162 void ae_serializer_unserialize_bool(ae_serializer *serializer, ae_bool *v, ae_state *state)
5163 {
5164     if( serializer->mode==AE_SM_FROM_STRING )
5165     {
5166         *v = ae_str2bool(serializer->in_str, state, &serializer->in_str);
5167         return;
5168     }
5169     if( serializer->mode==AE_SM_FROM_STREAM )
5170     {
5171         char buf[AE_SER_ENTRY_LENGTH+2+1];
5172         const char *p = buf;
5173         ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5174         *v = ae_str2bool(buf, state, &p);
5175         return;
5176     }
5177     ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5178 }
5179 
ae_serializer_unserialize_int(ae_serializer * serializer,ae_int_t * v,ae_state * state)5180 void ae_serializer_unserialize_int(ae_serializer *serializer, ae_int_t *v, ae_state *state)
5181 {
5182     if( serializer->mode==AE_SM_FROM_STRING )
5183     {
5184         *v = ae_str2int(serializer->in_str, state, &serializer->in_str);
5185         return;
5186     }
5187     if( serializer->mode==AE_SM_FROM_STREAM )
5188     {
5189         char buf[AE_SER_ENTRY_LENGTH+2+1];
5190         const char *p = buf;
5191         ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5192         *v = ae_str2int(buf, state, &p);
5193         return;
5194     }
5195     ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5196 }
5197 
ae_serializer_unserialize_int64(ae_serializer * serializer,ae_int64_t * v,ae_state * state)5198 void ae_serializer_unserialize_int64(ae_serializer *serializer, ae_int64_t *v, ae_state *state)
5199 {
5200     if( serializer->mode==AE_SM_FROM_STRING )
5201     {
5202         *v = ae_str2int64(serializer->in_str, state, &serializer->in_str);
5203         return;
5204     }
5205     if( serializer->mode==AE_SM_FROM_STREAM )
5206     {
5207         char buf[AE_SER_ENTRY_LENGTH+2+1];
5208         const char *p = buf;
5209         ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5210         *v = ae_str2int64(buf, state, &p);
5211         return;
5212     }
5213     ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5214 }
5215 
ae_serializer_unserialize_double(ae_serializer * serializer,double * v,ae_state * state)5216 void ae_serializer_unserialize_double(ae_serializer *serializer, double *v, ae_state *state)
5217 {
5218     if( serializer->mode==AE_SM_FROM_STRING )
5219     {
5220         *v = ae_str2double(serializer->in_str, state, &serializer->in_str);
5221         return;
5222     }
5223     if( serializer->mode==AE_SM_FROM_STREAM )
5224     {
5225         char buf[AE_SER_ENTRY_LENGTH+2+1];
5226         const char *p = buf;
5227         ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5228         *v = ae_str2double(buf, state, &p);
5229         return;
5230     }
5231     ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5232 }
5233 
ae_serializer_unserialize_byte_array(ae_serializer * serializer,ae_vector * bytes,ae_state * state)5234 void ae_serializer_unserialize_byte_array(ae_serializer *serializer, ae_vector *bytes, ae_state *state)
5235 {
5236     ae_int_t chunk_size, n, entries_count;
5237 
5238     chunk_size = 8;
5239 
5240     /* read array length, allocate output */
5241     ae_serializer_unserialize_int(serializer, &n, state);
5242     ae_vector_set_length(bytes, n, state);
5243 
5244     /* determine entries count, read entries */
5245     entries_count = n/chunk_size + (n%chunk_size>0 ? 1 : 0);
5246     for(ae_int_t eidx=0; eidx<entries_count; eidx++)
5247     {
5248         ae_int_t elen;
5249         ae_int64_t tmp64;
5250 
5251         elen = n-eidx*chunk_size;
5252         elen = elen>chunk_size ? chunk_size : elen;
5253         ae_serializer_unserialize_int64(serializer, &tmp64, state);
5254         memmove(bytes->ptr.p_ubyte+eidx*chunk_size, &tmp64, elen);
5255     }
5256 }
5257 
ae_serializer_stop(ae_serializer * serializer,ae_state * state)5258 void ae_serializer_stop(ae_serializer *serializer, ae_state *state)
5259 {
5260 #ifdef AE_USE_CPP_SERIALIZATION
5261     if( serializer->mode==AE_SM_TO_CPPSTRING )
5262     {
5263         ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state);/* strict "less" because we need space for trailing zero */
5264         serializer->bytes_written++;
5265         *(serializer->out_cppstr) += ".";
5266         return;
5267     }
5268 #endif
5269     if( serializer->mode==AE_SM_TO_STRING )
5270     {
5271         ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state); /* strict "less" because we need space for trailing zero */
5272         serializer->bytes_written++;
5273         strcat(serializer->out_str, ".");
5274         serializer->out_str += 1;
5275         return;
5276     }
5277     if( serializer->mode==AE_SM_TO_STREAM )
5278     {
5279         ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state); /* strict "less" because we need space for trailing zero */
5280         serializer->bytes_written++;
5281         ae_assert(serializer->stream_writer(".", serializer->stream_aux)==0, "ae_serializer: error writing to stream", state);
5282         return;
5283     }
5284     if( serializer->mode==AE_SM_FROM_STRING )
5285     {
5286         /*
5287          * because input string may be from pre-3.11 serializer,
5288          * which does not include trailing dot, we do not test
5289          * string for presence of "." symbol. Anyway, because string
5290          * is not stream, we do not have to read ALL trailing symbols.
5291          */
5292         return;
5293     }
5294     if( serializer->mode==AE_SM_FROM_STREAM )
5295     {
5296         /*
5297          * Read trailing dot, perform integrity check
5298          */
5299         char buf[2];
5300         ae_assert(serializer->stream_reader(serializer->stream_aux, 1, buf)==0, "ae_serializer: error reading from stream", state);
5301         ae_assert(buf[0]=='.', "ae_serializer: trailing . is not found in the stream", state);
5302         return;
5303     }
5304     ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5305 }
5306 
5307 
5308 /************************************************************************
5309 Complex math functions
5310 ************************************************************************/
ae_complex_from_i(ae_int_t v)5311 ae_complex ae_complex_from_i(ae_int_t v)
5312 {
5313     ae_complex r;
5314     r.x = (double)v;
5315     r.y = 0.0;
5316     return r;
5317 }
5318 
ae_complex_from_d(double v)5319 ae_complex ae_complex_from_d(double v)
5320 {
5321     ae_complex r;
5322     r.x = v;
5323     r.y = 0.0;
5324     return r;
5325 }
5326 
ae_c_neg(ae_complex lhs)5327 ae_complex ae_c_neg(ae_complex lhs)
5328 {
5329     ae_complex result;
5330     result.x = -lhs.x;
5331     result.y = -lhs.y;
5332     return result;
5333 }
5334 
ae_c_conj(ae_complex lhs,ae_state * state)5335 ae_complex ae_c_conj(ae_complex lhs, ae_state *state)
5336 {
5337     ae_complex result;
5338     result.x = +lhs.x;
5339     result.y = -lhs.y;
5340     return result;
5341 }
5342 
ae_c_sqr(ae_complex lhs,ae_state * state)5343 ae_complex ae_c_sqr(ae_complex lhs, ae_state *state)
5344 {
5345     ae_complex result;
5346     result.x = lhs.x*lhs.x-lhs.y*lhs.y;
5347     result.y = 2*lhs.x*lhs.y;
5348     return result;
5349 }
5350 
ae_c_abs(ae_complex z,ae_state * state)5351 double ae_c_abs(ae_complex z, ae_state *state)
5352 {
5353     double w;
5354     double xabs;
5355     double yabs;
5356     double v;
5357 
5358     xabs = fabs(z.x);
5359     yabs = fabs(z.y);
5360     w = xabs>yabs ? xabs : yabs;
5361     v = xabs<yabs ? xabs : yabs;
5362     if( v==0 )
5363         return w;
5364     else
5365     {
5366         double t = v/w;
5367         return w*sqrt(1+t*t);
5368     }
5369 }
5370 
ae_c_eq(ae_complex lhs,ae_complex rhs)5371 ae_bool ae_c_eq(ae_complex lhs,   ae_complex rhs)
5372 {
5373     volatile double x1 = lhs.x;
5374     volatile double x2 = rhs.x;
5375     volatile double y1 = lhs.y;
5376     volatile double y2 = rhs.y;
5377     return x1==x2 && y1==y2;
5378 }
5379 
ae_c_neq(ae_complex lhs,ae_complex rhs)5380 ae_bool ae_c_neq(ae_complex lhs,  ae_complex rhs)
5381 {
5382     volatile double x1 = lhs.x;
5383     volatile double x2 = rhs.x;
5384     volatile double y1 = lhs.y;
5385     volatile double y2 = rhs.y;
5386     return x1!=x2 || y1!=y2;
5387 }
5388 
ae_c_add(ae_complex lhs,ae_complex rhs)5389 ae_complex ae_c_add(ae_complex lhs,  ae_complex rhs)
5390 {
5391     ae_complex result;
5392     result.x = lhs.x+rhs.x;
5393     result.y = lhs.y+rhs.y;
5394     return result;
5395 }
5396 
ae_c_mul(ae_complex lhs,ae_complex rhs)5397 ae_complex ae_c_mul(ae_complex lhs,  ae_complex rhs)
5398 {
5399     ae_complex result;
5400     result.x = lhs.x*rhs.x-lhs.y*rhs.y;
5401     result.y = lhs.x*rhs.y+lhs.y*rhs.x;
5402     return result;
5403 }
5404 
ae_c_sub(ae_complex lhs,ae_complex rhs)5405 ae_complex ae_c_sub(ae_complex lhs,   ae_complex rhs)
5406 {
5407     ae_complex result;
5408     result.x = lhs.x-rhs.x;
5409     result.y = lhs.y-rhs.y;
5410     return result;
5411 }
5412 
ae_c_div(ae_complex lhs,ae_complex rhs)5413 ae_complex ae_c_div(ae_complex lhs,   ae_complex rhs)
5414 {
5415     ae_complex result;
5416     double e;
5417     double f;
5418     if( fabs(rhs.y)<fabs(rhs.x) )
5419     {
5420         e = rhs.y/rhs.x;
5421         f = rhs.x+rhs.y*e;
5422         result.x = (lhs.x+lhs.y*e)/f;
5423         result.y = (lhs.y-lhs.x*e)/f;
5424     }
5425     else
5426     {
5427         e = rhs.x/rhs.y;
5428         f = rhs.y+rhs.x*e;
5429         result.x = (lhs.y+lhs.x*e)/f;
5430         result.y = (-lhs.x+lhs.y*e)/f;
5431     }
5432     return result;
5433 }
5434 
ae_c_eq_d(ae_complex lhs,double rhs)5435 ae_bool ae_c_eq_d(ae_complex lhs,  double rhs)
5436 {
5437     volatile double x1 = lhs.x;
5438     volatile double x2 = rhs;
5439     volatile double y1 = lhs.y;
5440     volatile double y2 = 0;
5441     return x1==x2 && y1==y2;
5442 }
5443 
ae_c_neq_d(ae_complex lhs,double rhs)5444 ae_bool ae_c_neq_d(ae_complex lhs, double rhs)
5445 {
5446     volatile double x1 = lhs.x;
5447     volatile double x2 = rhs;
5448     volatile double y1 = lhs.y;
5449     volatile double y2 = 0;
5450     return x1!=x2 || y1!=y2;
5451 }
5452 
ae_c_add_d(ae_complex lhs,double rhs)5453 ae_complex ae_c_add_d(ae_complex lhs, double rhs)
5454 {
5455     ae_complex result;
5456     result.x = lhs.x+rhs;
5457     result.y = lhs.y;
5458     return result;
5459 }
5460 
ae_c_mul_d(ae_complex lhs,double rhs)5461 ae_complex ae_c_mul_d(ae_complex lhs, double rhs)
5462 {
5463     ae_complex result;
5464     result.x = lhs.x*rhs;
5465     result.y = lhs.y*rhs;
5466     return result;
5467 }
5468 
ae_c_sub_d(ae_complex lhs,double rhs)5469 ae_complex ae_c_sub_d(ae_complex lhs, double rhs)
5470 {
5471     ae_complex result;
5472     result.x = lhs.x-rhs;
5473     result.y = lhs.y;
5474     return result;
5475 }
5476 
ae_c_d_sub(double lhs,ae_complex rhs)5477 ae_complex ae_c_d_sub(double lhs,     ae_complex rhs)
5478 {
5479     ae_complex result;
5480     result.x = lhs-rhs.x;
5481     result.y = -rhs.y;
5482     return result;
5483 }
5484 
ae_c_div_d(ae_complex lhs,double rhs)5485 ae_complex ae_c_div_d(ae_complex lhs, double rhs)
5486 {
5487     ae_complex result;
5488     result.x = lhs.x/rhs;
5489     result.y = lhs.y/rhs;
5490     return result;
5491 }
5492 
ae_c_d_div(double lhs,ae_complex rhs)5493 ae_complex ae_c_d_div(double lhs,   ae_complex rhs)
5494 {
5495     ae_complex result;
5496     double e;
5497     double f;
5498     if( fabs(rhs.y)<fabs(rhs.x) )
5499     {
5500         e = rhs.y/rhs.x;
5501         f = rhs.x+rhs.y*e;
5502         result.x = lhs/f;
5503         result.y = -lhs*e/f;
5504     }
5505     else
5506     {
5507         e = rhs.x/rhs.y;
5508         f = rhs.y+rhs.x*e;
5509         result.x = lhs*e/f;
5510         result.y = -lhs/f;
5511     }
5512     return result;
5513 }
5514 
5515 
5516 /************************************************************************
5517 Complex BLAS operations
5518 ************************************************************************/
ae_v_cdotproduct(const ae_complex * v0,ae_int_t stride0,const char * conj0,const ae_complex * v1,ae_int_t stride1,const char * conj1,ae_int_t n)5519 ae_complex ae_v_cdotproduct(const ae_complex *v0, ae_int_t stride0, const char *conj0, const ae_complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
5520 {
5521     double rx = 0, ry = 0;
5522     ae_int_t i;
5523     ae_bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n'));
5524     ae_bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n'));
5525     ae_complex result;
5526     if( bconj0 && bconj1 )
5527     {
5528         double v0x, v0y, v1x, v1y;
5529         for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5530         {
5531             v0x = v0->x;
5532             v0y = -v0->y;
5533             v1x = v1->x;
5534             v1y = -v1->y;
5535             rx += v0x*v1x-v0y*v1y;
5536             ry += v0x*v1y+v0y*v1x;
5537         }
5538     }
5539     if( !bconj0 && bconj1 )
5540     {
5541         double v0x, v0y, v1x, v1y;
5542         for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5543         {
5544             v0x = v0->x;
5545             v0y = v0->y;
5546             v1x = v1->x;
5547             v1y = -v1->y;
5548             rx += v0x*v1x-v0y*v1y;
5549             ry += v0x*v1y+v0y*v1x;
5550         }
5551     }
5552     if( bconj0 && !bconj1 )
5553     {
5554         double v0x, v0y, v1x, v1y;
5555         for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5556         {
5557             v0x = v0->x;
5558             v0y = -v0->y;
5559             v1x = v1->x;
5560             v1y = v1->y;
5561             rx += v0x*v1x-v0y*v1y;
5562             ry += v0x*v1y+v0y*v1x;
5563         }
5564     }
5565     if( !bconj0 && !bconj1 )
5566     {
5567         double v0x, v0y, v1x, v1y;
5568         for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5569         {
5570             v0x = v0->x;
5571             v0y = v0->y;
5572             v1x = v1->x;
5573             v1y = v1->y;
5574             rx += v0x*v1x-v0y*v1y;
5575             ry += v0x*v1y+v0y*v1x;
5576         }
5577     }
5578     result.x = rx;
5579     result.y = ry;
5580     return result;
5581 }
5582 
ae_v_cmove(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)5583 void ae_v_cmove(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5584 {
5585     ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5586     ae_int_t i;
5587     if( stride_dst!=1 || stride_src!=1 )
5588     {
5589         /*
5590          * general unoptimized case
5591          */
5592         if( bconj )
5593         {
5594             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5595             {
5596                 vdst->x =  vsrc->x;
5597                 vdst->y = -vsrc->y;
5598             }
5599         }
5600         else
5601         {
5602             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5603                 *vdst = *vsrc;
5604         }
5605     }
5606     else
5607     {
5608         /*
5609          * optimized case
5610          */
5611         if( bconj )
5612         {
5613             for(i=0; i<n; i++, vdst++, vsrc++)
5614             {
5615                 vdst->x =  vsrc->x;
5616                 vdst->y = -vsrc->y;
5617             }
5618         }
5619         else
5620         {
5621             for(i=0; i<n; i++, vdst++, vsrc++)
5622                 *vdst = *vsrc;
5623         }
5624     }
5625 }
5626 
ae_v_cmoveneg(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)5627 void ae_v_cmoveneg(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5628 {
5629     ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5630     ae_int_t i;
5631     if( stride_dst!=1 || stride_src!=1 )
5632     {
5633         /*
5634          * general unoptimized case
5635          */
5636         if( bconj )
5637         {
5638             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5639             {
5640                 vdst->x = -vsrc->x;
5641                 vdst->y =  vsrc->y;
5642             }
5643         }
5644         else
5645         {
5646             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5647             {
5648                 vdst->x = -vsrc->x;
5649                 vdst->y = -vsrc->y;
5650             }
5651         }
5652     }
5653     else
5654     {
5655         /*
5656          * optimized case
5657          */
5658         if( bconj )
5659         {
5660             for(i=0; i<n; i++, vdst++, vsrc++)
5661             {
5662                 vdst->x = -vsrc->x;
5663                 vdst->y =  vsrc->y;
5664             }
5665         }
5666         else
5667         {
5668             for(i=0; i<n; i++, vdst++, vsrc++)
5669             {
5670                 vdst->x = -vsrc->x;
5671                 vdst->y = -vsrc->y;
5672             }
5673         }
5674     }
5675 }
5676 
ae_v_cmoved(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)5677 void ae_v_cmoved(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
5678 {
5679     ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5680     ae_int_t i;
5681     if( stride_dst!=1 || stride_src!=1 )
5682     {
5683         /*
5684          * general unoptimized case
5685          */
5686         if( bconj )
5687         {
5688             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5689             {
5690                 vdst->x =  alpha*vsrc->x;
5691                 vdst->y = -alpha*vsrc->y;
5692             }
5693         }
5694         else
5695         {
5696             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5697             {
5698                 vdst->x = alpha*vsrc->x;
5699                 vdst->y = alpha*vsrc->y;
5700             }
5701         }
5702     }
5703     else
5704     {
5705         /*
5706          * optimized case
5707          */
5708         if( bconj )
5709         {
5710             for(i=0; i<n; i++, vdst++, vsrc++)
5711             {
5712                 vdst->x =  alpha*vsrc->x;
5713                 vdst->y = -alpha*vsrc->y;
5714             }
5715         }
5716         else
5717         {
5718             for(i=0; i<n; i++, vdst++, vsrc++)
5719             {
5720                 vdst->x = alpha*vsrc->x;
5721                 vdst->y = alpha*vsrc->y;
5722             }
5723         }
5724     }
5725 }
5726 
ae_v_cmovec(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,ae_complex alpha)5727 void ae_v_cmovec(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
5728 {
5729     ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5730     ae_int_t i;
5731     if( stride_dst!=1 || stride_src!=1 )
5732     {
5733         /*
5734          * general unoptimized case
5735          */
5736         if( bconj )
5737         {
5738             double ax = alpha.x, ay = alpha.y;
5739             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5740             {
5741                 vdst->x =  ax*vsrc->x+ay*vsrc->y;
5742                 vdst->y = -ax*vsrc->y+ay*vsrc->x;
5743             }
5744         }
5745         else
5746         {
5747             double ax = alpha.x, ay = alpha.y;
5748             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5749             {
5750                 vdst->x = ax*vsrc->x-ay*vsrc->y;
5751                 vdst->y = ax*vsrc->y+ay*vsrc->x;
5752             }
5753         }
5754     }
5755     else
5756     {
5757         /*
5758          * highly optimized case
5759          */
5760         if( bconj )
5761         {
5762             double ax = alpha.x, ay = alpha.y;
5763             for(i=0; i<n; i++, vdst++, vsrc++)
5764             {
5765                 vdst->x =  ax*vsrc->x+ay*vsrc->y;
5766                 vdst->y = -ax*vsrc->y+ay*vsrc->x;
5767             }
5768         }
5769         else
5770         {
5771             double ax = alpha.x, ay = alpha.y;
5772             for(i=0; i<n; i++, vdst++, vsrc++)
5773             {
5774                 vdst->x = ax*vsrc->x-ay*vsrc->y;
5775                 vdst->y = ax*vsrc->y+ay*vsrc->x;
5776             }
5777         }
5778     }
5779 }
5780 
ae_v_cadd(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)5781 void ae_v_cadd(ae_complex *vdst,     ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5782 {
5783     ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5784     ae_int_t i;
5785     if( stride_dst!=1 || stride_src!=1 )
5786     {
5787         /*
5788          * general unoptimized case
5789          */
5790         if( bconj )
5791         {
5792             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5793             {
5794                 vdst->x += vsrc->x;
5795                 vdst->y -= vsrc->y;
5796             }
5797         }
5798         else
5799         {
5800             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5801             {
5802                 vdst->x += vsrc->x;
5803                 vdst->y += vsrc->y;
5804             }
5805         }
5806     }
5807     else
5808     {
5809         /*
5810          * optimized case
5811          */
5812         if( bconj )
5813         {
5814             for(i=0; i<n; i++, vdst++, vsrc++)
5815             {
5816                 vdst->x += vsrc->x;
5817                 vdst->y -= vsrc->y;
5818             }
5819         }
5820         else
5821         {
5822             for(i=0; i<n; i++, vdst++, vsrc++)
5823             {
5824                 vdst->x += vsrc->x;
5825                 vdst->y += vsrc->y;
5826             }
5827         }
5828     }
5829 }
5830 
ae_v_caddd(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)5831 void ae_v_caddd(ae_complex *vdst,    ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
5832 {
5833     ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5834     ae_int_t i;
5835     if( stride_dst!=1 || stride_src!=1 )
5836     {
5837         /*
5838          * general unoptimized case
5839          */
5840         if( bconj )
5841         {
5842             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5843             {
5844                 vdst->x += alpha*vsrc->x;
5845                 vdst->y -= alpha*vsrc->y;
5846             }
5847         }
5848         else
5849         {
5850             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5851             {
5852                 vdst->x += alpha*vsrc->x;
5853                 vdst->y += alpha*vsrc->y;
5854             }
5855         }
5856     }
5857     else
5858     {
5859         /*
5860          * optimized case
5861          */
5862         if( bconj )
5863         {
5864             for(i=0; i<n; i++, vdst++, vsrc++)
5865             {
5866                 vdst->x += alpha*vsrc->x;
5867                 vdst->y -= alpha*vsrc->y;
5868             }
5869         }
5870         else
5871         {
5872             for(i=0; i<n; i++, vdst++, vsrc++)
5873             {
5874                 vdst->x += alpha*vsrc->x;
5875                 vdst->y += alpha*vsrc->y;
5876             }
5877         }
5878     }
5879 }
5880 
ae_v_caddc(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,ae_complex alpha)5881 void ae_v_caddc(ae_complex *vdst,    ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
5882 {
5883     ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5884     ae_int_t i;
5885     if( stride_dst!=1 || stride_src!=1 )
5886     {
5887         /*
5888          * general unoptimized case
5889          */
5890         double ax = alpha.x, ay = alpha.y;
5891         if( bconj )
5892         {
5893             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5894             {
5895                 vdst->x += ax*vsrc->x+ay*vsrc->y;
5896                 vdst->y -= ax*vsrc->y-ay*vsrc->x;
5897             }
5898         }
5899         else
5900         {
5901             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5902             {
5903                 vdst->x += ax*vsrc->x-ay*vsrc->y;
5904                 vdst->y += ax*vsrc->y+ay*vsrc->x;
5905             }
5906         }
5907     }
5908     else
5909     {
5910         /*
5911          * highly optimized case
5912          */
5913         double ax = alpha.x, ay = alpha.y;
5914         if( bconj )
5915         {
5916             for(i=0; i<n; i++, vdst++, vsrc++)
5917             {
5918                 vdst->x += ax*vsrc->x+ay*vsrc->y;
5919                 vdst->y -= ax*vsrc->y-ay*vsrc->x;
5920             }
5921         }
5922         else
5923         {
5924             for(i=0; i<n; i++, vdst++, vsrc++)
5925             {
5926                 vdst->x += ax*vsrc->x-ay*vsrc->y;
5927                 vdst->y += ax*vsrc->y+ay*vsrc->x;
5928             }
5929         }
5930     }
5931 }
5932 
ae_v_csub(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)5933 void ae_v_csub(ae_complex *vdst,     ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5934 {
5935     ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5936     ae_int_t i;
5937     if( stride_dst!=1 || stride_src!=1 )
5938     {
5939         /*
5940          * general unoptimized case
5941          */
5942         if( bconj )
5943         {
5944             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5945             {
5946                 vdst->x -= vsrc->x;
5947                 vdst->y += vsrc->y;
5948             }
5949         }
5950         else
5951         {
5952             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5953             {
5954                 vdst->x -= vsrc->x;
5955                 vdst->y -= vsrc->y;
5956             }
5957         }
5958     }
5959     else
5960     {
5961         /*
5962          * highly optimized case
5963          */
5964         if( bconj )
5965         {
5966             for(i=0; i<n; i++, vdst++, vsrc++)
5967             {
5968                 vdst->x -= vsrc->x;
5969                 vdst->y += vsrc->y;
5970             }
5971         }
5972         else
5973         {
5974             for(i=0; i<n; i++, vdst++, vsrc++)
5975             {
5976                 vdst->x -= vsrc->x;
5977                 vdst->y -= vsrc->y;
5978             }
5979         }
5980     }
5981 }
5982 
ae_v_csubd(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)5983 void ae_v_csubd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
5984 {
5985     ae_v_caddd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
5986 }
5987 
ae_v_csubc(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,ae_complex alpha)5988 void ae_v_csubc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
5989 {
5990     alpha.x = -alpha.x;
5991     alpha.y = -alpha.y;
5992     ae_v_caddc(vdst, stride_dst, vsrc, stride_src, conj_src, n, alpha);
5993 }
5994 
ae_v_cmuld(ae_complex * vdst,ae_int_t stride_dst,ae_int_t n,double alpha)5995 void ae_v_cmuld(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
5996 {
5997     ae_int_t i;
5998     if( stride_dst!=1 )
5999     {
6000         /*
6001          * general unoptimized case
6002          */
6003         for(i=0; i<n; i++, vdst+=stride_dst)
6004         {
6005             vdst->x *= alpha;
6006             vdst->y *= alpha;
6007         }
6008     }
6009     else
6010     {
6011         /*
6012          * optimized case
6013          */
6014         for(i=0; i<n; i++, vdst++)
6015         {
6016             vdst->x *= alpha;
6017             vdst->y *= alpha;
6018         }
6019     }
6020 }
6021 
ae_v_cmulc(ae_complex * vdst,ae_int_t stride_dst,ae_int_t n,ae_complex alpha)6022 void ae_v_cmulc(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, ae_complex alpha)
6023 {
6024     ae_int_t i;
6025     if( stride_dst!=1 )
6026     {
6027         /*
6028          * general unoptimized case
6029          */
6030         double ax = alpha.x, ay = alpha.y;
6031         for(i=0; i<n; i++, vdst+=stride_dst)
6032         {
6033             double  dstx = vdst->x, dsty = vdst->y;
6034             vdst->x = ax*dstx-ay*dsty;
6035             vdst->y = ax*dsty+ay*dstx;
6036         }
6037     }
6038     else
6039     {
6040         /*
6041          * highly optimized case
6042          */
6043         double ax = alpha.x, ay = alpha.y;
6044         for(i=0; i<n; i++, vdst++)
6045         {
6046             double  dstx = vdst->x, dsty = vdst->y;
6047             vdst->x = ax*dstx-ay*dsty;
6048             vdst->y = ax*dsty+ay*dstx;
6049         }
6050     }
6051 }
6052 
6053 /************************************************************************
6054 Real BLAS operations
6055 ************************************************************************/
ae_v_dotproduct(const double * v0,ae_int_t stride0,const double * v1,ae_int_t stride1,ae_int_t n)6056 double ae_v_dotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
6057 {
6058     double result = 0;
6059     ae_int_t i;
6060     if( stride0!=1 || stride1!=1 )
6061     {
6062         /*
6063          * slow general code
6064          */
6065         for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6066             result += (*v0)*(*v1);
6067     }
6068     else
6069     {
6070         /*
6071          * optimized code for stride=1
6072          */
6073         ae_int_t n4 = n/4;
6074         ae_int_t nleft = n%4;
6075         for(i=0; i<n4; i++, v0+=4, v1+=4)
6076             result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3];
6077         for(i=0; i<nleft; i++, v0++, v1++)
6078             result += v0[0]*v1[0];
6079     }
6080     return result;
6081 }
6082 
ae_v_move(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6083 void ae_v_move(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n)
6084 {
6085     ae_int_t i;
6086     if( stride_dst!=1 || stride_src!=1 )
6087     {
6088         /*
6089          * general unoptimized case
6090          */
6091         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6092             *vdst = *vsrc;
6093     }
6094     else
6095     {
6096         /*
6097          * optimized case
6098          */
6099         ae_int_t n2 = n/2;
6100         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6101         {
6102             vdst[0] = vsrc[0];
6103             vdst[1] = vsrc[1];
6104         }
6105         if( n%2!=0 )
6106             vdst[0] = vsrc[0];
6107     }
6108 }
6109 
ae_v_moveneg(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6110 void ae_v_moveneg(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n)
6111 {
6112     ae_int_t i;
6113     if( stride_dst!=1 || stride_src!=1 )
6114     {
6115         /*
6116          * general unoptimized case
6117          */
6118         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6119             *vdst = -*vsrc;
6120     }
6121     else
6122     {
6123         /*
6124          * optimized case
6125          */
6126         ae_int_t n2 = n/2;
6127         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6128         {
6129             vdst[0] = -vsrc[0];
6130             vdst[1] = -vsrc[1];
6131         }
6132         if( n%2!=0 )
6133             vdst[0] = -vsrc[0];
6134     }
6135 }
6136 
ae_v_moved(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)6137 void ae_v_moved(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
6138 {
6139     ae_int_t i;
6140     if( stride_dst!=1 || stride_src!=1 )
6141     {
6142         /*
6143          * general unoptimized case
6144          */
6145         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6146             *vdst = alpha*(*vsrc);
6147     }
6148     else
6149     {
6150         /*
6151          * optimized case
6152          */
6153         ae_int_t n2 = n/2;
6154         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6155         {
6156             vdst[0] = alpha*vsrc[0];
6157             vdst[1] = alpha*vsrc[1];
6158         }
6159         if( n%2!=0 )
6160             vdst[0] = alpha*vsrc[0];
6161     }
6162 }
6163 
ae_v_add(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6164 void ae_v_add(double *vdst,     ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n)
6165 {
6166     ae_int_t i;
6167     if( stride_dst!=1 || stride_src!=1 )
6168     {
6169         /*
6170          * general unoptimized case
6171          */
6172         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6173             *vdst += *vsrc;
6174     }
6175     else
6176     {
6177         /*
6178          * optimized case
6179          */
6180         ae_int_t n2 = n/2;
6181         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6182         {
6183             vdst[0] += vsrc[0];
6184             vdst[1] += vsrc[1];
6185         }
6186         if( n%2!=0 )
6187             vdst[0] += vsrc[0];
6188     }
6189 }
6190 
ae_v_addd(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)6191 void ae_v_addd(double *vdst,    ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
6192 {
6193     ae_int_t i;
6194     if( stride_dst!=1 || stride_src!=1 )
6195     {
6196         /*
6197          * general unoptimized case
6198          */
6199         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6200             *vdst += alpha*(*vsrc);
6201     }
6202     else
6203     {
6204         /*
6205          * optimized case
6206          */
6207         ae_int_t n2 = n/2;
6208         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6209         {
6210             vdst[0] += alpha*vsrc[0];
6211             vdst[1] += alpha*vsrc[1];
6212         }
6213         if( n%2!=0 )
6214             vdst[0] += alpha*vsrc[0];
6215     }
6216 }
6217 
ae_v_sub(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6218 void ae_v_sub(double *vdst,     ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n)
6219 {
6220     ae_int_t i;
6221     if( stride_dst!=1 || stride_src!=1 )
6222     {
6223         /*
6224          * general unoptimized case
6225          */
6226         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6227             *vdst -= *vsrc;
6228     }
6229     else
6230     {
6231         /*
6232          * highly optimized case
6233          */
6234         ae_int_t n2 = n/2;
6235         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6236         {
6237             vdst[0] -= vsrc[0];
6238             vdst[1] -= vsrc[1];
6239         }
6240         if( n%2!=0 )
6241             vdst[0] -= vsrc[0];
6242     }
6243 }
6244 
ae_v_subd(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)6245 void ae_v_subd(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
6246 {
6247     ae_v_addd(vdst, stride_dst, vsrc, stride_src, n, -alpha);
6248 }
6249 
ae_v_muld(double * vdst,ae_int_t stride_dst,ae_int_t n,double alpha)6250 void ae_v_muld(double *vdst,  ae_int_t stride_dst, ae_int_t n, double alpha)
6251 {
6252     ae_int_t i;
6253     if( stride_dst!=1 )
6254     {
6255         /*
6256          * general unoptimized case
6257          */
6258         for(i=0; i<n; i++, vdst+=stride_dst)
6259             *vdst *= alpha;
6260     }
6261     else
6262     {
6263         /*
6264          * highly optimized case
6265          */
6266         for(i=0; i<n; i++, vdst++)
6267             *vdst *= alpha;
6268     }
6269 }
6270 
6271 /************************************************************************
6272 Other functions
6273 ************************************************************************/
ae_v_len(ae_int_t a,ae_int_t b)6274 ae_int_t ae_v_len(ae_int_t a, ae_int_t b)
6275 {
6276     return b-a+1;
6277 }
6278 
6279 /************************************************************************
6280 RComm functions
6281 ************************************************************************/
_rcommstate_init(rcommstate * p,ae_state * _state,ae_bool make_automatic)6282 void _rcommstate_init(rcommstate* p, ae_state *_state, ae_bool make_automatic)
6283 {
6284     /* initial zero-filling */
6285     memset(&p->ba, 0, sizeof(p->ba));
6286     memset(&p->ia, 0, sizeof(p->ia));
6287     memset(&p->ra, 0, sizeof(p->ra));
6288     memset(&p->ca, 0, sizeof(p->ca));
6289 
6290     /* initialization */
6291     ae_vector_init(&p->ba, 0, DT_BOOL,    _state, make_automatic);
6292     ae_vector_init(&p->ia, 0, DT_INT,     _state, make_automatic);
6293     ae_vector_init(&p->ra, 0, DT_REAL,    _state, make_automatic);
6294     ae_vector_init(&p->ca, 0, DT_COMPLEX, _state, make_automatic);
6295 }
6296 
_rcommstate_init_copy(rcommstate * dst,rcommstate * src,ae_state * _state,ae_bool make_automatic)6297 void _rcommstate_init_copy(rcommstate* dst, rcommstate* src, ae_state *_state, ae_bool make_automatic)
6298 {
6299     /* initial zero-filling */
6300     memset(&dst->ba, 0, sizeof(dst->ba));
6301     memset(&dst->ia, 0, sizeof(dst->ia));
6302     memset(&dst->ra, 0, sizeof(dst->ra));
6303     memset(&dst->ca, 0, sizeof(dst->ca));
6304 
6305     /* initialization */
6306     ae_vector_init_copy(&dst->ba, &src->ba, _state, make_automatic);
6307     ae_vector_init_copy(&dst->ia, &src->ia, _state, make_automatic);
6308     ae_vector_init_copy(&dst->ra, &src->ra, _state, make_automatic);
6309     ae_vector_init_copy(&dst->ca, &src->ca, _state, make_automatic);
6310     dst->stage = src->stage;
6311 }
6312 
_rcommstate_clear(rcommstate * p)6313 void _rcommstate_clear(rcommstate* p)
6314 {
6315     ae_vector_clear(&p->ba);
6316     ae_vector_clear(&p->ia);
6317     ae_vector_clear(&p->ra);
6318     ae_vector_clear(&p->ca);
6319 }
6320 
_rcommstate_destroy(rcommstate * p)6321 void _rcommstate_destroy(rcommstate* p)
6322 {
6323     _rcommstate_clear(p);
6324 }
6325 
6326 
6327 }
6328 
6329 /////////////////////////////////////////////////////////////////////////
6330 //
6331 // THIS SECTION CONTAINS C++ RELATED FUNCTIONALITY
6332 //
6333 /////////////////////////////////////////////////////////////////////////
6334 /********************************************************************
6335 Internal forwards
6336 ********************************************************************/
6337 namespace alglib
6338 {
6339     double get_aenv_nan();
6340     double get_aenv_posinf();
6341     double get_aenv_neginf();
6342     ae_int_t my_stricmp(const char *s1, const char *s2);
6343     char* filter_spaces(const char *s);
6344     void str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec);
6345     void str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat);
6346 
6347     ae_bool parse_bool_delim(const char *s, const char *delim);
6348     ae_int_t parse_int_delim(const char *s, const char *delim);
6349     bool _parse_real_delim(const char *s, const char *delim, double *result, const char **new_s);
6350     double parse_real_delim(const char *s, const char *delim);
6351     alglib::complex parse_complex_delim(const char *s, const char *delim);
6352 
6353     std::string arraytostring(const bool *ptr, ae_int_t n);
6354     std::string arraytostring(const ae_int_t *ptr, ae_int_t n);
6355     std::string arraytostring(const double *ptr, ae_int_t n, int dps);
6356     std::string arraytostring(const alglib::complex *ptr, ae_int_t n, int dps);
6357 }
6358 
6359 /********************************************************************
6360 Global and local constants/variables
6361 ********************************************************************/
6362 const double alglib::machineepsilon = 5E-16;
6363 const double alglib::maxrealnumber  = 1E300;
6364 const double alglib::minrealnumber  = 1E-300;
6365 const alglib::ae_int_t alglib::endianness =  alglib_impl::ae_get_endianness();
6366 const double alglib::fp_nan         =  alglib::get_aenv_nan();
6367 const double alglib::fp_posinf      =  alglib::get_aenv_posinf();
6368 const double alglib::fp_neginf      =  alglib::get_aenv_neginf();
6369 #if defined(AE_NO_EXCEPTIONS)
6370 static const char *_alglib_last_error = NULL;
6371 #endif
6372 static const alglib_impl::ae_uint64_t _i64_xdefault  = 0x0;
6373 static const alglib_impl::ae_uint64_t _i64_xserial   = _ALGLIB_FLG_THREADING_SERIAL;
6374 static const alglib_impl::ae_uint64_t _i64_xparallel = _ALGLIB_FLG_THREADING_PARALLEL;
6375 const alglib::xparams &alglib::xdefault = *((const alglib::xparams *)(&_i64_xdefault));
6376 const alglib::xparams &alglib::serial   = *((const alglib::xparams *)(&_i64_xserial));
6377 const alglib::xparams &alglib::parallel = *((const alglib::xparams *)(&_i64_xparallel));
6378 
6379 
6380 
6381 /********************************************************************
6382 Exception handling
6383 ********************************************************************/
6384 #if !defined(AE_NO_EXCEPTIONS)
ap_error()6385 alglib::ap_error::ap_error()
6386 {
6387 }
6388 
ap_error(const char * s)6389 alglib::ap_error::ap_error(const char *s)
6390 {
6391     msg = s;
6392 }
6393 
make_assertion(bool bClause)6394 void alglib::ap_error::make_assertion(bool bClause)
6395 {
6396     if(!bClause)
6397         _ALGLIB_CPP_EXCEPTION("");
6398 }
6399 
make_assertion(bool bClause,const char * p_msg)6400 void alglib::ap_error::make_assertion(bool bClause, const char *p_msg)
6401 {
6402     if(!bClause)
6403         _ALGLIB_CPP_EXCEPTION(p_msg);
6404 }
6405 #else
set_error_flag(const char * s)6406 void alglib::set_error_flag(const char *s)
6407 {
6408     if( s==NULL )
6409         s = "ALGLIB: unknown error";
6410     _alglib_last_error = s;
6411 }
6412 
get_error_flag(const char ** p_msg)6413 bool alglib::get_error_flag(const char **p_msg)
6414 {
6415     if( _alglib_last_error==NULL )
6416         return false;
6417     if( p_msg!=NULL )
6418         *p_msg = _alglib_last_error;
6419     return true;
6420 }
6421 
clear_error_flag()6422 void alglib::clear_error_flag()
6423 {
6424     _alglib_last_error = NULL;
6425 }
6426 #endif
6427 
6428 /********************************************************************
6429 Complex number with double precision.
6430 ********************************************************************/
complex()6431 alglib::complex::complex():x(0.0),y(0.0)
6432 {
6433 }
6434 
complex(const double & _x)6435 alglib::complex::complex(const double &_x):x(_x),y(0.0)
6436 {
6437 }
6438 
complex(const double & _x,const double & _y)6439 alglib::complex::complex(const double &_x, const double &_y):x(_x),y(_y)
6440 {
6441 }
6442 
complex(const alglib::complex & z)6443 alglib::complex::complex(const alglib::complex &z):x(z.x),y(z.y)
6444 {
6445 }
6446 
operator =(const double & v)6447 alglib::complex& alglib::complex::operator= (const double& v)
6448 {
6449     x = v;
6450     y = 0.0;
6451     return *this;
6452 }
6453 
operator +=(const double & v)6454 alglib::complex& alglib::complex::operator+=(const double& v)
6455 {
6456     x += v;
6457     return *this;
6458 }
6459 
operator -=(const double & v)6460 alglib::complex& alglib::complex::operator-=(const double& v)
6461 {
6462     x -= v;
6463     return *this;
6464 }
6465 
operator *=(const double & v)6466 alglib::complex& alglib::complex::operator*=(const double& v)
6467 {
6468     x *= v;
6469     y *= v;
6470     return *this;
6471 }
6472 
operator /=(const double & v)6473 alglib::complex& alglib::complex::operator/=(const double& v)
6474 {
6475     x /= v;
6476     y /= v;
6477     return *this;
6478 }
6479 
operator =(const alglib::complex & z)6480 alglib::complex& alglib::complex::operator= (const alglib::complex& z)
6481 {
6482     x = z.x;
6483     y = z.y;
6484     return *this;
6485 }
6486 
operator +=(const alglib::complex & z)6487 alglib::complex& alglib::complex::operator+=(const alglib::complex& z)
6488 {
6489     x += z.x;
6490     y += z.y;
6491     return *this;
6492 }
6493 
operator -=(const alglib::complex & z)6494 alglib::complex& alglib::complex::operator-=(const alglib::complex& z)
6495 {
6496     x -= z.x;
6497     y -= z.y;
6498     return *this;
6499 }
6500 
operator *=(const alglib::complex & z)6501 alglib::complex& alglib::complex::operator*=(const alglib::complex& z)
6502 {
6503     double t = x*z.x-y*z.y;
6504     y = x*z.y+y*z.x;
6505     x = t;
6506     return *this;
6507 }
6508 
operator /=(const alglib::complex & z)6509 alglib::complex& alglib::complex::operator/=(const alglib::complex& z)
6510 {
6511     alglib::complex result;
6512     double e;
6513     double f;
6514     if( fabs(z.y)<fabs(z.x) )
6515     {
6516         e = z.y/z.x;
6517         f = z.x+z.y*e;
6518         result.x = (x+y*e)/f;
6519         result.y = (y-x*e)/f;
6520     }
6521     else
6522     {
6523         e = z.x/z.y;
6524         f = z.y+z.x*e;
6525         result.x = (y+x*e)/f;
6526         result.y = (-x+y*e)/f;
6527     }
6528     *this = result;
6529     return *this;
6530 }
6531 
c_ptr()6532 alglib_impl::ae_complex* alglib::complex::c_ptr()
6533 {
6534     return (alglib_impl::ae_complex*)this;
6535 }
6536 
c_ptr() const6537 const alglib_impl::ae_complex* alglib::complex::c_ptr() const
6538 {
6539     return (const alglib_impl::ae_complex*)this;
6540 }
6541 
6542 #if !defined(AE_NO_EXCEPTIONS)
tostring(int _dps) const6543 std::string alglib::complex::tostring(int _dps) const
6544 {
6545     char mask[32];
6546     char buf_x[32];
6547     char buf_y[32];
6548     char buf_zero[32];
6549     int dps = _dps>=0 ? _dps : -_dps;
6550     if( dps<=0 || dps>=20 )
6551         _ALGLIB_CPP_EXCEPTION("complex::tostring(): incorrect dps");
6552 
6553     // handle IEEE special quantities
6554     if( fp_isnan(x) || fp_isnan(y) )
6555         return "NAN";
6556     if( fp_isinf(x) || fp_isinf(y) )
6557         return "INF";
6558 
6559     // generate mask
6560     if( sprintf(mask, "%%.%d%s", dps, _dps>=0 ? "f" : "e")>=(int)sizeof(mask) )
6561         _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6562 
6563     // print |x|, |y| and zero with same mask and compare
6564     if( sprintf(buf_x, mask, (double)(fabs(x)))>=(int)sizeof(buf_x) )
6565         _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6566     if( sprintf(buf_y, mask, (double)(fabs(y)))>=(int)sizeof(buf_y) )
6567         _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6568     if( sprintf(buf_zero, mask, (double)0)>=(int)sizeof(buf_zero) )
6569         _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6570 
6571     // different zero/nonzero patterns
6572     if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)!=0 )
6573         return std::string(x>0 ? "" : "-")+buf_x+(y>0 ? "+" : "-")+buf_y+"i";
6574     if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)==0 )
6575         return std::string(x>0 ? "" : "-")+buf_x;
6576     if( strcmp(buf_x,buf_zero)==0 && strcmp(buf_y,buf_zero)!=0 )
6577         return std::string(y>0 ? "" : "-")+buf_y+"i";
6578     return std::string("0");
6579 }
6580 #endif
6581 
operator ==(const alglib::complex & lhs,const alglib::complex & rhs)6582 bool alglib::operator==(const alglib::complex& lhs, const alglib::complex& rhs)
6583 {
6584     volatile double x1 = lhs.x;
6585     volatile double x2 = rhs.x;
6586     volatile double y1 = lhs.y;
6587     volatile double y2 = rhs.y;
6588     return x1==x2 && y1==y2;
6589 }
6590 
operator !=(const alglib::complex & lhs,const alglib::complex & rhs)6591 bool alglib::operator!=(const alglib::complex& lhs, const alglib::complex& rhs)
6592 { return !(lhs==rhs); }
6593 
operator +(const alglib::complex & lhs)6594 const alglib::complex alglib::operator+(const alglib::complex& lhs)
6595 { return lhs; }
6596 
operator -(const alglib::complex & lhs)6597 const alglib::complex alglib::operator-(const alglib::complex& lhs)
6598 { return alglib::complex(-lhs.x, -lhs.y); }
6599 
operator +(const alglib::complex & lhs,const alglib::complex & rhs)6600 const alglib::complex alglib::operator+(const alglib::complex& lhs, const alglib::complex& rhs)
6601 { alglib::complex r = lhs; r += rhs; return r; }
6602 
operator +(const alglib::complex & lhs,const double & rhs)6603 const alglib::complex alglib::operator+(const alglib::complex& lhs, const double& rhs)
6604 { alglib::complex r = lhs; r += rhs; return r; }
6605 
operator +(const double & lhs,const alglib::complex & rhs)6606 const alglib::complex alglib::operator+(const double& lhs, const alglib::complex& rhs)
6607 { alglib::complex r = rhs; r += lhs; return r; }
6608 
operator -(const alglib::complex & lhs,const alglib::complex & rhs)6609 const alglib::complex alglib::operator-(const alglib::complex& lhs, const alglib::complex& rhs)
6610 { alglib::complex r = lhs; r -= rhs; return r; }
6611 
operator -(const alglib::complex & lhs,const double & rhs)6612 const alglib::complex alglib::operator-(const alglib::complex& lhs, const double& rhs)
6613 { alglib::complex r = lhs; r -= rhs; return r; }
6614 
operator -(const double & lhs,const alglib::complex & rhs)6615 const alglib::complex alglib::operator-(const double& lhs, const alglib::complex& rhs)
6616 { alglib::complex r = lhs; r -= rhs; return r; }
6617 
operator *(const alglib::complex & lhs,const alglib::complex & rhs)6618 const alglib::complex alglib::operator*(const alglib::complex& lhs, const alglib::complex& rhs)
6619 { return alglib::complex(lhs.x*rhs.x - lhs.y*rhs.y,  lhs.x*rhs.y + lhs.y*rhs.x); }
6620 
operator *(const alglib::complex & lhs,const double & rhs)6621 const alglib::complex alglib::operator*(const alglib::complex& lhs, const double& rhs)
6622 { return alglib::complex(lhs.x*rhs,  lhs.y*rhs); }
6623 
operator *(const double & lhs,const alglib::complex & rhs)6624 const alglib::complex alglib::operator*(const double& lhs, const alglib::complex& rhs)
6625 { return alglib::complex(lhs*rhs.x,  lhs*rhs.y); }
6626 
operator /(const alglib::complex & lhs,const alglib::complex & rhs)6627 const alglib::complex alglib::operator/(const alglib::complex& lhs, const alglib::complex& rhs)
6628 {
6629     alglib::complex result;
6630     double e;
6631     double f;
6632     if( fabs(rhs.y)<fabs(rhs.x) )
6633     {
6634         e = rhs.y/rhs.x;
6635         f = rhs.x+rhs.y*e;
6636         result.x = (lhs.x+lhs.y*e)/f;
6637         result.y = (lhs.y-lhs.x*e)/f;
6638     }
6639     else
6640     {
6641         e = rhs.x/rhs.y;
6642         f = rhs.y+rhs.x*e;
6643         result.x = (lhs.y+lhs.x*e)/f;
6644         result.y = (-lhs.x+lhs.y*e)/f;
6645     }
6646     return result;
6647 }
6648 
operator /(const double & lhs,const alglib::complex & rhs)6649 const alglib::complex alglib::operator/(const double& lhs, const alglib::complex& rhs)
6650 {
6651     alglib::complex result;
6652     double e;
6653     double f;
6654     if( fabs(rhs.y)<fabs(rhs.x) )
6655     {
6656         e = rhs.y/rhs.x;
6657         f = rhs.x+rhs.y*e;
6658         result.x = lhs/f;
6659         result.y = -lhs*e/f;
6660     }
6661     else
6662     {
6663         e = rhs.x/rhs.y;
6664         f = rhs.y+rhs.x*e;
6665         result.x = lhs*e/f;
6666         result.y = -lhs/f;
6667     }
6668     return result;
6669 }
6670 
operator /(const alglib::complex & lhs,const double & rhs)6671 const alglib::complex alglib::operator/(const alglib::complex& lhs, const double& rhs)
6672 { return alglib::complex(lhs.x/rhs, lhs.y/rhs); }
6673 
abscomplex(const alglib::complex & z)6674 double alglib::abscomplex(const alglib::complex &z)
6675 {
6676     double w;
6677     double xabs;
6678     double yabs;
6679     double v;
6680 
6681     xabs = fabs(z.x);
6682     yabs = fabs(z.y);
6683     w = xabs>yabs ? xabs : yabs;
6684     v = xabs<yabs ? xabs : yabs;
6685     if( v==0 )
6686         return w;
6687     else
6688     {
6689         double t = v/w;
6690         return w*sqrt(1+t*t);
6691     }
6692 }
6693 
conj(const alglib::complex & z)6694 alglib::complex alglib::conj(const alglib::complex &z)
6695 { return alglib::complex(z.x, -z.y); }
6696 
csqr(const alglib::complex & z)6697 alglib::complex alglib::csqr(const alglib::complex &z)
6698 { return alglib::complex(z.x*z.x-z.y*z.y, 2*z.x*z.y); }
6699 
setnworkers(alglib::ae_int_t nworkers)6700 void alglib::setnworkers(alglib::ae_int_t nworkers)
6701 {
6702 #ifdef AE_HPC
6703     alglib_impl::ae_set_cores_to_use(nworkers);
6704 #endif
6705 }
6706 
setglobalthreading(const alglib::xparams settings)6707 void alglib::setglobalthreading(const alglib::xparams settings)
6708 {
6709 #ifdef AE_HPC
6710     alglib_impl::ae_set_global_threading(settings.flags);
6711 #endif
6712 }
6713 
getnworkers()6714 alglib::ae_int_t alglib::getnworkers()
6715 {
6716 #ifdef AE_HPC
6717     return alglib_impl::ae_get_cores_to_use();
6718 #else
6719     return 1;
6720 #endif
6721 }
6722 
_ae_cores_count()6723 alglib::ae_int_t alglib::_ae_cores_count()
6724 {
6725 #ifdef AE_HPC
6726     return alglib_impl::ae_cores_count();
6727 #else
6728     return 1;
6729 #endif
6730 }
6731 
_ae_set_global_threading(alglib_impl::ae_uint64_t flg_value)6732 void alglib::_ae_set_global_threading(alglib_impl::ae_uint64_t flg_value)
6733 {
6734 #ifdef AE_HPC
6735     alglib_impl::ae_set_global_threading(flg_value);
6736 #endif
6737 }
6738 
_ae_get_global_threading()6739 alglib_impl::ae_uint64_t alglib::_ae_get_global_threading()
6740 {
6741 #ifdef AE_HPC
6742     return alglib_impl::ae_get_global_threading();
6743 #else
6744     return _ALGLIB_FLG_THREADING_SERIAL;
6745 #endif
6746 }
6747 
6748 
6749 /********************************************************************
6750 Level 1 BLAS functions
6751 ********************************************************************/
vdotproduct(const double * v0,ae_int_t stride0,const double * v1,ae_int_t stride1,ae_int_t n)6752 double alglib::vdotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
6753 {
6754     double result = 0;
6755     ae_int_t i;
6756     if( stride0!=1 || stride1!=1 )
6757     {
6758         //
6759         // slow general code
6760         //
6761         for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6762             result += (*v0)*(*v1);
6763     }
6764     else
6765     {
6766         //
6767         // optimized code for stride=1
6768         //
6769         ae_int_t n4 = n/4;
6770         ae_int_t nleft = n%4;
6771         for(i=0; i<n4; i++, v0+=4, v1+=4)
6772             result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3];
6773         for(i=0; i<nleft; i++, v0++, v1++)
6774             result += v0[0]*v1[0];
6775     }
6776     return result;
6777 }
6778 
vdotproduct(const double * v1,const double * v2,ae_int_t N)6779 double alglib::vdotproduct(const double *v1, const double *v2, ae_int_t N)
6780 {
6781     return vdotproduct(v1, 1, v2, 1, N);
6782 }
6783 
vdotproduct(const alglib::complex * v0,ae_int_t stride0,const char * conj0,const alglib::complex * v1,ae_int_t stride1,const char * conj1,ae_int_t n)6784 alglib::complex alglib::vdotproduct(const alglib::complex *v0, ae_int_t stride0, const char *conj0, const alglib::complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
6785 {
6786     double rx = 0, ry = 0;
6787     ae_int_t i;
6788     bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n'));
6789     bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n'));
6790     if( bconj0 && bconj1 )
6791     {
6792         double v0x, v0y, v1x, v1y;
6793         for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6794         {
6795             v0x = v0->x;
6796             v0y = -v0->y;
6797             v1x = v1->x;
6798             v1y = -v1->y;
6799             rx += v0x*v1x-v0y*v1y;
6800             ry += v0x*v1y+v0y*v1x;
6801         }
6802     }
6803     if( !bconj0 && bconj1 )
6804     {
6805         double v0x, v0y, v1x, v1y;
6806         for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6807         {
6808             v0x = v0->x;
6809             v0y = v0->y;
6810             v1x = v1->x;
6811             v1y = -v1->y;
6812             rx += v0x*v1x-v0y*v1y;
6813             ry += v0x*v1y+v0y*v1x;
6814         }
6815     }
6816     if( bconj0 && !bconj1 )
6817     {
6818         double v0x, v0y, v1x, v1y;
6819         for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6820         {
6821             v0x = v0->x;
6822             v0y = -v0->y;
6823             v1x = v1->x;
6824             v1y = v1->y;
6825             rx += v0x*v1x-v0y*v1y;
6826             ry += v0x*v1y+v0y*v1x;
6827         }
6828     }
6829     if( !bconj0 && !bconj1 )
6830     {
6831         double v0x, v0y, v1x, v1y;
6832         for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6833         {
6834             v0x = v0->x;
6835             v0y = v0->y;
6836             v1x = v1->x;
6837             v1y = v1->y;
6838             rx += v0x*v1x-v0y*v1y;
6839             ry += v0x*v1y+v0y*v1x;
6840         }
6841     }
6842     return alglib::complex(rx,ry);
6843 }
6844 
vdotproduct(const alglib::complex * v1,const alglib::complex * v2,ae_int_t N)6845 alglib::complex alglib::vdotproduct(const alglib::complex *v1, const alglib::complex *v2, ae_int_t N)
6846 {
6847     return vdotproduct(v1, 1, "N", v2, 1, "N", N);
6848 }
6849 
vmove(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6850 void alglib::vmove(double *vdst, ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n)
6851 {
6852     ae_int_t i;
6853     if( stride_dst!=1 || stride_src!=1 )
6854     {
6855         //
6856         // general unoptimized case
6857         //
6858         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6859             *vdst = *vsrc;
6860     }
6861     else
6862     {
6863         //
6864         // optimized case
6865         //
6866         ae_int_t n2 = n/2;
6867         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6868         {
6869             vdst[0] = vsrc[0];
6870             vdst[1] = vsrc[1];
6871         }
6872         if( n%2!=0 )
6873             vdst[0] = vsrc[0];
6874     }
6875 }
6876 
vmove(double * vdst,const double * vsrc,ae_int_t N)6877 void alglib::vmove(double *vdst, const double* vsrc, ae_int_t N)
6878 {
6879     vmove(vdst, 1, vsrc, 1, N);
6880 }
6881 
vmove(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)6882 void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
6883 {
6884     bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
6885     ae_int_t i;
6886     if( stride_dst!=1 || stride_src!=1 )
6887     {
6888         //
6889         // general unoptimized case
6890         //
6891         if( bconj )
6892         {
6893             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6894             {
6895                 vdst->x =  vsrc->x;
6896                 vdst->y = -vsrc->y;
6897             }
6898         }
6899         else
6900         {
6901             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6902                 *vdst = *vsrc;
6903         }
6904     }
6905     else
6906     {
6907         //
6908         // optimized case
6909         //
6910         if( bconj )
6911         {
6912             for(i=0; i<n; i++, vdst++, vsrc++)
6913             {
6914                 vdst->x =  vsrc->x;
6915                 vdst->y = -vsrc->y;
6916             }
6917         }
6918         else
6919         {
6920             for(i=0; i<n; i++, vdst++, vsrc++)
6921                 *vdst = *vsrc;
6922         }
6923     }
6924 }
6925 
vmove(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N)6926 void alglib::vmove(alglib::complex *vdst, const alglib::complex* vsrc, ae_int_t N)
6927 {
6928     vmove(vdst, 1, vsrc, 1, "N", N);
6929 }
6930 
vmoveneg(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6931 void alglib::vmoveneg(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n)
6932 {
6933     ae_int_t i;
6934     if( stride_dst!=1 || stride_src!=1 )
6935     {
6936         //
6937         // general unoptimized case
6938         //
6939         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6940             *vdst = -*vsrc;
6941     }
6942     else
6943     {
6944         //
6945         // optimized case
6946         //
6947         ae_int_t n2 = n/2;
6948         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6949         {
6950             vdst[0] = -vsrc[0];
6951             vdst[1] = -vsrc[1];
6952         }
6953         if( n%2!=0 )
6954             vdst[0] = -vsrc[0];
6955     }
6956 }
6957 
vmoveneg(double * vdst,const double * vsrc,ae_int_t N)6958 void alglib::vmoveneg(double *vdst, const double *vsrc, ae_int_t N)
6959 {
6960     vmoveneg(vdst, 1, vsrc, 1, N);
6961 }
6962 
vmoveneg(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)6963 void alglib::vmoveneg(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
6964 {
6965     bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
6966     ae_int_t i;
6967     if( stride_dst!=1 || stride_src!=1 )
6968     {
6969         //
6970         // general unoptimized case
6971         //
6972         if( bconj )
6973         {
6974             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6975             {
6976                 vdst->x = -vsrc->x;
6977                 vdst->y =  vsrc->y;
6978             }
6979         }
6980         else
6981         {
6982             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6983             {
6984                 vdst->x = -vsrc->x;
6985                 vdst->y = -vsrc->y;
6986             }
6987         }
6988     }
6989     else
6990     {
6991         //
6992         // optimized case
6993         //
6994         if( bconj )
6995         {
6996             for(i=0; i<n; i++, vdst++, vsrc++)
6997             {
6998                 vdst->x = -vsrc->x;
6999                 vdst->y =  vsrc->y;
7000             }
7001         }
7002         else
7003         {
7004             for(i=0; i<n; i++, vdst++, vsrc++)
7005             {
7006                 vdst->x = -vsrc->x;
7007                 vdst->y = -vsrc->y;
7008             }
7009         }
7010     }
7011 }
7012 
vmoveneg(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N)7013 void alglib::vmoveneg(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
7014 {
7015     vmoveneg(vdst, 1, vsrc, 1, "N", N);
7016 }
7017 
vmove(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)7018 void alglib::vmove(double *vdst,  ae_int_t stride_dst, const double* vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
7019 {
7020     ae_int_t i;
7021     if( stride_dst!=1 || stride_src!=1 )
7022     {
7023         //
7024         // general unoptimized case
7025         //
7026         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7027             *vdst = alpha*(*vsrc);
7028     }
7029     else
7030     {
7031         //
7032         // optimized case
7033         //
7034         ae_int_t n2 = n/2;
7035         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7036         {
7037             vdst[0] = alpha*vsrc[0];
7038             vdst[1] = alpha*vsrc[1];
7039         }
7040         if( n%2!=0 )
7041             vdst[0] = alpha*vsrc[0];
7042     }
7043 }
7044 
vmove(double * vdst,const double * vsrc,ae_int_t N,double alpha)7045 void alglib::vmove(double *vdst, const double *vsrc, ae_int_t N, double alpha)
7046 {
7047     vmove(vdst, 1, vsrc, 1, N, alpha);
7048 }
7049 
vmove(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)7050 void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
7051 {
7052     bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7053     ae_int_t i;
7054     if( stride_dst!=1 || stride_src!=1 )
7055     {
7056         //
7057         // general unoptimized case
7058         //
7059         if( bconj )
7060         {
7061             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7062             {
7063                 vdst->x =  alpha*vsrc->x;
7064                 vdst->y = -alpha*vsrc->y;
7065             }
7066         }
7067         else
7068         {
7069             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7070             {
7071                 vdst->x = alpha*vsrc->x;
7072                 vdst->y = alpha*vsrc->y;
7073             }
7074         }
7075     }
7076     else
7077     {
7078         //
7079         // optimized case
7080         //
7081         if( bconj )
7082         {
7083             for(i=0; i<n; i++, vdst++, vsrc++)
7084             {
7085                 vdst->x =  alpha*vsrc->x;
7086                 vdst->y = -alpha*vsrc->y;
7087             }
7088         }
7089         else
7090         {
7091             for(i=0; i<n; i++, vdst++, vsrc++)
7092             {
7093                 vdst->x = alpha*vsrc->x;
7094                 vdst->y = alpha*vsrc->y;
7095             }
7096         }
7097     }
7098 }
7099 
vmove(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N,double alpha)7100 void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha)
7101 {
7102     vmove(vdst, 1, vsrc, 1, "N", N, alpha);
7103 }
7104 
vmove(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,alglib::complex alpha)7105 void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
7106 {
7107     bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7108     ae_int_t i;
7109     if( stride_dst!=1 || stride_src!=1 )
7110     {
7111         //
7112         // general unoptimized case
7113         //
7114         if( bconj )
7115         {
7116             double ax = alpha.x, ay = alpha.y;
7117             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7118             {
7119                 vdst->x =  ax*vsrc->x+ay*vsrc->y;
7120                 vdst->y = -ax*vsrc->y+ay*vsrc->x;
7121             }
7122         }
7123         else
7124         {
7125             double ax = alpha.x, ay = alpha.y;
7126             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7127             {
7128                 vdst->x = ax*vsrc->x-ay*vsrc->y;
7129                 vdst->y = ax*vsrc->y+ay*vsrc->x;
7130             }
7131         }
7132     }
7133     else
7134     {
7135         //
7136         // optimized case
7137         //
7138         if( bconj )
7139         {
7140             double ax = alpha.x, ay = alpha.y;
7141             for(i=0; i<n; i++, vdst++, vsrc++)
7142             {
7143                 vdst->x =  ax*vsrc->x+ay*vsrc->y;
7144                 vdst->y = -ax*vsrc->y+ay*vsrc->x;
7145             }
7146         }
7147         else
7148         {
7149             double ax = alpha.x, ay = alpha.y;
7150             for(i=0; i<n; i++, vdst++, vsrc++)
7151             {
7152                 vdst->x = ax*vsrc->x-ay*vsrc->y;
7153                 vdst->y = ax*vsrc->y+ay*vsrc->x;
7154             }
7155         }
7156     }
7157 }
7158 
vmove(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N,alglib::complex alpha)7159 void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha)
7160 {
7161     vmove(vdst, 1, vsrc, 1, "N", N, alpha);
7162 }
7163 
vadd(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)7164 void alglib::vadd(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n)
7165 {
7166     ae_int_t i;
7167     if( stride_dst!=1 || stride_src!=1 )
7168     {
7169         //
7170         // general unoptimized case
7171         //
7172         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7173             *vdst += *vsrc;
7174     }
7175     else
7176     {
7177         //
7178         // optimized case
7179         //
7180         ae_int_t n2 = n/2;
7181         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7182         {
7183             vdst[0] += vsrc[0];
7184             vdst[1] += vsrc[1];
7185         }
7186         if( n%2!=0 )
7187             vdst[0] += vsrc[0];
7188     }
7189 }
7190 
vadd(double * vdst,const double * vsrc,ae_int_t N)7191 void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N)
7192 {
7193     vadd(vdst, 1, vsrc, 1, N);
7194 }
7195 
vadd(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)7196 void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
7197 {
7198     bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7199     ae_int_t i;
7200     if( stride_dst!=1 || stride_src!=1 )
7201     {
7202         //
7203         // general unoptimized case
7204         //
7205         if( bconj )
7206         {
7207             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7208             {
7209                 vdst->x += vsrc->x;
7210                 vdst->y -= vsrc->y;
7211             }
7212         }
7213         else
7214         {
7215             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7216             {
7217                 vdst->x += vsrc->x;
7218                 vdst->y += vsrc->y;
7219             }
7220         }
7221     }
7222     else
7223     {
7224         //
7225         // optimized case
7226         //
7227         if( bconj )
7228         {
7229             for(i=0; i<n; i++, vdst++, vsrc++)
7230             {
7231                 vdst->x += vsrc->x;
7232                 vdst->y -= vsrc->y;
7233             }
7234         }
7235         else
7236         {
7237             for(i=0; i<n; i++, vdst++, vsrc++)
7238             {
7239                 vdst->x += vsrc->x;
7240                 vdst->y += vsrc->y;
7241             }
7242         }
7243     }
7244 }
7245 
vadd(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N)7246 void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
7247 {
7248     vadd(vdst, 1, vsrc, 1, "N", N);
7249 }
7250 
vadd(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)7251 void alglib::vadd(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
7252 {
7253     ae_int_t i;
7254     if( stride_dst!=1 || stride_src!=1 )
7255     {
7256         //
7257         // general unoptimized case
7258         //
7259         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7260             *vdst += alpha*(*vsrc);
7261     }
7262     else
7263     {
7264         //
7265         // optimized case
7266         //
7267         ae_int_t n2 = n/2;
7268         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7269         {
7270             vdst[0] += alpha*vsrc[0];
7271             vdst[1] += alpha*vsrc[1];
7272         }
7273         if( n%2!=0 )
7274             vdst[0] += alpha*vsrc[0];
7275     }
7276 }
7277 
vadd(double * vdst,const double * vsrc,ae_int_t N,double alpha)7278 void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N, double alpha)
7279 {
7280     vadd(vdst, 1, vsrc, 1, N, alpha);
7281 }
7282 
vadd(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)7283 void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
7284 {
7285     bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7286     ae_int_t i;
7287     if( stride_dst!=1 || stride_src!=1 )
7288     {
7289         //
7290         // general unoptimized case
7291         //
7292         if( bconj )
7293         {
7294             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7295             {
7296                 vdst->x += alpha*vsrc->x;
7297                 vdst->y -= alpha*vsrc->y;
7298             }
7299         }
7300         else
7301         {
7302             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7303             {
7304                 vdst->x += alpha*vsrc->x;
7305                 vdst->y += alpha*vsrc->y;
7306             }
7307         }
7308     }
7309     else
7310     {
7311         //
7312         // optimized case
7313         //
7314         if( bconj )
7315         {
7316             for(i=0; i<n; i++, vdst++, vsrc++)
7317             {
7318                 vdst->x += alpha*vsrc->x;
7319                 vdst->y -= alpha*vsrc->y;
7320             }
7321         }
7322         else
7323         {
7324             for(i=0; i<n; i++, vdst++, vsrc++)
7325             {
7326                 vdst->x += alpha*vsrc->x;
7327                 vdst->y += alpha*vsrc->y;
7328             }
7329         }
7330     }
7331 }
7332 
vadd(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N,double alpha)7333 void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha)
7334 {
7335     vadd(vdst, 1, vsrc, 1, "N", N, alpha);
7336 }
7337 
vadd(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,alglib::complex alpha)7338 void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
7339 {
7340     bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7341     ae_int_t i;
7342     if( stride_dst!=1 || stride_src!=1 )
7343     {
7344         //
7345         // general unoptimized case
7346         //
7347         double ax = alpha.x, ay = alpha.y;
7348         if( bconj )
7349         {
7350             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7351             {
7352                 vdst->x += ax*vsrc->x+ay*vsrc->y;
7353                 vdst->y -= ax*vsrc->y-ay*vsrc->x;
7354             }
7355         }
7356         else
7357         {
7358             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7359             {
7360                 vdst->x += ax*vsrc->x-ay*vsrc->y;
7361                 vdst->y += ax*vsrc->y+ay*vsrc->x;
7362             }
7363         }
7364     }
7365     else
7366     {
7367         //
7368         // optimized case
7369         //
7370         double ax = alpha.x, ay = alpha.y;
7371         if( bconj )
7372         {
7373             for(i=0; i<n; i++, vdst++, vsrc++)
7374             {
7375                 vdst->x += ax*vsrc->x+ay*vsrc->y;
7376                 vdst->y -= ax*vsrc->y-ay*vsrc->x;
7377             }
7378         }
7379         else
7380         {
7381             for(i=0; i<n; i++, vdst++, vsrc++)
7382             {
7383                 vdst->x += ax*vsrc->x-ay*vsrc->y;
7384                 vdst->y += ax*vsrc->y+ay*vsrc->x;
7385             }
7386         }
7387     }
7388 }
7389 
vadd(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N,alglib::complex alpha)7390 void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha)
7391 {
7392     vadd(vdst, 1, vsrc, 1, "N", N, alpha);
7393 }
7394 
vsub(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)7395 void alglib::vsub(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n)
7396 {
7397     ae_int_t i;
7398     if( stride_dst!=1 || stride_src!=1 )
7399     {
7400         //
7401         // general unoptimized case
7402         //
7403         for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7404             *vdst -= *vsrc;
7405     }
7406     else
7407     {
7408         //
7409         // optimized case
7410         //
7411         ae_int_t n2 = n/2;
7412         for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7413         {
7414             vdst[0] -= vsrc[0];
7415             vdst[1] -= vsrc[1];
7416         }
7417         if( n%2!=0 )
7418             vdst[0] -= vsrc[0];
7419     }
7420 }
7421 
vsub(double * vdst,const double * vsrc,ae_int_t N)7422 void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N)
7423 {
7424     vsub(vdst, 1, vsrc, 1, N);
7425 }
7426 
vsub(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)7427 void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
7428 {
7429     bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7430     ae_int_t i;
7431     if( stride_dst!=1 || stride_src!=1 )
7432     {
7433         //
7434         // general unoptimized case
7435         //
7436         if( bconj )
7437         {
7438             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7439             {
7440                 vdst->x -= vsrc->x;
7441                 vdst->y += vsrc->y;
7442             }
7443         }
7444         else
7445         {
7446             for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7447             {
7448                 vdst->x -= vsrc->x;
7449                 vdst->y -= vsrc->y;
7450             }
7451         }
7452     }
7453     else
7454     {
7455         //
7456         // optimized case
7457         //
7458         if( bconj )
7459         {
7460             for(i=0; i<n; i++, vdst++, vsrc++)
7461             {
7462                 vdst->x -= vsrc->x;
7463                 vdst->y += vsrc->y;
7464             }
7465         }
7466         else
7467         {
7468             for(i=0; i<n; i++, vdst++, vsrc++)
7469             {
7470                 vdst->x -= vsrc->x;
7471                 vdst->y -= vsrc->y;
7472             }
7473         }
7474     }
7475 }
7476 
vsub(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N)7477 void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
7478 {
7479     vsub(vdst, 1, vsrc, 1, "N", N);
7480 }
7481 
vsub(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)7482 void alglib::vsub(double *vdst,  ae_int_t stride_dst, const double *vsrc,  ae_int_t stride_src, ae_int_t n, double alpha)
7483 {
7484     vadd(vdst, stride_dst, vsrc, stride_src, n, -alpha);
7485 }
7486 
vsub(double * vdst,const double * vsrc,ae_int_t N,double alpha)7487 void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N, double alpha)
7488 {
7489     vadd(vdst, 1, vsrc, 1, N, -alpha);
7490 }
7491 
vsub(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)7492 void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
7493 {
7494     vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
7495 }
7496 
vsub(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t n,double alpha)7497 void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, double alpha)
7498 {
7499     vadd(vdst, 1, vsrc, 1, "N", n, -alpha);
7500 }
7501 
vsub(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,alglib::complex alpha)7502 void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
7503 {
7504     vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
7505 }
7506 
vsub(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t n,alglib::complex alpha)7507 void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, alglib::complex alpha)
7508 {
7509     vadd(vdst, 1, vsrc, 1, "N", n, -alpha);
7510 }
vmul(double * vdst,ae_int_t stride_dst,ae_int_t n,double alpha)7511 void alglib::vmul(double *vdst,  ae_int_t stride_dst, ae_int_t n, double alpha)
7512 {
7513     ae_int_t i;
7514     if( stride_dst!=1 )
7515     {
7516         //
7517         // general unoptimized case
7518         //
7519         for(i=0; i<n; i++, vdst+=stride_dst)
7520             *vdst *= alpha;
7521     }
7522     else
7523     {
7524         //
7525         // optimized case
7526         //
7527         for(i=0; i<n; i++, vdst++)
7528             *vdst *= alpha;
7529     }
7530 }
7531 
vmul(double * vdst,ae_int_t N,double alpha)7532 void alglib::vmul(double *vdst, ae_int_t N, double alpha)
7533 {
7534     vmul(vdst, 1, N, alpha);
7535 }
7536 
vmul(alglib::complex * vdst,ae_int_t stride_dst,ae_int_t n,double alpha)7537 void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
7538 {
7539     ae_int_t i;
7540     if( stride_dst!=1 )
7541     {
7542         //
7543         // general unoptimized case
7544         //
7545         for(i=0; i<n; i++, vdst+=stride_dst)
7546         {
7547             vdst->x *= alpha;
7548             vdst->y *= alpha;
7549         }
7550     }
7551     else
7552     {
7553         //
7554         // optimized case
7555         //
7556         for(i=0; i<n; i++, vdst++)
7557         {
7558             vdst->x *= alpha;
7559             vdst->y *= alpha;
7560         }
7561     }
7562 }
7563 
vmul(alglib::complex * vdst,ae_int_t N,double alpha)7564 void alglib::vmul(alglib::complex *vdst, ae_int_t N, double alpha)
7565 {
7566     vmul(vdst, 1, N, alpha);
7567 }
7568 
vmul(alglib::complex * vdst,ae_int_t stride_dst,ae_int_t n,alglib::complex alpha)7569 void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, alglib::complex alpha)
7570 {
7571     ae_int_t i;
7572     if( stride_dst!=1 )
7573     {
7574         //
7575         // general unoptimized case
7576         //
7577         double ax = alpha.x, ay = alpha.y;
7578         for(i=0; i<n; i++, vdst+=stride_dst)
7579         {
7580             double  dstx = vdst->x, dsty = vdst->y;
7581             vdst->x = ax*dstx-ay*dsty;
7582             vdst->y = ax*dsty+ay*dstx;
7583         }
7584     }
7585     else
7586     {
7587         //
7588         // optimized case
7589         //
7590         double ax = alpha.x, ay = alpha.y;
7591         for(i=0; i<n; i++, vdst++)
7592         {
7593             double  dstx = vdst->x, dsty = vdst->y;
7594             vdst->x = ax*dstx-ay*dsty;
7595             vdst->y = ax*dsty+ay*dstx;
7596         }
7597     }
7598 }
7599 
vmul(alglib::complex * vdst,ae_int_t N,alglib::complex alpha)7600 void alglib::vmul(alglib::complex *vdst, ae_int_t N, alglib::complex alpha)
7601 {
7602     vmul(vdst, 1, N, alpha);
7603 }
7604 
vlen(ae_int_t n1,ae_int_t n2)7605 alglib::ae_int_t alglib::vlen(ae_int_t n1, ae_int_t n2)
7606 {
7607     return n2-n1+1;
7608 }
7609 
7610 
7611 /********************************************************************
7612 Matrices and vectors
7613 ********************************************************************/
ae_vector_wrapper(alglib_impl::ae_vector * e_ptr,alglib_impl::ae_datatype datatype)7614 alglib::ae_vector_wrapper::ae_vector_wrapper(alglib_impl::ae_vector *e_ptr, alglib_impl::ae_datatype datatype)
7615 {
7616     if( e_ptr==NULL || e_ptr->datatype!=datatype )
7617     {
7618         const char *msg = "ALGLIB: ae_vector_wrapper datatype check failed";
7619 #if !defined(AE_NO_EXCEPTIONS)
7620         _ALGLIB_CPP_EXCEPTION(msg);
7621 #else
7622         ptr = NULL;
7623         is_frozen_proxy = false;
7624         _ALGLIB_SET_ERROR_FLAG(msg);
7625         return;
7626 #endif
7627     }
7628     ptr = e_ptr;
7629     is_frozen_proxy = true;
7630 }
7631 
ae_vector_wrapper(alglib_impl::ae_datatype datatype)7632 alglib::ae_vector_wrapper::ae_vector_wrapper(alglib_impl::ae_datatype datatype)
7633 {
7634     jmp_buf _break_jump;
7635     alglib_impl::ae_state _state;
7636 
7637     alglib_impl::ae_state_init(&_state);
7638     if( setjmp(_break_jump) )
7639     {
7640 #if !defined(AE_NO_EXCEPTIONS)
7641         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7642 #else
7643         ptr = NULL;
7644         is_frozen_proxy = false;
7645         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7646         return;
7647 #endif
7648     }
7649     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7650     ptr = &inner_vec;
7651     is_frozen_proxy = false;
7652     memset(ptr, 0, sizeof(*ptr));
7653     ae_vector_init(ptr, 0, datatype, &_state, ae_false);
7654     ae_state_clear(&_state);
7655 }
7656 
ae_vector_wrapper(const ae_vector_wrapper & rhs,alglib_impl::ae_datatype datatype)7657 alglib::ae_vector_wrapper::ae_vector_wrapper(const ae_vector_wrapper &rhs, alglib_impl::ae_datatype datatype)
7658 {
7659     jmp_buf _break_jump;
7660     alglib_impl::ae_state _state;
7661 
7662     alglib_impl::ae_state_init(&_state);
7663     if( setjmp(_break_jump) )
7664     {
7665 #if !defined(AE_NO_EXCEPTIONS)
7666         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7667 #else
7668         ptr = NULL;
7669         is_frozen_proxy = false;
7670         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7671         return;
7672 #endif
7673     }
7674     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7675     alglib_impl::ae_assert(rhs.ptr!=NULL, "ALGLIB: ae_vector_wrapper source is not initialized", &_state);
7676     alglib_impl::ae_assert(rhs.ptr->datatype==datatype, "ALGLIB: ae_vector_wrapper datatype check failed", &_state);
7677     ptr = &inner_vec;
7678     is_frozen_proxy = false;
7679     memset(ptr, 0, sizeof(*ptr));
7680     ae_vector_init_copy(ptr, rhs.ptr, &_state, ae_false);
7681     ae_state_clear(&_state);
7682 }
7683 
~ae_vector_wrapper()7684 alglib::ae_vector_wrapper::~ae_vector_wrapper()
7685 {
7686     if( ptr==&inner_vec )
7687         ae_vector_clear(ptr);
7688 }
7689 
setlength(ae_int_t iLen)7690 void alglib::ae_vector_wrapper::setlength(ae_int_t iLen)
7691 {
7692     jmp_buf _break_jump;
7693     alglib_impl::ae_state _state;
7694     alglib_impl::ae_state_init(&_state);
7695     if( setjmp(_break_jump) )
7696     {
7697 #if !defined(AE_NO_EXCEPTIONS)
7698         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7699 #else
7700         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7701         return;
7702 #endif
7703     }
7704     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7705     alglib_impl::ae_assert(ptr!=NULL, "ALGLIB: setlength() error, ptr==NULL (array was not correctly initialized)", &_state);
7706     alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: setlength() error, ptr is frozen proxy array", &_state);
7707     alglib_impl::ae_vector_set_length(ptr, iLen, &_state);
7708     alglib_impl::ae_state_clear(&_state);
7709 }
7710 
length() const7711 alglib::ae_int_t alglib::ae_vector_wrapper::length() const
7712 {
7713     if( ptr==NULL )
7714         return 0;
7715     return ptr->cnt;
7716 }
7717 
attach_to(alglib_impl::x_vector * new_ptr,alglib_impl::ae_state * _state)7718 void alglib::ae_vector_wrapper::attach_to(alglib_impl::x_vector *new_ptr, alglib_impl::ae_state *_state)
7719 {
7720     if( ptr==&inner_vec )
7721         ae_vector_clear(ptr);
7722     ptr = &inner_vec;
7723     memset(ptr, 0, sizeof(*ptr));
7724     ae_vector_init_attach_to_x(ptr, new_ptr, _state, ae_false);
7725     is_frozen_proxy = true;
7726 }
7727 
assign(const alglib::ae_vector_wrapper & rhs)7728 const alglib::ae_vector_wrapper& alglib::ae_vector_wrapper::assign(const alglib::ae_vector_wrapper &rhs)
7729 {
7730     jmp_buf _break_jump;
7731     alglib_impl::ae_state _state;
7732     if( this==&rhs )
7733         return *this;
7734     alglib_impl::ae_state_init(&_state);
7735     if( setjmp(_break_jump) )
7736     {
7737 #if !defined(AE_NO_EXCEPTIONS)
7738         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7739 #else
7740         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7741         return *this;
7742 #endif
7743     }
7744     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7745     ae_assert(ptr!=NULL, "ALGLIB: incorrect assignment (uninitialized destination)", &_state);
7746     ae_assert(rhs.ptr!=NULL, "ALGLIB: incorrect assignment (uninitialized source)", &_state);
7747     ae_assert(rhs.ptr->datatype==ptr->datatype, "ALGLIB: incorrect assignment to array (types do not match)", &_state);
7748     if( is_frozen_proxy )
7749         ae_assert(rhs.ptr->cnt==ptr->cnt, "ALGLIB: incorrect assignment to proxy array (sizes do not match)", &_state);
7750     if( rhs.ptr->cnt!=ptr->cnt )
7751         ae_vector_set_length(ptr, rhs.ptr->cnt, &_state);
7752     memcpy(ptr->ptr.p_ptr, rhs.ptr->ptr.p_ptr, ptr->cnt*alglib_impl::ae_sizeof(ptr->datatype));
7753     alglib_impl::ae_state_clear(&_state);
7754     return *this;
7755 }
7756 
c_ptr() const7757 const alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr() const
7758 {
7759     return ptr;
7760 }
7761 
c_ptr()7762 alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr()
7763 {
7764     return ptr;
7765 }
7766 
7767 #if !defined(AE_NO_EXCEPTIONS)
ae_vector_wrapper(const char * s,alglib_impl::ae_datatype datatype)7768 alglib::ae_vector_wrapper::ae_vector_wrapper(const char *s, alglib_impl::ae_datatype datatype)
7769 {
7770     std::vector<const char*> svec;
7771     size_t i;
7772     char *p = filter_spaces(s);
7773     if( p==NULL )
7774         _ALGLIB_CPP_EXCEPTION("ALGLIB: allocation error");
7775     try
7776     {
7777         str_vector_create(p, true, &svec);
7778         {
7779             jmp_buf _break_jump;
7780             alglib_impl::ae_state _state;
7781             alglib_impl::ae_state_init(&_state);
7782             if( setjmp(_break_jump) )
7783                 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7784             alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7785             ptr = &inner_vec;
7786             is_frozen_proxy = false;
7787             memset(ptr, 0, sizeof(*ptr));
7788             ae_vector_init(ptr, (ae_int_t)(svec.size()), datatype, &_state, ae_false);
7789             ae_state_clear(&_state);
7790         }
7791         for(i=0; i<svec.size(); i++)
7792         {
7793             if( datatype==alglib_impl::DT_BOOL )
7794                 ptr->ptr.p_bool[i]    = parse_bool_delim(svec[i],",]");
7795             if( datatype==alglib_impl::DT_INT )
7796                 ptr->ptr.p_int[i]     = parse_int_delim(svec[i],",]");
7797             if( datatype==alglib_impl::DT_REAL )
7798                 ptr->ptr.p_double[i]  = parse_real_delim(svec[i],",]");
7799             if( datatype==alglib_impl::DT_COMPLEX )
7800             {
7801                 alglib::complex t = parse_complex_delim(svec[i],",]");
7802                 ptr->ptr.p_complex[i].x = t.x;
7803                 ptr->ptr.p_complex[i].y = t.y;
7804             }
7805         }
7806         alglib_impl::ae_free(p);
7807     }
7808     catch(...)
7809     {
7810         alglib_impl::ae_free(p);
7811         throw;
7812     }
7813 }
7814 #endif
7815 
boolean_1d_array()7816 alglib::boolean_1d_array::boolean_1d_array():ae_vector_wrapper(alglib_impl::DT_BOOL)
7817 {
7818 }
7819 
boolean_1d_array(const alglib::boolean_1d_array & rhs)7820 alglib::boolean_1d_array::boolean_1d_array(const alglib::boolean_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_BOOL)
7821 {
7822 }
7823 
boolean_1d_array(alglib_impl::ae_vector * p)7824 alglib::boolean_1d_array::boolean_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_BOOL)
7825 {
7826 }
7827 
operator =(const alglib::boolean_1d_array & rhs)7828 const alglib::boolean_1d_array& alglib::boolean_1d_array::operator=(const alglib::boolean_1d_array &rhs)
7829 {
7830     return static_cast<const alglib::boolean_1d_array&>(assign(rhs));
7831 }
7832 
~boolean_1d_array()7833 alglib::boolean_1d_array::~boolean_1d_array()
7834 {
7835 }
7836 
operator ()(ae_int_t i) const7837 const ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i) const
7838 {
7839     return ptr->ptr.p_bool[i];
7840 }
7841 
operator ()(ae_int_t i)7842 ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i)
7843 {
7844     return ptr->ptr.p_bool[i];
7845 }
7846 
operator [](ae_int_t i) const7847 const ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i) const
7848 {
7849     return ptr->ptr.p_bool[i];
7850 }
7851 
operator [](ae_int_t i)7852 ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i)
7853 {
7854     return ptr->ptr.p_bool[i];
7855 }
7856 
setcontent(ae_int_t iLen,const bool * pContent)7857 void alglib::boolean_1d_array::setcontent(ae_int_t iLen, const bool *pContent )
7858 {
7859     ae_int_t i;
7860 
7861     // setlength, with exception-free error handling fallback code
7862     setlength(iLen);
7863     if( ptr==NULL || ptr->cnt!=iLen )
7864         return;
7865 
7866     // copy
7867     for(i=0; i<iLen; i++)
7868         ptr->ptr.p_bool[i] = pContent[i];
7869 }
7870 
getcontent()7871 ae_bool* alglib::boolean_1d_array::getcontent()
7872 {
7873     return ptr->ptr.p_bool;
7874 }
7875 
getcontent() const7876 const ae_bool* alglib::boolean_1d_array::getcontent() const
7877 {
7878     return ptr->ptr.p_bool;
7879 }
7880 
7881 #if !defined(AE_NO_EXCEPTIONS)
boolean_1d_array(const char * s)7882 alglib::boolean_1d_array::boolean_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_BOOL)
7883 {
7884 }
7885 
tostring() const7886 std::string alglib::boolean_1d_array::tostring() const
7887 {
7888     if( length()==0 )
7889         return "[]";
7890     return arraytostring(&(operator()(0)), length());
7891 }
7892 #endif
7893 
integer_1d_array()7894 alglib::integer_1d_array::integer_1d_array():ae_vector_wrapper(alglib_impl::DT_INT)
7895 {
7896 }
7897 
integer_1d_array(alglib_impl::ae_vector * p)7898 alglib::integer_1d_array::integer_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_INT)
7899 {
7900 }
7901 
integer_1d_array(const alglib::integer_1d_array & rhs)7902 alglib::integer_1d_array::integer_1d_array(const alglib::integer_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_INT)
7903 {
7904 }
7905 
operator =(const alglib::integer_1d_array & rhs)7906 const alglib::integer_1d_array& alglib::integer_1d_array::operator=(const alglib::integer_1d_array &rhs)
7907 {
7908     return static_cast<const alglib::integer_1d_array&>(assign(rhs));
7909 }
7910 
~integer_1d_array()7911 alglib::integer_1d_array::~integer_1d_array()
7912 {
7913 }
7914 
operator ()(ae_int_t i) const7915 const alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i) const
7916 {
7917     return ptr->ptr.p_int[i];
7918 }
7919 
operator ()(ae_int_t i)7920 alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i)
7921 {
7922     return ptr->ptr.p_int[i];
7923 }
7924 
operator [](ae_int_t i) const7925 const alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i) const
7926 {
7927     return ptr->ptr.p_int[i];
7928 }
7929 
operator [](ae_int_t i)7930 alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i)
7931 {
7932     return ptr->ptr.p_int[i];
7933 }
7934 
setcontent(ae_int_t iLen,const ae_int_t * pContent)7935 void alglib::integer_1d_array::setcontent(ae_int_t iLen, const ae_int_t *pContent )
7936 {
7937     ae_int_t i;
7938 
7939     // setlength(), handle possible exception-free errors
7940     setlength(iLen);
7941     if( ptr==NULL || ptr->cnt!=iLen )
7942         return;
7943 
7944     // copy
7945     for(i=0; i<iLen; i++)
7946         ptr->ptr.p_int[i] = pContent[i];
7947 }
7948 
getcontent()7949 alglib::ae_int_t* alglib::integer_1d_array::getcontent()
7950 {
7951     return ptr->ptr.p_int;
7952 }
7953 
getcontent() const7954 const alglib::ae_int_t* alglib::integer_1d_array::getcontent() const
7955 {
7956     return ptr->ptr.p_int;
7957 }
7958 
7959 #if !defined(AE_NO_EXCEPTIONS)
integer_1d_array(const char * s)7960 alglib::integer_1d_array::integer_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_INT)
7961 {
7962 }
7963 
tostring() const7964 std::string alglib::integer_1d_array::tostring() const
7965 {
7966     if( length()==0 )
7967         return "[]";
7968     return arraytostring(&operator()(0), length());
7969 }
7970 #endif
7971 
real_1d_array()7972 alglib::real_1d_array::real_1d_array():ae_vector_wrapper(alglib_impl::DT_REAL)
7973 {
7974 }
7975 
real_1d_array(alglib_impl::ae_vector * p)7976 alglib::real_1d_array::real_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_REAL)
7977 {
7978 }
7979 
real_1d_array(const alglib::real_1d_array & rhs)7980 alglib::real_1d_array::real_1d_array(const alglib::real_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_REAL)
7981 {
7982 }
7983 
operator =(const alglib::real_1d_array & rhs)7984 const alglib::real_1d_array& alglib::real_1d_array::operator=(const alglib::real_1d_array &rhs)
7985 {
7986     return static_cast<const alglib::real_1d_array&>(assign(rhs));
7987 }
7988 
~real_1d_array()7989 alglib::real_1d_array::~real_1d_array()
7990 {
7991 }
7992 
operator ()(ae_int_t i) const7993 const double& alglib::real_1d_array::operator()(ae_int_t i) const
7994 {
7995     return ptr->ptr.p_double[i];
7996 }
7997 
operator ()(ae_int_t i)7998 double& alglib::real_1d_array::operator()(ae_int_t i)
7999 {
8000     return ptr->ptr.p_double[i];
8001 }
8002 
operator [](ae_int_t i) const8003 const double& alglib::real_1d_array::operator[](ae_int_t i) const
8004 {
8005     return ptr->ptr.p_double[i];
8006 }
8007 
operator [](ae_int_t i)8008 double& alglib::real_1d_array::operator[](ae_int_t i)
8009 {
8010     return ptr->ptr.p_double[i];
8011 }
8012 
setcontent(ae_int_t iLen,const double * pContent)8013 void alglib::real_1d_array::setcontent(ae_int_t iLen, const double *pContent )
8014 {
8015     ae_int_t i;
8016 
8017     // setlength(), handle possible exception-free errors
8018     setlength(iLen);
8019     if( ptr==NULL || ptr->cnt!=iLen )
8020         return;
8021 
8022     // copy
8023     for(i=0; i<iLen; i++)
8024         ptr->ptr.p_double[i] = pContent[i];
8025 }
8026 
attach_to_ptr(ae_int_t iLen,double * pContent)8027 void alglib::real_1d_array::attach_to_ptr(ae_int_t iLen, double *pContent ) // TODO: convert to constructor!!!!!!!
8028 {
8029     alglib_impl::x_vector x;
8030     jmp_buf _break_jump;
8031     alglib_impl::ae_state _state;
8032 
8033     alglib_impl::ae_state_init(&_state);
8034     if( setjmp(_break_jump) )
8035     {
8036 #if !defined(AE_NO_EXCEPTIONS)
8037         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8038 #else
8039         ptr = NULL;
8040         is_frozen_proxy = false;
8041         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8042         return;
8043 #endif
8044     }
8045     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8046     alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: unable to attach proxy object to something else", &_state);
8047     alglib_impl::ae_assert(iLen>0, "ALGLIB: non-positive length for attach_to_ptr()", &_state);
8048     x.cnt = iLen;
8049     x.datatype = alglib_impl::DT_REAL;
8050     x.owner = alglib_impl::OWN_CALLER;
8051     x.last_action = alglib_impl::ACT_UNCHANGED;
8052     x.x_ptr.p_ptr = pContent;
8053     attach_to(&x, &_state);
8054     ae_state_clear(&_state);
8055 }
8056 
getcontent()8057 double* alglib::real_1d_array::getcontent()
8058 {
8059     return ptr->ptr.p_double;
8060 }
8061 
getcontent() const8062 const double* alglib::real_1d_array::getcontent() const
8063 {
8064     return ptr->ptr.p_double;
8065 }
8066 
8067 #if !defined(AE_NO_EXCEPTIONS)
real_1d_array(const char * s)8068 alglib::real_1d_array::real_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_REAL)
8069 {
8070 }
8071 
tostring(int dps) const8072 std::string alglib::real_1d_array::tostring(int dps) const
8073 {
8074     if( length()==0 )
8075         return "[]";
8076     return arraytostring(&operator()(0), length(), dps);
8077 }
8078 #endif
8079 
complex_1d_array()8080 alglib::complex_1d_array::complex_1d_array():ae_vector_wrapper(alglib_impl::DT_COMPLEX)
8081 {
8082 }
8083 
complex_1d_array(alglib_impl::ae_vector * p)8084 alglib::complex_1d_array::complex_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_COMPLEX)
8085 {
8086 }
8087 
complex_1d_array(const alglib::complex_1d_array & rhs)8088 alglib::complex_1d_array::complex_1d_array(const alglib::complex_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_COMPLEX)
8089 {
8090 }
8091 
operator =(const alglib::complex_1d_array & rhs)8092 const alglib::complex_1d_array& alglib::complex_1d_array::operator=(const alglib::complex_1d_array &rhs)
8093 {
8094     return static_cast<const alglib::complex_1d_array&>(assign(rhs));
8095 }
8096 
~complex_1d_array()8097 alglib::complex_1d_array::~complex_1d_array()
8098 {
8099 }
8100 
operator ()(ae_int_t i) const8101 const alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i) const
8102 {
8103     return *((const alglib::complex*)(ptr->ptr.p_complex+i));
8104 }
8105 
operator ()(ae_int_t i)8106 alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i)
8107 {
8108     return *((alglib::complex*)(ptr->ptr.p_complex+i));
8109 }
8110 
operator [](ae_int_t i) const8111 const alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i) const
8112 {
8113     return *((const alglib::complex*)(ptr->ptr.p_complex+i));
8114 }
8115 
operator [](ae_int_t i)8116 alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i)
8117 {
8118     return *((alglib::complex*)(ptr->ptr.p_complex+i));
8119 }
8120 
setcontent(ae_int_t iLen,const alglib::complex * pContent)8121 void alglib::complex_1d_array::setcontent(ae_int_t iLen, const alglib::complex *pContent )
8122 {
8123     ae_int_t i;
8124 
8125     // setlength(), handle possible exception-free errors
8126     setlength(iLen);
8127     if( ptr==NULL || ptr->cnt!=iLen  )
8128         return;
8129 
8130     // copy
8131     for(i=0; i<iLen; i++)
8132     {
8133         ptr->ptr.p_complex[i].x = pContent[i].x;
8134         ptr->ptr.p_complex[i].y = pContent[i].y;
8135     }
8136 }
8137 
getcontent()8138  alglib::complex* alglib::complex_1d_array::getcontent()
8139 {
8140     return (alglib::complex*)ptr->ptr.p_complex;
8141 }
8142 
getcontent() const8143 const alglib::complex* alglib::complex_1d_array::getcontent() const
8144 {
8145     return (const alglib::complex*)ptr->ptr.p_complex;
8146 }
8147 
8148 #if !defined(AE_NO_EXCEPTIONS)
complex_1d_array(const char * s)8149 alglib::complex_1d_array::complex_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_COMPLEX)
8150 {
8151 }
8152 
tostring(int dps) const8153 std::string alglib::complex_1d_array::tostring(int dps) const
8154 {
8155     if( length()==0 )
8156         return "[]";
8157     return arraytostring(&operator()(0), length(), dps);
8158 }
8159 #endif
8160 
ae_matrix_wrapper(alglib_impl::ae_matrix * e_ptr,alglib_impl::ae_datatype datatype)8161 alglib::ae_matrix_wrapper::ae_matrix_wrapper(alglib_impl::ae_matrix *e_ptr, alglib_impl::ae_datatype datatype)
8162 {
8163     if( e_ptr->datatype!=datatype )
8164     {
8165         const char *msg = "ALGLIB: ae_vector_wrapper datatype check failed";
8166 #if !defined(AE_NO_EXCEPTIONS)
8167         _ALGLIB_CPP_EXCEPTION(msg);
8168 #else
8169         ptr = NULL;
8170         is_frozen_proxy = false;
8171         _ALGLIB_SET_ERROR_FLAG(msg);
8172         return;
8173 #endif
8174     }
8175     ptr = e_ptr;
8176     is_frozen_proxy = true;
8177 }
8178 
ae_matrix_wrapper(alglib_impl::ae_datatype datatype)8179 alglib::ae_matrix_wrapper::ae_matrix_wrapper(alglib_impl::ae_datatype datatype)
8180 {
8181     jmp_buf _break_jump;
8182     alglib_impl::ae_state _state;
8183 
8184     alglib_impl::ae_state_init(&_state);
8185     if( setjmp(_break_jump) )
8186     {
8187 #if !defined(AE_NO_EXCEPTIONS)
8188         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8189 #else
8190         ptr = NULL;
8191         is_frozen_proxy = false;
8192         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8193         return;
8194 #endif
8195     }
8196     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8197     ptr = &inner_mat;
8198     is_frozen_proxy = false;
8199     memset(ptr, 0, sizeof(*ptr));
8200     ae_matrix_init(ptr, 0, 0, datatype, &_state, ae_false);
8201     ae_state_clear(&_state);
8202 
8203 }
8204 
ae_matrix_wrapper(const ae_matrix_wrapper & rhs,alglib_impl::ae_datatype datatype)8205 alglib::ae_matrix_wrapper::ae_matrix_wrapper(const ae_matrix_wrapper &rhs, alglib_impl::ae_datatype datatype)
8206 {
8207     jmp_buf _break_jump;
8208     alglib_impl::ae_state _state;
8209 
8210     alglib_impl::ae_state_init(&_state);
8211     if( setjmp(_break_jump) )
8212     {
8213 #if !defined(AE_NO_EXCEPTIONS)
8214         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8215 #else
8216         ptr = NULL;
8217         is_frozen_proxy = false;
8218         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8219         return;
8220 #endif
8221     }
8222     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8223     is_frozen_proxy = false;
8224     ptr = NULL;
8225     alglib_impl::ae_assert(rhs.ptr->datatype==datatype, "ALGLIB: ae_matrix_wrapper datatype check failed", &_state);
8226     if( rhs.ptr!=NULL )
8227     {
8228         ptr = &inner_mat;
8229         memset(ptr, 0, sizeof(*ptr));
8230         ae_matrix_init_copy(ptr, rhs.ptr, &_state, ae_false);
8231     }
8232     ae_state_clear(&_state);
8233 }
8234 
~ae_matrix_wrapper()8235 alglib::ae_matrix_wrapper::~ae_matrix_wrapper()
8236 {
8237     if( ptr==&inner_mat )
8238         ae_matrix_clear(ptr);
8239 }
8240 
8241 #if !defined(AE_NO_EXCEPTIONS)
ae_matrix_wrapper(const char * s,alglib_impl::ae_datatype datatype)8242 alglib::ae_matrix_wrapper::ae_matrix_wrapper(const char *s, alglib_impl::ae_datatype datatype)
8243 {
8244     std::vector< std::vector<const char*> > smat;
8245     size_t i, j;
8246     char *p = filter_spaces(s);
8247     if( p==NULL )
8248         _ALGLIB_CPP_EXCEPTION("ALGLIB: allocation error");
8249     try
8250     {
8251         str_matrix_create(p, &smat);
8252         {
8253             jmp_buf _break_jump;
8254             alglib_impl::ae_state _state;
8255             alglib_impl::ae_state_init(&_state);
8256             if( setjmp(_break_jump) )
8257                 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8258             alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8259             ptr = &inner_mat;
8260             is_frozen_proxy = false;
8261             memset(ptr, 0, sizeof(*ptr));
8262             if( smat.size()!=0 )
8263                 ae_matrix_init(ptr, (ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), datatype, &_state, ae_false);
8264             else
8265                 ae_matrix_init(ptr, 0, 0, datatype, &_state, ae_false);
8266             ae_state_clear(&_state);
8267         }
8268         for(i=0; i<smat.size(); i++)
8269             for(j=0; j<smat[0].size(); j++)
8270             {
8271                 if( datatype==alglib_impl::DT_BOOL )
8272                     ptr->ptr.pp_bool[i][j]    = parse_bool_delim(smat[i][j],",]");
8273                 if( datatype==alglib_impl::DT_INT )
8274                     ptr->ptr.pp_int[i][j]     = parse_int_delim(smat[i][j],",]");
8275                 if( datatype==alglib_impl::DT_REAL )
8276                     ptr->ptr.pp_double[i][j]  = parse_real_delim(smat[i][j],",]");
8277                 if( datatype==alglib_impl::DT_COMPLEX )
8278                 {
8279                     alglib::complex t = parse_complex_delim(smat[i][j],",]");
8280                     ptr->ptr.pp_complex[i][j].x = t.x;
8281                     ptr->ptr.pp_complex[i][j].y = t.y;
8282                 }
8283             }
8284         alglib_impl::ae_free(p);
8285     }
8286     catch(...)
8287     {
8288         alglib_impl::ae_free(p);
8289         throw;
8290     }
8291 }
8292 #endif
8293 
setlength(ae_int_t rows,ae_int_t cols)8294 void alglib::ae_matrix_wrapper::setlength(ae_int_t rows, ae_int_t cols) // TODO: automatic allocation of NULL ptr!!!!!
8295 {
8296     jmp_buf _break_jump;
8297     alglib_impl::ae_state _state;
8298     alglib_impl::ae_state_init(&_state);
8299     if( setjmp(_break_jump) )
8300     {
8301 #if !defined(AE_NO_EXCEPTIONS)
8302         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8303 #else
8304         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8305         return;
8306 #endif
8307     }
8308     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8309     alglib_impl::ae_assert(ptr!=NULL, "ALGLIB: setlength() error, p_mat==NULL (array was not correctly initialized)", &_state);
8310     alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: setlength() error, attempt to resize proxy array", &_state);
8311     alglib_impl::ae_matrix_set_length(ptr, rows, cols, &_state);
8312     alglib_impl::ae_state_clear(&_state);
8313 }
8314 
rows() const8315 alglib::ae_int_t alglib::ae_matrix_wrapper::rows() const
8316 {
8317     if( ptr==NULL )
8318         return 0;
8319     return ptr->rows;
8320 }
8321 
cols() const8322 alglib::ae_int_t alglib::ae_matrix_wrapper::cols() const
8323 {
8324     if( ptr==NULL )
8325         return 0;
8326     return ptr->cols;
8327 }
8328 
isempty() const8329 bool alglib::ae_matrix_wrapper::isempty() const
8330 {
8331     return rows()==0 || cols()==0;
8332 }
8333 
getstride() const8334 alglib::ae_int_t alglib::ae_matrix_wrapper::getstride() const
8335 {
8336     if( ptr==NULL )
8337         return 0;
8338     return ptr->stride;
8339 }
8340 
attach_to(alglib_impl::x_matrix * new_ptr,alglib_impl::ae_state * _state)8341 void alglib::ae_matrix_wrapper::attach_to(alglib_impl::x_matrix *new_ptr, alglib_impl::ae_state *_state)
8342 {
8343     if( ptr==&inner_mat )
8344         ae_matrix_clear(ptr);
8345     ptr = &inner_mat;
8346     memset(ptr, 0, sizeof(*ptr));
8347     ae_matrix_init_attach_to_x(ptr, new_ptr, _state, ae_false);
8348     is_frozen_proxy = true;
8349 }
8350 
assign(const alglib::ae_matrix_wrapper & rhs)8351 const alglib::ae_matrix_wrapper& alglib::ae_matrix_wrapper::assign(const alglib::ae_matrix_wrapper &rhs)
8352 {
8353     ae_int_t i;
8354     jmp_buf _break_jump;
8355     alglib_impl::ae_state _state;
8356     if( this==&rhs )
8357         return *this;
8358     alglib_impl::ae_state_init(&_state);
8359     if( setjmp(_break_jump) )
8360     {
8361 #if !defined(AE_NO_EXCEPTIONS)
8362         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8363 #else
8364         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8365         return *this;
8366 #endif
8367     }
8368     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8369     ae_assert(ptr!=NULL, "ALGLIB: incorrect assignment to matrix (uninitialized destination)", &_state);
8370     ae_assert(rhs.ptr!=NULL, "ALGLIB: incorrect assignment to array (uninitialized source)", &_state);
8371     ae_assert(rhs.ptr->datatype==ptr->datatype, "ALGLIB: incorrect assignment to array (types dont match)", &_state);
8372     if( is_frozen_proxy )
8373     {
8374         ae_assert(rhs.ptr->rows==ptr->rows, "ALGLIB: incorrect assignment to proxy array (sizes dont match)", &_state);
8375         ae_assert(rhs.ptr->cols==ptr->cols, "ALGLIB: incorrect assignment to proxy array (sizes dont match)", &_state);
8376     }
8377     if( (rhs.ptr->rows!=ptr->rows) || (rhs.ptr->cols!=ptr->cols) )
8378         ae_matrix_set_length(ptr, rhs.ptr->rows, rhs.ptr->cols, &_state);
8379     for(i=0; i<ptr->rows; i++)
8380         memcpy(ptr->ptr.pp_void[i], rhs.ptr->ptr.pp_void[i], ptr->cols*alglib_impl::ae_sizeof(ptr->datatype));
8381     alglib_impl::ae_state_clear(&_state);
8382     return *this;
8383 }
8384 
c_ptr() const8385 const alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr() const
8386 {
8387     return ptr;
8388 }
8389 
c_ptr()8390 alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr()
8391 {
8392     return ptr;
8393 }
8394 
boolean_2d_array()8395 alglib::boolean_2d_array::boolean_2d_array():ae_matrix_wrapper(alglib_impl::DT_BOOL)
8396 {
8397 }
8398 
boolean_2d_array(const alglib::boolean_2d_array & rhs)8399 alglib::boolean_2d_array::boolean_2d_array(const alglib::boolean_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_BOOL)
8400 {
8401 }
8402 
boolean_2d_array(alglib_impl::ae_matrix * p)8403 alglib::boolean_2d_array::boolean_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_BOOL)
8404 {
8405 }
8406 
~boolean_2d_array()8407 alglib::boolean_2d_array::~boolean_2d_array()
8408 {
8409 }
8410 
operator =(const alglib::boolean_2d_array & rhs)8411 const alglib::boolean_2d_array& alglib::boolean_2d_array::operator=(const alglib::boolean_2d_array &rhs)
8412 {
8413     return static_cast<const boolean_2d_array&>(assign(rhs));
8414 }
8415 
operator ()(ae_int_t i,ae_int_t j) const8416 const ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j) const
8417 {
8418     return ptr->ptr.pp_bool[i][j];
8419 }
8420 
operator ()(ae_int_t i,ae_int_t j)8421 ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j)
8422 {
8423     return ptr->ptr.pp_bool[i][j];
8424 }
8425 
operator [](ae_int_t i) const8426 const ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i) const
8427 {
8428     return ptr->ptr.pp_bool[i];
8429 }
8430 
operator [](ae_int_t i)8431 ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i)
8432 {
8433     return ptr->ptr.pp_bool[i];
8434 }
8435 
setcontent(ae_int_t irows,ae_int_t icols,const bool * pContent)8436 void alglib::boolean_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const bool *pContent )
8437 {
8438     ae_int_t i, j;
8439 
8440     // setlength(), handle possible exception-free errors
8441     setlength(irows, icols);
8442     if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8443         return;
8444 
8445     // copy
8446     for(i=0; i<irows; i++)
8447         for(j=0; j<icols; j++)
8448             ptr->ptr.pp_bool[i][j] = pContent[i*icols+j];
8449 }
8450 
8451 #if !defined(AE_NO_EXCEPTIONS)
boolean_2d_array(const char * s)8452 alglib::boolean_2d_array::boolean_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_BOOL)
8453 {
8454 }
8455 
tostring() const8456 std::string alglib::boolean_2d_array::tostring() const
8457 {
8458     std::string result;
8459     ae_int_t i;
8460     if( isempty() )
8461         return "[[]]";
8462     result = "[";
8463     for(i=0; i<rows(); i++)
8464     {
8465         if( i!=0 )
8466             result += ",";
8467         result += arraytostring(&operator()(i,0), cols());
8468     }
8469     result += "]";
8470     return result;
8471 }
8472 #endif
8473 
integer_2d_array()8474 alglib::integer_2d_array::integer_2d_array():ae_matrix_wrapper(alglib_impl::DT_INT)
8475 {
8476 }
8477 
integer_2d_array(const alglib::integer_2d_array & rhs)8478 alglib::integer_2d_array::integer_2d_array(const alglib::integer_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_INT)
8479 {
8480 }
8481 
integer_2d_array(alglib_impl::ae_matrix * p)8482 alglib::integer_2d_array::integer_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_INT)
8483 {
8484 }
8485 
~integer_2d_array()8486 alglib::integer_2d_array::~integer_2d_array()
8487 {
8488 }
8489 
operator =(const alglib::integer_2d_array & rhs)8490 const alglib::integer_2d_array& alglib::integer_2d_array::operator=(const alglib::integer_2d_array &rhs)
8491 {
8492     return static_cast<const integer_2d_array&>(assign(rhs));
8493 }
8494 
operator ()(ae_int_t i,ae_int_t j) const8495 const alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j) const
8496 {
8497     return ptr->ptr.pp_int[i][j];
8498 }
8499 
operator ()(ae_int_t i,ae_int_t j)8500 alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j)
8501 {
8502     return ptr->ptr.pp_int[i][j];
8503 }
8504 
operator [](ae_int_t i) const8505 const alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i) const
8506 {
8507     return ptr->ptr.pp_int[i];
8508 }
8509 
operator [](ae_int_t i)8510 alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i)
8511 {
8512     return ptr->ptr.pp_int[i];
8513 }
8514 
setcontent(ae_int_t irows,ae_int_t icols,const ae_int_t * pContent)8515 void alglib::integer_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const ae_int_t *pContent )
8516 {
8517     ae_int_t i, j;
8518 
8519     // setlength(), handle possible exception-free errors
8520     setlength(irows, icols);
8521     if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8522         return;
8523 
8524     // copy
8525     for(i=0; i<irows; i++)
8526         for(j=0; j<icols; j++)
8527             ptr->ptr.pp_int[i][j] = pContent[i*icols+j];
8528 }
8529 
8530 #if !defined(AE_NO_EXCEPTIONS)
integer_2d_array(const char * s)8531 alglib::integer_2d_array::integer_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_INT)
8532 {
8533 }
8534 
tostring() const8535 std::string alglib::integer_2d_array::tostring() const
8536 {
8537     std::string result;
8538     ae_int_t i;
8539     if( isempty() )
8540         return "[[]]";
8541     result = "[";
8542     for(i=0; i<rows(); i++)
8543     {
8544         if( i!=0 )
8545             result += ",";
8546         result += arraytostring(&operator()(i,0), cols());
8547     }
8548     result += "]";
8549     return result;
8550 }
8551 #endif
8552 
real_2d_array()8553 alglib::real_2d_array::real_2d_array():ae_matrix_wrapper(alglib_impl::DT_REAL)
8554 {
8555 }
8556 
real_2d_array(const alglib::real_2d_array & rhs)8557 alglib::real_2d_array::real_2d_array(const alglib::real_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_REAL)
8558 {
8559 }
8560 
real_2d_array(alglib_impl::ae_matrix * p)8561 alglib::real_2d_array::real_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_REAL)
8562 {
8563 }
8564 
~real_2d_array()8565 alglib::real_2d_array::~real_2d_array()
8566 {
8567 }
8568 
operator =(const alglib::real_2d_array & rhs)8569 const alglib::real_2d_array& alglib::real_2d_array::operator=(const alglib::real_2d_array &rhs)
8570 {
8571     return static_cast<const real_2d_array&>(assign(rhs));
8572 }
8573 
operator ()(ae_int_t i,ae_int_t j) const8574 const double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j) const
8575 {
8576     return ptr->ptr.pp_double[i][j];
8577 }
8578 
operator ()(ae_int_t i,ae_int_t j)8579 double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j)
8580 {
8581     return ptr->ptr.pp_double[i][j];
8582 }
8583 
operator [](ae_int_t i) const8584 const double* alglib::real_2d_array::operator[](ae_int_t i) const
8585 {
8586     return ptr->ptr.pp_double[i];
8587 }
8588 
operator [](ae_int_t i)8589 double* alglib::real_2d_array::operator[](ae_int_t i)
8590 {
8591     return ptr->ptr.pp_double[i];
8592 }
8593 
setcontent(ae_int_t irows,ae_int_t icols,const double * pContent)8594 void alglib::real_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const double *pContent )
8595 {
8596     ae_int_t i, j;
8597 
8598     // setlength(), handle possible exception-free errors
8599     setlength(irows, icols);
8600     if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8601         return;
8602 
8603     // copy
8604     for(i=0; i<irows; i++)
8605         for(j=0; j<icols; j++)
8606             ptr->ptr.pp_double[i][j] = pContent[i*icols+j];
8607 }
8608 
attach_to_ptr(ae_int_t irows,ae_int_t icols,double * pContent)8609 void alglib::real_2d_array::attach_to_ptr(ae_int_t irows, ae_int_t icols, double *pContent )
8610 {
8611     jmp_buf _break_jump;
8612     alglib_impl::ae_state _state;
8613     alglib_impl::x_matrix x;
8614     alglib_impl::ae_state_init(&_state);
8615     if( setjmp(_break_jump) )
8616     {
8617 #if !defined(AE_NO_EXCEPTIONS)
8618         _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8619 #else
8620         ptr = NULL;
8621         is_frozen_proxy = false;
8622         _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8623         return;
8624 #endif
8625     }
8626     alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8627     alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: unable to attach proxy object to something else", &_state);
8628     alglib_impl::ae_assert(irows>0&&icols>0, "ALGLIB: non-positive length for attach_to_ptr()", &_state);
8629     x.rows = irows;
8630     x.cols = icols;
8631     x.stride = icols;
8632     x.datatype = alglib_impl::DT_REAL;
8633     x.owner = alglib_impl::OWN_CALLER;
8634     x.last_action = alglib_impl::ACT_UNCHANGED;
8635     x.x_ptr.p_ptr = pContent;
8636     attach_to(&x, &_state);
8637     ae_state_clear(&_state);
8638 }
8639 
8640 #if !defined(AE_NO_EXCEPTIONS)
real_2d_array(const char * s)8641 alglib::real_2d_array::real_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_REAL)
8642 {
8643 }
8644 
tostring(int dps) const8645 std::string alglib::real_2d_array::tostring(int dps) const
8646 {
8647     std::string result;
8648     ae_int_t i;
8649     if( isempty() )
8650         return "[[]]";
8651     result = "[";
8652     for(i=0; i<rows(); i++)
8653     {
8654         if( i!=0 )
8655             result += ",";
8656         result += arraytostring(&operator()(i,0), cols(), dps);
8657     }
8658     result += "]";
8659     return result;
8660 }
8661 #endif
8662 
complex_2d_array()8663 alglib::complex_2d_array::complex_2d_array():ae_matrix_wrapper(alglib_impl::DT_COMPLEX)
8664 {
8665 }
8666 
complex_2d_array(const alglib::complex_2d_array & rhs)8667 alglib::complex_2d_array::complex_2d_array(const alglib::complex_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_COMPLEX)
8668 {
8669 }
8670 
complex_2d_array(alglib_impl::ae_matrix * p)8671 alglib::complex_2d_array::complex_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_COMPLEX)
8672 {
8673 }
8674 
~complex_2d_array()8675 alglib::complex_2d_array::~complex_2d_array()
8676 {
8677 }
8678 
operator =(const alglib::complex_2d_array & rhs)8679 const alglib::complex_2d_array& alglib::complex_2d_array::operator=(const alglib::complex_2d_array &rhs)
8680 {
8681     return static_cast<const complex_2d_array&>(assign(rhs));
8682 }
8683 
operator ()(ae_int_t i,ae_int_t j) const8684 const alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j) const
8685 {
8686     return *((const alglib::complex*)(ptr->ptr.pp_complex[i]+j));
8687 }
8688 
operator ()(ae_int_t i,ae_int_t j)8689 alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j)
8690 {
8691     return *((alglib::complex*)(ptr->ptr.pp_complex[i]+j));
8692 }
8693 
operator [](ae_int_t i) const8694 const alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i) const
8695 {
8696     return (const alglib::complex*)(ptr->ptr.pp_complex[i]);
8697 }
8698 
operator [](ae_int_t i)8699 alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i)
8700 {
8701     return (alglib::complex*)(ptr->ptr.pp_complex[i]);
8702 }
8703 
setcontent(ae_int_t irows,ae_int_t icols,const alglib::complex * pContent)8704 void alglib::complex_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const alglib::complex *pContent )
8705 {
8706     ae_int_t i, j;
8707 
8708     // setlength(), handle possible exception-free errors
8709     setlength(irows, icols);
8710     if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8711         return;
8712 
8713     // copy
8714     for(i=0; i<irows; i++)
8715         for(j=0; j<icols; j++)
8716         {
8717             ptr->ptr.pp_complex[i][j].x = pContent[i*icols+j].x;
8718             ptr->ptr.pp_complex[i][j].y = pContent[i*icols+j].y;
8719         }
8720 }
8721 
8722 #if !defined(AE_NO_EXCEPTIONS)
complex_2d_array(const char * s)8723 alglib::complex_2d_array::complex_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_COMPLEX)
8724 {
8725 }
8726 
tostring(int dps) const8727 std::string alglib::complex_2d_array::tostring(int dps) const
8728 {
8729     std::string result;
8730     ae_int_t i;
8731     if( isempty() )
8732         return "[[]]";
8733     result = "[";
8734     for(i=0; i<rows(); i++)
8735     {
8736         if( i!=0 )
8737             result += ",";
8738         result += arraytostring(&operator()(i,0), cols(), dps);
8739     }
8740     result += "]";
8741     return result;
8742 }
8743 #endif
8744 
8745 /********************************************************************
8746 Internal functions
8747 ********************************************************************/
get_aenv_nan()8748 double alglib::get_aenv_nan()
8749 {
8750     double r;
8751     alglib_impl::ae_state _alglib_env_state;
8752     alglib_impl::ae_state_init(&_alglib_env_state);
8753     r = _alglib_env_state.v_nan;
8754     alglib_impl::ae_state_clear(&_alglib_env_state);
8755     return r;
8756 }
8757 
get_aenv_posinf()8758 double alglib::get_aenv_posinf()
8759 {
8760     double r;
8761     alglib_impl::ae_state _alglib_env_state;
8762     alglib_impl::ae_state_init(&_alglib_env_state);
8763     r = _alglib_env_state.v_posinf;
8764     alglib_impl::ae_state_clear(&_alglib_env_state);
8765     return r;
8766 }
8767 
get_aenv_neginf()8768 double alglib::get_aenv_neginf()
8769 {
8770     double r;
8771     alglib_impl::ae_state _alglib_env_state;
8772     alglib_impl::ae_state_init(&_alglib_env_state);
8773     r = _alglib_env_state.v_neginf;
8774     alglib_impl::ae_state_clear(&_alglib_env_state);
8775     return r;
8776 }
8777 
my_stricmp(const char * s1,const char * s2)8778 alglib::ae_int_t alglib::my_stricmp(const char *s1, const char *s2)
8779 {
8780     int c1, c2;
8781 
8782     //
8783     // handle special cases
8784     //
8785     if(s1==NULL && s2!=NULL)
8786         return -1;
8787     if(s1!=NULL && s2==NULL)
8788         return +1;
8789     if(s1==NULL && s2==NULL)
8790         return 0;
8791 
8792     //
8793     // compare
8794     //
8795     for (;;)
8796     {
8797         c1 = *s1;
8798         c2 = *s2;
8799         s1++;
8800         s2++;
8801         if( c1==0 )
8802             return c2==0 ? 0 : -1;
8803         if( c2==0 )
8804             return c1==0 ? 0 : +1;
8805         c1 = tolower(c1);
8806         c2 = tolower(c2);
8807         if( c1<c2 )
8808             return -1;
8809         if( c1>c2 )
8810             return +1;
8811     }
8812 }
8813 
8814 #if !defined(AE_NO_EXCEPTIONS)
8815 //
8816 // This function filters out all spaces from the string.
8817 // It returns string allocated with ae_malloc().
8818 // On allocaction failure returns NULL.
8819 //
filter_spaces(const char * s)8820 char* alglib::filter_spaces(const char *s)
8821 {
8822     size_t i, n;
8823     char *r;
8824     char *r0;
8825     n = strlen(s);
8826     r = (char*)alglib_impl::ae_malloc(n+1,NULL);
8827     if( r==NULL )
8828         return r;
8829     for(i=0,r0=r; i<=n; i++,s++)
8830         if( !isspace(*s) )
8831         {
8832             *r0 = *s;
8833             r0++;
8834         }
8835     return r;
8836 }
8837 
str_vector_create(const char * src,bool match_head_only,std::vector<const char * > * p_vec)8838 void alglib::str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec)
8839 {
8840     //
8841     // parse beginning of the string.
8842     // try to handle "[]" string
8843     //
8844     p_vec->clear();
8845     if( *src!='[' )
8846         _ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
8847     src++;
8848     if( *src==']' )
8849         return;
8850     p_vec->push_back(src);
8851     for(;;)
8852     {
8853         if( *src==0 )
8854             _ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
8855         if( *src==']' )
8856         {
8857             if( src[1]==0 || !match_head_only)
8858                 return;
8859             _ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
8860         }
8861         if( *src==',' )
8862         {
8863             p_vec->push_back(src+1);
8864             src++;
8865             continue;
8866         }
8867         src++;
8868     }
8869 }
8870 
str_matrix_create(const char * src,std::vector<std::vector<const char * >> * p_mat)8871 void alglib::str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat)
8872 {
8873     p_mat->clear();
8874 
8875     //
8876     // Try to handle "[[]]" string
8877     //
8878     if( strcmp(src, "[[]]")==0 )
8879         return;
8880 
8881     //
8882     // Parse non-empty string
8883     //
8884     if( *src!='[' )
8885         _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8886     src++;
8887     for(;;)
8888     {
8889         p_mat->push_back(std::vector<const char*>());
8890         str_vector_create(src, false, &p_mat->back());
8891         if( p_mat->back().size()==0 || p_mat->back().size()!=(*p_mat)[0].size() )
8892             _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8893         src = strchr(src, ']');
8894         if( src==NULL )
8895             _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8896         src++;
8897         if( *src==',' )
8898         {
8899             src++;
8900             continue;
8901         }
8902         if( *src==']' )
8903             break;
8904         _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8905     }
8906     src++;
8907     if( *src!=0 )
8908         _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8909 }
8910 
parse_bool_delim(const char * s,const char * delim)8911 ae_bool alglib::parse_bool_delim(const char *s, const char *delim)
8912 {
8913     const char *p;
8914     char buf[8];
8915 
8916     // try to parse false
8917     p = "false";
8918     memset(buf, 0, sizeof(buf));
8919     strncpy(buf, s, strlen(p));
8920     if( my_stricmp(buf, p)==0 )
8921     {
8922         if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL )
8923             _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8924         return ae_false;
8925     }
8926 
8927     // try to parse true
8928     p = "true";
8929     memset(buf, 0, sizeof(buf));
8930     strncpy(buf, s, strlen(p));
8931     if( my_stricmp(buf, p)==0 )
8932     {
8933         if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL )
8934             _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8935         return ae_true;
8936     }
8937 
8938     // error
8939     _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8940 }
8941 
parse_int_delim(const char * s,const char * delim)8942 alglib::ae_int_t alglib::parse_int_delim(const char *s, const char *delim)
8943 {
8944     const char *p;
8945     long long_val;
8946     volatile ae_int_t ae_val;
8947 
8948     p = s;
8949 
8950     //
8951     // check string structure:
8952     // * leading sign
8953     // * at least one digit
8954     // * delimiter
8955     //
8956     if( *s=='-' || *s=='+' )
8957         s++;
8958     if( *s==0 || strchr("1234567890",*s)==NULL)
8959         _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8960     while( *s!=0 && strchr("1234567890",*s)!=NULL )
8961         s++;
8962     if( *s==0 || strchr(delim,*s)==NULL )
8963         _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8964 
8965     // convert and ensure that value fits into ae_int_t
8966     s = p;
8967     long_val = atol(s);
8968     ae_val = long_val;
8969     if( ae_val!=long_val )
8970         _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8971     return ae_val;
8972 }
8973 
_parse_real_delim(const char * s,const char * delim,double * result,const char ** new_s)8974 bool alglib::_parse_real_delim(const char *s, const char *delim, double *result, const char **new_s)
8975 {
8976     const char *p;
8977     char *t;
8978     bool has_digits;
8979     char buf[64];
8980     int  isign;
8981     lconv *loc;
8982 
8983     p = s;
8984 
8985     //
8986     // check string structure and decide what to do
8987     //
8988     isign = 1;
8989     if( *s=='-' || *s=='+' )
8990     {
8991         isign = *s=='-' ? -1 : +1;
8992         s++;
8993     }
8994     memset(buf, 0, sizeof(buf));
8995     strncpy(buf, s, 3);
8996     if( my_stricmp(buf,"nan")!=0 && my_stricmp(buf,"inf")!=0 )
8997     {
8998         //
8999         // [sign] [ddd] [.] [ddd] [e|E[sign]ddd]
9000         //
9001         has_digits = false;
9002         if( *s!=0 && strchr("1234567890",*s)!=NULL )
9003         {
9004             has_digits = true;
9005             while( *s!=0 && strchr("1234567890",*s)!=NULL )
9006                 s++;
9007         }
9008         if( *s=='.' )
9009             s++;
9010         if( *s!=0 && strchr("1234567890",*s)!=NULL )
9011         {
9012             has_digits = true;
9013             while( *s!=0 && strchr("1234567890",*s)!=NULL )
9014                 s++;
9015         }
9016         if (!has_digits )
9017             return false;
9018         if( *s=='e' || *s=='E' )
9019         {
9020             s++;
9021             if( *s=='-' || *s=='+' )
9022                 s++;
9023             if( *s==0 || strchr("1234567890",*s)==NULL )
9024                 return false;
9025             while( *s!=0 && strchr("1234567890",*s)!=NULL )
9026                 s++;
9027         }
9028         if( *s==0 || strchr(delim,*s)==NULL )
9029             return false;
9030         *new_s = s;
9031 
9032         //
9033         // finite value conversion
9034         //
9035         if( *new_s-p>=(int)sizeof(buf) )
9036             return false;
9037         strncpy(buf, p, (size_t)(*new_s-p));
9038         buf[*new_s-p] = 0;
9039         loc = localeconv();
9040         t = strchr(buf,'.');
9041         if( t!=NULL )
9042             *t = *loc->decimal_point;
9043         *result = atof(buf);
9044         return true;
9045     }
9046     else
9047     {
9048         //
9049         // check delimiter and update *new_s
9050         //
9051         s += 3;
9052         if( *s==0 || strchr(delim,*s)==NULL )
9053             return false;
9054         *new_s = s;
9055 
9056         //
9057         // NAN, INF conversion
9058         //
9059         if( my_stricmp(buf,"nan")==0 )
9060             *result = fp_nan;
9061         if( my_stricmp(buf,"inf")==0 )
9062             *result = isign>0 ? fp_posinf : fp_neginf;
9063         return true;
9064     }
9065 }
9066 
parse_real_delim(const char * s,const char * delim)9067 double alglib::parse_real_delim(const char *s, const char *delim)
9068 {
9069     double result;
9070     const char *new_s;
9071     if( !_parse_real_delim(s, delim, &result, &new_s) )
9072         _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9073     return result;
9074 }
9075 
parse_complex_delim(const char * s,const char * delim)9076 alglib::complex alglib::parse_complex_delim(const char *s, const char *delim)
9077 {
9078     double d_result;
9079     const char *new_s;
9080     alglib::complex c_result;
9081 
9082     // parse as real value
9083     if( _parse_real_delim(s, delim, &d_result, &new_s) )
9084         return d_result;
9085 
9086     // parse as "a+bi" or "a-bi"
9087     if( _parse_real_delim(s, "+-", &c_result.x, &new_s) )
9088     {
9089         s = new_s;
9090         if( !_parse_real_delim(s, "i", &c_result.y, &new_s) )
9091             _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9092         s = new_s+1;
9093         if( *s==0 || strchr(delim,*s)==NULL )
9094             _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9095         return c_result;
9096     }
9097 
9098     // parse as complex value "bi+a" or "bi-a"
9099     if( _parse_real_delim(s, "i", &c_result.y, &new_s) )
9100     {
9101         s = new_s+1;
9102         if( *s==0 )
9103             _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9104         if( strchr(delim,*s)!=NULL )
9105         {
9106             c_result.x = 0;
9107             return c_result;
9108         }
9109         if( strchr("+-",*s)!=NULL )
9110         {
9111             if( !_parse_real_delim(s, delim, &c_result.x, &new_s) )
9112                 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9113             return c_result;
9114         }
9115         _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9116     }
9117 
9118     // error
9119     _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9120 }
9121 
arraytostring(const bool * ptr,ae_int_t n)9122 std::string alglib::arraytostring(const bool *ptr, ae_int_t n)
9123 {
9124     std::string result;
9125     ae_int_t i;
9126     result = "[";
9127     for(i=0; i<n; i++)
9128     {
9129         if( i!=0 )
9130             result += ",";
9131         result += ptr[i] ? "true" : "false";
9132     }
9133     result += "]";
9134     return result;
9135 }
9136 
arraytostring(const ae_int_t * ptr,ae_int_t n)9137 std::string alglib::arraytostring(const ae_int_t *ptr, ae_int_t n)
9138 {
9139     std::string result;
9140     ae_int_t i;
9141     char buf[64];
9142     result = "[";
9143     for(i=0; i<n; i++)
9144     {
9145         if( sprintf(buf, i==0 ? "%ld" : ",%ld", long(ptr[i]))>=(int)sizeof(buf) )
9146             _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9147         result += buf;
9148     }
9149     result += "]";
9150     return result;
9151 }
9152 
arraytostring(const double * ptr,ae_int_t n,int _dps)9153 std::string alglib::arraytostring(const double *ptr, ae_int_t n, int _dps)
9154 {
9155     std::string result;
9156     ae_int_t i;
9157     char buf[64];
9158     char mask1[64];
9159     char mask2[80];
9160     int dps = _dps>=0 ? _dps : -_dps;
9161     dps = dps<=50 ? dps : 50;
9162     result = "[";
9163     if( sprintf(mask1, "%%.%d%s", dps, _dps>=0 ? "f" : "e")>=(int)sizeof(mask1) )
9164         _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9165     if( sprintf(mask2, ",%s", mask1)>=(int)sizeof(mask2) )
9166         _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9167     for(i=0; i<n; i++)
9168     {
9169         buf[0] = 0;
9170         if( fp_isfinite(ptr[i]) )
9171         {
9172             if( sprintf(buf, i==0 ? mask1 : mask2, double(ptr[i]))>=(int)sizeof(buf) )
9173                 _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9174         }
9175         else if( fp_isnan(ptr[i]) )
9176             strcpy(buf, i==0 ?  "NAN" :  ",NAN");
9177         else if( fp_isposinf(ptr[i]) )
9178             strcpy(buf, i==0 ? "+INF" : ",+INF");
9179         else if( fp_isneginf(ptr[i]) )
9180             strcpy(buf, i==0 ? "-INF" : ",-INF");
9181         result += buf;
9182     }
9183     result += "]";
9184     return result;
9185 }
9186 
arraytostring(const alglib::complex * ptr,ae_int_t n,int dps)9187 std::string alglib::arraytostring(const alglib::complex *ptr, ae_int_t n, int dps)
9188 {
9189     std::string result;
9190     ae_int_t i;
9191     result = "[";
9192     for(i=0; i<n; i++)
9193     {
9194         if( i!=0 )
9195             result += ",";
9196         result += ptr[i].tostring(dps);
9197     }
9198     result += "]";
9199     return result;
9200 }
9201 #endif
9202 
9203 
9204 /********************************************************************
9205 standard functions
9206 ********************************************************************/
sign(double x)9207 int alglib::sign(double x)
9208 {
9209     if( x>0 ) return  1;
9210     if( x<0 ) return -1;
9211     return 0;
9212 }
9213 
randomreal()9214 double alglib::randomreal()
9215 {
9216     int i1 = rand();
9217     int i2 = rand();
9218     double mx = (double)(RAND_MAX)+1.0;
9219     volatile double tmp0 = i2/mx;
9220     volatile double tmp1 = i1+tmp0;
9221     return tmp1/mx;
9222 }
9223 
randominteger(alglib::ae_int_t maxv)9224 alglib::ae_int_t alglib::randominteger(alglib::ae_int_t maxv)
9225 {
9226     return ((alglib::ae_int_t)rand())%maxv;
9227 }
9228 
round(double x)9229 int alglib::round(double x)
9230 { return int(floor(x+0.5)); }
9231 
trunc(double x)9232 int alglib::trunc(double x)
9233 { return int(x>0 ? floor(x) : ceil(x)); }
9234 
ifloor(double x)9235 int alglib::ifloor(double x)
9236 { return int(floor(x)); }
9237 
iceil(double x)9238 int alglib::iceil(double x)
9239 { return int(ceil(x)); }
9240 
pi()9241 double alglib::pi()
9242 { return 3.14159265358979323846; }
9243 
sqr(double x)9244 double alglib::sqr(double x)
9245 { return x*x; }
9246 
maxint(int m1,int m2)9247 int alglib::maxint(int m1, int m2)
9248 {
9249     return m1>m2 ? m1 : m2;
9250 }
9251 
minint(int m1,int m2)9252 int alglib::minint(int m1, int m2)
9253 {
9254     return m1>m2 ? m2 : m1;
9255 }
9256 
maxreal(double m1,double m2)9257 double alglib::maxreal(double m1, double m2)
9258 {
9259     return m1>m2 ? m1 : m2;
9260 }
9261 
minreal(double m1,double m2)9262 double alglib::minreal(double m1, double m2)
9263 {
9264     return m1>m2 ? m2 : m1;
9265 }
9266 
fp_eq(double v1,double v2)9267 bool alglib::fp_eq(double v1, double v2)
9268 {
9269     // IEEE-strict floating point comparison
9270     volatile double x = v1;
9271     volatile double y = v2;
9272     return x==y;
9273 }
9274 
fp_neq(double v1,double v2)9275 bool alglib::fp_neq(double v1, double v2)
9276 {
9277     // IEEE-strict floating point comparison
9278     return !fp_eq(v1,v2);
9279 }
9280 
fp_less(double v1,double v2)9281 bool alglib::fp_less(double v1, double v2)
9282 {
9283     // IEEE-strict floating point comparison
9284     volatile double x = v1;
9285     volatile double y = v2;
9286     return x<y;
9287 }
9288 
fp_less_eq(double v1,double v2)9289 bool alglib::fp_less_eq(double v1, double v2)
9290 {
9291     // IEEE-strict floating point comparison
9292     volatile double x = v1;
9293     volatile double y = v2;
9294     return x<=y;
9295 }
9296 
fp_greater(double v1,double v2)9297 bool alglib::fp_greater(double v1, double v2)
9298 {
9299     // IEEE-strict floating point comparison
9300     volatile double x = v1;
9301     volatile double y = v2;
9302     return x>y;
9303 }
9304 
fp_greater_eq(double v1,double v2)9305 bool alglib::fp_greater_eq(double v1, double v2)
9306 {
9307     // IEEE-strict floating point comparison
9308     volatile double x = v1;
9309     volatile double y = v2;
9310     return x>=y;
9311 }
9312 
fp_isnan(double x)9313 bool alglib::fp_isnan(double x)
9314 {
9315     return alglib_impl::ae_isnan_stateless(x,endianness);
9316 }
9317 
fp_isposinf(double x)9318 bool alglib::fp_isposinf(double x)
9319 {
9320     return alglib_impl::ae_isposinf_stateless(x,endianness);
9321 }
9322 
fp_isneginf(double x)9323 bool alglib::fp_isneginf(double x)
9324 {
9325     return alglib_impl::ae_isneginf_stateless(x,endianness);
9326 }
9327 
fp_isinf(double x)9328 bool alglib::fp_isinf(double x)
9329 {
9330     return alglib_impl::ae_isinf_stateless(x,endianness);
9331 }
9332 
fp_isfinite(double x)9333 bool alglib::fp_isfinite(double x)
9334 {
9335     return alglib_impl::ae_isfinite_stateless(x,endianness);
9336 }
9337 
9338 /********************************************************************
9339 CSV functions
9340 ********************************************************************/
9341 #if !defined(AE_NO_EXCEPTIONS)
read_csv(const char * filename,char separator,int flags,alglib::real_2d_array & out)9342 void alglib::read_csv(const char *filename, char separator, int flags, alglib::real_2d_array &out)
9343 {
9344     int flag;
9345 
9346     //
9347     // Parameters
9348     //
9349     bool skip_first_row = (flags&CSV_SKIP_HEADERS)!=0;
9350 
9351     //
9352     // Prepare empty output array
9353     //
9354     out.setlength(0,0);
9355 
9356     //
9357     // Open file, determine size, read contents
9358     //
9359     FILE *f_in = fopen(filename, "rb");
9360     if( f_in==NULL )
9361         _ALGLIB_CPP_EXCEPTION("read_csv: unable to open input file");
9362     flag = fseek(f_in, 0, SEEK_END);
9363     AE_CRITICAL_ASSERT(flag==0);
9364     long int _filesize = ftell(f_in);
9365     AE_CRITICAL_ASSERT(_filesize>=0);
9366     if( _filesize==0 )
9367     {
9368         // empty file, return empty array, success
9369         fclose(f_in);
9370         return;
9371     }
9372     size_t filesize = _filesize;
9373     std::vector<char> v_buf;
9374     v_buf.resize(filesize+2, 0);
9375     char *p_buf = &v_buf[0];
9376     flag = fseek(f_in, 0, SEEK_SET);
9377     AE_CRITICAL_ASSERT(flag==0);
9378     size_t bytes_read = fread ((void*)p_buf, 1, filesize, f_in);
9379     AE_CRITICAL_ASSERT(bytes_read==filesize);
9380     fclose(f_in);
9381 
9382     //
9383     // Normalize file contents:
9384     // * replace 0x0 by spaces
9385     // * remove trailing spaces and newlines
9386     // * append trailing '\n' and '\0' characters
9387     // Return if file contains only spaces/newlines.
9388     //
9389     for(size_t i=0; i<filesize; i++)
9390         if( p_buf[i]==0 )
9391             p_buf[i] = ' ';
9392     for(; filesize>0; )
9393     {
9394         char c = p_buf[filesize-1];
9395         if( c==' ' || c=='\t' || c=='\n' || c=='\r' )
9396         {
9397             filesize--;
9398             continue;
9399         }
9400         break;
9401     }
9402     if( filesize==0 )
9403         return;
9404     p_buf[filesize+0] = '\n';
9405     p_buf[filesize+1] = '\0';
9406     filesize+=2;
9407 
9408     //
9409     // Scan dataset.
9410     //
9411     size_t rows_count = 0, cols_count = 0, max_length = 0;
9412     std::vector<size_t> offsets, lengths;
9413     for(size_t row_start=0; p_buf[row_start]!=0x0; )
9414     {
9415         // determine row length
9416         size_t row_length;
9417         for(row_length=0; p_buf[row_start+row_length]!='\n'; row_length++);
9418 
9419         // determine cols count, perform integrity check
9420         size_t cur_cols_cnt=1;
9421         for(size_t idx=0; idx<row_length; idx++)
9422             if( p_buf[row_start+idx]==separator )
9423                 cur_cols_cnt++;
9424         if( cols_count>0 && cols_count!=cur_cols_cnt )
9425             _ALGLIB_CPP_EXCEPTION("read_csv: non-rectangular contents, rows have different sizes");
9426         cols_count = cur_cols_cnt;
9427 
9428         // store offsets and lengths of the fields
9429         size_t cur_offs = 0;
9430         for(size_t idx=0; idx<row_length+1; idx++)
9431             if( p_buf[row_start+idx]==separator || p_buf[row_start+idx]=='\n' )
9432             {
9433                 offsets.push_back(row_start+cur_offs);
9434                 lengths.push_back(idx-cur_offs);
9435                 max_length = idx-cur_offs>max_length ? idx-cur_offs : max_length;
9436                 cur_offs = idx+1;
9437             }
9438 
9439         // advance row start
9440         rows_count++;
9441         row_start = row_start+row_length+1;
9442     }
9443     AE_CRITICAL_ASSERT(rows_count>=1);
9444     AE_CRITICAL_ASSERT(cols_count>=1);
9445     AE_CRITICAL_ASSERT(cols_count*rows_count==offsets.size());
9446     AE_CRITICAL_ASSERT(cols_count*rows_count==lengths.size());
9447     if( rows_count==1 && skip_first_row ) // empty output, return
9448         return;
9449 
9450     //
9451     // Convert
9452     //
9453     size_t row0 = skip_first_row ? 1 : 0;
9454     size_t row1 = rows_count;
9455     lconv *loc  = localeconv();
9456     out.setlength(row1-row0, cols_count);
9457     for(size_t ridx=row0; ridx<row1; ridx++)
9458         for(size_t cidx=0; cidx<cols_count; cidx++)
9459         {
9460             char *p_field = p_buf+offsets[ridx*cols_count+cidx];
9461             size_t       field_len = lengths[ridx*cols_count+cidx];
9462             for(size_t idx=0; idx<field_len; idx++)
9463                 if( p_field[idx]=='.' || p_field[idx]==',' )
9464                     p_field[idx] = *loc->decimal_point;
9465             out[ridx-row0][cidx] = atof(p_field);
9466         }
9467 }
9468 #endif
9469 
9470 
9471 
9472 /********************************************************************
9473 Trace functions
9474 ********************************************************************/
trace_file(std::string tags,std::string filename)9475 void alglib::trace_file(std::string tags, std::string filename)
9476 {
9477     alglib_impl::ae_trace_file(tags.c_str(), filename.c_str());
9478 }
9479 
trace_disable()9480 void alglib::trace_disable()
9481 {
9482     alglib_impl::ae_trace_disable();
9483 }
9484 
9485 
9486 
9487 /////////////////////////////////////////////////////////////////////////
9488 //
9489 // THIS SECTIONS CONTAINS OPTIMIZED LINEAR ALGEBRA CODE
9490 // IT IS SHARED BETWEEN C++ AND PURE C LIBRARIES
9491 //
9492 /////////////////////////////////////////////////////////////////////////
9493 #if defined(_ALGLIB_HAS_SSE2_INTRINSICS)
9494 #include "kernels_sse2.h"
9495 #endif
9496 #if defined(_ALGLIB_HAS_AVX2_INTRINSICS)
9497 #include "kernels_avx2.h"
9498 #endif
9499 #if defined(_ALGLIB_HAS_FMA_INTRINSICS)
9500 #include "kernels_fma.h"
9501 #endif
9502 namespace alglib_impl
9503 {
9504 #define alglib_simd_alignment 16
9505 
9506 #define alglib_r_block        32
9507 #define alglib_half_r_block   16
9508 #define alglib_twice_r_block  64
9509 
9510 #define alglib_c_block        16
9511 #define alglib_half_c_block    8
9512 #define alglib_twice_c_block  32
9513 
9514 
9515 
9516 
9517 /********************************************************************
9518 This subroutine calculates fast 32x32 real matrix-vector product:
9519 
9520     y := beta*y + alpha*A*x
9521 
9522 using either generic C code or native optimizations (if available)
9523 
9524 IMPORTANT:
9525 * A must be stored in row-major order,
9526   stride is alglib_r_block,
9527   aligned on alglib_simd_alignment boundary
9528 * X must be aligned on alglib_simd_alignment boundary
9529 * Y may be non-aligned
9530 ********************************************************************/
_ialglib_mv_32(const double * a,const double * x,double * y,ae_int_t stride,double alpha,double beta)9531 void _ialglib_mv_32(const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
9532 {
9533     ae_int_t i, k;
9534     const double *pa0, *pa1, *pb;
9535 
9536     pa0 = a;
9537     pa1 = a+alglib_r_block;
9538     pb = x;
9539     for(i=0; i<16; i++)
9540     {
9541         double v0 = 0, v1 = 0;
9542         for(k=0; k<4; k++)
9543         {
9544             v0 += pa0[0]*pb[0];
9545             v1 += pa1[0]*pb[0];
9546             v0 += pa0[1]*pb[1];
9547             v1 += pa1[1]*pb[1];
9548             v0 += pa0[2]*pb[2];
9549             v1 += pa1[2]*pb[2];
9550             v0 += pa0[3]*pb[3];
9551             v1 += pa1[3]*pb[3];
9552             v0 += pa0[4]*pb[4];
9553             v1 += pa1[4]*pb[4];
9554             v0 += pa0[5]*pb[5];
9555             v1 += pa1[5]*pb[5];
9556             v0 += pa0[6]*pb[6];
9557             v1 += pa1[6]*pb[6];
9558             v0 += pa0[7]*pb[7];
9559             v1 += pa1[7]*pb[7];
9560             pa0 += 8;
9561             pa1 += 8;
9562             pb  += 8;
9563         }
9564         y[0] = beta*y[0]+alpha*v0;
9565         y[stride] = beta*y[stride]+alpha*v1;
9566 
9567         /*
9568          * now we've processed rows I and I+1,
9569          * pa0 and pa1 are pointing to rows I+1 and I+2.
9570          * move to I+2 and I+3.
9571          */
9572         pa0 += alglib_r_block;
9573         pa1 += alglib_r_block;
9574         pb = x;
9575         y+=2*stride;
9576     }
9577 }
9578 
9579 
9580 /*************************************************************************
9581 This function calculates MxN real matrix-vector product:
9582 
9583     y := beta*y + alpha*A*x
9584 
9585 using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32.
9586 
9587 If beta is zero, we do not use previous values of y (they are  overwritten
9588 by alpha*A*x without ever being read).  If alpha is zero, no matrix-vector
9589 product is calculated (only beta is updated); however, this update  is not
9590 efficient  and  this  function  should  NOT  be used for multiplication of
9591 vector and scalar.
9592 
9593 IMPORTANT:
9594 * 0<=M<=alglib_r_block, 0<=N<=alglib_r_block
9595 * A must be stored in row-major order with stride equal to alglib_r_block
9596 *************************************************************************/
_ialglib_rmv(ae_int_t m,ae_int_t n,const double * a,const double * x,double * y,ae_int_t stride,double alpha,double beta)9597 void _ialglib_rmv(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
9598 {
9599     /*
9600      * Handle special cases:
9601      * - alpha is zero or n is zero
9602      * - m is zero
9603      */
9604     if( m==0 )
9605         return;
9606     if( alpha==0.0 || n==0 )
9607     {
9608         ae_int_t i;
9609         if( beta==0.0 )
9610         {
9611             for(i=0; i<m; i++)
9612             {
9613                 *y = 0.0;
9614                 y += stride;
9615             }
9616         }
9617         else
9618         {
9619             for(i=0; i<m; i++)
9620             {
9621                 *y *= beta;
9622                 y += stride;
9623             }
9624         }
9625         return;
9626     }
9627 
9628     /*
9629      * Handle general case: nonzero alpha, n and m
9630      *
9631      */
9632     if( m==32 && n==32 )
9633     {
9634         /*
9635          * 32x32, may be we have something better than general implementation
9636          */
9637         _ialglib_mv_32(a, x, y, stride, alpha, beta);
9638     }
9639     else
9640     {
9641         ae_int_t i, k, m2, n8, n2, ntrail2;
9642         const double *pa0, *pa1, *pb;
9643 
9644         /*
9645          * First M/2 rows of A are processed in pairs.
9646          * optimized code is used.
9647          */
9648         m2 = m/2;
9649         n8 = n/8;
9650         ntrail2 = (n-8*n8)/2;
9651         for(i=0; i<m2; i++)
9652         {
9653             double v0 = 0, v1 = 0;
9654 
9655             /*
9656              * 'a' points to the part of the matrix which
9657              * is not processed yet
9658              */
9659             pb = x;
9660             pa0 = a;
9661             pa1 = a+alglib_r_block;
9662             a += alglib_twice_r_block;
9663 
9664             /*
9665              * 8 elements per iteration
9666              */
9667             for(k=0; k<n8; k++)
9668             {
9669                 v0 += pa0[0]*pb[0];
9670                 v1 += pa1[0]*pb[0];
9671                 v0 += pa0[1]*pb[1];
9672                 v1 += pa1[1]*pb[1];
9673                 v0 += pa0[2]*pb[2];
9674                 v1 += pa1[2]*pb[2];
9675                 v0 += pa0[3]*pb[3];
9676                 v1 += pa1[3]*pb[3];
9677                 v0 += pa0[4]*pb[4];
9678                 v1 += pa1[4]*pb[4];
9679                 v0 += pa0[5]*pb[5];
9680                 v1 += pa1[5]*pb[5];
9681                 v0 += pa0[6]*pb[6];
9682                 v1 += pa1[6]*pb[6];
9683                 v0 += pa0[7]*pb[7];
9684                 v1 += pa1[7]*pb[7];
9685                 pa0 += 8;
9686                 pa1 += 8;
9687                 pb  += 8;
9688             }
9689 
9690             /*
9691              * 2 elements per iteration
9692              */
9693             for(k=0; k<ntrail2; k++)
9694             {
9695                 v0 += pa0[0]*pb[0];
9696                 v1 += pa1[0]*pb[0];
9697                 v0 += pa0[1]*pb[1];
9698                 v1 += pa1[1]*pb[1];
9699                 pa0 += 2;
9700                 pa1 += 2;
9701                 pb  += 2;
9702             }
9703 
9704             /*
9705              * last element, if needed
9706              */
9707             if( n%2!=0 )
9708             {
9709                 v0 += pa0[0]*pb[0];
9710                 v1 += pa1[0]*pb[0];
9711             }
9712 
9713             /*
9714              * final update
9715              */
9716             if( beta!=0 )
9717             {
9718                 y[0] = beta*y[0]+alpha*v0;
9719                 y[stride] = beta*y[stride]+alpha*v1;
9720             }
9721             else
9722             {
9723                 y[0] = alpha*v0;
9724                 y[stride] = alpha*v1;
9725             }
9726 
9727             /*
9728              * move to the next pair of elements
9729              */
9730             y+=2*stride;
9731         }
9732 
9733 
9734         /*
9735          * Last (odd) row is processed with less optimized code.
9736          */
9737         if( m%2!=0 )
9738         {
9739             double v0 = 0;
9740 
9741             /*
9742              * 'a' points to the part of the matrix which
9743              * is not processed yet
9744              */
9745             pb = x;
9746             pa0 = a;
9747 
9748             /*
9749              * 2 elements per iteration
9750              */
9751             n2 = n/2;
9752             for(k=0; k<n2; k++)
9753             {
9754                 v0 += pa0[0]*pb[0]+pa0[1]*pb[1];
9755                 pa0 += 2;
9756                 pb  += 2;
9757             }
9758 
9759             /*
9760              * last element, if needed
9761              */
9762             if( n%2!=0 )
9763                 v0 += pa0[0]*pb[0];
9764 
9765             /*
9766              * final update
9767              */
9768             if( beta!=0 )
9769                 y[0] = beta*y[0]+alpha*v0;
9770             else
9771                 y[0] = alpha*v0;
9772         }
9773     }
9774 }
9775 
9776 
9777 /*************************************************************************
9778 This function calculates MxN real matrix-vector product:
9779 
9780     y := beta*y + alpha*A*x
9781 
9782 using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32.
9783 
9784 If beta is zero, we do not use previous values of y (they are  overwritten
9785 by alpha*A*x without ever being read).  If alpha is zero, no matrix-vector
9786 product is calculated (only beta is updated); however, this update  is not
9787 efficient  and  this  function  should  NOT  be used for multiplication of
9788 vector and scalar.
9789 
9790 IMPORTANT:
9791 * 0<=M<=alglib_r_block, 0<=N<=alglib_r_block
9792 * A must be stored in row-major order with stride equal to alglib_r_block
9793 * y may be non-aligned
9794 * both A and x must have same offset with respect to 16-byte boundary:
9795   either both are aligned, or both are aligned with offset 8. Function
9796   will crash your system if you try to call it with misaligned or
9797   incorrectly aligned data.
9798 
9799 This function supports SSE2; it can be used when:
9800 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
9801 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
9802 
9803 If (1) is failed, this function will be undefined. If (2) is failed,  call
9804 to this function will probably crash your system.
9805 
9806 If  you  want  to  know  whether  it  is safe to call it, you should check
9807 results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
9808 and will do its work.
9809 *************************************************************************/
9810 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_rmv_sse2(ae_int_t m,ae_int_t n,const double * a,const double * x,double * y,ae_int_t stride,double alpha,double beta)9811 void _ialglib_rmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
9812 {
9813     ae_int_t i, k, n2;
9814     ae_int_t mb3, mtail, nhead, nb8, nb2, ntail;
9815     const double *pa0, *pa1, *pa2, *pb;
9816     __m128d v0, v1, v2, va0, va1, va2, vx, vtmp;
9817 
9818     /*
9819      * Handle special cases:
9820      * - alpha is zero or n is zero
9821      * - m is zero
9822      */
9823     if( m==0 )
9824         return;
9825     if( alpha==0.0 || n==0 )
9826     {
9827         if( beta==0.0 )
9828         {
9829             for(i=0; i<m; i++)
9830             {
9831                 *y = 0.0;
9832                 y += stride;
9833             }
9834         }
9835         else
9836         {
9837             for(i=0; i<m; i++)
9838             {
9839                 *y *= beta;
9840                 y += stride;
9841             }
9842         }
9843         return;
9844     }
9845 
9846     /*
9847      * Handle general case: nonzero alpha, n and m
9848      *
9849      * We divide problem as follows...
9850      *
9851      * Rows M are divided into:
9852      * - mb3 blocks, each 3xN
9853      * - mtail blocks, each 1xN
9854      *
9855      * Within a row, elements are divided  into:
9856      * - nhead 1x1 blocks (used to align the rest, either 0 or 1)
9857      * - nb8 1x8 blocks, aligned to 16-byte boundary
9858      * - nb2 1x2 blocks, aligned to 16-byte boundary
9859      * - ntail 1x1 blocks, aligned too (altough we don't rely on it)
9860      *
9861      */
9862     n2 = n/2;
9863     mb3 = m/3;
9864     mtail = m%3;
9865     nhead = ae_misalignment(a,alglib_simd_alignment)==0 ? 0 : 1;
9866     nb8 = (n-nhead)/8;
9867     nb2 = (n-nhead-8*nb8)/2;
9868     ntail = n-nhead-8*nb8-2*nb2;
9869     for(i=0; i<mb3; i++)
9870     {
9871         double row0, row1, row2;
9872         row0 = 0;
9873         row1 = 0;
9874         row2 = 0;
9875         pb = x;
9876         pa0 = a;
9877         pa1 = a+alglib_r_block;
9878         pa2 = a+alglib_twice_r_block;
9879         a += 3*alglib_r_block;
9880         if( nhead==1 )
9881         {
9882             vx  = _mm_load_sd(pb);
9883             v0 = _mm_load_sd(pa0);
9884             v1 = _mm_load_sd(pa1);
9885             v2 = _mm_load_sd(pa2);
9886 
9887             v0 = _mm_mul_sd(v0,vx);
9888             v1 = _mm_mul_sd(v1,vx);
9889             v2 = _mm_mul_sd(v2,vx);
9890 
9891             pa0++;
9892             pa1++;
9893             pa2++;
9894             pb++;
9895         }
9896         else
9897         {
9898             v0 = _mm_setzero_pd();
9899             v1 = _mm_setzero_pd();
9900             v2 = _mm_setzero_pd();
9901         }
9902         for(k=0; k<nb8; k++)
9903         {
9904             /*
9905              * this code is a shuffle of simultaneous dot product.
9906              * see below for commented unshuffled original version.
9907              */
9908             vx  = _mm_load_pd(pb);
9909             va0 = _mm_load_pd(pa0);
9910             va1 = _mm_load_pd(pa1);
9911             va0 = _mm_mul_pd(va0,vx);
9912             va2 = _mm_load_pd(pa2);
9913             v0 = _mm_add_pd(va0,v0);
9914             va1 = _mm_mul_pd(va1,vx);
9915             va0 = _mm_load_pd(pa0+2);
9916             v1 = _mm_add_pd(va1,v1);
9917             va2 = _mm_mul_pd(va2,vx);
9918             va1 = _mm_load_pd(pa1+2);
9919             v2 = _mm_add_pd(va2,v2);
9920             vx  = _mm_load_pd(pb+2);
9921             va0 = _mm_mul_pd(va0,vx);
9922             va2 = _mm_load_pd(pa2+2);
9923             v0 = _mm_add_pd(va0,v0);
9924             va1 = _mm_mul_pd(va1,vx);
9925             va0 = _mm_load_pd(pa0+4);
9926             v1 = _mm_add_pd(va1,v1);
9927             va2 = _mm_mul_pd(va2,vx);
9928             va1 = _mm_load_pd(pa1+4);
9929             v2 = _mm_add_pd(va2,v2);
9930             vx  = _mm_load_pd(pb+4);
9931             va0 = _mm_mul_pd(va0,vx);
9932             va2 = _mm_load_pd(pa2+4);
9933             v0 = _mm_add_pd(va0,v0);
9934             va1 = _mm_mul_pd(va1,vx);
9935             va0 = _mm_load_pd(pa0+6);
9936             v1 = _mm_add_pd(va1,v1);
9937             va2 = _mm_mul_pd(va2,vx);
9938             va1 = _mm_load_pd(pa1+6);
9939             v2 = _mm_add_pd(va2,v2);
9940             vx  = _mm_load_pd(pb+6);
9941             va0 = _mm_mul_pd(va0,vx);
9942             v0 = _mm_add_pd(va0,v0);
9943             va2 = _mm_load_pd(pa2+6);
9944             va1 = _mm_mul_pd(va1,vx);
9945             v1 = _mm_add_pd(va1,v1);
9946             va2 = _mm_mul_pd(va2,vx);
9947             v2 = _mm_add_pd(va2,v2);
9948 
9949             pa0 += 8;
9950             pa1 += 8;
9951             pa2 += 8;
9952             pb += 8;
9953 
9954             /*
9955             this is unshuffled version of code above
9956 
9957             vx  = _mm_load_pd(pb);
9958             va0 = _mm_load_pd(pa0);
9959             va1 = _mm_load_pd(pa1);
9960             va2 = _mm_load_pd(pa2);
9961 
9962             va0 = _mm_mul_pd(va0,vx);
9963             va1 = _mm_mul_pd(va1,vx);
9964             va2 = _mm_mul_pd(va2,vx);
9965 
9966             v0 = _mm_add_pd(va0,v0);
9967             v1 = _mm_add_pd(va1,v1);
9968             v2 = _mm_add_pd(va2,v2);
9969 
9970             vx  = _mm_load_pd(pb+2);
9971             va0 = _mm_load_pd(pa0+2);
9972             va1 = _mm_load_pd(pa1+2);
9973             va2 = _mm_load_pd(pa2+2);
9974 
9975             va0 = _mm_mul_pd(va0,vx);
9976             va1 = _mm_mul_pd(va1,vx);
9977             va2 = _mm_mul_pd(va2,vx);
9978 
9979             v0 = _mm_add_pd(va0,v0);
9980             v1 = _mm_add_pd(va1,v1);
9981             v2 = _mm_add_pd(va2,v2);
9982 
9983             vx  = _mm_load_pd(pb+4);
9984             va0 = _mm_load_pd(pa0+4);
9985             va1 = _mm_load_pd(pa1+4);
9986             va2 = _mm_load_pd(pa2+4);
9987 
9988             va0 = _mm_mul_pd(va0,vx);
9989             va1 = _mm_mul_pd(va1,vx);
9990             va2 = _mm_mul_pd(va2,vx);
9991 
9992             v0 = _mm_add_pd(va0,v0);
9993             v1 = _mm_add_pd(va1,v1);
9994             v2 = _mm_add_pd(va2,v2);
9995 
9996             vx  = _mm_load_pd(pb+6);
9997             va0 = _mm_load_pd(pa0+6);
9998             va1 = _mm_load_pd(pa1+6);
9999             va2 = _mm_load_pd(pa2+6);
10000 
10001             va0 = _mm_mul_pd(va0,vx);
10002             va1 = _mm_mul_pd(va1,vx);
10003             va2 = _mm_mul_pd(va2,vx);
10004 
10005             v0 = _mm_add_pd(va0,v0);
10006             v1 = _mm_add_pd(va1,v1);
10007             v2 = _mm_add_pd(va2,v2);
10008             */
10009         }
10010         for(k=0; k<nb2; k++)
10011         {
10012             vx  = _mm_load_pd(pb);
10013             va0 = _mm_load_pd(pa0);
10014             va1 = _mm_load_pd(pa1);
10015             va2 = _mm_load_pd(pa2);
10016 
10017             va0 = _mm_mul_pd(va0,vx);
10018             v0 = _mm_add_pd(va0,v0);
10019             va1 = _mm_mul_pd(va1,vx);
10020             v1 = _mm_add_pd(va1,v1);
10021             va2 = _mm_mul_pd(va2,vx);
10022             v2 = _mm_add_pd(va2,v2);
10023 
10024             pa0 += 2;
10025             pa1 += 2;
10026             pa2 += 2;
10027             pb += 2;
10028         }
10029         for(k=0; k<ntail; k++)
10030         {
10031             vx  = _mm_load1_pd(pb);
10032             va0 = _mm_load1_pd(pa0);
10033             va1 = _mm_load1_pd(pa1);
10034             va2 = _mm_load1_pd(pa2);
10035 
10036             va0 = _mm_mul_sd(va0,vx);
10037             v0 = _mm_add_sd(v0,va0);
10038             va1 = _mm_mul_sd(va1,vx);
10039             v1 = _mm_add_sd(v1,va1);
10040             va2 = _mm_mul_sd(va2,vx);
10041             v2 = _mm_add_sd(v2,va2);
10042         }
10043         vtmp = _mm_add_pd(_mm_unpacklo_pd(v0,v1),_mm_unpackhi_pd(v0,v1));
10044         _mm_storel_pd(&row0, vtmp);
10045         _mm_storeh_pd(&row1, vtmp);
10046         v2 = _mm_add_sd(_mm_shuffle_pd(v2,v2,1),v2);
10047         _mm_storel_pd(&row2, v2);
10048         if( beta!=0 )
10049         {
10050             y[0] = beta*y[0]+alpha*row0;
10051             y[stride] = beta*y[stride]+alpha*row1;
10052             y[2*stride] = beta*y[2*stride]+alpha*row2;
10053         }
10054         else
10055         {
10056             y[0] = alpha*row0;
10057             y[stride] = alpha*row1;
10058             y[2*stride] = alpha*row2;
10059         }
10060         y+=3*stride;
10061     }
10062     for(i=0; i<mtail; i++)
10063     {
10064         double row0;
10065         row0 = 0;
10066         pb = x;
10067         pa0 = a;
10068         a += alglib_r_block;
10069         for(k=0; k<n2; k++)
10070         {
10071             row0 += pb[0]*pa0[0]+pb[1]*pa0[1];
10072             pa0 += 2;
10073             pb += 2;
10074         }
10075         if( n%2 )
10076             row0 += pb[0]*pa0[0];
10077         if( beta!=0 )
10078             y[0] = beta*y[0]+alpha*row0;
10079         else
10080             y[0] = alpha*row0;
10081         y+=stride;
10082     }
10083 }
10084 #endif
10085 
10086 
10087 /*************************************************************************
10088 This subroutine calculates fast MxN complex matrix-vector product:
10089 
10090     y := beta*y + alpha*A*x
10091 
10092 using generic C code, where A, x, y, alpha and beta are complex.
10093 
10094 If beta is zero, we do not use previous values of y (they are  overwritten
10095 by alpha*A*x without ever being read). However, when  alpha  is  zero,  we
10096 still calculate A*x and  multiply  it  by  alpha  (this distinction can be
10097 important when A or x contain infinities/NANs).
10098 
10099 IMPORTANT:
10100 * 0<=M<=alglib_c_block, 0<=N<=alglib_c_block
10101 * A must be stored in row-major order, as sequence of double precision
10102   pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not
10103   in doubles).
10104 * Y may be referenced by cy (pointer to ae_complex) or
10105   dy (pointer to array of double precision pair) depending on what type of
10106   output you wish. Pass pointer to Y as one of these parameters,
10107   AND SET OTHER PARAMETER TO NULL.
10108 * both A and x must be aligned; y may be non-aligned.
10109 *************************************************************************/
_ialglib_cmv(ae_int_t m,ae_int_t n,const double * a,const double * x,ae_complex * cy,double * dy,ae_int_t stride,ae_complex alpha,ae_complex beta)10110 void _ialglib_cmv(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta)
10111 {
10112     ae_int_t i, j;
10113     const double *pa, *parow, *pb;
10114 
10115     parow = a;
10116     for(i=0; i<m; i++)
10117     {
10118         double v0 = 0, v1 = 0;
10119         pa = parow;
10120         pb = x;
10121         for(j=0; j<n; j++)
10122         {
10123             v0 += pa[0]*pb[0];
10124             v1 += pa[0]*pb[1];
10125             v0 -= pa[1]*pb[1];
10126             v1 += pa[1]*pb[0];
10127 
10128             pa  += 2;
10129             pb  += 2;
10130         }
10131         if( cy!=NULL )
10132         {
10133             double tx = (beta.x*cy->x-beta.y*cy->y)+(alpha.x*v0-alpha.y*v1);
10134             double ty = (beta.x*cy->y+beta.y*cy->x)+(alpha.x*v1+alpha.y*v0);
10135             cy->x = tx;
10136             cy->y = ty;
10137             cy+=stride;
10138         }
10139         else
10140         {
10141             double tx = (beta.x*dy[0]-beta.y*dy[1])+(alpha.x*v0-alpha.y*v1);
10142             double ty = (beta.x*dy[1]+beta.y*dy[0])+(alpha.x*v1+alpha.y*v0);
10143             dy[0] = tx;
10144             dy[1] = ty;
10145             dy += 2*stride;
10146         }
10147         parow += 2*alglib_c_block;
10148     }
10149 }
10150 
10151 
10152 /*************************************************************************
10153 This subroutine calculates fast MxN complex matrix-vector product:
10154 
10155     y := beta*y + alpha*A*x
10156 
10157 using generic C code, where A, x, y, alpha and beta are complex.
10158 
10159 If beta is zero, we do not use previous values of y (they are  overwritten
10160 by alpha*A*x without ever being read). However, when  alpha  is  zero,  we
10161 still calculate A*x and  multiply  it  by  alpha  (this distinction can be
10162 important when A or x contain infinities/NANs).
10163 
10164 IMPORTANT:
10165 * 0<=M<=alglib_c_block, 0<=N<=alglib_c_block
10166 * A must be stored in row-major order, as sequence of double precision
10167   pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not
10168   in doubles).
10169 * Y may be referenced by cy (pointer to ae_complex) or
10170   dy (pointer to array of double precision pair) depending on what type of
10171   output you wish. Pass pointer to Y as one of these parameters,
10172   AND SET OTHER PARAMETER TO NULL.
10173 * both A and x must be aligned; y may be non-aligned.
10174 
10175 This function supports SSE2; it can be used when:
10176 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
10177 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
10178 
10179 If (1) is failed, this function will be undefined. If (2) is failed,  call
10180 to this function will probably crash your system.
10181 
10182 If  you  want  to  know  whether  it  is safe to call it, you should check
10183 results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
10184 and will do its work.
10185 *************************************************************************/
10186 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_cmv_sse2(ae_int_t m,ae_int_t n,const double * a,const double * x,ae_complex * cy,double * dy,ae_int_t stride,ae_complex alpha,ae_complex beta)10187 void _ialglib_cmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta)
10188 {
10189     ae_int_t i, j, m2;
10190     const double *pa0, *pa1, *parow, *pb;
10191     __m128d vbeta, vbetax, vbetay;
10192     __m128d valpha, valphax, valphay;
10193 
10194     m2 = m/2;
10195     parow = a;
10196     if( cy!=NULL )
10197     {
10198         dy = (double*)cy;
10199         cy = NULL;
10200     }
10201     vbeta = _mm_loadh_pd(_mm_load_sd(&beta.x),&beta.y);
10202     vbetax = _mm_unpacklo_pd(vbeta,vbeta);
10203     vbetay = _mm_unpackhi_pd(vbeta,vbeta);
10204     valpha = _mm_loadh_pd(_mm_load_sd(&alpha.x),&alpha.y);
10205     valphax = _mm_unpacklo_pd(valpha,valpha);
10206     valphay = _mm_unpackhi_pd(valpha,valpha);
10207     for(i=0; i<m2; i++)
10208     {
10209         __m128d vx, vy, vt0, vt1, vt2, vt3, vt4, vt5, vrx, vry, vtx, vty;
10210         pa0 = parow;
10211         pa1 = parow+2*alglib_c_block;
10212         pb = x;
10213         vx = _mm_setzero_pd();
10214         vy = _mm_setzero_pd();
10215         for(j=0; j<n; j++)
10216         {
10217             vt0 = _mm_load1_pd(pb);
10218             vt1 = _mm_load1_pd(pb+1);
10219             vt2 = _mm_load_pd(pa0);
10220             vt3 = _mm_load_pd(pa1);
10221             vt5 = _mm_unpacklo_pd(vt2,vt3);
10222             vt4 = _mm_unpackhi_pd(vt2,vt3);
10223             vt2 = vt5;
10224             vt3 = vt4;
10225 
10226             vt2 = _mm_mul_pd(vt2,vt0);
10227             vx = _mm_add_pd(vx,vt2);
10228             vt3 = _mm_mul_pd(vt3,vt1);
10229             vx = _mm_sub_pd(vx,vt3);
10230             vt4 = _mm_mul_pd(vt4,vt0);
10231             vy = _mm_add_pd(vy,vt4);
10232             vt5 = _mm_mul_pd(vt5,vt1);
10233             vy = _mm_add_pd(vy,vt5);
10234 
10235             pa0 += 2;
10236             pa1 += 2;
10237             pb  += 2;
10238         }
10239         if( beta.x==0.0 && beta.y==0.0 )
10240         {
10241             vrx = _mm_setzero_pd();
10242             vry = _mm_setzero_pd();
10243         }
10244         else
10245         {
10246             vtx = _mm_loadh_pd(_mm_load_sd(dy+0),dy+2*stride+0);
10247             vty = _mm_loadh_pd(_mm_load_sd(dy+1),dy+2*stride+1);
10248             vrx = _mm_sub_pd(_mm_mul_pd(vbetax,vtx),_mm_mul_pd(vbetay,vty));
10249             vry = _mm_add_pd(_mm_mul_pd(vbetax,vty),_mm_mul_pd(vbetay,vtx));
10250         }
10251         vtx = _mm_sub_pd(_mm_mul_pd(valphax,vx),_mm_mul_pd(valphay,vy));
10252         vty = _mm_add_pd(_mm_mul_pd(valphax,vy),_mm_mul_pd(valphay,vx));
10253         vrx = _mm_add_pd(vrx,vtx);
10254         vry = _mm_add_pd(vry,vty);
10255         _mm_storel_pd(dy+0,          vrx);
10256         _mm_storeh_pd(dy+2*stride+0, vrx);
10257         _mm_storel_pd(dy+1,          vry);
10258         _mm_storeh_pd(dy+2*stride+1, vry);
10259         dy += 4*stride;
10260         parow += 4*alglib_c_block;
10261     }
10262     if( m%2 )
10263     {
10264         double v0 = 0, v1 = 0;
10265         double tx, ty;
10266         pa0 = parow;
10267         pb = x;
10268         for(j=0; j<n; j++)
10269         {
10270             v0 += pa0[0]*pb[0];
10271             v1 += pa0[0]*pb[1];
10272             v0 -= pa0[1]*pb[1];
10273             v1 += pa0[1]*pb[0];
10274 
10275             pa0 += 2;
10276             pb  += 2;
10277         }
10278         if( beta.x==0.0 && beta.y==0.0 )
10279         {
10280             tx = 0.0;
10281             ty = 0.0;
10282         }
10283         else
10284         {
10285             tx = beta.x*dy[0]-beta.y*dy[1];
10286             ty = beta.x*dy[1]+beta.y*dy[0];
10287         }
10288         tx += alpha.x*v0-alpha.y*v1;
10289         ty += alpha.x*v1+alpha.y*v0;
10290         dy[0] = tx;
10291         dy[1] = ty;
10292         dy += 2*stride;
10293         parow += 2*alglib_c_block;
10294     }
10295 }
10296 #endif
10297 
10298 /********************************************************************
10299 This subroutine sets vector to zero
10300 ********************************************************************/
_ialglib_vzero(ae_int_t n,double * p,ae_int_t stride)10301 void _ialglib_vzero(ae_int_t n, double *p, ae_int_t stride)
10302 {
10303     ae_int_t i;
10304     if( stride==1 )
10305     {
10306         for(i=0; i<n; i++,p++)
10307             *p = 0.0;
10308     }
10309     else
10310     {
10311         for(i=0; i<n; i++,p+=stride)
10312             *p = 0.0;
10313     }
10314 }
10315 
10316 /********************************************************************
10317 This subroutine sets vector to zero
10318 ********************************************************************/
_ialglib_vzero_complex(ae_int_t n,ae_complex * p,ae_int_t stride)10319 void _ialglib_vzero_complex(ae_int_t n, ae_complex *p, ae_int_t stride)
10320 {
10321     ae_int_t i;
10322     if( stride==1 )
10323     {
10324         for(i=0; i<n; i++,p++)
10325         {
10326             p->x = 0.0;
10327             p->y = 0.0;
10328         }
10329     }
10330     else
10331     {
10332         for(i=0; i<n; i++,p+=stride)
10333         {
10334             p->x = 0.0;
10335             p->y = 0.0;
10336         }
10337     }
10338 }
10339 
10340 
10341 /********************************************************************
10342 This subroutine copies unaligned real vector
10343 ********************************************************************/
_ialglib_vcopy(ae_int_t n,const double * a,ae_int_t stridea,double * b,ae_int_t strideb)10344 void _ialglib_vcopy(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb)
10345 {
10346     ae_int_t i, n2;
10347     if( stridea==1 && strideb==1 )
10348     {
10349         n2 = n/2;
10350         for(i=n2; i!=0; i--, a+=2, b+=2)
10351         {
10352             b[0] = a[0];
10353             b[1] = a[1];
10354         }
10355         if( n%2!=0 )
10356             b[0] = a[0];
10357     }
10358     else
10359     {
10360         for(i=0; i<n; i++,a+=stridea,b+=strideb)
10361             *b = *a;
10362     }
10363 }
10364 
10365 
10366 /********************************************************************
10367 This subroutine copies unaligned complex vector
10368 (passed as ae_complex*)
10369 
10370 1. strideb is stride measured in complex numbers, not doubles
10371 2. conj may be "N" (no conj.) or "C" (conj.)
10372 ********************************************************************/
_ialglib_vcopy_complex(ae_int_t n,const ae_complex * a,ae_int_t stridea,double * b,ae_int_t strideb,const char * conj)10373 void _ialglib_vcopy_complex(ae_int_t n, const ae_complex *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj)
10374 {
10375     ae_int_t i;
10376 
10377     /*
10378      * more general case
10379      */
10380     if( conj[0]=='N' || conj[0]=='n' )
10381     {
10382         for(i=0; i<n; i++,a+=stridea,b+=2*strideb)
10383         {
10384             b[0] = a->x;
10385             b[1] = a->y;
10386         }
10387     }
10388     else
10389     {
10390         for(i=0; i<n; i++,a+=stridea,b+=2*strideb)
10391         {
10392             b[0] = a->x;
10393             b[1] = -a->y;
10394         }
10395     }
10396 }
10397 
10398 
10399 /********************************************************************
10400 This subroutine copies unaligned complex vector (passed as double*)
10401 
10402 1. strideb is stride measured in complex numbers, not doubles
10403 2. conj may be "N" (no conj.) or "C" (conj.)
10404 ********************************************************************/
_ialglib_vcopy_dcomplex(ae_int_t n,const double * a,ae_int_t stridea,double * b,ae_int_t strideb,const char * conj)10405 void _ialglib_vcopy_dcomplex(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj)
10406 {
10407     ae_int_t i;
10408 
10409     /*
10410      * more general case
10411      */
10412     if( conj[0]=='N' || conj[0]=='n' )
10413     {
10414         for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb)
10415         {
10416             b[0] = a[0];
10417             b[1] = a[1];
10418         }
10419     }
10420     else
10421     {
10422         for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb)
10423         {
10424             b[0] = a[0];
10425             b[1] = -a[1];
10426         }
10427     }
10428 }
10429 
10430 
10431 /********************************************************************
10432 This subroutine copies matrix from  non-aligned non-contigous storage
10433 to aligned contigous storage
10434 
10435 A:
10436 * MxN
10437 * non-aligned
10438 * non-contigous
10439 * may be transformed during copying (as prescribed by op)
10440 
10441 B:
10442 * alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
10443 * aligned
10444 * stride is alglib_r_block
10445 
10446 Transformation types:
10447 * 0 - no transform
10448 * 1 - transposition
10449 ********************************************************************/
_ialglib_mcopyblock(ae_int_t m,ae_int_t n,const double * a,ae_int_t op,ae_int_t stride,double * b)10450 void _ialglib_mcopyblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b)
10451 {
10452     ae_int_t i, j, n2;
10453     const double *psrc;
10454     double *pdst;
10455     if( op==0 )
10456     {
10457         n2 = n/2;
10458         for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc=a)
10459         {
10460             for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2)
10461             {
10462                 pdst[0] = psrc[0];
10463                 pdst[1] = psrc[1];
10464             }
10465             if( n%2!=0 )
10466                 pdst[0] = psrc[0];
10467         }
10468     }
10469     else
10470     {
10471         n2 = n/2;
10472         for(i=0,psrc=a; i<m; i++,a+=stride,b+=1,psrc=a)
10473         {
10474             for(j=0,pdst=b; j<n2; j++,pdst+=alglib_twice_r_block,psrc+=2)
10475             {
10476                 pdst[0] = psrc[0];
10477                 pdst[alglib_r_block] = psrc[1];
10478             }
10479             if( n%2!=0 )
10480                 pdst[0] = psrc[0];
10481         }
10482     }
10483 }
10484 
10485 
10486 /********************************************************************
10487 This subroutine copies matrix from  non-aligned non-contigous storage
10488 to aligned contigous storage
10489 
10490 A:
10491 * MxN
10492 * non-aligned
10493 * non-contigous
10494 * may be transformed during copying (as prescribed by op)
10495 
10496 B:
10497 * alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
10498 * aligned
10499 * stride is alglib_r_block
10500 
10501 Transformation types:
10502 * 0 - no transform
10503 * 1 - transposition
10504 
10505 This function supports SSE2; it can be used when:
10506 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
10507 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
10508 
10509 If (1) is failed, this function will be undefined. If (2) is failed,  call
10510 to this function will probably crash your system.
10511 
10512 If  you  want  to  know  whether  it  is safe to call it, you should check
10513 results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
10514 and will do its work.
10515 ********************************************************************/
10516 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_mcopyblock_sse2(ae_int_t m,ae_int_t n,const double * a,ae_int_t op,ae_int_t stride,double * b)10517 void _ialglib_mcopyblock_sse2(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b)
10518 {
10519     ae_int_t i, j, mb2;
10520     const double *psrc0, *psrc1;
10521     double *pdst;
10522     if( op==0 )
10523     {
10524         ae_int_t nb8, ntail;
10525         nb8 = n/8;
10526         ntail = n-8*nb8;
10527         for(i=0,psrc0=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc0=a)
10528         {
10529             pdst=b;
10530             for(j=0; j<nb8; j++)
10531             {
10532                 __m128d v0, v1;
10533                 v0 = _mm_loadu_pd(psrc0);
10534                 _mm_store_pd(pdst, v0);
10535                 v1 = _mm_loadu_pd(psrc0+2);
10536                 _mm_store_pd(pdst+2, v1);
10537                 v1 = _mm_loadu_pd(psrc0+4);
10538                 _mm_store_pd(pdst+4, v1);
10539                 v1 = _mm_loadu_pd(psrc0+6);
10540                 _mm_store_pd(pdst+6, v1);
10541                 pdst+=8;
10542                 psrc0+=8;
10543             }
10544             for(j=0; j<ntail; j++)
10545                 pdst[j] = psrc0[j];
10546         }
10547     }
10548     else
10549     {
10550         const double *arow0, *arow1;
10551         double *bcol0, *bcol1, *pdst0, *pdst1;
10552         ae_int_t nb4, ntail, n2;
10553 
10554         n2 = n/2;
10555         mb2 = m/2;
10556         nb4 = n/4;
10557         ntail = n-4*nb4;
10558 
10559         arow0 = a;
10560         arow1 = a+stride;
10561         bcol0 = b;
10562         bcol1 = b+1;
10563         for(i=0; i<mb2; i++)
10564         {
10565             psrc0 = arow0;
10566             psrc1 = arow1;
10567             pdst0 = bcol0;
10568             pdst1 = bcol1;
10569             for(j=0; j<nb4; j++)
10570             {
10571                 __m128d v0, v1, v2, v3;
10572                 v0 = _mm_loadu_pd(psrc0);
10573                 v1 = _mm_loadu_pd(psrc1);
10574                 v2 = _mm_loadu_pd(psrc0+2);
10575                 v3 = _mm_loadu_pd(psrc1+2);
10576                 _mm_store_pd(pdst0, _mm_unpacklo_pd(v0,v1));
10577                 _mm_store_pd(pdst0+alglib_r_block, _mm_unpackhi_pd(v0,v1));
10578                 _mm_store_pd(pdst0+2*alglib_r_block, _mm_unpacklo_pd(v2,v3));
10579                 _mm_store_pd(pdst0+3*alglib_r_block, _mm_unpackhi_pd(v2,v3));
10580 
10581                 pdst0 += 4*alglib_r_block;
10582                 pdst1 += 4*alglib_r_block;
10583                 psrc0 += 4;
10584                 psrc1 += 4;
10585             }
10586             for(j=0; j<ntail; j++)
10587             {
10588                 pdst0[0] = psrc0[0];
10589                 pdst1[0] = psrc1[0];
10590                 pdst0 += alglib_r_block;
10591                 pdst1 += alglib_r_block;
10592                 psrc0 += 1;
10593                 psrc1 += 1;
10594             }
10595             arow0 += 2*stride;
10596             arow1 += 2*stride;
10597             bcol0 += 2;
10598             bcol1 += 2;
10599         }
10600         if( m%2 )
10601         {
10602             psrc0 = arow0;
10603             pdst0 = bcol0;
10604             for(j=0; j<n2; j++)
10605             {
10606                 pdst0[0] = psrc0[0];
10607                 pdst0[alglib_r_block] = psrc0[1];
10608                 pdst0 += alglib_twice_r_block;
10609                 psrc0 += 2;
10610             }
10611             if( n%2!=0 )
10612                 pdst0[0] = psrc0[0];
10613         }
10614     }
10615 }
10616 #endif
10617 
10618 
10619 /********************************************************************
10620 This subroutine copies matrix from  aligned contigous storage to non-
10621 aligned non-contigous storage
10622 
10623 A:
10624 * MxN
10625 * aligned
10626 * contigous
10627 * stride is alglib_r_block
10628 * may be transformed during copying (as prescribed by op)
10629 
10630 B:
10631 * alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
10632 * non-aligned, non-contigous
10633 
10634 Transformation types:
10635 * 0 - no transform
10636 * 1 - transposition
10637 ********************************************************************/
_ialglib_mcopyunblock(ae_int_t m,ae_int_t n,const double * a,ae_int_t op,double * b,ae_int_t stride)10638 void _ialglib_mcopyunblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, double *b, ae_int_t stride)
10639 {
10640     ae_int_t i, j, n2;
10641     const double *psrc;
10642     double *pdst;
10643     if( op==0 )
10644     {
10645         n2 = n/2;
10646         for(i=0,psrc=a; i<m; i++,a+=alglib_r_block,b+=stride,psrc=a)
10647         {
10648             for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2)
10649             {
10650                 pdst[0] = psrc[0];
10651                 pdst[1] = psrc[1];
10652             }
10653             if( n%2!=0 )
10654                 pdst[0] = psrc[0];
10655         }
10656     }
10657     else
10658     {
10659         n2 = n/2;
10660         for(i=0,psrc=a; i<m; i++,a++,b+=stride,psrc=a)
10661         {
10662             for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=alglib_twice_r_block)
10663             {
10664                 pdst[0] = psrc[0];
10665                 pdst[1] = psrc[alglib_r_block];
10666             }
10667             if( n%2!=0 )
10668                 pdst[0] = psrc[0];
10669         }
10670     }
10671 }
10672 
10673 
10674 /********************************************************************
10675 This subroutine copies matrix from  non-aligned non-contigous storage
10676 to aligned contigous storage
10677 
10678 A:
10679 * MxN
10680 * non-aligned
10681 * non-contigous
10682 * may be transformed during copying (as prescribed by op)
10683 * pointer to ae_complex is passed
10684 
10685 B:
10686 * 2*alglib_c_block*alglib_c_block doubles (only MxN/NxM submatrix is used)
10687 * aligned
10688 * stride is alglib_c_block
10689 * pointer to double is passed
10690 
10691 Transformation types:
10692 * 0 - no transform
10693 * 1 - transposition
10694 * 2 - conjugate transposition
10695 * 3 - conjugate, but no  transposition
10696 ********************************************************************/
_ialglib_mcopyblock_complex(ae_int_t m,ae_int_t n,const ae_complex * a,ae_int_t op,ae_int_t stride,double * b)10697 void _ialglib_mcopyblock_complex(ae_int_t m, ae_int_t n, const ae_complex *a, ae_int_t op, ae_int_t stride, double *b)
10698 {
10699     ae_int_t i, j;
10700     const ae_complex *psrc;
10701     double *pdst;
10702     if( op==0 )
10703     {
10704         for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a)
10705             for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++)
10706             {
10707                 pdst[0] = psrc->x;
10708                 pdst[1] = psrc->y;
10709             }
10710     }
10711     if( op==1 )
10712     {
10713         for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a)
10714             for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++)
10715             {
10716                 pdst[0] = psrc->x;
10717                 pdst[1] = psrc->y;
10718             }
10719     }
10720     if( op==2 )
10721     {
10722         for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a)
10723             for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++)
10724             {
10725                 pdst[0] = psrc->x;
10726                 pdst[1] = -psrc->y;
10727             }
10728     }
10729     if( op==3 )
10730     {
10731         for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a)
10732             for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++)
10733             {
10734                 pdst[0] = psrc->x;
10735                 pdst[1] = -psrc->y;
10736             }
10737     }
10738 }
10739 
10740 
10741 /********************************************************************
10742 This subroutine copies matrix from aligned contigous storage to
10743 non-aligned non-contigous storage
10744 
10745 A:
10746 * 2*alglib_c_block*alglib_c_block doubles (only MxN submatrix is used)
10747 * aligned
10748 * stride is alglib_c_block
10749 * pointer to double is passed
10750 * may be transformed during copying (as prescribed by op)
10751 
10752 B:
10753 * MxN
10754 * non-aligned
10755 * non-contigous
10756 * pointer to ae_complex is passed
10757 
10758 Transformation types:
10759 * 0 - no transform
10760 * 1 - transposition
10761 * 2 - conjugate transposition
10762 * 3 - conjugate, but no  transposition
10763 ********************************************************************/
_ialglib_mcopyunblock_complex(ae_int_t m,ae_int_t n,const double * a,ae_int_t op,ae_complex * b,ae_int_t stride)10764 void _ialglib_mcopyunblock_complex(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_complex* b, ae_int_t stride)
10765 {
10766     ae_int_t i, j;
10767     const double *psrc;
10768     ae_complex *pdst;
10769     if( op==0 )
10770     {
10771         for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a)
10772             for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2)
10773             {
10774                 pdst->x = psrc[0];
10775                 pdst->y = psrc[1];
10776             }
10777     }
10778     if( op==1 )
10779     {
10780         for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a)
10781             for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block)
10782             {
10783                 pdst->x = psrc[0];
10784                 pdst->y = psrc[1];
10785             }
10786     }
10787     if( op==2 )
10788     {
10789         for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a)
10790             for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block)
10791             {
10792                 pdst->x = psrc[0];
10793                 pdst->y = -psrc[1];
10794             }
10795     }
10796     if( op==3 )
10797     {
10798         for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a)
10799             for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2)
10800             {
10801                 pdst->x = psrc[0];
10802                 pdst->y = -psrc[1];
10803             }
10804     }
10805 }
10806 
10807 
10808 /********************************************************************
10809 Real GEMM kernel
10810 ********************************************************************/
_ialglib_rmatrixgemm(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,double * _a,ae_int_t _a_stride,ae_int_t optypea,double * _b,ae_int_t _b_stride,ae_int_t optypeb,double beta,double * _c,ae_int_t _c_stride)10811 ae_bool _ialglib_rmatrixgemm(ae_int_t m,
10812      ae_int_t n,
10813      ae_int_t k,
10814      double alpha,
10815      double *_a,
10816      ae_int_t _a_stride,
10817      ae_int_t optypea,
10818      double *_b,
10819      ae_int_t _b_stride,
10820      ae_int_t optypeb,
10821      double beta,
10822      double *_c,
10823      ae_int_t _c_stride)
10824 {
10825     int i;
10826     double *crow;
10827     double _abuf[alglib_r_block+alglib_simd_alignment];
10828     double _bbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
10829     double * const abuf = (double * ) ae_align(_abuf,alglib_simd_alignment);
10830     double * const b    = (double * ) ae_align(_bbuf,alglib_simd_alignment);
10831     void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
10832     void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
10833 
10834     if( m>alglib_r_block || n>alglib_r_block || k>alglib_r_block || m<=0 || n<=0 || k<=0 || alpha==0.0 )
10835         return ae_false;
10836 
10837     /*
10838      * Check for SSE2 support
10839      */
10840 #ifdef AE_HAS_SSE2_INTRINSICS
10841     if( ae_cpuid() & CPU_SSE2 )
10842     {
10843         rmv = &_ialglib_rmv_sse2;
10844         mcopyblock = &_ialglib_mcopyblock_sse2;
10845     }
10846 #endif
10847 
10848     /*
10849      * copy b
10850      */
10851     if( optypeb==0 )
10852         mcopyblock(k, n, _b, 1, _b_stride, b);
10853     else
10854         mcopyblock(n, k, _b, 0, _b_stride, b);
10855 
10856     /*
10857      * multiply B by A (from the right, by rows)
10858      * and store result in C
10859      */
10860     crow  = _c;
10861     if( optypea==0 )
10862     {
10863         const double *arow = _a;
10864         for(i=0; i<m; i++)
10865         {
10866             _ialglib_vcopy(k, arow, 1, abuf, 1);
10867             if( beta==0 )
10868                 _ialglib_vzero(n, crow, 1);
10869             rmv(n, k, b, abuf, crow, 1, alpha, beta);
10870             crow += _c_stride;
10871             arow += _a_stride;
10872         }
10873     }
10874     else
10875     {
10876         const double *acol = _a;
10877         for(i=0; i<m; i++)
10878         {
10879             _ialglib_vcopy(k, acol, _a_stride, abuf, 1);
10880             if( beta==0 )
10881                 _ialglib_vzero(n, crow, 1);
10882             rmv(n, k, b, abuf, crow, 1, alpha, beta);
10883             crow += _c_stride;
10884             acol++;
10885         }
10886     }
10887     return ae_true;
10888 }
10889 
10890 
10891 /********************************************************************
10892 Complex GEMM kernel
10893 ********************************************************************/
_ialglib_cmatrixgemm(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_complex * _a,ae_int_t _a_stride,ae_int_t optypea,ae_complex * _b,ae_int_t _b_stride,ae_int_t optypeb,ae_complex beta,ae_complex * _c,ae_int_t _c_stride)10894 ae_bool _ialglib_cmatrixgemm(ae_int_t m,
10895      ae_int_t n,
10896      ae_int_t k,
10897      ae_complex alpha,
10898      ae_complex *_a,
10899      ae_int_t _a_stride,
10900      ae_int_t optypea,
10901      ae_complex *_b,
10902      ae_int_t _b_stride,
10903      ae_int_t optypeb,
10904      ae_complex beta,
10905      ae_complex *_c,
10906      ae_int_t _c_stride)
10907  {
10908     const ae_complex *arow;
10909     ae_complex *crow;
10910     ae_int_t i;
10911     double _loc_abuf[2*alglib_c_block+alglib_simd_alignment];
10912     double _loc_b[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
10913     double * const abuf = (double *)ae_align(_loc_abuf,alglib_simd_alignment);
10914     double * const b    = (double *)ae_align(_loc_b,   alglib_simd_alignment);
10915     ae_int_t brows;
10916     ae_int_t bcols;
10917     void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
10918 
10919     if( m>alglib_c_block || n>alglib_c_block || k>alglib_c_block )
10920         return ae_false;
10921 
10922     /*
10923      * Check for SSE2 support
10924      */
10925 #ifdef AE_HAS_SSE2_INTRINSICS
10926     if( ae_cpuid() & CPU_SSE2 )
10927     {
10928         cmv = &_ialglib_cmv_sse2;
10929     }
10930 #endif
10931 
10932     /*
10933      * copy b
10934      */
10935     brows = optypeb==0 ? k : n;
10936     bcols = optypeb==0 ? n : k;
10937     if( optypeb==0 )
10938         _ialglib_mcopyblock_complex(brows, bcols, _b, 1, _b_stride, b);
10939     if( optypeb==1 )
10940         _ialglib_mcopyblock_complex(brows, bcols, _b, 0, _b_stride, b);
10941     if( optypeb==2 )
10942         _ialglib_mcopyblock_complex(brows, bcols, _b, 3, _b_stride, b);
10943 
10944     /*
10945      * multiply B by A (from the right, by rows)
10946      * and store result in C
10947      */
10948     arow  = _a;
10949     crow  = _c;
10950     for(i=0; i<m; i++)
10951     {
10952         if( optypea==0 )
10953         {
10954             _ialglib_vcopy_complex(k, arow, 1, abuf, 1, "No conj");
10955             arow += _a_stride;
10956         }
10957         else if( optypea==1 )
10958         {
10959             _ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "No conj");
10960             arow++;
10961         }
10962         else
10963         {
10964             _ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "Conj");
10965             arow++;
10966         }
10967         if( beta.x==0 && beta.y==0 )
10968             _ialglib_vzero_complex(n, crow, 1);
10969         cmv(n, k, b, abuf, crow, NULL, 1, alpha, beta);
10970         crow += _c_stride;
10971     }
10972     return ae_true;
10973 }
10974 
10975 
10976 /********************************************************************
10977 complex TRSM kernel
10978 ********************************************************************/
_ialglib_cmatrixrighttrsm(ae_int_t m,ae_int_t n,ae_complex * _a,ae_int_t _a_stride,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_complex * _x,ae_int_t _x_stride)10979 ae_bool _ialglib_cmatrixrighttrsm(ae_int_t m,
10980      ae_int_t n,
10981      ae_complex *_a,
10982      ae_int_t _a_stride,
10983      ae_bool isupper,
10984      ae_bool isunit,
10985      ae_int_t optype,
10986      ae_complex *_x,
10987      ae_int_t _x_stride)
10988 {
10989     /*
10990      * local buffers
10991      */
10992     double *pdiag;
10993     ae_int_t i;
10994     double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
10995     double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
10996     double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
10997     double * const abuf   = (double*)ae_align(_loc_abuf,  alglib_simd_alignment);
10998     double * const xbuf   = (double*)ae_align(_loc_xbuf,  alglib_simd_alignment);
10999     double * const tmpbuf = (double*)ae_align(_loc_tmpbuf,alglib_simd_alignment);
11000     ae_bool uppera;
11001     void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
11002 
11003     if( m>alglib_c_block || n>alglib_c_block )
11004         return ae_false;
11005 
11006     /*
11007      * Check for SSE2 support
11008      */
11009 #ifdef AE_HAS_SSE2_INTRINSICS
11010     if( ae_cpuid() & CPU_SSE2 )
11011     {
11012         cmv = &_ialglib_cmv_sse2;
11013     }
11014 #endif
11015 
11016     /*
11017      * Prepare
11018      */
11019     _ialglib_mcopyblock_complex(n, n, _a, optype, _a_stride, abuf);
11020     _ialglib_mcopyblock_complex(m, n, _x, 0, _x_stride, xbuf);
11021     if( isunit )
11022         for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
11023         {
11024             pdiag[0] = 1.0;
11025             pdiag[1] = 0.0;
11026         }
11027     if( optype==0 )
11028         uppera = isupper;
11029     else
11030         uppera = !isupper;
11031 
11032     /*
11033      * Solve Y*A^-1=X where A is upper or lower triangular
11034      */
11035     if( uppera )
11036     {
11037         for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
11038         {
11039             ae_complex tmp_c;
11040             ae_complex beta;
11041             ae_complex alpha;
11042             tmp_c.x = pdiag[0];
11043             tmp_c.y = pdiag[1];
11044             beta = ae_c_d_div(1.0, tmp_c);
11045             alpha.x = -beta.x;
11046             alpha.y = -beta.y;
11047             _ialglib_vcopy_dcomplex(i, abuf+2*i, alglib_c_block, tmpbuf, 1, "No conj");
11048             cmv(m, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
11049         }
11050         _ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride);
11051     }
11052     else
11053     {
11054         for(i=n-1,pdiag=abuf+2*((n-1)*alglib_c_block+(n-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
11055         {
11056             ae_complex tmp_c;
11057             ae_complex beta;
11058             ae_complex alpha;
11059             tmp_c.x = pdiag[0];
11060             tmp_c.y = pdiag[1];
11061             beta = ae_c_d_div(1.0, tmp_c);
11062             alpha.x = -beta.x;
11063             alpha.y = -beta.y;
11064             _ialglib_vcopy_dcomplex(n-1-i, pdiag+2*alglib_c_block, alglib_c_block, tmpbuf, 1, "No conj");
11065             cmv(m, n-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
11066         }
11067         _ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride);
11068     }
11069     return ae_true;
11070 }
11071 
11072 
11073 /********************************************************************
11074 real TRSM kernel
11075 ********************************************************************/
_ialglib_rmatrixrighttrsm(ae_int_t m,ae_int_t n,double * _a,ae_int_t _a_stride,ae_bool isupper,ae_bool isunit,ae_int_t optype,double * _x,ae_int_t _x_stride)11076 ae_bool _ialglib_rmatrixrighttrsm(ae_int_t m,
11077      ae_int_t n,
11078      double *_a,
11079      ae_int_t _a_stride,
11080      ae_bool isupper,
11081      ae_bool isunit,
11082      ae_int_t optype,
11083      double *_x,
11084      ae_int_t _x_stride)
11085 {
11086     /*
11087      * local buffers
11088      */
11089     double *pdiag;
11090     ae_int_t i;
11091     double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11092     double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11093     double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment];
11094     double * const abuf   = (double *) ae_align(_loc_abuf,  alglib_simd_alignment);
11095     double * const xbuf   = (double *) ae_align(_loc_xbuf,  alglib_simd_alignment);
11096     double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11097     ae_bool uppera;
11098     void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
11099     void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
11100 
11101     if( m>alglib_r_block || n>alglib_r_block )
11102         return ae_false;
11103 
11104     /*
11105      * Check for SSE2 support
11106      */
11107 #ifdef AE_HAS_SSE2_INTRINSICS
11108     if( ae_cpuid() & CPU_SSE2 )
11109     {
11110         rmv = &_ialglib_rmv_sse2;
11111         mcopyblock = &_ialglib_mcopyblock_sse2;
11112     }
11113 #endif
11114 
11115     /*
11116      * Prepare
11117      */
11118     mcopyblock(n, n, _a, optype, _a_stride, abuf);
11119     mcopyblock(m, n, _x, 0, _x_stride, xbuf);
11120     if( isunit )
11121         for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
11122             *pdiag = 1.0;
11123     if( optype==0 )
11124         uppera = isupper;
11125     else
11126         uppera = !isupper;
11127 
11128     /*
11129      * Solve Y*A^-1=X where A is upper or lower triangular
11130      */
11131     if( uppera )
11132     {
11133         for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
11134         {
11135             double beta  = 1.0/(*pdiag);
11136             double alpha = -beta;
11137             _ialglib_vcopy(i, abuf+i, alglib_r_block, tmpbuf, 1);
11138             rmv(m, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
11139         }
11140         _ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride);
11141     }
11142     else
11143     {
11144         for(i=n-1,pdiag=abuf+(n-1)*alglib_r_block+(n-1); i>=0; i--,pdiag-=alglib_r_block+1)
11145         {
11146             double beta = 1.0/(*pdiag);
11147             double alpha = -beta;
11148             _ialglib_vcopy(n-1-i, pdiag+alglib_r_block, alglib_r_block, tmpbuf+i+1, 1);
11149             rmv(m, n-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta);
11150         }
11151         _ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride);
11152     }
11153     return ae_true;
11154 }
11155 
11156 
11157 /********************************************************************
11158 complex TRSM kernel
11159 ********************************************************************/
_ialglib_cmatrixlefttrsm(ae_int_t m,ae_int_t n,ae_complex * _a,ae_int_t _a_stride,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_complex * _x,ae_int_t _x_stride)11160 ae_bool _ialglib_cmatrixlefttrsm(ae_int_t m,
11161      ae_int_t n,
11162      ae_complex *_a,
11163      ae_int_t _a_stride,
11164      ae_bool isupper,
11165      ae_bool isunit,
11166      ae_int_t optype,
11167      ae_complex *_x,
11168      ae_int_t _x_stride)
11169 {
11170     /*
11171      * local buffers
11172      */
11173     double *pdiag, *arow;
11174     ae_int_t i;
11175     double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11176     double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11177     double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
11178     double * const abuf   = (double *) ae_align(_loc_abuf,  alglib_simd_alignment);
11179     double * const xbuf   = (double *) ae_align(_loc_xbuf,  alglib_simd_alignment);
11180     double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11181     ae_bool uppera;
11182     void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
11183 
11184     if( m>alglib_c_block || n>alglib_c_block )
11185         return ae_false;
11186 
11187     /*
11188      * Check for SSE2 support
11189      */
11190 #ifdef AE_HAS_SSE2_INTRINSICS
11191     if( ae_cpuid() & CPU_SSE2 )
11192     {
11193         cmv = &_ialglib_cmv_sse2;
11194     }
11195 #endif
11196 
11197     /*
11198      * Prepare
11199      * Transpose X (so we may use mv, which calculates A*x, but not x*A)
11200      */
11201     _ialglib_mcopyblock_complex(m, m, _a, optype, _a_stride, abuf);
11202     _ialglib_mcopyblock_complex(m, n, _x, 1, _x_stride, xbuf);
11203     if( isunit )
11204         for(i=0,pdiag=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1))
11205         {
11206             pdiag[0] = 1.0;
11207             pdiag[1] = 0.0;
11208         }
11209     if( optype==0 )
11210         uppera = isupper;
11211     else
11212         uppera = !isupper;
11213 
11214     /*
11215      * Solve A^-1*Y^T=X^T where A is upper or lower triangular
11216      */
11217     if( uppera )
11218     {
11219         for(i=m-1,pdiag=abuf+2*((m-1)*alglib_c_block+(m-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
11220         {
11221             ae_complex tmp_c;
11222             ae_complex beta;
11223             ae_complex alpha;
11224             tmp_c.x = pdiag[0];
11225             tmp_c.y = pdiag[1];
11226             beta = ae_c_d_div(1.0, tmp_c);
11227             alpha.x = -beta.x;
11228             alpha.y = -beta.y;
11229             _ialglib_vcopy_dcomplex(m-1-i, pdiag+2, 1, tmpbuf, 1, "No conj");
11230             cmv(n, m-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
11231         }
11232         _ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride);
11233     }
11234     else
11235     {   for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1),arow+=2*alglib_c_block)
11236         {
11237             ae_complex tmp_c;
11238             ae_complex beta;
11239             ae_complex alpha;
11240             tmp_c.x = pdiag[0];
11241             tmp_c.y = pdiag[1];
11242             beta = ae_c_d_div(1.0, tmp_c);
11243             alpha.x = -beta.x;
11244             alpha.y = -beta.y;
11245             _ialglib_vcopy_dcomplex(i, arow, 1, tmpbuf, 1, "No conj");
11246             cmv(n, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
11247         }
11248         _ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride);
11249     }
11250     return ae_true;
11251 }
11252 
11253 
11254 /********************************************************************
11255 real TRSM kernel
11256 ********************************************************************/
_ialglib_rmatrixlefttrsm(ae_int_t m,ae_int_t n,double * _a,ae_int_t _a_stride,ae_bool isupper,ae_bool isunit,ae_int_t optype,double * _x,ae_int_t _x_stride)11257 ae_bool _ialglib_rmatrixlefttrsm(ae_int_t m,
11258      ae_int_t n,
11259      double *_a,
11260      ae_int_t _a_stride,
11261      ae_bool isupper,
11262      ae_bool isunit,
11263      ae_int_t optype,
11264      double *_x,
11265      ae_int_t _x_stride)
11266 {
11267     /*
11268      * local buffers
11269      */
11270     double *pdiag, *arow;
11271     ae_int_t i;
11272     double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11273     double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11274     double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment];
11275     double * const abuf   = (double *) ae_align(_loc_abuf,  alglib_simd_alignment);
11276     double * const xbuf   = (double *) ae_align(_loc_xbuf,  alglib_simd_alignment);
11277     double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11278     ae_bool uppera;
11279     void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
11280     void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
11281 
11282     if( m>alglib_r_block || n>alglib_r_block )
11283         return ae_false;
11284 
11285     /*
11286      * Check for SSE2 support
11287      */
11288 #ifdef AE_HAS_SSE2_INTRINSICS
11289     if( ae_cpuid() & CPU_SSE2 )
11290     {
11291         rmv = &_ialglib_rmv_sse2;
11292         mcopyblock = &_ialglib_mcopyblock_sse2;
11293     }
11294 #endif
11295 
11296     /*
11297      * Prepare
11298      * Transpose X (so we may use mv, which calculates A*x, but not x*A)
11299      */
11300     mcopyblock(m, m, _a, optype, _a_stride, abuf);
11301     mcopyblock(m, n, _x, 1, _x_stride, xbuf);
11302     if( isunit )
11303         for(i=0,pdiag=abuf; i<m; i++,pdiag+=alglib_r_block+1)
11304             *pdiag = 1.0;
11305     if( optype==0 )
11306         uppera = isupper;
11307     else
11308         uppera = !isupper;
11309 
11310     /*
11311      * Solve A^-1*Y^T=X^T where A is upper or lower triangular
11312      */
11313     if( uppera )
11314     {
11315         for(i=m-1,pdiag=abuf+(m-1)*alglib_r_block+(m-1); i>=0; i--,pdiag-=alglib_r_block+1)
11316         {
11317             double beta = 1.0/(*pdiag);
11318             double alpha = -beta;
11319             _ialglib_vcopy(m-1-i, pdiag+1, 1, tmpbuf+i+1, 1);
11320             rmv(n, m-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta);
11321         }
11322         _ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride);
11323     }
11324     else
11325     {   for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=alglib_r_block+1,arow+=alglib_r_block)
11326         {
11327             double beta = 1.0/(*pdiag);
11328             double alpha = -beta;
11329             _ialglib_vcopy(i, arow, 1, tmpbuf, 1);
11330             rmv(n, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
11331         }
11332         _ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride);
11333     }
11334     return ae_true;
11335 }
11336 
11337 
11338 /********************************************************************
11339 complex SYRK kernel
11340 ********************************************************************/
_ialglib_cmatrixherk(ae_int_t n,ae_int_t k,double alpha,ae_complex * _a,ae_int_t _a_stride,ae_int_t optypea,double beta,ae_complex * _c,ae_int_t _c_stride,ae_bool isupper)11341 ae_bool _ialglib_cmatrixherk(ae_int_t n,
11342      ae_int_t k,
11343      double alpha,
11344      ae_complex *_a,
11345      ae_int_t _a_stride,
11346      ae_int_t optypea,
11347      double beta,
11348      ae_complex *_c,
11349      ae_int_t _c_stride,
11350      ae_bool isupper)
11351 {
11352     /*
11353      * local buffers
11354      */
11355     double *arow, *crow;
11356     ae_complex c_alpha, c_beta;
11357     ae_int_t i;
11358     double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11359     double _loc_cbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11360     double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
11361     double * const abuf   = (double *) ae_align(_loc_abuf,  alglib_simd_alignment);
11362     double * const cbuf   = (double *) ae_align(_loc_cbuf,  alglib_simd_alignment);
11363     double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11364 
11365     if( n>alglib_c_block || k>alglib_c_block )
11366         return ae_false;
11367     if( n==0 )
11368         return ae_true;
11369 
11370     /*
11371      * copy A and C, task is transformed to "A*A^H"-form.
11372      * if beta==0, then C is filled by zeros (and not referenced)
11373      *
11374      * alpha==0 or k==0 are correctly processed (A is not referenced)
11375      */
11376     c_alpha.x = alpha;
11377     c_alpha.y = 0;
11378     c_beta.x = beta;
11379     c_beta.y = 0;
11380     if( alpha==0 )
11381         k = 0;
11382     if( k>0 )
11383     {
11384         if( optypea==0 )
11385             _ialglib_mcopyblock_complex(n, k, _a, 3, _a_stride, abuf);
11386         else
11387             _ialglib_mcopyblock_complex(k, n, _a, 1, _a_stride, abuf);
11388     }
11389     _ialglib_mcopyblock_complex(n, n, _c, 0, _c_stride, cbuf);
11390     if( beta==0 )
11391     {
11392         for(i=0,crow=cbuf; i<n; i++,crow+=2*alglib_c_block)
11393             if( isupper )
11394                 _ialglib_vzero(2*(n-i), crow+2*i, 1);
11395             else
11396                 _ialglib_vzero(2*(i+1), crow, 1);
11397     }
11398 
11399 
11400     /*
11401      * update C
11402      */
11403     if( isupper )
11404     {
11405         for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
11406         {
11407             _ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj");
11408             _ialglib_cmv(n-i, k, arow, tmpbuf, NULL, crow+2*i, 1, c_alpha, c_beta);
11409         }
11410     }
11411     else
11412     {
11413         for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
11414         {
11415             _ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj");
11416             _ialglib_cmv(i+1, k, abuf, tmpbuf, NULL, crow, 1, c_alpha, c_beta);
11417         }
11418     }
11419 
11420     /*
11421      * copy back
11422      */
11423     _ialglib_mcopyunblock_complex(n, n, cbuf, 0, _c, _c_stride);
11424 
11425     return ae_true;
11426 }
11427 
11428 
11429 /********************************************************************
11430 real SYRK kernel
11431 ********************************************************************/
_ialglib_rmatrixsyrk(ae_int_t n,ae_int_t k,double alpha,double * _a,ae_int_t _a_stride,ae_int_t optypea,double beta,double * _c,ae_int_t _c_stride,ae_bool isupper)11432 ae_bool _ialglib_rmatrixsyrk(ae_int_t n,
11433      ae_int_t k,
11434      double alpha,
11435      double *_a,
11436      ae_int_t _a_stride,
11437      ae_int_t optypea,
11438      double beta,
11439      double *_c,
11440      ae_int_t _c_stride,
11441      ae_bool isupper)
11442 {
11443     /*
11444      * local buffers
11445      */
11446     double *arow, *crow;
11447     ae_int_t i;
11448     double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11449     double _loc_cbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11450     double * const abuf   = (double *) ae_align(_loc_abuf,  alglib_simd_alignment);
11451     double * const cbuf   = (double *) ae_align(_loc_cbuf,  alglib_simd_alignment);
11452 
11453     if( n>alglib_r_block || k>alglib_r_block )
11454         return ae_false;
11455     if( n==0 )
11456         return ae_true;
11457 
11458     /*
11459      * copy A and C, task is transformed to "A*A^T"-form.
11460      * if beta==0, then C is filled by zeros (and not referenced)
11461      *
11462      * alpha==0 or k==0 are correctly processed (A is not referenced)
11463      */
11464     if( alpha==0 )
11465         k = 0;
11466     if( k>0 )
11467     {
11468         if( optypea==0 )
11469             _ialglib_mcopyblock(n, k, _a, 0, _a_stride, abuf);
11470         else
11471             _ialglib_mcopyblock(k, n, _a, 1, _a_stride, abuf);
11472     }
11473     _ialglib_mcopyblock(n, n, _c, 0, _c_stride, cbuf);
11474     if( beta==0 )
11475     {
11476         for(i=0,crow=cbuf; i<n; i++,crow+=alglib_r_block)
11477             if( isupper )
11478                 _ialglib_vzero(n-i, crow+i, 1);
11479             else
11480                 _ialglib_vzero(i+1, crow, 1);
11481     }
11482 
11483 
11484     /*
11485      * update C
11486      */
11487     if( isupper )
11488     {
11489         for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block)
11490         {
11491             _ialglib_rmv(n-i, k, arow, arow, crow+i, 1, alpha, beta);
11492         }
11493     }
11494     else
11495     {
11496         for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block)
11497         {
11498             _ialglib_rmv(i+1, k, abuf, arow, crow, 1, alpha, beta);
11499         }
11500     }
11501 
11502     /*
11503      * copy back
11504      */
11505     _ialglib_mcopyunblock(n, n, cbuf, 0, _c, _c_stride);
11506 
11507     return ae_true;
11508 }
11509 
11510 
11511 /********************************************************************
11512 complex rank-1 kernel
11513 ********************************************************************/
_ialglib_cmatrixrank1(ae_int_t m,ae_int_t n,ae_complex * _a,ae_int_t _a_stride,ae_complex * _u,ae_complex * _v)11514 ae_bool _ialglib_cmatrixrank1(ae_int_t m,
11515      ae_int_t n,
11516      ae_complex *_a,
11517      ae_int_t _a_stride,
11518      ae_complex *_u,
11519      ae_complex *_v)
11520 {
11521     /*
11522      * Locals
11523      */
11524     ae_complex *arow, *pu, *pv, *vtmp, *dst;
11525     ae_int_t n2 = n/2;
11526     ae_int_t i, j;
11527 
11528     /*
11529      * Quick exit
11530      */
11531     if( m<=0 || n<=0 )
11532         return ae_false;
11533 
11534 
11535     /*
11536      * update pairs of rows
11537      */
11538     arow  = _a;
11539     pu    = _u;
11540     vtmp  = _v;
11541     for(i=0; i<m; i++, arow+=_a_stride, pu++)
11542     {
11543         /*
11544          * update by two
11545          */
11546         for(j=0,pv=vtmp, dst=arow; j<n2; j++, dst+=2, pv+=2)
11547         {
11548             double ux  = pu[0].x;
11549             double uy  = pu[0].y;
11550             double v0x = pv[0].x;
11551             double v0y = pv[0].y;
11552             double v1x = pv[1].x;
11553             double v1y = pv[1].y;
11554             dst[0].x += ux*v0x-uy*v0y;
11555             dst[0].y += ux*v0y+uy*v0x;
11556             dst[1].x += ux*v1x-uy*v1y;
11557             dst[1].y += ux*v1y+uy*v1x;
11558         }
11559 
11560         /*
11561          * final update
11562          */
11563         if( n%2!=0 )
11564         {
11565             double ux = pu[0].x;
11566             double uy = pu[0].y;
11567             double vx = pv[0].x;
11568             double vy = pv[0].y;
11569             dst[0].x += ux*vx-uy*vy;
11570             dst[0].y += ux*vy+uy*vx;
11571         }
11572     }
11573     return ae_true;
11574 }
11575 
11576 
11577 /********************************************************************
11578 real rank-1 kernel
11579 deprecated version
11580 ********************************************************************/
_ialglib_rmatrixrank1(ae_int_t m,ae_int_t n,double * _a,ae_int_t _a_stride,double * _u,double * _v)11581 ae_bool _ialglib_rmatrixrank1(ae_int_t m,
11582      ae_int_t n,
11583      double *_a,
11584      ae_int_t _a_stride,
11585      double *_u,
11586      double *_v)
11587 {
11588     /*
11589      * Locals
11590      */
11591     double *arow0, *arow1, *pu, *pv, *vtmp, *dst0, *dst1;
11592     ae_int_t m2 = m/2;
11593     ae_int_t n2 = n/2;
11594     ae_int_t stride  = _a_stride;
11595     ae_int_t stride2 = 2*_a_stride;
11596     ae_int_t i, j;
11597 
11598     /*
11599      * Quick exit
11600      */
11601     if( m<=0 || n<=0 )
11602         return ae_false;
11603 
11604     /*
11605      * update pairs of rows
11606      */
11607     arow0 = _a;
11608     arow1 = arow0+stride;
11609     pu    = _u;
11610     vtmp  = _v;
11611     for(i=0; i<m2; i++,arow0+=stride2,arow1+=stride2,pu+=2)
11612     {
11613         /*
11614          * update by two
11615          */
11616         for(j=0,pv=vtmp, dst0=arow0, dst1=arow1; j<n2; j++, dst0+=2, dst1+=2, pv+=2)
11617         {
11618             dst0[0] += pu[0]*pv[0];
11619             dst0[1] += pu[0]*pv[1];
11620             dst1[0] += pu[1]*pv[0];
11621             dst1[1] += pu[1]*pv[1];
11622         }
11623 
11624         /*
11625          * final update
11626          */
11627         if( n%2!=0 )
11628         {
11629             dst0[0] += pu[0]*pv[0];
11630             dst1[0] += pu[1]*pv[0];
11631         }
11632     }
11633 
11634     /*
11635      * update last row
11636      */
11637     if( m%2!=0 )
11638     {
11639         /*
11640          * update by two
11641          */
11642         for(j=0,pv=vtmp, dst0=arow0; j<n2; j++, dst0+=2, pv+=2)
11643         {
11644             dst0[0] += pu[0]*pv[0];
11645             dst0[1] += pu[0]*pv[1];
11646         }
11647 
11648         /*
11649          * final update
11650          */
11651         if( n%2!=0 )
11652             dst0[0] += pu[0]*pv[0];
11653     }
11654     return ae_true;
11655 }
11656 
11657 
11658 
11659 /********************************************************************
11660 real rank-1 kernel
11661 deprecated version
11662 ********************************************************************/
_ialglib_rmatrixger(ae_int_t m,ae_int_t n,double * _a,ae_int_t _a_stride,double alpha,double * _u,double * _v)11663 ae_bool _ialglib_rmatrixger(ae_int_t m,
11664      ae_int_t n,
11665      double *_a,
11666      ae_int_t _a_stride,
11667      double alpha,
11668      double *_u,
11669      double *_v)
11670 {
11671     /*
11672      * Locals
11673      */
11674     double *arow0, *arow1, *pu, *pv, *vtmp, *dst0, *dst1;
11675     ae_int_t m2 = m/2;
11676     ae_int_t n2 = n/2;
11677     ae_int_t stride  = _a_stride;
11678     ae_int_t stride2 = 2*_a_stride;
11679     ae_int_t i, j;
11680 
11681     /*
11682      * Quick exit
11683      */
11684     if( m<=0 || n<=0 || alpha==0.0 )
11685         return ae_false;
11686 
11687     /*
11688      * update pairs of rows
11689      */
11690     arow0 = _a;
11691     arow1 = arow0+stride;
11692     pu    = _u;
11693     vtmp  = _v;
11694     for(i=0; i<m2; i++,arow0+=stride2,arow1+=stride2,pu+=2)
11695     {
11696         double au0 = alpha*pu[0];
11697         double au1 = alpha*pu[1];
11698 
11699         /*
11700          * update by two
11701          */
11702         for(j=0,pv=vtmp, dst0=arow0, dst1=arow1; j<n2; j++, dst0+=2, dst1+=2, pv+=2)
11703         {
11704             dst0[0] += au0*pv[0];
11705             dst0[1] += au0*pv[1];
11706             dst1[0] += au1*pv[0];
11707             dst1[1] += au1*pv[1];
11708         }
11709 
11710         /*
11711          * final update
11712          */
11713         if( n%2!=0 )
11714         {
11715             dst0[0] += au0*pv[0];
11716             dst1[0] += au1*pv[0];
11717         }
11718     }
11719 
11720     /*
11721      * update last row
11722      */
11723     if( m%2!=0 )
11724     {
11725         double au0 = alpha*pu[0];
11726 
11727         /*
11728          * update by two
11729          */
11730         for(j=0,pv=vtmp, dst0=arow0; j<n2; j++, dst0+=2, pv+=2)
11731         {
11732             dst0[0] += au0*pv[0];
11733             dst0[1] += au0*pv[1];
11734         }
11735 
11736         /*
11737          * final update
11738          */
11739         if( n%2!=0 )
11740             dst0[0] += au0*pv[0];
11741     }
11742     return ae_true;
11743 }
11744 
11745 /********************************************************************
11746 Interface functions for efficient kernels
11747 ********************************************************************/
_ialglib_i_rmatrixgemmf(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * _a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * _b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * _c,ae_int_t ic,ae_int_t jc)11748 ae_bool _ialglib_i_rmatrixgemmf(ae_int_t m,
11749      ae_int_t n,
11750      ae_int_t k,
11751      double alpha,
11752      ae_matrix *_a,
11753      ae_int_t ia,
11754      ae_int_t ja,
11755      ae_int_t optypea,
11756      ae_matrix *_b,
11757      ae_int_t ib,
11758      ae_int_t jb,
11759      ae_int_t optypeb,
11760      double beta,
11761      ae_matrix *_c,
11762      ae_int_t ic,
11763      ae_int_t jc)
11764 {
11765     /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11766     if( alpha==0.0 || k==0 || n==0 || m==0)
11767         return ae_false;
11768 
11769     /* handle with optimized ALGLIB kernel */
11770     return _ialglib_rmatrixgemm(m, n, k, alpha, _a->ptr.pp_double[ia]+ja, _a->stride, optypea, _b->ptr.pp_double[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_double[ic]+jc, _c->stride);
11771 }
11772 
_ialglib_i_cmatrixgemmf(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * _a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * _b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * _c,ae_int_t ic,ae_int_t jc)11773 ae_bool _ialglib_i_cmatrixgemmf(ae_int_t m,
11774      ae_int_t n,
11775      ae_int_t k,
11776      ae_complex alpha,
11777      ae_matrix *_a,
11778      ae_int_t ia,
11779      ae_int_t ja,
11780      ae_int_t optypea,
11781      ae_matrix *_b,
11782      ae_int_t ib,
11783      ae_int_t jb,
11784      ae_int_t optypeb,
11785      ae_complex beta,
11786      ae_matrix *_c,
11787      ae_int_t ic,
11788      ae_int_t jc)
11789 {
11790     /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11791     if( (alpha.x==0.0 && alpha.y==0) || k==0 || n==0 || m==0 )
11792         return ae_false;
11793 
11794     /* handle with optimized ALGLIB kernel */
11795     return _ialglib_cmatrixgemm(m, n, k, alpha, _a->ptr.pp_complex[ia]+ja, _a->stride, optypea, _b->ptr.pp_complex[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_complex[ic]+jc, _c->stride);
11796 }
11797 
_ialglib_i_cmatrixrighttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2)11798 ae_bool _ialglib_i_cmatrixrighttrsmf(ae_int_t m,
11799      ae_int_t n,
11800      ae_matrix *a,
11801      ae_int_t i1,
11802      ae_int_t j1,
11803      ae_bool isupper,
11804      ae_bool isunit,
11805      ae_int_t optype,
11806      ae_matrix *x,
11807      ae_int_t i2,
11808      ae_int_t j2)
11809 {
11810     /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11811     if( m==0 || n==0)
11812         return ae_false;
11813 
11814     /* handle with optimized ALGLIB kernel */
11815     return _ialglib_cmatrixrighttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride);
11816 }
11817 
_ialglib_i_rmatrixrighttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2)11818 ae_bool _ialglib_i_rmatrixrighttrsmf(ae_int_t m,
11819      ae_int_t n,
11820      ae_matrix *a,
11821      ae_int_t i1,
11822      ae_int_t j1,
11823      ae_bool isupper,
11824      ae_bool isunit,
11825      ae_int_t optype,
11826      ae_matrix *x,
11827      ae_int_t i2,
11828      ae_int_t j2)
11829 {
11830     /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11831     if( m==0 || n==0)
11832         return ae_false;
11833 
11834     /* handle with optimized ALGLIB kernel */
11835     return _ialglib_rmatrixrighttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride);
11836 }
11837 
_ialglib_i_cmatrixlefttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2)11838 ae_bool _ialglib_i_cmatrixlefttrsmf(ae_int_t m,
11839      ae_int_t n,
11840      ae_matrix *a,
11841      ae_int_t i1,
11842      ae_int_t j1,
11843      ae_bool isupper,
11844      ae_bool isunit,
11845      ae_int_t optype,
11846      ae_matrix *x,
11847      ae_int_t i2,
11848      ae_int_t j2)
11849 {
11850     /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11851     if( m==0 || n==0)
11852         return ae_false;
11853 
11854     /* handle with optimized ALGLIB kernel */
11855     return _ialglib_cmatrixlefttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride);
11856 }
11857 
_ialglib_i_rmatrixlefttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2)11858 ae_bool _ialglib_i_rmatrixlefttrsmf(ae_int_t m,
11859      ae_int_t n,
11860      ae_matrix *a,
11861      ae_int_t i1,
11862      ae_int_t j1,
11863      ae_bool isupper,
11864      ae_bool isunit,
11865      ae_int_t optype,
11866      ae_matrix *x,
11867      ae_int_t i2,
11868      ae_int_t j2)
11869 {
11870     /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11871     if( m==0 || n==0)
11872         return ae_false;
11873 
11874     /* handle with optimized ALGLIB kernel */
11875     return _ialglib_rmatrixlefttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride);
11876 }
11877 
_ialglib_i_cmatrixherkf(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper)11878 ae_bool _ialglib_i_cmatrixherkf(ae_int_t n,
11879      ae_int_t k,
11880      double alpha,
11881      ae_matrix *a,
11882      ae_int_t ia,
11883      ae_int_t ja,
11884      ae_int_t optypea,
11885      double beta,
11886      ae_matrix *c,
11887      ae_int_t ic,
11888      ae_int_t jc,
11889      ae_bool isupper)
11890 {
11891     /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11892     if( alpha==0.0 || k==0 || n==0)
11893         return ae_false;
11894 
11895     /* ALGLIB kernel */
11896     return _ialglib_cmatrixherk(n, k, alpha, &a->ptr.pp_complex[ia][ja], a->stride, optypea, beta, &c->ptr.pp_complex[ic][jc], c->stride, isupper);
11897 }
11898 
_ialglib_i_rmatrixsyrkf(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper)11899 ae_bool _ialglib_i_rmatrixsyrkf(ae_int_t n,
11900      ae_int_t k,
11901      double alpha,
11902      ae_matrix *a,
11903      ae_int_t ia,
11904      ae_int_t ja,
11905      ae_int_t optypea,
11906      double beta,
11907      ae_matrix *c,
11908      ae_int_t ic,
11909      ae_int_t jc,
11910      ae_bool isupper)
11911 {
11912     /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11913     if( alpha==0.0 || k==0 || n==0)
11914         return ae_false;
11915 
11916     /* ALGLIB kernel */
11917     return _ialglib_rmatrixsyrk(n, k, alpha, &a->ptr.pp_double[ia][ja], a->stride, optypea, beta, &c->ptr.pp_double[ic][jc], c->stride, isupper);
11918 }
11919 
_ialglib_i_cmatrixrank1f(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t uoffs,ae_vector * v,ae_int_t voffs)11920 ae_bool _ialglib_i_cmatrixrank1f(ae_int_t m,
11921      ae_int_t n,
11922      ae_matrix *a,
11923      ae_int_t ia,
11924      ae_int_t ja,
11925      ae_vector *u,
11926      ae_int_t uoffs,
11927      ae_vector *v,
11928      ae_int_t voffs)
11929 {
11930     return _ialglib_cmatrixrank1(m, n, &a->ptr.pp_complex[ia][ja], a->stride, &u->ptr.p_complex[uoffs], &v->ptr.p_complex[voffs]);
11931 }
11932 
_ialglib_i_rmatrixrank1f(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t uoffs,ae_vector * v,ae_int_t voffs)11933 ae_bool _ialglib_i_rmatrixrank1f(ae_int_t m,
11934      ae_int_t n,
11935      ae_matrix *a,
11936      ae_int_t ia,
11937      ae_int_t ja,
11938      ae_vector *u,
11939      ae_int_t uoffs,
11940      ae_vector *v,
11941      ae_int_t voffs)
11942 {
11943     return _ialglib_rmatrixrank1(m, n, &a->ptr.pp_double[ia][ja], a->stride, &u->ptr.p_double[uoffs], &v->ptr.p_double[voffs]);
11944 }
11945 
_ialglib_i_rmatrixgerf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,double alpha,ae_vector * u,ae_int_t uoffs,ae_vector * v,ae_int_t voffs)11946 ae_bool _ialglib_i_rmatrixgerf(ae_int_t m,
11947      ae_int_t n,
11948      ae_matrix *a,
11949      ae_int_t ia,
11950      ae_int_t ja,
11951      double alpha,
11952      ae_vector *u,
11953      ae_int_t uoffs,
11954      ae_vector *v,
11955      ae_int_t voffs)
11956 {
11957     return _ialglib_rmatrixger(m, n, &a->ptr.pp_double[ia][ja], a->stride, alpha, &u->ptr.p_double[uoffs], &v->ptr.p_double[voffs]);
11958 }
11959 
11960 
11961 
11962 
11963 /********************************************************************
11964 This function reads rectangular matrix A given by two column pointers
11965 col0 and col1 and stride src_stride and moves it into contiguous row-
11966 by-row storage given by dst.
11967 
11968 It can handle following special cases:
11969 * col1==NULL    in this case second column of A is filled by zeros
11970 ********************************************************************/
_ialglib_pack_n2(double * col0,double * col1,ae_int_t n,ae_int_t src_stride,double * dst)11971 void _ialglib_pack_n2(
11972     double *col0,
11973     double *col1,
11974     ae_int_t n,
11975     ae_int_t src_stride,
11976     double *dst)
11977 {
11978     ae_int_t n2, j, stride2;
11979 
11980     /*
11981      * handle special case
11982      */
11983     if( col1==NULL )
11984     {
11985         for(j=0; j<n; j++)
11986         {
11987             dst[0] = *col0;
11988             dst[1] = 0.0;
11989             col0 += src_stride;
11990             dst  += 2;
11991         }
11992         return;
11993     }
11994 
11995     /*
11996      * handle general case
11997      */
11998     n2 = n/2;
11999     stride2 = src_stride*2;
12000     for(j=0; j<n2; j++)
12001     {
12002         dst[0] = *col0;
12003         dst[1] = *col1;
12004         dst[2] = col0[src_stride];
12005         dst[3] = col1[src_stride];
12006         col0 += stride2;
12007         col1 += stride2;
12008         dst  += 4;
12009     }
12010     if( n%2 )
12011     {
12012         dst[0] = *col0;
12013         dst[1] = *col1;
12014     }
12015 }
12016 
12017 /*************************************************************************
12018 This function reads rectangular matrix A given by two column pointers col0
12019 and  col1  and  stride src_stride and moves it into  contiguous row-by-row
12020 storage given by dst.
12021 
12022 dst must be aligned, col0 and col1 may be non-aligned.
12023 
12024 It can handle following special cases:
12025 * col1==NULL        in this case second column of A is filled by zeros
12026 * src_stride==1     efficient SSE-based code is used
12027 * col1-col0==1      efficient SSE-based code is used
12028 
12029 This function supports SSE2; it can be used when:
12030 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
12031 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
12032 
12033 If  you  want  to  know  whether  it  is safe to call it, you should check
12034 results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
12035 and will do its work.
12036 *************************************************************************/
12037 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_pack_n2_sse2(double * col0,double * col1,ae_int_t n,ae_int_t src_stride,double * dst)12038 void _ialglib_pack_n2_sse2(
12039     double *col0,
12040     double *col1,
12041     ae_int_t n,
12042     ae_int_t src_stride,
12043     double *dst)
12044 {
12045     ae_int_t n2, j, stride2;
12046 
12047     /*
12048      * handle special case: col1==NULL
12049      */
12050     if( col1==NULL )
12051     {
12052         for(j=0; j<n; j++)
12053         {
12054             dst[0] = *col0;
12055             dst[1] = 0.0;
12056             col0 += src_stride;
12057             dst  += 2;
12058         }
12059         return;
12060     }
12061 
12062     /*
12063      * handle unit stride
12064      */
12065     if( src_stride==1 )
12066     {
12067         __m128d v0, v1;
12068         n2 = n/2;
12069         for(j=0; j<n2; j++)
12070         {
12071             v0 = _mm_loadu_pd(col0);
12072             col0 += 2;
12073             v1 = _mm_loadu_pd(col1);
12074             col1 += 2;
12075             _mm_store_pd(dst,  _mm_unpacklo_pd(v0,v1));
12076             _mm_store_pd(dst+2,_mm_unpackhi_pd(v0,v1));
12077             dst  += 4;
12078         }
12079         if( n%2 )
12080         {
12081             dst[0] = *col0;
12082             dst[1] = *col1;
12083         }
12084         return;
12085     }
12086 
12087     /*
12088      * handle col1-col0==1
12089      */
12090     if( col1-col0==1 )
12091     {
12092         __m128d v0, v1;
12093         n2 = n/2;
12094         stride2 = 2*src_stride;
12095         for(j=0; j<n2; j++)
12096         {
12097             v0 = _mm_loadu_pd(col0);
12098             v1 = _mm_loadu_pd(col0+src_stride);
12099             _mm_store_pd(dst,  v0);
12100             _mm_store_pd(dst+2,v1);
12101             col0 += stride2;
12102             dst  += 4;
12103         }
12104         if( n%2 )
12105         {
12106             dst[0] = col0[0];
12107             dst[1] = col0[1];
12108         }
12109         return;
12110     }
12111 
12112     /*
12113      * handle general case
12114      */
12115     n2 = n/2;
12116     stride2 = src_stride*2;
12117     for(j=0; j<n2; j++)
12118     {
12119         dst[0] = *col0;
12120         dst[1] = *col1;
12121         dst[2] = col0[src_stride];
12122         dst[3] = col1[src_stride];
12123         col0 += stride2;
12124         col1 += stride2;
12125         dst  += 4;
12126     }
12127     if( n%2 )
12128     {
12129         dst[0] = *col0;
12130         dst[1] = *col1;
12131     }
12132 }
12133 #endif
12134 
12135 
12136 /********************************************************************
12137 This function calculates R := alpha*A'*B+beta*R where A and B are Kx2
12138 matrices stored in contiguous row-by-row storage,  R  is  2x2  matrix
12139 stored in non-contiguous row-by-row storage.
12140 
12141 A and B must be aligned; R may be non-aligned.
12142 
12143 If beta is zero, contents of R is ignored (not  multiplied  by zero -
12144 just ignored).
12145 
12146 However, when alpha is zero, we still calculate A'*B, which is
12147 multiplied by zero afterwards.
12148 
12149 Function accepts additional parameter store_mode:
12150 * if 0, full R is stored
12151 * if 1, only first row of R is stored
12152 * if 2, only first column of R is stored
12153 * if 3, only top left element of R is stored
12154 ********************************************************************/
_ialglib_mm22(double alpha,const double * a,const double * b,ae_int_t k,double beta,double * r,ae_int_t stride,ae_int_t store_mode)12155 void _ialglib_mm22(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode)
12156 {
12157     double v00, v01, v10, v11;
12158     ae_int_t t;
12159     v00 = 0.0;
12160     v01 = 0.0;
12161     v10 = 0.0;
12162     v11 = 0.0;
12163     for(t=0; t<k; t++)
12164     {
12165         v00 += a[0]*b[0];
12166         v01 += a[0]*b[1];
12167         v10 += a[1]*b[0];
12168         v11 += a[1]*b[1];
12169         a+=2;
12170         b+=2;
12171     }
12172     if( store_mode==0 )
12173     {
12174         if( beta==0 )
12175         {
12176             r[0] = alpha*v00;
12177             r[1] = alpha*v01;
12178             r[stride+0] = alpha*v10;
12179             r[stride+1] = alpha*v11;
12180         }
12181         else
12182         {
12183             r[0] = beta*r[0] + alpha*v00;
12184             r[1] = beta*r[1] + alpha*v01;
12185             r[stride+0] = beta*r[stride+0] + alpha*v10;
12186             r[stride+1] = beta*r[stride+1] + alpha*v11;
12187         }
12188         return;
12189     }
12190     if( store_mode==1 )
12191     {
12192         if( beta==0 )
12193         {
12194             r[0] = alpha*v00;
12195             r[1] = alpha*v01;
12196         }
12197         else
12198         {
12199             r[0] = beta*r[0] + alpha*v00;
12200             r[1] = beta*r[1] + alpha*v01;
12201         }
12202         return;
12203     }
12204     if( store_mode==2 )
12205     {
12206         if( beta==0 )
12207         {
12208             r[0] =alpha*v00;
12209             r[stride+0] = alpha*v10;
12210         }
12211         else
12212         {
12213             r[0] = beta*r[0] + alpha*v00;
12214             r[stride+0] = beta*r[stride+0] + alpha*v10;
12215         }
12216         return;
12217     }
12218     if( store_mode==3 )
12219     {
12220         if( beta==0 )
12221         {
12222             r[0] = alpha*v00;
12223         }
12224         else
12225         {
12226             r[0] = beta*r[0] + alpha*v00;
12227         }
12228         return;
12229     }
12230 }
12231 
12232 
12233 /********************************************************************
12234 This function calculates R := alpha*A'*B+beta*R where A and B are Kx2
12235 matrices stored in contiguous row-by-row storage,  R  is  2x2  matrix
12236 stored in non-contiguous row-by-row storage.
12237 
12238 A and B must be aligned; R may be non-aligned.
12239 
12240 If beta is zero, contents of R is ignored (not  multiplied  by zero -
12241 just ignored).
12242 
12243 However, when alpha is zero, we still calculate A'*B, which is
12244 multiplied by zero afterwards.
12245 
12246 Function accepts additional parameter store_mode:
12247 * if 0, full R is stored
12248 * if 1, only first row of R is stored
12249 * if 2, only first column of R is stored
12250 * if 3, only top left element of R is stored
12251 
12252 This function supports SSE2; it can be used when:
12253 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
12254 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
12255 
12256 If (1) is failed, this function will still be defined and callable, but it
12257 will do nothing.  If (2)  is  failed , call to this function will probably
12258 crash your system.
12259 
12260 If  you  want  to  know  whether  it  is safe to call it, you should check
12261 results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
12262 and will do its work.
12263 ********************************************************************/
12264 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_mm22_sse2(double alpha,const double * a,const double * b,ae_int_t k,double beta,double * r,ae_int_t stride,ae_int_t store_mode)12265 void _ialglib_mm22_sse2(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode)
12266 {
12267     /*
12268      * We calculate product of two Kx2 matrices (result is 2x2).
12269      * VA and VB store result as follows:
12270      *
12271      *        [ VD[0]  VE[0] ]
12272      * A'*B = [              ]
12273      *        [ VE[1]  VD[1] ]
12274      *
12275      */
12276     __m128d va, vb, vd, ve, vt, r0, r1, valpha, vbeta;
12277     ae_int_t t, k2;
12278 
12279     /*
12280      * calculate product
12281      */
12282     k2 = k/2;
12283     vd = _mm_setzero_pd();
12284     ve = _mm_setzero_pd();
12285     for(t=0; t<k2; t++)
12286     {
12287         vb = _mm_load_pd(b);
12288         va = _mm_load_pd(a);
12289         vt = vb;
12290         vb = _mm_mul_pd(va,vb);
12291         vt = _mm_shuffle_pd(vt, vt, 1);
12292         vd = _mm_add_pd(vb,vd);
12293         vt = _mm_mul_pd(va,vt);
12294         vb = _mm_load_pd(b+2);
12295         ve = _mm_add_pd(vt,ve);
12296         va = _mm_load_pd(a+2);
12297         vt = vb;
12298         vb = _mm_mul_pd(va,vb);
12299         vt = _mm_shuffle_pd(vt, vt, 1);
12300         vd = _mm_add_pd(vb,vd);
12301         vt = _mm_mul_pd(va,vt);
12302         ve = _mm_add_pd(vt,ve);
12303         a+=4;
12304         b+=4;
12305     }
12306     if( k%2 )
12307     {
12308         va = _mm_load_pd(a);
12309         vb = _mm_load_pd(b);
12310         vt = _mm_shuffle_pd(vb, vb, 1);
12311         vd = _mm_add_pd(_mm_mul_pd(va,vb),vd);
12312         ve = _mm_add_pd(_mm_mul_pd(va,vt),ve);
12313     }
12314 
12315     /*
12316      * r0 is first row of alpha*A'*B, r1 is second row
12317      */
12318     valpha = _mm_load1_pd(&alpha);
12319     r0 = _mm_mul_pd(_mm_unpacklo_pd(vd,ve),valpha);
12320     r1 = _mm_mul_pd(_mm_unpackhi_pd(ve,vd),valpha);
12321 
12322     /*
12323      * store
12324      */
12325     if( store_mode==0 )
12326     {
12327         if( beta==0 )
12328         {
12329             _mm_storeu_pd(r,r0);
12330             _mm_storeu_pd(r+stride,r1);
12331         }
12332         else
12333         {
12334             vbeta = _mm_load1_pd(&beta);
12335             _mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r0));
12336             _mm_storeu_pd(r+stride,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r1));
12337         }
12338         return;
12339     }
12340     if( store_mode==1 )
12341     {
12342         if( beta==0 )
12343             _mm_storeu_pd(r,r0);
12344         else
12345             _mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),_mm_load1_pd(&beta)),r0));
12346         return;
12347     }
12348     if( store_mode==2 )
12349     {
12350         double buf[4];
12351         _mm_storeu_pd(buf,r0);
12352         _mm_storeu_pd(buf+2,r1);
12353         if( beta==0 )
12354         {
12355             r[0] =buf[0];
12356             r[stride+0] = buf[2];
12357         }
12358         else
12359         {
12360             r[0] = beta*r[0] + buf[0];
12361             r[stride+0] = beta*r[stride+0] + buf[2];
12362         }
12363         return;
12364     }
12365     if( store_mode==3 )
12366     {
12367         double buf[2];
12368         _mm_storeu_pd(buf,r0);
12369         if( beta==0 )
12370             r[0] = buf[0];
12371         else
12372             r[0] = beta*r[0] + buf[0];
12373         return;
12374     }
12375 }
12376 #endif
12377 
12378 
12379 /*************************************************************************
12380 This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0  and  B1
12381 are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4  matrix
12382 stored in non-contiguous row-by-row storage.
12383 
12384 A, B0 and B1 must be aligned; R may be non-aligned.
12385 
12386 Note  that  B0  and  B1  are  two  separate  matrices  stored in different
12387 locations.
12388 
12389 If beta is zero, contents of R is ignored (not  multiplied  by zero - just
12390 ignored).
12391 
12392 However,  when  alpha  is  zero , we still calculate MM product,  which is
12393 multiplied by zero afterwards.
12394 
12395 Unlike mm22 functions, this function does NOT support partial  output of R
12396 - we always store full 2x4 matrix.
12397 *************************************************************************/
_ialglib_mm22x2(double alpha,const double * a,const double * b0,const double * b1,ae_int_t k,double beta,double * r,ae_int_t stride)12398 void _ialglib_mm22x2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride)
12399 {
12400     _ialglib_mm22(alpha, a, b0, k, beta, r, stride, 0);
12401     _ialglib_mm22(alpha, a, b1, k, beta, r+2, stride, 0);
12402 }
12403 
12404 /*************************************************************************
12405 This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0  and  B1
12406 are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4  matrix
12407 stored in non-contiguous row-by-row storage.
12408 
12409 A, B0 and B1 must be aligned; R may be non-aligned.
12410 
12411 Note  that  B0  and  B1  are  two  separate  matrices  stored in different
12412 locations.
12413 
12414 If beta is zero, contents of R is ignored (not  multiplied  by zero - just
12415 ignored).
12416 
12417 However,  when  alpha  is  zero , we still calculate MM product,  which is
12418 multiplied by zero afterwards.
12419 
12420 Unlike mm22 functions, this function does NOT support partial  output of R
12421 - we always store full 2x4 matrix.
12422 
12423 This function supports SSE2; it can be used when:
12424 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
12425 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
12426 
12427 If (1) is failed, this function will still be defined and callable, but it
12428 will do nothing.  If (2)  is  failed , call to this function will probably
12429 crash your system.
12430 
12431 If  you  want  to  know  whether  it  is safe to call it, you should check
12432 results  of  ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
12433 and will do its work.
12434 *************************************************************************/
12435 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_mm22x2_sse2(double alpha,const double * a,const double * b0,const double * b1,ae_int_t k,double beta,double * r,ae_int_t stride)12436 void _ialglib_mm22x2_sse2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride)
12437 {
12438     /*
12439      * We calculate product of two Kx2 matrices (result is 2x2).
12440      * V0, V1, V2, V3 store result as follows:
12441      *
12442      *     [ V0[0]  V1[1] V2[0]  V3[1] ]
12443      * R = [                           ]
12444      *     [ V1[0]  V0[1] V3[0]  V2[1] ]
12445      *
12446      * VA0 stores current 1x2 block of A, VA1 stores shuffle of VA0,
12447      * VB0 and VB1 are used to store two copies of 1x2 block of B0 or B1
12448      * (both vars store same data - either B0 or B1). Results from multiplication
12449      * by VA0/VA1 are stored in VB0/VB1 too.
12450      *
12451      */
12452     __m128d v0, v1, v2, v3, va0, va1, vb0, vb1;
12453     __m128d r00, r01, r10, r11, valpha, vbeta;
12454     ae_int_t t;
12455 
12456     v0 = _mm_setzero_pd();
12457     v1 = _mm_setzero_pd();
12458     v2 = _mm_setzero_pd();
12459     v3 = _mm_setzero_pd();
12460     for(t=0; t<k; t++)
12461     {
12462         va0 = _mm_load_pd(a);
12463         vb0 = _mm_load_pd(b0);
12464         va1 = _mm_load_pd(a);
12465 
12466         vb0 = _mm_mul_pd(va0,vb0);
12467         vb1 = _mm_load_pd(b0);
12468         v0 = _mm_add_pd(v0,vb0);
12469         vb1 = _mm_mul_pd(va1,vb1);
12470         vb0 = _mm_load_pd(b1);
12471         v1 = _mm_add_pd(v1,vb1);
12472 
12473         vb0 = _mm_mul_pd(va0,vb0);
12474         vb1 = _mm_load_pd(b1);
12475         v2 = _mm_add_pd(v2,vb0);
12476         vb1 = _mm_mul_pd(va1,vb1);
12477         v3 = _mm_add_pd(v3,vb1);
12478 
12479         a+=2;
12480         b0+=2;
12481         b1+=2;
12482     }
12483 
12484     /*
12485      * shuffle V1 and V3 (conversion to more convenient storage format):
12486      *
12487      *     [ V0[0]  V1[0] V2[0]  V3[0] ]
12488      * R = [                           ]
12489      *     [ V1[1]  V0[1] V3[1]  V2[1] ]
12490      *
12491      * unpack results to
12492      *
12493      * [ r00 r01 ]
12494      * [ r10 r11 ]
12495      *
12496      */
12497     valpha = _mm_load1_pd(&alpha);
12498     v1 = _mm_shuffle_pd(v1, v1, 1);
12499     v3 = _mm_shuffle_pd(v3, v3, 1);
12500     r00 = _mm_mul_pd(_mm_unpacklo_pd(v0,v1),valpha);
12501     r10 = _mm_mul_pd(_mm_unpackhi_pd(v1,v0),valpha);
12502     r01 = _mm_mul_pd(_mm_unpacklo_pd(v2,v3),valpha);
12503     r11 = _mm_mul_pd(_mm_unpackhi_pd(v3,v2),valpha);
12504 
12505     /*
12506      * store
12507      */
12508     if( beta==0 )
12509     {
12510         _mm_storeu_pd(r,r00);
12511         _mm_storeu_pd(r+2,r01);
12512         _mm_storeu_pd(r+stride,r10);
12513         _mm_storeu_pd(r+stride+2,r11);
12514     }
12515     else
12516     {
12517         vbeta = _mm_load1_pd(&beta);
12518         _mm_storeu_pd(r,          _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r00));
12519         _mm_storeu_pd(r+2,        _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+2),vbeta),r01));
12520         _mm_storeu_pd(r+stride,   _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r10));
12521         _mm_storeu_pd(r+stride+2, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride+2),vbeta),r11));
12522     }
12523 }
12524 #endif
12525 
12526 #if !defined(ALGLIB_NO_FAST_KERNELS)
12527 
12528 /*************************************************************************
12529 Computes dot product (X,Y) for elements [0,N) of X[] and Y[]
12530 
12531 INPUT PARAMETERS:
12532     N       -   vector length
12533     X       -   array[N], vector to process
12534     Y       -   array[N], vector to process
12535 
12536 RESULT:
12537     (X,Y)
12538 
12539   -- ALGLIB --
12540      Copyright 20.01.2020 by Bochkanov Sergey
12541 *************************************************************************/
rdotv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)12542 double rdotv(ae_int_t n,
12543      /* Real    */ ae_vector* x,
12544      /* Real    */ ae_vector* y,
12545      ae_state *_state)
12546 {
12547     ae_int_t i;
12548     double result;
12549 
12550     /*
12551      * Try fast kernels.
12552      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12553      */
12554     if( n>=_ABLASF_KERNEL_SIZE1 )
12555         _ALGLIB_KERNEL_RETURN_SSE2_AVX2_FMA(rdotv,(n,x->ptr.p_double,y->ptr.p_double,_state)) /* use _ALGLIB_KERNEL_VOID_ for a kernel that does not return result */
12556 
12557     /*
12558      * Original generic C implementation
12559      */
12560     result = (double)(0);
12561     for(i=0; i<=n-1; i++)
12562     {
12563         result = result+x->ptr.p_double[i]*y->ptr.p_double[i];
12564     }
12565     return result;
12566 }
12567 
12568 
12569 
12570 /*************************************************************************
12571 Computes dot product (X,A[i]) for elements [0,N) of vector X[] and row A[i,*]
12572 
12573 INPUT PARAMETERS:
12574     N       -   vector length
12575     X       -   array[N], vector to process
12576     A       -   array[?,N], matrix to process
12577     I       -   row index
12578 
12579 RESULT:
12580     (X,Ai)
12581 
12582   -- ALGLIB --
12583      Copyright 20.01.2020 by Bochkanov Sergey
12584 *************************************************************************/
rdotvr(ae_int_t n,ae_vector * x,ae_matrix * a,ae_int_t i,ae_state * _state)12585 double rdotvr(ae_int_t n,
12586      /* Real    */ ae_vector* x,
12587      /* Real    */ ae_matrix* a,
12588      ae_int_t i,
12589      ae_state *_state)
12590 {
12591     ae_int_t j;
12592     double result;
12593 
12594     /*
12595      * Try fast kernels.
12596      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12597      */
12598     if( n>=_ABLASF_KERNEL_SIZE1 )
12599         _ALGLIB_KERNEL_RETURN_SSE2_AVX2_FMA(rdotv,(n,x->ptr.p_double,a->ptr.pp_double[i],_state))
12600 
12601     result = (double)(0);
12602     for(j=0; j<=n-1; j++)
12603     {
12604         result = result+x->ptr.p_double[j]*a->ptr.pp_double[i][j];
12605     }
12606     return result;
12607 }
12608 
12609 
12610 /*************************************************************************
12611 Computes dot product (X,A[i]) for rows A[ia,*] and B[ib,*]
12612 
12613 INPUT PARAMETERS:
12614     N       -   vector length
12615     X       -   array[N], vector to process
12616     A       -   array[?,N], matrix to process
12617     I       -   row index
12618 
12619 RESULT:
12620     (X,Ai)
12621 
12622   -- ALGLIB --
12623      Copyright 20.01.2020 by Bochkanov Sergey
12624 *************************************************************************/
rdotrr(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_matrix * b,ae_int_t ib,ae_state * _state)12625 double rdotrr(ae_int_t n,
12626      /* Real    */ ae_matrix* a,
12627      ae_int_t ia,
12628      /* Real    */ ae_matrix* b,
12629      ae_int_t ib,
12630      ae_state *_state)
12631 {
12632     ae_int_t j;
12633     double result;
12634 
12635     /*
12636      * Try fast kernels.
12637      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12638      */
12639     if( n>=_ABLASF_KERNEL_SIZE1 )
12640         _ALGLIB_KERNEL_RETURN_SSE2_AVX2_FMA(rdotv,(n,a->ptr.pp_double[ia],b->ptr.pp_double[ib],_state))
12641 
12642     result = (double)(0);
12643     for(j=0; j<=n-1; j++)
12644     {
12645         result = result+a->ptr.pp_double[ia][j]*b->ptr.pp_double[ib][j];
12646     }
12647     return result;
12648 }
12649 
12650 
12651 /*************************************************************************
12652 Computes dot product (X,X) for elements [0,N) of X[]
12653 
12654 INPUT PARAMETERS:
12655     N       -   vector length
12656     X       -   array[N], vector to process
12657 
12658 RESULT:
12659     (X,X)
12660 
12661   -- ALGLIB --
12662      Copyright 20.01.2020 by Bochkanov Sergey
12663 *************************************************************************/
rdotv2(ae_int_t n,ae_vector * x,ae_state * _state)12664 double rdotv2(ae_int_t n, /* Real    */ ae_vector* x, ae_state *_state)
12665 {
12666     ae_int_t i;
12667     double v;
12668     double result;
12669 
12670     /*
12671      * Try fast kernels.
12672      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12673      */
12674     if( n>=_ABLASF_KERNEL_SIZE1 )
12675         _ALGLIB_KERNEL_RETURN_SSE2_AVX2_FMA(rdotv2,(n,x->ptr.p_double,_state))
12676 
12677     result = (double)(0);
12678     for(i=0; i<=n-1; i++)
12679     {
12680         v = x->ptr.p_double[i];
12681         result = result+v*v;
12682     }
12683     return result;
12684 }
12685 
12686 
12687 /*************************************************************************
12688 Copies vector X[] to Y[]
12689 
12690 INPUT PARAMETERS:
12691     N       -   vector length
12692     X       -   array[N], source
12693     Y       -   preallocated array[N]
12694 
12695 OUTPUT PARAMETERS:
12696     Y       -   leading N elements are replaced by X
12697 
12698 
12699 NOTE: destination and source should NOT overlap
12700 
12701   -- ALGLIB --
12702      Copyright 20.01.2020 by Bochkanov Sergey
12703 *************************************************************************/
rcopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)12704 void rcopyv(ae_int_t n,
12705      /* Real    */ ae_vector* x,
12706      /* Real    */ ae_vector* y,
12707      ae_state *_state)
12708 {
12709     ae_int_t j;
12710 
12711     /*
12712      * Try fast kernels.
12713      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12714      */
12715     if( n>=_ABLASF_KERNEL_SIZE1 )
12716         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopyv,
12717             (n,x->ptr.p_double,y->ptr.p_double,_state))
12718 
12719 
12720     for(j=0; j<=n-1; j++)
12721     {
12722         y->ptr.p_double[j] = x->ptr.p_double[j];
12723     }
12724 }
12725 
12726 /*************************************************************************
12727 Copies vector X[] to row I of A[,]
12728 
12729 INPUT PARAMETERS:
12730     N       -   vector length
12731     X       -   array[N], source
12732     A       -   preallocated 2D array large enough to store result
12733     I       -   destination row index
12734 
12735 OUTPUT PARAMETERS:
12736     A       -   leading N elements of I-th row are replaced by X
12737 
12738   -- ALGLIB --
12739      Copyright 20.01.2020 by Bochkanov Sergey
12740 *************************************************************************/
rcopyvr(ae_int_t n,ae_vector * x,ae_matrix * a,ae_int_t i,ae_state * _state)12741 void rcopyvr(ae_int_t n,
12742      /* Real    */ ae_vector* x,
12743      /* Real    */ ae_matrix* a,
12744      ae_int_t i,
12745      ae_state *_state)
12746 {
12747     ae_int_t j;
12748 
12749     /*
12750      * Try fast kernels.
12751      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12752      */
12753     if( n>=_ABLASF_KERNEL_SIZE1 )
12754         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopyv,
12755             (n, x->ptr.p_double, a->ptr.pp_double[i], _state))
12756 
12757     for(j=0; j<=n-1; j++)
12758     {
12759         a->ptr.pp_double[i][j] = x->ptr.p_double[j];
12760     }
12761 }
12762 
12763 
12764 /*************************************************************************
12765 Copies row I of A[,] to vector X[]
12766 
12767 INPUT PARAMETERS:
12768     N       -   vector length
12769     A       -   2D array, source
12770     I       -   source row index
12771     X       -   preallocated destination
12772 
12773 OUTPUT PARAMETERS:
12774     X       -   array[N], destination
12775 
12776   -- ALGLIB --
12777      Copyright 20.01.2020 by Bochkanov Sergey
12778 *************************************************************************/
rcopyrv(ae_int_t n,ae_matrix * a,ae_int_t i,ae_vector * x,ae_state * _state)12779 void rcopyrv(ae_int_t n,
12780      /* Real    */ ae_matrix* a,
12781      ae_int_t i,
12782      /* Real    */ ae_vector* x,
12783      ae_state *_state)
12784 {
12785     ae_int_t j;
12786 
12787     /*
12788      * Try fast kernels.
12789      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12790      */
12791     if( n>=_ABLASF_KERNEL_SIZE1 )
12792         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopyv,
12793             (n, a->ptr.pp_double[i], x->ptr.p_double, _state))
12794 
12795     for(j=0; j<=n-1; j++)
12796     {
12797         x->ptr.p_double[j] = a->ptr.pp_double[i][j];
12798     }
12799 }
12800 
12801 
12802 /*************************************************************************
12803 Copies row I of A[,] to row K of B[,].
12804 
12805 A[i,...] and B[k,...] may overlap.
12806 
12807 INPUT PARAMETERS:
12808     N       -   vector length
12809     A       -   2D array, source
12810     I       -   source row index
12811     B       -   preallocated destination
12812     K       -   destination row index
12813 
12814 OUTPUT PARAMETERS:
12815     B       -   row K overwritten
12816 
12817   -- ALGLIB --
12818      Copyright 20.01.2020 by Bochkanov Sergey
12819 *************************************************************************/
rcopyrr(ae_int_t n,ae_matrix * a,ae_int_t i,ae_matrix * b,ae_int_t k,ae_state * _state)12820 void rcopyrr(ae_int_t n,
12821      /* Real    */ ae_matrix* a,
12822      ae_int_t i,
12823      /* Real    */ ae_matrix* b,
12824      ae_int_t k,
12825      ae_state *_state)
12826 {
12827     ae_int_t j;
12828 
12829     /*
12830      * Try fast kernels.
12831      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12832      */
12833     if( n>=_ABLASF_KERNEL_SIZE1 )
12834         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopyv,
12835             (n, a->ptr.pp_double[i], b->ptr.pp_double[k], _state))
12836 
12837     for(j=0; j<=n-1; j++)
12838     {
12839         b->ptr.pp_double[k][j] = a->ptr.pp_double[i][j];
12840     }
12841 }
12842 
12843 /*************************************************************************
12844 Performs copying with multiplication of V*X[] to Y[]
12845 
12846 INPUT PARAMETERS:
12847     N       -   vector length
12848     V       -   multiplier
12849     X       -   array[N], source
12850     Y       -   preallocated array[N]
12851 
12852 OUTPUT PARAMETERS:
12853     Y       -   array[N], Y = V*X
12854 
12855   -- ALGLIB --
12856      Copyright 20.01.2020 by Bochkanov Sergey
12857 *************************************************************************/
rcopymulv(ae_int_t n,double v,ae_vector * x,ae_vector * y,ae_state * _state)12858 void rcopymulv(ae_int_t n,
12859      double v,
12860      /* Real    */ ae_vector* x,
12861      /* Real    */ ae_vector* y,
12862      ae_state *_state)
12863 {
12864     ae_int_t i;
12865 
12866     /*
12867      * Try fast kernels.
12868      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12869      */
12870     if( n>=_ABLASF_KERNEL_SIZE1 )
12871         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopymulv,
12872             (n,v,x->ptr.p_double,y->ptr.p_double,_state))
12873 
12874     for(i=0; i<=n-1; i++)
12875     {
12876         y->ptr.p_double[i] = v*x->ptr.p_double[i];
12877     }
12878 }
12879 
12880 
12881 /*************************************************************************
12882 Performs copying with multiplication of V*X[] to Y[I,*]
12883 
12884 INPUT PARAMETERS:
12885     N       -   vector length
12886     V       -   multiplier
12887     X       -   array[N], source
12888     Y       -   preallocated array[?,N]
12889     RIdx    -   destination row index
12890 
12891 OUTPUT PARAMETERS:
12892     Y       -   Y[RIdx,...] = V*X
12893 
12894   -- ALGLIB --
12895      Copyright 20.01.2020 by Bochkanov Sergey
12896 *************************************************************************/
rcopymulvr(ae_int_t n,double v,ae_vector * x,ae_matrix * y,ae_int_t ridx,ae_state * _state)12897 void rcopymulvr(ae_int_t n,
12898      double v,
12899      /* Real    */ ae_vector* x,
12900      /* Real    */ ae_matrix* y,
12901      ae_int_t ridx,
12902      ae_state *_state)
12903 {
12904     ae_int_t i;
12905 
12906     /*
12907      * Try fast kernels.
12908      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12909      */
12910     if( n>=_ABLASF_KERNEL_SIZE1 )
12911         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopymulv,
12912             (n,v,x->ptr.p_double,y->ptr.pp_double[ridx],_state))
12913 
12914     for(i=0; i<=n-1; i++)
12915     {
12916         y->ptr.pp_double[ridx][i] = v*x->ptr.p_double[i];
12917     }
12918 }
12919 
12920 /*************************************************************************
12921 Copies vector X[] to Y[]
12922 
12923 INPUT PARAMETERS:
12924     N       -   vector length
12925     X       -   source array
12926     Y       -   preallocated array[N]
12927 
12928 OUTPUT PARAMETERS:
12929     Y       -   X copied to Y
12930 
12931   -- ALGLIB --
12932      Copyright 20.01.2020 by Bochkanov Sergey
12933 *************************************************************************/
icopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)12934 void icopyv(ae_int_t n,
12935      /* Integer */ ae_vector* x,
12936      /* Integer */ ae_vector* y,
12937      ae_state *_state)
12938 {
12939     ae_int_t j;
12940 
12941     /*
12942      * Try fast kernels.
12943      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12944      */
12945     if( n>=_ABLASF_KERNEL_SIZE1 )
12946         _ALGLIB_KERNEL_VOID_SSE2_AVX2(icopyv,
12947             (n, x->ptr.p_int, y->ptr.p_int, _state))
12948 
12949     for(j=0; j<=n-1; j++)
12950     {
12951         y->ptr.p_int[j] = x->ptr.p_int[j];
12952     }
12953 }
12954 
12955 /*************************************************************************
12956 Copies vector X[] to Y[]
12957 
12958 INPUT PARAMETERS:
12959     N       -   vector length
12960     X       -   array[N], source
12961     Y       -   preallocated array[N]
12962 
12963 OUTPUT PARAMETERS:
12964     Y       -   leading N elements are replaced by X
12965 
12966 
12967 NOTE: destination and source should NOT overlap
12968 
12969   -- ALGLIB --
12970      Copyright 20.01.2020 by Bochkanov Sergey
12971 *************************************************************************/
bcopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)12972 void bcopyv(ae_int_t n,
12973      /* Boolean */ ae_vector* x,
12974      /* Boolean */ ae_vector* y,
12975      ae_state *_state)
12976 {
12977     ae_int_t j;
12978 
12979     /*
12980      * Try fast kernels.
12981      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12982      */
12983     if( n>=_ABLASF_KERNEL_SIZE1*8 )
12984         _ALGLIB_KERNEL_VOID_SSE2_AVX2(bcopyv,
12985             (n, x->ptr.p_bool, y->ptr.p_bool, _state))
12986 
12987     for(j=0; j<=n-1; j++)
12988     {
12989         y->ptr.p_bool[j] = x->ptr.p_bool[j];
12990     }
12991 }
12992 
12993 
12994 /*************************************************************************
12995 Sets vector X[] to V
12996 
12997 INPUT PARAMETERS:
12998     N       -   vector length
12999     V       -   value to set
13000     X       -   array[N]
13001 
13002 OUTPUT PARAMETERS:
13003     X       -   leading N elements are replaced by V
13004 
13005   -- ALGLIB --
13006      Copyright 20.01.2020 by Bochkanov Sergey
13007 *************************************************************************/
rsetv(ae_int_t n,double v,ae_vector * x,ae_state * _state)13008 void rsetv(ae_int_t n,
13009      double v,
13010      /* Real    */ ae_vector* x,
13011      ae_state *_state)
13012 {
13013     ae_int_t j;
13014 
13015     /*
13016      * Try fast kernels.
13017      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13018      */
13019     if( n>=_ABLASF_KERNEL_SIZE1 )
13020         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rsetv,
13021             (n, v, x->ptr.p_double, _state))
13022 
13023     for(j=0; j<=n-1; j++)
13024     {
13025         x->ptr.p_double[j] = v;
13026     }
13027 }
13028 
13029 /*************************************************************************
13030 Sets row I of A[,] to V
13031 
13032 INPUT PARAMETERS:
13033     N       -   vector length
13034     V       -   value to set
13035     A       -   array[N,N] or larger
13036     I       -   row index
13037 
13038 OUTPUT PARAMETERS:
13039     A       -   leading N elements of I-th row are replaced by V
13040 
13041   -- ALGLIB --
13042      Copyright 20.01.2020 by Bochkanov Sergey
13043 *************************************************************************/
rsetr(ae_int_t n,double v,ae_matrix * a,ae_int_t i,ae_state * _state)13044 void rsetr(ae_int_t n,
13045      double v,
13046      /* Real    */ ae_matrix* a,
13047      ae_int_t i,
13048      ae_state *_state)
13049 {
13050     ae_int_t j;
13051 
13052     /*
13053      * Try fast kernels.
13054      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13055      */
13056     if( n>=_ABLASF_KERNEL_SIZE1 )
13057         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rsetv,
13058             (n, v, a->ptr.pp_double[i], _state))
13059 
13060     for(j=0; j<=n-1; j++)
13061     {
13062         a->ptr.pp_double[i][j] = v;
13063     }
13064 }
13065 
13066 
13067 /*************************************************************************
13068 Sets X[OffsX:OffsX+N-1] to V
13069 
13070 INPUT PARAMETERS:
13071     N       -   subvector length
13072     V       -   value to set
13073     X       -   array[N]
13074 
13075 OUTPUT PARAMETERS:
13076     X       -   X[OffsX:OffsX+N-1] is replaced by V
13077 
13078   -- ALGLIB --
13079      Copyright 20.01.2020 by Bochkanov Sergey
13080 *************************************************************************/
rsetvx(ae_int_t n,double v,ae_vector * x,ae_int_t offsx,ae_state * _state)13081 void rsetvx(ae_int_t n,
13082      double v,
13083      /* Real    */ ae_vector* x,
13084      ae_int_t offsx,
13085      ae_state *_state)
13086 {
13087     ae_int_t j;
13088 
13089     /*
13090      * Try fast kernels.
13091      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13092      */
13093     if( n>=_ABLASF_KERNEL_SIZE1 )
13094         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rsetvx,
13095             (n, v, x->ptr.p_double+offsx, _state))
13096 
13097     for(j=0; j<=n-1; j++)
13098     {
13099         x->ptr.p_double[offsx+j] = v;
13100     }
13101 }
13102 
13103 
13104 /*************************************************************************
13105 Sets matrix A[] to V
13106 
13107 INPUT PARAMETERS:
13108     M, N    -   rows/cols count
13109     V       -   value to set
13110     A       -   array[M,N]
13111 
13112 OUTPUT PARAMETERS:
13113     A       -   leading M rows, N cols are replaced by V
13114 
13115   -- ALGLIB --
13116      Copyright 20.01.2020 by Bochkanov Sergey
13117 *************************************************************************/
rsetm_simd(const ae_int_t n,const double v,double * pDest,ae_state * _state)13118 static void rsetm_simd(const ae_int_t n, const double v, double *pDest, ae_state *_state)
13119 {
13120     _ALGLIB_KERNEL_VOID_SSE2_AVX2(rsetv, (n, v, pDest, _state));
13121 
13122     ae_int_t j;
13123     for(j=0; j<=n-1; j++) {
13124         pDest[j] = v;
13125     }
13126 }
13127 
rsetm(ae_int_t m,ae_int_t n,double v,ae_matrix * a,ae_state * _state)13128 void rsetm(ae_int_t m,
13129      ae_int_t n,
13130      double v,
13131      /* Real    */ ae_matrix* a,
13132      ae_state *_state)
13133 {
13134     ae_int_t i;
13135     ae_int_t j;
13136 
13137     /*
13138      * Try fast kernels.
13139      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13140      */
13141     if( n >=_ABLASF_KERNEL_SIZE1 ) {
13142         for(i=0; i<m; i++) {
13143             rsetm_simd(n, v, a->ptr.pp_double[i], _state);
13144         }
13145         return;
13146     }
13147 
13148     for(i=0; i<=m-1; i++)
13149     {
13150         for(j=0; j<=n-1; j++)
13151         {
13152             a->ptr.pp_double[i][j] = v;
13153         }
13154     }
13155 }
13156 
13157 
13158 /*************************************************************************
13159 Sets vector X[] to V
13160 
13161 INPUT PARAMETERS:
13162     N       -   vector length
13163     V       -   value to set
13164     X       -   array[N]
13165 
13166 OUTPUT PARAMETERS:
13167     X       -   leading N elements are replaced by V
13168 
13169   -- ALGLIB --
13170      Copyright 20.01.2020 by Bochkanov Sergey
13171 *************************************************************************/
isetv(ae_int_t n,ae_int_t v,ae_vector * x,ae_state * _state)13172 void isetv(ae_int_t n,
13173      ae_int_t v,
13174      /* Integer */ ae_vector* x,
13175      ae_state *_state)
13176 {
13177     ae_int_t j;
13178 
13179     /*
13180      * Try fast kernels.
13181      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13182      */
13183     if( n>=_ABLASF_KERNEL_SIZE1 )
13184         _ALGLIB_KERNEL_VOID_SSE2_AVX2(isetv,
13185             (n, v, x->ptr.p_int, _state))
13186 
13187     for(j=0; j<=n-1; j++)
13188     {
13189         x->ptr.p_int[j] = v;
13190     }
13191 }
13192 
13193 /*************************************************************************
13194 Sets vector X[] to V
13195 
13196 INPUT PARAMETERS:
13197     N       -   vector length
13198     V       -   value to set
13199     X       -   array[N]
13200 
13201 OUTPUT PARAMETERS:
13202     X       -   leading N elements are replaced by V
13203 
13204   -- ALGLIB --
13205      Copyright 20.01.2020 by Bochkanov Sergey
13206 *************************************************************************/
bsetv(ae_int_t n,ae_bool v,ae_vector * x,ae_state * _state)13207 void bsetv(ae_int_t n,
13208      ae_bool v,
13209      /* Boolean */ ae_vector* x,
13210      ae_state *_state)
13211 {
13212     ae_int_t j;
13213 
13214     /*
13215      * Try fast kernels.
13216      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13217      */
13218     if( n>=_ABLASF_KERNEL_SIZE1*8 )
13219         _ALGLIB_KERNEL_VOID_SSE2_AVX2(bsetv,
13220             (n, v, x->ptr.p_bool, _state))
13221 
13222     for(j=0; j<=n-1; j++)
13223     {
13224         x->ptr.p_bool[j] = v;
13225     }
13226 }
13227 
13228 
13229 /*************************************************************************
13230 Performs inplace multiplication of X[] by V
13231 
13232 INPUT PARAMETERS:
13233     N       -   vector length
13234     X       -   array[N], vector to process
13235     V       -   multiplier
13236 
13237 OUTPUT PARAMETERS:
13238     X       -   elements 0...N-1 multiplied by V
13239 
13240   -- ALGLIB --
13241      Copyright 20.01.2020 by Bochkanov Sergey
13242 *************************************************************************/
rmulv(ae_int_t n,double v,ae_vector * x,ae_state * _state)13243 void rmulv(ae_int_t n,
13244      double v,
13245      /* Real    */ ae_vector* x,
13246      ae_state *_state)
13247 {
13248     ae_int_t i;
13249 
13250     /*
13251      * Try fast kernels.
13252      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13253      */
13254     if( n>=_ABLASF_KERNEL_SIZE1 )
13255         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmulv,
13256             (n,v,x->ptr.p_double,_state))
13257 
13258     for(i=0; i<=n-1; i++)
13259     {
13260         x->ptr.p_double[i] = x->ptr.p_double[i]*v;
13261     }
13262 }
13263 
13264 
13265 /*************************************************************************
13266 Performs inplace multiplication of X[] by V
13267 
13268 INPUT PARAMETERS:
13269     N       -   row length
13270     X       -   array[?,N], row to process
13271     V       -   multiplier
13272 
13273 OUTPUT PARAMETERS:
13274     X       -   elements 0...N-1 of row RowIdx are multiplied by V
13275 
13276   -- ALGLIB --
13277      Copyright 20.01.2020 by Bochkanov Sergey
13278 *************************************************************************/
rmulr(ae_int_t n,double v,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13279 void rmulr(ae_int_t n,
13280      double v,
13281      /* Real    */ ae_matrix* x,
13282      ae_int_t rowidx,
13283      ae_state *_state)
13284 {
13285     ae_int_t i;
13286 
13287     /*
13288      * Try fast kernels.
13289      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13290      */
13291     if( n>=_ABLASF_KERNEL_SIZE1 )
13292         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmulv,
13293             (n, v, x->ptr.pp_double[rowidx], _state))
13294 
13295     for(i=0; i<=n-1; i++)
13296     {
13297         x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]*v;
13298     }
13299 }
13300 
13301 
13302 /*************************************************************************
13303 Performs inplace multiplication of X[OffsX:OffsX+N-1] by V
13304 
13305 INPUT PARAMETERS:
13306     N       -   subvector length
13307     X       -   vector to process
13308     V       -   multiplier
13309 
13310 OUTPUT PARAMETERS:
13311     X       -   elements OffsX:OffsX+N-1 multiplied by V
13312 
13313   -- ALGLIB --
13314      Copyright 20.01.2020 by Bochkanov Sergey
13315 *************************************************************************/
rmulvx(ae_int_t n,double v,ae_vector * x,ae_int_t offsx,ae_state * _state)13316 void rmulvx(ae_int_t n,
13317      double v,
13318      /* Real    */ ae_vector* x,
13319      ae_int_t offsx,
13320      ae_state *_state)
13321 {
13322     ae_int_t i;
13323 
13324     /*
13325      * Try fast kernels.
13326      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13327      */
13328     if( n>=_ABLASF_KERNEL_SIZE1 )
13329         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmulvx,
13330             (n, v, x->ptr.p_double+offsx, _state))
13331 
13332     for(i=0; i<=n-1; i++)
13333     {
13334         x->ptr.p_double[offsx+i] = x->ptr.p_double[offsx+i]*v;
13335     }
13336 }
13337 
13338 
13339 /*************************************************************************
13340 Performs inplace addition of Y[] to X[]
13341 
13342 INPUT PARAMETERS:
13343     N       -   vector length
13344     Alpha   -   multiplier
13345     Y       -   array[N], vector to process
13346     X       -   array[N], vector to process
13347 
13348 RESULT:
13349     X := X + alpha*Y
13350 
13351   -- ALGLIB --
13352      Copyright 20.01.2020 by Bochkanov Sergey
13353 *************************************************************************/
raddv(ae_int_t n,double alpha,ae_vector * y,ae_vector * x,ae_state * _state)13354 void raddv(ae_int_t n,
13355      double alpha,
13356      /* Real    */ ae_vector* y,
13357      /* Real    */ ae_vector* x,
13358      ae_state *_state)
13359 {
13360     ae_int_t i;
13361 
13362     /*
13363      * Try fast kernels.
13364      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13365      */
13366     if( n>=_ABLASF_KERNEL_SIZE1 )
13367         _ALGLIB_KERNEL_VOID_SSE2_AVX2_FMA(raddv,
13368             (n,alpha,y->ptr.p_double,x->ptr.p_double,_state))
13369 
13370 
13371     for(i=0; i<=n-1; i++)
13372     {
13373         x->ptr.p_double[i] = x->ptr.p_double[i]+alpha*y->ptr.p_double[i];
13374     }
13375 }
13376 
13377 
13378 /*************************************************************************
13379 Performs inplace addition of vector Y[] to row X[]
13380 
13381 INPUT PARAMETERS:
13382     N       -   vector length
13383     Alpha   -   multiplier
13384     Y       -   vector to add
13385     X       -   target row RowIdx
13386 
13387 RESULT:
13388     X := X + alpha*Y
13389 
13390   -- ALGLIB --
13391      Copyright 20.01.2020 by Bochkanov Sergey
13392 *************************************************************************/
raddvr(ae_int_t n,double alpha,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13393 void raddvr(ae_int_t n,
13394      double alpha,
13395      /* Real    */ ae_vector* y,
13396      /* Real    */ ae_matrix* x,
13397      ae_int_t rowidx,
13398      ae_state *_state)
13399 {
13400     ae_int_t i;
13401 
13402     /*
13403      * Try fast kernels.
13404      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13405      */
13406     if( n>=_ABLASF_KERNEL_SIZE1 )
13407         _ALGLIB_KERNEL_VOID_SSE2_AVX2_FMA(raddv,
13408             (n,alpha,y->ptr.p_double,x->ptr.pp_double[rowidx],_state))
13409 
13410 
13411     for(i=0; i<=n-1; i++)
13412     {
13413         x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]+alpha*y->ptr.p_double[i];
13414     }
13415 }
13416 
13417 
13418 /*************************************************************************
13419 Performs inplace addition of Y[RIdx,...] to X[]
13420 
13421 INPUT PARAMETERS:
13422     N       -   vector length
13423     Alpha   -   multiplier
13424     Y       -   array[?,N], matrix whose RIdx-th row is added
13425     RIdx    -   row index
13426     X       -   array[N], vector to process
13427 
13428 RESULT:
13429     X := X + alpha*Y
13430 
13431   -- ALGLIB --
13432      Copyright 20.01.2020 by Bochkanov Sergey
13433 *************************************************************************/
raddrv(ae_int_t n,double alpha,ae_matrix * y,ae_int_t ridx,ae_vector * x,ae_state * _state)13434 void raddrv(ae_int_t n,
13435      double alpha,
13436      /* Real    */ ae_matrix* y,
13437      ae_int_t ridx,
13438      /* Real    */ ae_vector* x,
13439      ae_state *_state)
13440 {
13441     ae_int_t i;
13442 
13443     /*
13444      * Try fast kernels.
13445      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13446      */
13447     if( n>=_ABLASF_KERNEL_SIZE1 )
13448         _ALGLIB_KERNEL_VOID_SSE2_AVX2_FMA(raddv,
13449             (n,alpha,y->ptr.pp_double[ridx],x->ptr.p_double,_state))
13450 
13451     for(i=0; i<=n-1; i++)
13452     {
13453         x->ptr.p_double[i] = x->ptr.p_double[i]+alpha*y->ptr.pp_double[ridx][i];
13454     }
13455 }
13456 
13457 
13458 /*************************************************************************
13459 Performs inplace addition of Y[RIdx,...] to X[RIdxDst]
13460 
13461 INPUT PARAMETERS:
13462     N       -   vector length
13463     Alpha   -   multiplier
13464     Y       -   array[?,N], matrix whose RIdxSrc-th row is added
13465     RIdxSrc -   source row index
13466     X       -   array[?,N], matrix whose RIdxDst-th row is target
13467     RIdxDst -   destination row index
13468 
13469 RESULT:
13470     X := X + alpha*Y
13471 
13472   -- ALGLIB --
13473      Copyright 20.01.2020 by Bochkanov Sergey
13474 *************************************************************************/
raddrr(ae_int_t n,double alpha,ae_matrix * y,ae_int_t ridxsrc,ae_matrix * x,ae_int_t ridxdst,ae_state * _state)13475 void raddrr(ae_int_t n,
13476      double alpha,
13477      /* Real    */ ae_matrix* y,
13478      ae_int_t ridxsrc,
13479      /* Real    */ ae_matrix* x,
13480      ae_int_t ridxdst,
13481      ae_state *_state)
13482 {
13483     ae_int_t i;
13484 
13485     /*
13486      * Try fast kernels.
13487      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13488      */
13489     if( n>=_ABLASF_KERNEL_SIZE1 )
13490         _ALGLIB_KERNEL_VOID_SSE2_AVX2_FMA(raddv,
13491             (n,alpha,y->ptr.pp_double[ridxsrc],x->ptr.pp_double[ridxdst],_state))
13492 
13493     for(i=0; i<=n-1; i++)
13494     {
13495         x->ptr.pp_double[ridxdst][i] = x->ptr.pp_double[ridxdst][i]+alpha*y->ptr.pp_double[ridxsrc][i];
13496     }
13497 }
13498 
13499 
13500 /*************************************************************************
13501 Performs inplace addition of Y[] to X[]
13502 
13503 INPUT PARAMETERS:
13504     N       -   vector length
13505     Alpha   -   multiplier
13506     Y       -   source vector
13507     OffsY   -   source offset
13508     X       -   destination vector
13509     OffsX   -   destination offset
13510 
13511 RESULT:
13512     X := X + alpha*Y
13513 
13514   -- ALGLIB --
13515      Copyright 20.01.2020 by Bochkanov Sergey
13516 *************************************************************************/
raddvx(ae_int_t n,double alpha,ae_vector * y,ae_int_t offsy,ae_vector * x,ae_int_t offsx,ae_state * _state)13517 void raddvx(ae_int_t n,
13518      double alpha,
13519      /* Real    */ ae_vector* y,
13520      ae_int_t offsy,
13521      /* Real    */ ae_vector* x,
13522      ae_int_t offsx,
13523      ae_state *_state)
13524 {
13525     ae_int_t i;
13526 
13527     /*
13528      * Try fast kernels.
13529      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13530      */
13531     if( n>=_ABLASF_KERNEL_SIZE1 )
13532         _ALGLIB_KERNEL_VOID_SSE2_AVX2_FMA(raddvx,
13533             (n, alpha, y->ptr.p_double+offsy, x->ptr.p_double+offsx, _state))
13534 
13535     for(i=0; i<=n-1; i++)
13536     {
13537         x->ptr.p_double[offsx+i] = x->ptr.p_double[offsx+i]+alpha*y->ptr.p_double[offsy+i];
13538     }
13539 }
13540 
13541 
13542 /*************************************************************************
13543 Performs componentwise multiplication of vector X[] by vector Y[]
13544 
13545 INPUT PARAMETERS:
13546     N       -   vector length
13547     Y       -   vector to multiply by
13548     X       -   target vector
13549 
13550 RESULT:
13551     X := componentwise(X*Y)
13552 
13553   -- ALGLIB --
13554      Copyright 20.01.2020 by Bochkanov Sergey
13555 *************************************************************************/
rmergemulv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)13556 void rmergemulv(ae_int_t n,
13557      /* Real    */ ae_vector* y,
13558      /* Real    */ ae_vector* x,
13559      ae_state *_state)
13560 {
13561     ae_int_t i;
13562 
13563     /*
13564      * Try fast kernels.
13565      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13566      */
13567     if( n>=_ABLASF_KERNEL_SIZE1 )
13568         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemulv,
13569             (n,y->ptr.p_double,x->ptr.p_double,_state))
13570 
13571 
13572     for(i=0; i<=n-1; i++)
13573     {
13574         x->ptr.p_double[i] = x->ptr.p_double[i]*y->ptr.p_double[i];
13575     }
13576 }
13577 
13578 
13579 /*************************************************************************
13580 Performs componentwise multiplication of row X[] by vector Y[]
13581 
13582 INPUT PARAMETERS:
13583     N       -   vector length
13584     Y       -   vector to multiply by
13585     X       -   target row RowIdx
13586 
13587 RESULT:
13588     X := componentwise(X*Y)
13589 
13590   -- ALGLIB --
13591      Copyright 20.01.2020 by Bochkanov Sergey
13592 *************************************************************************/
rmergemulvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13593 void rmergemulvr(ae_int_t n,
13594      /* Real    */ ae_vector* y,
13595      /* Real    */ ae_matrix* x,
13596      ae_int_t rowidx,
13597      ae_state *_state)
13598 {
13599     ae_int_t i;
13600 
13601     /*
13602      * Try fast kernels.
13603      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13604      */
13605     if( n>=_ABLASF_KERNEL_SIZE1 )
13606         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemulv,
13607             (n,y->ptr.p_double,x->ptr.pp_double[rowidx],_state))
13608 
13609 
13610     for(i=0; i<=n-1; i++)
13611     {
13612         x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]*y->ptr.p_double[i];
13613     }
13614 }
13615 
13616 
13617 /*************************************************************************
13618 Performs componentwise multiplication of row X[] by vector Y[]
13619 
13620 INPUT PARAMETERS:
13621     N       -   vector length
13622     Y       -   vector to multiply by
13623     X       -   target row RowIdx
13624 
13625 RESULT:
13626     X := componentwise(X*Y)
13627 
13628   -- ALGLIB --
13629      Copyright 20.01.2020 by Bochkanov Sergey
13630 *************************************************************************/
rmergemulrv(ae_int_t n,ae_matrix * y,ae_int_t rowidx,ae_vector * x,ae_state * _state)13631 void rmergemulrv(ae_int_t n,
13632      /* Real    */ ae_matrix* y,
13633      ae_int_t rowidx,
13634      /* Real    */ ae_vector* x,
13635      ae_state *_state)
13636 {
13637     ae_int_t i;
13638 
13639     /*
13640      * Try fast kernels.
13641      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13642      */
13643     if( n>=_ABLASF_KERNEL_SIZE1 )
13644         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemulv,
13645             (n,y->ptr.pp_double[rowidx],x->ptr.p_double,_state))
13646 
13647     for(i=0; i<=n-1; i++)
13648     {
13649         x->ptr.p_double[i] = x->ptr.p_double[i]*y->ptr.pp_double[rowidx][i];
13650     }
13651 }
13652 
13653 /*************************************************************************
13654 Performs componentwise max of vector X[] and vector Y[]
13655 
13656 INPUT PARAMETERS:
13657     N       -   vector length
13658     Y       -   vector to multiply by
13659     X       -   target vector
13660 
13661 RESULT:
13662     X := componentwise_max(X,Y)
13663 
13664   -- ALGLIB --
13665      Copyright 20.01.2020 by Bochkanov Sergey
13666 *************************************************************************/
rmergemaxv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)13667 void rmergemaxv(ae_int_t n,
13668      /* Real    */ ae_vector* y,
13669      /* Real    */ ae_vector* x,
13670      ae_state *_state)
13671 {
13672     ae_int_t i;
13673 
13674     /*
13675      * Try fast kernels.
13676      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13677      */
13678     if( n>=_ABLASF_KERNEL_SIZE1 )
13679         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemaxv,
13680             (n,y->ptr.p_double,x->ptr.p_double,_state))
13681 
13682     for(i=0; i<=n-1; i++)
13683     {
13684         x->ptr.p_double[i] = ae_maxreal(x->ptr.p_double[i], y->ptr.p_double[i], _state);
13685     }
13686 }
13687 
13688 
13689 /*************************************************************************
13690 Performs componentwise max of row X[] and vector Y[]
13691 
13692 INPUT PARAMETERS:
13693     N       -   vector length
13694     Y       -   vector to multiply by
13695     X       -   target row RowIdx
13696 
13697 RESULT:
13698     X := componentwise_max(X,Y)
13699 
13700   -- ALGLIB --
13701      Copyright 20.01.2020 by Bochkanov Sergey
13702 *************************************************************************/
rmergemaxvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13703 void rmergemaxvr(ae_int_t n,
13704      /* Real    */ ae_vector* y,
13705      /* Real    */ ae_matrix* x,
13706      ae_int_t rowidx,
13707      ae_state *_state)
13708 {
13709     ae_int_t i;
13710 
13711     /*
13712      * Try fast kernels.
13713      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13714      */
13715     if( n>=_ABLASF_KERNEL_SIZE1 )
13716         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemaxv,
13717             (n,y->ptr.p_double,x->ptr.pp_double[rowidx],_state))
13718 
13719     for(i=0; i<=n-1; i++)
13720     {
13721         x->ptr.pp_double[rowidx][i] = ae_maxreal(x->ptr.pp_double[rowidx][i], y->ptr.p_double[i], _state);
13722     }
13723 }
13724 
13725 
13726 /*************************************************************************
13727 Performs componentwise max of row X[I] and vector Y[]
13728 
13729 INPUT PARAMETERS:
13730     N       -   vector length
13731     X       -   matrix, I-th row is source
13732     rowidx  -   target row RowIdx
13733 
13734 RESULT:
13735     Y := componentwise_max(X,Y)
13736 
13737   -- ALGLIB --
13738      Copyright 20.01.2020 by Bochkanov Sergey
13739 *************************************************************************/
rmergemaxrv(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_vector * y,ae_state * _state)13740 void rmergemaxrv(ae_int_t n,
13741      /* Real    */ ae_matrix* x,
13742      ae_int_t rowidx,
13743      /* Real    */ ae_vector* y,
13744      ae_state *_state)
13745 {
13746     ae_int_t i;
13747 
13748     /*
13749      * Try fast kernels.
13750      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13751      */
13752     if( n>=_ABLASF_KERNEL_SIZE1 )
13753         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemaxv,
13754             (n,x->ptr.pp_double[rowidx],y->ptr.p_double,_state))
13755 
13756     for(i=0; i<=n-1; i++)
13757     {
13758         y->ptr.p_double[i] = ae_maxreal(y->ptr.p_double[i], x->ptr.pp_double[rowidx][i], _state);
13759     }
13760 }
13761 
13762 /*************************************************************************
13763 Performs componentwise min of vector X[] and vector Y[]
13764 
13765 INPUT PARAMETERS:
13766     N       -   vector length
13767     Y       -   source vector
13768     X       -   target vector
13769 
13770 RESULT:
13771     X := componentwise_max(X,Y)
13772 
13773   -- ALGLIB --
13774      Copyright 20.01.2020 by Bochkanov Sergey
13775 *************************************************************************/
rmergeminv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)13776 void rmergeminv(ae_int_t n,
13777      /* Real    */ ae_vector* y,
13778      /* Real    */ ae_vector* x,
13779      ae_state *_state)
13780 {
13781     ae_int_t i;
13782 
13783     /*
13784      * Try fast kernels.
13785      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13786      */
13787     if( n>=_ABLASF_KERNEL_SIZE1 )
13788         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergeminv,
13789             (n,y->ptr.p_double,x->ptr.p_double,_state))
13790 
13791     for(i=0; i<=n-1; i++)
13792     {
13793         x->ptr.p_double[i] = ae_minreal(x->ptr.p_double[i], y->ptr.p_double[i], _state);
13794     }
13795 }
13796 
13797 
13798 /*************************************************************************
13799 Performs componentwise max of row X[] and vector Y[]
13800 
13801 INPUT PARAMETERS:
13802     N       -   vector length
13803     Y       -   vector to multiply by
13804     X       -   target row RowIdx
13805 
13806 RESULT:
13807     X := componentwise_max(X,Y)
13808 
13809   -- ALGLIB --
13810      Copyright 20.01.2020 by Bochkanov Sergey
13811 *************************************************************************/
rmergeminvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13812 void rmergeminvr(ae_int_t n,
13813      /* Real    */ ae_vector* y,
13814      /* Real    */ ae_matrix* x,
13815      ae_int_t rowidx,
13816      ae_state *_state)
13817 {
13818     ae_int_t i;
13819 
13820     /*
13821      * Try fast kernels.
13822      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13823      */
13824     if( n>=_ABLASF_KERNEL_SIZE1 )
13825         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergeminv,
13826             (n,y->ptr.p_double,x->ptr.pp_double[rowidx],_state))
13827 
13828     for(i=0; i<=n-1; i++)
13829     {
13830         x->ptr.pp_double[rowidx][i] = ae_minreal(x->ptr.pp_double[rowidx][i], y->ptr.p_double[i], _state);
13831     }
13832 }
13833 
13834 
13835 /*************************************************************************
13836 Performs componentwise max of row X[I] and vector Y[]
13837 
13838 INPUT PARAMETERS:
13839     N       -   vector length
13840     X       -   matrix, I-th row is source
13841     X       -   target row RowIdx
13842 
13843 RESULT:
13844     X := componentwise_max(X,Y)
13845 
13846   -- ALGLIB --
13847      Copyright 20.01.2020 by Bochkanov Sergey
13848 *************************************************************************/
rmergeminrv(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_vector * y,ae_state * _state)13849 void rmergeminrv(ae_int_t n,
13850      /* Real    */ ae_matrix* x,
13851      ae_int_t rowidx,
13852      /* Real    */ ae_vector* y,
13853      ae_state *_state)
13854 {
13855     ae_int_t i;
13856 
13857     /*
13858      * Try fast kernels.
13859      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13860      */
13861     if( n>=_ABLASF_KERNEL_SIZE1 )
13862         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergeminv,
13863             (n,x->ptr.pp_double[rowidx],y->ptr.p_double,_state))
13864 
13865     for(i=0; i<=n-1; i++)
13866     {
13867         y->ptr.p_double[i] = ae_minreal(y->ptr.p_double[i], x->ptr.pp_double[rowidx][i], _state);
13868     }
13869 }
13870 /*************************************************************************
13871 Returns maximum X
13872 
13873 INPUT PARAMETERS:
13874     N       -   vector length
13875     X       -   array[N], vector to process
13876 
13877 OUTPUT PARAMETERS:
13878     max(X[i])
13879     zero for N=0
13880 
13881   -- ALGLIB --
13882      Copyright 20.01.2020 by Bochkanov Sergey
13883 *************************************************************************/
rmaxv(ae_int_t n,ae_vector * x,ae_state * _state)13884 double rmaxv(ae_int_t n, /* Real    */ ae_vector* x, ae_state *_state)
13885 {
13886     ae_int_t i;
13887     double v;
13888     double result;
13889 
13890     /*
13891      * Try fast kernels.
13892      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13893      */
13894     if( n>=_ABLASF_KERNEL_SIZE1 )
13895         _ALGLIB_KERNEL_RETURN_SSE2_AVX2(rmaxv, (n, x->ptr.p_double, _state));
13896 
13897     if(n == 0)
13898         return 0.0;
13899     result = x->ptr.p_double[0];
13900     for(i=1; i<=n-1; i++)
13901     {
13902         v = x->ptr.p_double[i];
13903         if( v>result )
13904         {
13905             result = v;
13906         }
13907     }
13908     return result;
13909 }
13910 
13911 /*************************************************************************
13912 Returns maximum X
13913 
13914 INPUT PARAMETERS:
13915     N       -   vector length
13916     X       -   matrix to process, RowIdx-th row is processed
13917 
13918 OUTPUT PARAMETERS:
13919     max(X[RowIdx,i])
13920     zero for N=0
13921 
13922   -- ALGLIB --
13923      Copyright 20.01.2020 by Bochkanov Sergey
13924 *************************************************************************/
rmaxr(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13925 double rmaxr(ae_int_t n,
13926      /* Real    */ ae_matrix* x,
13927      ae_int_t rowidx,
13928      ae_state *_state)
13929 {
13930     ae_int_t i;
13931     double v;
13932     double result;
13933 
13934     /*
13935      * Try fast kernels.
13936      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13937      */
13938     if( n>=_ABLASF_KERNEL_SIZE1 )
13939         _ALGLIB_KERNEL_RETURN_SSE2_AVX2(rmaxv,(n, x->ptr.pp_double[rowidx], _state))
13940 
13941     if(n == 0)
13942         return 0.0;
13943     result = x->ptr.pp_double[rowidx][0];
13944     for(i=1; i<=n-1; i++)
13945     {
13946         v = x->ptr.pp_double[rowidx][i];
13947         if( v>result )
13948         {
13949             result = v;
13950         }
13951     }
13952     return result;
13953 }
13954 
13955 /*************************************************************************
13956 Returns maximum |X|
13957 
13958 INPUT PARAMETERS:
13959     N       -   vector length
13960     X       -   array[N], vector to process
13961 
13962 OUTPUT PARAMETERS:
13963     max(|X[i]|)
13964     zero for N=0
13965 
13966   -- ALGLIB --
13967      Copyright 20.01.2020 by Bochkanov Sergey
13968 *************************************************************************/
rmaxabsv(ae_int_t n,ae_vector * x,ae_state * _state)13969 double rmaxabsv(ae_int_t n, /* Real    */ ae_vector* x, ae_state *_state)
13970 {
13971     ae_int_t i;
13972     double v;
13973     double result;
13974 
13975     /*
13976      * Try fast kernels.
13977      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13978      */
13979     if( n>=_ABLASF_KERNEL_SIZE1 )
13980         _ALGLIB_KERNEL_RETURN_SSE2_AVX2(rmaxabsv, (n, x->ptr.p_double, _state))
13981 
13982     result = (double)(0);
13983     for(i=0; i<=n-1; i++)
13984     {
13985         v = ae_fabs(x->ptr.p_double[i], _state);
13986         if( v>result )
13987         {
13988             result = v;
13989         }
13990     }
13991     return result;
13992 }
13993 
13994 
13995 /*************************************************************************
13996 Returns maximum |X|
13997 
13998 INPUT PARAMETERS:
13999     N       -   vector length
14000     X       -   matrix to process, RowIdx-th row is processed
14001 
14002 OUTPUT PARAMETERS:
14003     max(|X[RowIdx,i]|)
14004     zero for N=0
14005 
14006   -- ALGLIB --
14007      Copyright 20.01.2020 by Bochkanov Sergey
14008 *************************************************************************/
rmaxabsr(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_state * _state)14009 double rmaxabsr(ae_int_t n,
14010      /* Real    */ ae_matrix* x,
14011      ae_int_t rowidx,
14012      ae_state *_state)
14013 {
14014     ae_int_t i;
14015     double v;
14016     double result;
14017 
14018     /*
14019      * Try fast kernels.
14020      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
14021      */
14022     if( n>=_ABLASF_KERNEL_SIZE1 )
14023         _ALGLIB_KERNEL_RETURN_SSE2_AVX2(rmaxabsv,(n, x->ptr.pp_double[rowidx], _state))
14024 
14025     result = (double)(0);
14026     for(i=0; i<=n-1; i++)
14027     {
14028         v = ae_fabs(x->ptr.pp_double[rowidx][i], _state);
14029         if( v>result )
14030         {
14031             result = v;
14032         }
14033     }
14034     return result;
14035 }
14036 
14037 /*************************************************************************
14038 Copies vector X[] to Y[], extended version
14039 
14040 INPUT PARAMETERS:
14041     N       -   vector length
14042     X       -   source array
14043     OffsX   -   source offset
14044     Y       -   preallocated array[N]
14045     OffsY   -   destination offset
14046 
14047 OUTPUT PARAMETERS:
14048     Y       -   N elements starting from OffsY are replaced by X[OffsX:OffsX+N-1]
14049 
14050 NOTE: destination and source should NOT overlap
14051 
14052   -- ALGLIB --
14053      Copyright 20.01.2020 by Bochkanov Sergey
14054 *************************************************************************/
rcopyvx(ae_int_t n,ae_vector * x,ae_int_t offsx,ae_vector * y,ae_int_t offsy,ae_state * _state)14055 void rcopyvx(ae_int_t n,
14056      /* Real    */ ae_vector* x,
14057      ae_int_t offsx,
14058      /* Real    */ ae_vector* y,
14059      ae_int_t offsy,
14060      ae_state *_state)
14061 {
14062     ae_int_t j;
14063 
14064     /*
14065      * Try fast kernels.
14066      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
14067      */
14068     if( n>=_ABLASF_KERNEL_SIZE1 )
14069         _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopyvx,(n, x->ptr.p_double+offsx, y->ptr.p_double+offsy, _state))
14070 
14071     for(j=0; j<=n-1; j++)
14072     {
14073         y->ptr.p_double[offsy+j] = x->ptr.p_double[offsx+j];
14074     }
14075 }
14076 
14077 /*************************************************************************
14078 Copies vector X[] to Y[], extended version
14079 
14080 INPUT PARAMETERS:
14081     N       -   vector length
14082     X       -   source array
14083     OffsX   -   source offset
14084     Y       -   preallocated array[N]
14085     OffsY   -   destination offset
14086 
14087 OUTPUT PARAMETERS:
14088     Y       -   N elements starting from OffsY are replaced by X[OffsX:OffsX+N-1]
14089 
14090 NOTE: destination and source should NOT overlap
14091 
14092   -- ALGLIB --
14093      Copyright 20.01.2020 by Bochkanov Sergey
14094 *************************************************************************/
icopyvx(ae_int_t n,ae_vector * x,ae_int_t offsx,ae_vector * y,ae_int_t offsy,ae_state * _state)14095 void icopyvx(ae_int_t n,
14096      /* Integer */ ae_vector* x,
14097      ae_int_t offsx,
14098      /* Integer */ ae_vector* y,
14099      ae_int_t offsy,
14100      ae_state *_state)
14101 {
14102     ae_int_t j;
14103 
14104     /*
14105      * Try fast kernels.
14106      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
14107      */
14108     if( n>=_ABLASF_KERNEL_SIZE1 )
14109         _ALGLIB_KERNEL_VOID_SSE2_AVX2(icopyvx,(n, x->ptr.p_int+offsx, y->ptr.p_int+offsy, _state))
14110 
14111     for(j=0; j<=n-1; j++)
14112     {
14113         y->ptr.p_int[offsy+j] = x->ptr.p_int[offsx+j];
14114     }
14115 }
14116 
14117 /*************************************************************************
14118 Matrix-vector product: y := alpha*op(A)*x + beta*y
14119 
14120 NOTE: this  function  expects  Y  to  be  large enough to store result. No
14121       automatic preallocation happens for  smaller  arrays.  No  integrity
14122       checks is performed for sizes of A, x, y.
14123 
14124 INPUT PARAMETERS:
14125     M   -   number of rows of op(A)
14126     N   -   number of columns of op(A)
14127     Alpha-  coefficient
14128     A   -   source matrix
14129     OpA -   operation type:
14130             * OpA=0     =>  op(A) = A
14131             * OpA=1     =>  op(A) = A^T
14132     X   -   input vector, has at least N elements
14133     Beta-   coefficient
14134     Y   -   preallocated output array, has at least M elements
14135 
14136 OUTPUT PARAMETERS:
14137     Y   -   vector which stores result
14138 
14139 HANDLING OF SPECIAL CASES:
14140     * if M=0, then subroutine does nothing. It does not even touch arrays.
14141     * if N=0 or Alpha=0.0, then:
14142       * if Beta=0, then Y is filled by zeros. A and X are  not  referenced
14143         at all. Initial values of Y are ignored (we do not  multiply  Y by
14144         zero, we just rewrite it by zeros)
14145       * if Beta<>0, then Y is replaced by Beta*Y
14146     * if M>0, N>0, Alpha<>0, but  Beta=0,  then  Y  is  replaced  by  A*x;
14147        initial state of Y is ignored (rewritten by  A*x,  without  initial
14148        multiplication by zeros).
14149 
14150 
14151   -- ALGLIB routine --
14152 
14153      01.09.2021
14154      Bochkanov Sergey
14155 *************************************************************************/
rgemv(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t opa,ae_vector * x,double beta,ae_vector * y,ae_state * _state)14156 void rgemv(ae_int_t m,
14157      ae_int_t n,
14158      double alpha,
14159      /* Real    */ ae_matrix* a,
14160      ae_int_t opa,
14161      /* Real    */ ae_vector* x,
14162      double beta,
14163      /* Real    */ ae_vector* y,
14164      ae_state *_state)
14165 {
14166     ae_int_t i;
14167     ae_int_t j;
14168     double v;
14169 
14170 
14171 
14172     /*
14173      * Properly premultiply Y by Beta.
14174      *
14175      * Quick exit for M=0, N=0 or Alpha=0.
14176      * After this block we have M>0, N>0, Alpha<>0.
14177      */
14178     if( m<=0 )
14179     {
14180         return;
14181     }
14182     if( ae_fp_neq(beta,(double)(0)) )
14183     {
14184         rmulv(m, beta, y, _state);
14185     }
14186     else
14187     {
14188         rsetv(m, 0.0, y, _state);
14189     }
14190     if( n<=0||ae_fp_eq(alpha,0.0) )
14191     {
14192         return;
14193     }
14194 
14195     /*
14196      * Straight or transposed?
14197      */
14198     if( opa==0 )
14199     {
14200         /*
14201          * Try SIMD code
14202          */
14203         if( n>=_ABLASF_KERNEL_SIZE2 )
14204             _ALGLIB_KERNEL_VOID_AVX2_FMA(rgemv_straight, (m, n, alpha, a,
14205                 x->ptr.p_double, y->ptr.p_double, _state))
14206 
14207         /*
14208          * Generic C version: y += A*x
14209          */
14210         for(i=0; i<=m-1; i++)
14211         {
14212             v = (double)(0);
14213             for(j=0; j<=n-1; j++)
14214             {
14215                 v = v+a->ptr.pp_double[i][j]*x->ptr.p_double[j];
14216             }
14217             y->ptr.p_double[i] = alpha*v+y->ptr.p_double[i];
14218         }
14219         return;
14220     }
14221     if( opa==1 )
14222     {
14223         /*
14224          * Try SIMD code
14225          */
14226         if( m>=_ABLASF_KERNEL_SIZE2 )
14227             _ALGLIB_KERNEL_VOID_AVX2_FMA(rgemv_transposed, (m, n, alpha, a,
14228                 x->ptr.p_double, y->ptr.p_double, _state))
14229 
14230 
14231         /*
14232          * Generic C version: y += A^T*x
14233          */
14234         for(i=0; i<=n-1; i++)
14235         {
14236             v = alpha*x->ptr.p_double[i];
14237             for(j=0; j<=m-1; j++)
14238             {
14239                 y->ptr.p_double[j] = y->ptr.p_double[j]+v*a->ptr.pp_double[i][j];
14240             }
14241         }
14242         return;
14243     }
14244 }
14245 
14246 
14247 /*************************************************************************
14248 Matrix-vector product: y := alpha*op(A)*x + beta*y
14249 
14250 Here x, y, A are subvectors/submatrices of larger vectors/matrices.
14251 
14252 NOTE: this  function  expects  Y  to  be  large enough to store result. No
14253       automatic preallocation happens for  smaller  arrays.  No  integrity
14254       checks is performed for sizes of A, x, y.
14255 
14256 INPUT PARAMETERS:
14257     M   -   number of rows of op(A)
14258     N   -   number of columns of op(A)
14259     Alpha-  coefficient
14260     A   -   source matrix
14261     IA  -   submatrix offset (row index)
14262     JA  -   submatrix offset (column index)
14263     OpA -   operation type:
14264             * OpA=0     =>  op(A) = A
14265             * OpA=1     =>  op(A) = A^T
14266     X   -   input vector, has at least N+IX elements
14267     IX  -   subvector offset
14268     Beta-   coefficient
14269     Y   -   preallocated output array, has at least M+IY elements
14270     IY  -   subvector offset
14271 
14272 OUTPUT PARAMETERS:
14273     Y   -   vector which stores result
14274 
14275 HANDLING OF SPECIAL CASES:
14276     * if M=0, then subroutine does nothing. It does not even touch arrays.
14277     * if N=0 or Alpha=0.0, then:
14278       * if Beta=0, then Y is filled by zeros. A and X are  not  referenced
14279         at all. Initial values of Y are ignored (we do not  multiply  Y by
14280         zero, we just rewrite it by zeros)
14281       * if Beta<>0, then Y is replaced by Beta*Y
14282     * if M>0, N>0, Alpha<>0, but  Beta=0,  then  Y  is  replaced  by  A*x;
14283        initial state of Y is ignored (rewritten by  A*x,  without  initial
14284        multiplication by zeros).
14285 
14286 
14287   -- ALGLIB routine --
14288 
14289      01.09.2021
14290      Bochkanov Sergey
14291 *************************************************************************/
rgemvx(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)14292 void rgemvx(ae_int_t m,
14293      ae_int_t n,
14294      double alpha,
14295      /* Real    */ ae_matrix* a,
14296      ae_int_t ia,
14297      ae_int_t ja,
14298      ae_int_t opa,
14299      /* Real    */ ae_vector* x,
14300      ae_int_t ix,
14301      double beta,
14302      /* Real    */ ae_vector* y,
14303      ae_int_t iy,
14304      ae_state *_state)
14305 {
14306     ae_int_t i;
14307     ae_int_t j;
14308     double v;
14309 
14310 
14311 
14312     /*
14313      * Properly premultiply Y by Beta.
14314      *
14315      * Quick exit for M=0, N=0 or Alpha=0.
14316      * After this block we have M>0, N>0, Alpha<>0.
14317      */
14318     if( m<=0 )
14319     {
14320         return;
14321     }
14322     if( ae_fp_neq(beta,(double)(0)) )
14323     {
14324         rmulvx(m, beta, y, iy, _state);
14325     }
14326     else
14327     {
14328         rsetvx(m, 0.0, y, iy, _state);
14329     }
14330     if( n<=0||ae_fp_eq(alpha,0.0) )
14331     {
14332         return;
14333     }
14334 
14335     /*
14336      * Straight or transposed?
14337      */
14338     if( opa==0 )
14339     {
14340         /*
14341          * Try SIMD code
14342          */
14343         if( n>=_ABLASF_KERNEL_SIZE2 )
14344             _ALGLIB_KERNEL_VOID_AVX2_FMA(rgemvx_straight, (m, n, alpha, a, ia, ja,
14345                 x->ptr.p_double + ix, y->ptr.p_double + iy, _state))
14346 
14347 
14348         /*
14349          * Generic C code: y += A*x
14350          */
14351         for(i=0; i<=m-1; i++)
14352         {
14353             v = (double)(0);
14354             for(j=0; j<=n-1; j++)
14355             {
14356                 v = v+a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
14357             }
14358             y->ptr.p_double[iy+i] = alpha*v+y->ptr.p_double[iy+i];
14359         }
14360         return;
14361     }
14362     if( opa==1 )
14363     {
14364         /*
14365          * Try SIMD code
14366          */
14367         if( m>=_ABLASF_KERNEL_SIZE2 )
14368             _ALGLIB_KERNEL_VOID_AVX2_FMA(rgemvx_transposed, (m, n, alpha, a, ia, ja,
14369                 x->ptr.p_double+ix, y->ptr.p_double+iy, _state))
14370 
14371         /*
14372          * Generic C code: y += A^T*x
14373          */
14374         for(i=0; i<=n-1; i++)
14375         {
14376             v = alpha*x->ptr.p_double[ix+i];
14377             for(j=0; j<=m-1; j++)
14378             {
14379                 y->ptr.p_double[iy+j] = y->ptr.p_double[iy+j]+v*a->ptr.pp_double[ia+i][ja+j];
14380             }
14381         }
14382         return;
14383     }
14384 }
14385 
14386 
14387 /*************************************************************************
14388 Rank-1 correction: A := A + alpha*u*v'
14389 
14390 NOTE: this  function  expects  A  to  be  large enough to store result. No
14391       automatic preallocation happens for  smaller  arrays.  No  integrity
14392       checks is performed for sizes of A, u, v.
14393 
14394 INPUT PARAMETERS:
14395     M   -   number of rows
14396     N   -   number of columns
14397     A   -   target MxN matrix
14398     Alpha-  coefficient
14399     U   -   vector #1
14400     V   -   vector #2
14401 
14402 
14403   -- ALGLIB routine --
14404      07.09.2021
14405      Bochkanov Sergey
14406 *************************************************************************/
rger(ae_int_t m,ae_int_t n,double alpha,ae_vector * u,ae_vector * v,ae_matrix * a,ae_state * _state)14407 void rger(ae_int_t m,
14408      ae_int_t n,
14409      double alpha,
14410      /* Real    */ ae_vector* u,
14411      /* Real    */ ae_vector* v,
14412      /* Real    */ ae_matrix* a,
14413      ae_state *_state)
14414 {
14415     ae_int_t i;
14416     ae_int_t j;
14417     double s;
14418 
14419 
14420     if( (m<=0||n<=0)||ae_fp_eq(alpha,(double)(0)) )
14421     {
14422         return;
14423     }
14424     for(i=0; i<=m-1; i++)
14425     {
14426         s = alpha*u->ptr.p_double[i];
14427         for(j=0; j<=n-1; j++)
14428         {
14429             a->ptr.pp_double[i][j] = a->ptr.pp_double[i][j]+s*v->ptr.p_double[j];
14430         }
14431     }
14432 }
14433 
14434 
14435 /*************************************************************************
14436 This subroutine solves linear system op(A)*x=b where:
14437 * A is NxN upper/lower triangular/unitriangular matrix
14438 * X and B are Nx1 vectors
14439 * "op" may be identity transformation or transposition
14440 
14441 Solution replaces X.
14442 
14443 IMPORTANT: * no overflow/underflow/denegeracy tests is performed.
14444            * no integrity checks for operand sizes, out-of-bounds accesses
14445              and so on is performed
14446 
14447 INPUT PARAMETERS
14448     N   -   matrix size, N>=0
14449     A       -   matrix, actial matrix is stored in A[IA:IA+N-1,JA:JA+N-1]
14450     IA      -   submatrix offset
14451     JA      -   submatrix offset
14452     IsUpper -   whether matrix is upper triangular
14453     IsUnit  -   whether matrix is unitriangular
14454     OpType  -   transformation type:
14455                 * 0 - no transformation
14456                 * 1 - transposition
14457     X       -   right part, actual vector is stored in X[IX:IX+N-1]
14458     IX      -   offset
14459 
14460 OUTPUT PARAMETERS
14461     X       -   solution replaces elements X[IX:IX+N-1]
14462 
14463   -- ALGLIB routine --
14464      (c) 07.09.2021 Bochkanov Sergey
14465 *************************************************************************/
rtrsvx(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_vector * x,ae_int_t ix,ae_state * _state)14466 void rtrsvx(ae_int_t n,
14467      /* Real    */ ae_matrix* a,
14468      ae_int_t ia,
14469      ae_int_t ja,
14470      ae_bool isupper,
14471      ae_bool isunit,
14472      ae_int_t optype,
14473      /* Real    */ ae_vector* x,
14474      ae_int_t ix,
14475      ae_state *_state)
14476 {
14477     ae_int_t i;
14478     ae_int_t j;
14479     double v;
14480 
14481 
14482     if( n<=0 )
14483     {
14484         return;
14485     }
14486     if( optype==0&&isupper )
14487     {
14488         for(i=n-1; i>=0; i--)
14489         {
14490             v = x->ptr.p_double[ix+i];
14491             for(j=i+1; j<=n-1; j++)
14492             {
14493                 v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
14494             }
14495             if( !isunit )
14496             {
14497                 v = v/a->ptr.pp_double[ia+i][ja+i];
14498             }
14499             x->ptr.p_double[ix+i] = v;
14500         }
14501         return;
14502     }
14503     if( optype==0&&!isupper )
14504     {
14505         for(i=0; i<=n-1; i++)
14506         {
14507             v = x->ptr.p_double[ix+i];
14508             for(j=0; j<=i-1; j++)
14509             {
14510                 v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
14511             }
14512             if( !isunit )
14513             {
14514                 v = v/a->ptr.pp_double[ia+i][ja+i];
14515             }
14516             x->ptr.p_double[ix+i] = v;
14517         }
14518         return;
14519     }
14520     if( optype==1&&isupper )
14521     {
14522         for(i=0; i<=n-1; i++)
14523         {
14524             v = x->ptr.p_double[ix+i];
14525             if( !isunit )
14526             {
14527                 v = v/a->ptr.pp_double[ia+i][ja+i];
14528             }
14529             x->ptr.p_double[ix+i] = v;
14530             if( v==0 )
14531             {
14532                 continue;
14533             }
14534             for(j=i+1; j<=n-1; j++)
14535             {
14536                 x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
14537             }
14538         }
14539         return;
14540     }
14541     if( optype==1&&!isupper )
14542     {
14543         for(i=n-1; i>=0; i--)
14544         {
14545             v = x->ptr.p_double[ix+i];
14546             if( !isunit )
14547             {
14548                 v = v/a->ptr.pp_double[ia+i][ja+i];
14549             }
14550             x->ptr.p_double[ix+i] = v;
14551             if( v==0 )
14552             {
14553                 continue;
14554             }
14555             for(j=0; j<=i-1; j++)
14556             {
14557                 x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
14558             }
14559         }
14560         return;
14561     }
14562     ae_assert(ae_false, "rTRSVX: unexpected operation type", _state);
14563 }
14564 
14565 /*************************************************************************
14566 Fast rGEMM kernel with AVX2/FMA support
14567 
14568   -- ALGLIB routine --
14569      19.09.2021
14570      Bochkanov Sergey
14571 *************************************************************************/
ablasf_rgemm32basecase(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * _a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * _b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * _c,ae_int_t ic,ae_int_t jc,ae_state * _state)14572 ae_bool ablasf_rgemm32basecase(
14573      ae_int_t m,
14574      ae_int_t n,
14575      ae_int_t k,
14576      double alpha,
14577      /* Real    */ ae_matrix* _a,
14578      ae_int_t ia,
14579      ae_int_t ja,
14580      ae_int_t optypea,
14581      /* Real    */ ae_matrix* _b,
14582      ae_int_t ib,
14583      ae_int_t jb,
14584      ae_int_t optypeb,
14585      double beta,
14586      /* Real    */ ae_matrix* _c,
14587      ae_int_t ic,
14588      ae_int_t jc,
14589      ae_state *_state)
14590 {
14591 #if !defined(_ALGLIB_HAS_AVX2_INTRINSICS)
14592     return ae_false;
14593 #else
14594     const ae_int_t block_size = _ABLASF_BLOCK_SIZE;
14595     const ae_int_t micro_size = _ABLASF_MICRO_SIZE;
14596     ae_int_t out0, out1;
14597     double *c;
14598     ae_int_t stride_c;
14599     ae_int_t cpu_id = ae_cpuid();
14600     ae_int_t (*ablasf_packblk)(const double*, ae_int_t, ae_int_t, ae_int_t, ae_int_t, double*, ae_int_t, ae_int_t) = (k==32 && block_size==32) ? ablasf_packblkh32_avx2 : ablasf_packblkh_avx2;
14601     void     (*ablasf_dotblk)(const double *, const double *, ae_int_t, ae_int_t, ae_int_t, double *, ae_int_t)    = ablasf_dotblkh_avx2;
14602     void     (*ablasf_daxpby)(ae_int_t, double, const double *, double, double*) = ablasf_daxpby_avx2;
14603 
14604     /*
14605      * Determine CPU and kernel support
14606      */
14607     if( m>block_size || n>block_size || k>block_size || m==0 || n==0 || !(cpu_id&CPU_AVX2) )
14608         return ae_false;
14609 #if defined(_ALGLIB_HAS_FMA_INTRINSICS)
14610     if( cpu_id&CPU_FMA )
14611         ablasf_dotblk  = ablasf_dotblkh_fma;
14612 #endif
14613 
14614     /*
14615      * Prepare C
14616      */
14617     c = _c->ptr.pp_double[ic]+jc;
14618     stride_c = _c->stride;
14619 
14620     /*
14621      * Do we have alpha*A*B ?
14622      */
14623     if( alpha!=0 && k>0 )
14624     {
14625         /*
14626          * Prepare structures
14627          */
14628         ae_int_t base0, base1, offs0;
14629         double *a = _a->ptr.pp_double[ia]+ja;
14630         double *b = _b->ptr.pp_double[ib]+jb;
14631         ae_int_t stride_a = _a->stride;
14632         ae_int_t stride_b = _b->stride;
14633         double      _blka[_ABLASF_BLOCK_SIZE*_ABLASF_MICRO_SIZE+_ALGLIB_SIMD_ALIGNMENT_DOUBLES];
14634         double _blkb_long[_ABLASF_BLOCK_SIZE*_ABLASF_BLOCK_SIZE+_ALGLIB_SIMD_ALIGNMENT_DOUBLES];
14635         double      _blkc[_ABLASF_MICRO_SIZE*_ABLASF_BLOCK_SIZE+_ALGLIB_SIMD_ALIGNMENT_DOUBLES];
14636         double *blka          = (double*)ae_align(_blka,     _ALGLIB_SIMD_ALIGNMENT_BYTES);
14637         double *storageb_long = (double*)ae_align(_blkb_long,_ALGLIB_SIMD_ALIGNMENT_BYTES);
14638         double *blkc          = (double*)ae_align(_blkc,     _ALGLIB_SIMD_ALIGNMENT_BYTES);
14639 
14640         /*
14641          * Pack transform(B) into precomputed block form
14642          */
14643         for(base1=0; base1<n; base1+=micro_size)
14644         {
14645             const ae_int_t lim1 = n-base1<micro_size ? n-base1 : micro_size;
14646             double *curb = storageb_long+base1*block_size;
14647             ablasf_packblk(
14648                 b + (optypeb==0 ? base1 : base1*stride_b), stride_b, optypeb==0 ? 1 : 0, k, lim1,
14649                 curb, block_size, micro_size);
14650         }
14651 
14652         /*
14653          * Output
14654          */
14655         for(base0=0; base0<m; base0+=micro_size)
14656         {
14657             /*
14658              * Load block row of transform(A)
14659              */
14660             const ae_int_t lim0    = m-base0<micro_size ? m-base0 : micro_size;
14661             const ae_int_t round_k = ablasf_packblk(
14662                 a + (optypea==0 ? base0*stride_a : base0), stride_a, optypea, k, lim0,
14663                 blka, block_size, micro_size);
14664 
14665             /*
14666              * Compute block(A)'*entire(B)
14667              */
14668             for(base1=0; base1<n; base1+=micro_size)
14669                 ablasf_dotblk(blka, storageb_long+base1*block_size, round_k, block_size, micro_size, blkc+base1, block_size);
14670 
14671             /*
14672              * Output block row of block(A)'*entire(B)
14673              */
14674             for(offs0=0; offs0<lim0; offs0++)
14675                 ablasf_daxpby(n, alpha, blkc+offs0*block_size, beta, c+(base0+offs0)*stride_c);
14676         }
14677     }
14678     else
14679     {
14680         /*
14681          * No A*B, just beta*C (degenerate case, not optimized)
14682          */
14683         if( beta==0 )
14684         {
14685             for(out0=0; out0<m; out0++)
14686                 for(out1=0; out1<n; out1++)
14687                     c[out0*stride_c+out1] = 0.0;
14688         }
14689         else if( beta!=1 )
14690         {
14691             for(out0=0; out0<m; out0++)
14692                 for(out1=0; out1<n; out1++)
14693                     c[out0*stride_c+out1] *= beta;
14694         }
14695     }
14696     return ae_true;
14697 #endif
14698 }
14699 
14700 
14701 /*************************************************************************
14702 Returns recommended width of the SIMD-friendly buffer
14703 *************************************************************************/
spchol_spsymmgetmaxsimd(ae_state * _state)14704 ae_int_t spchol_spsymmgetmaxsimd(ae_state *_state)
14705 {
14706 #if AE_CPU==AE_INTEL
14707     return 4;
14708 #else
14709     return 1;
14710 #endif
14711 }
14712 
14713 /*************************************************************************
14714 Solving linear system: propagating computed supernode.
14715 
14716 Propagates computed supernode to the rest of the RHS  using  SIMD-friendly
14717 RHS storage format.
14718 
14719 INPUT PARAMETERS:
14720 
14721 OUTPUT PARAMETERS:
14722 
14723   -- ALGLIB routine --
14724      08.09.2021
14725      Bochkanov Sergey
14726 *************************************************************************/
spchol_propagatefwd(ae_vector * x,ae_int_t cols0,ae_int_t blocksize,ae_vector * superrowidx,ae_int_t rbase,ae_int_t offdiagsize,ae_vector * rowstorage,ae_int_t offss,ae_int_t sstride,ae_vector * simdbuf,ae_int_t simdwidth,ae_state * _state)14727 void spchol_propagatefwd(/* Real    */ ae_vector* x,
14728      ae_int_t cols0,
14729      ae_int_t blocksize,
14730      /* Integer */ ae_vector* superrowidx,
14731      ae_int_t rbase,
14732      ae_int_t offdiagsize,
14733      /* Real    */ ae_vector* rowstorage,
14734      ae_int_t offss,
14735      ae_int_t sstride,
14736      /* Real    */ ae_vector* simdbuf,
14737      ae_int_t simdwidth,
14738      ae_state *_state)
14739 {
14740     ae_int_t i;
14741     ae_int_t j;
14742     ae_int_t k;
14743     ae_int_t baseoffs;
14744     double v;
14745 
14746     /*
14747      * Try SIMD kernels
14748      */
14749 #if defined(_ALGLIB_HAS_FMA_INTRINSICS)
14750     if( sstride==4 || (blocksize==2 && sstride==2) )
14751         if( ae_cpuid()&CPU_FMA )
14752         {
14753             spchol_propagatefwd_fma(x, cols0, blocksize, superrowidx, rbase, offdiagsize, rowstorage, offss, sstride, simdbuf, simdwidth, _state);
14754             return;
14755         }
14756 #endif
14757 
14758     /*
14759      * Propagate rank-1 node (can not be accelerated with SIMD)
14760      */
14761     if( blocksize==1 && sstride==1 )
14762     {
14763         /*
14764          * blocksize is 1, stride is 1
14765          */
14766         double vx = x->ptr.p_double[cols0];
14767         double *p_mat_row  = rowstorage->ptr.p_double+offss+1*1;
14768         double *p_simd_buf = simdbuf->ptr.p_double;
14769         ae_int_t *p_rowidx = superrowidx->ptr.p_int+rbase;
14770         if( simdwidth==4 )
14771         {
14772             for(k=0; k<offdiagsize; k++)
14773                 p_simd_buf[p_rowidx[k]*4] -= p_mat_row[k]*vx;
14774         }
14775         else
14776         {
14777             for(k=0; k<offdiagsize; k++)
14778                 p_simd_buf[p_rowidx[k]*simdwidth] -= p_mat_row[k]*vx;
14779         }
14780         return;
14781     }
14782 
14783     /*
14784      * Generic C code for generic propagate
14785      */
14786     for(k=0; k<=offdiagsize-1; k++)
14787     {
14788         i = superrowidx->ptr.p_int[rbase+k];
14789         baseoffs = offss+(k+blocksize)*sstride;
14790         v = simdbuf->ptr.p_double[i*simdwidth];
14791         for(j=0; j<=blocksize-1; j++)
14792         {
14793             v = v-rowstorage->ptr.p_double[baseoffs+j]*x->ptr.p_double[cols0+j];
14794         }
14795         simdbuf->ptr.p_double[i*simdwidth] = v;
14796     }
14797 }
14798 
14799 
14800 /*************************************************************************
14801 Fast kernels for small supernodal updates: special 4x4x4x4 function.
14802 
14803 ! See comments on UpdateSupernode() for information  on generic supernodal
14804 ! updates, including notation used below.
14805 
14806 The generic update has following form:
14807 
14808     S := S - scatter(U*D*Uc')
14809 
14810 This specialized function performs AxBxCx4 update, i.e.:
14811 * S is a tHeight*A matrix with row stride equal to 4 (usually it means that
14812   it has 3 or 4 columns)
14813 * U is a uHeight*B matrix
14814 * Uc' is a B*C matrix, with C<=A
14815 * scatter() scatters rows and columns of U*Uc'
14816 
14817 Return value:
14818 * True if update was applied
14819 * False if kernel refused to perform an update (quick exit for unsupported
14820   combinations of input sizes)
14821 
14822   -- ALGLIB routine --
14823      20.09.2020
14824      Bochkanov Sergey
14825 *************************************************************************/
spchol_updatekernelabc4(ae_vector * rowstorage,ae_int_t offss,ae_int_t twidth,ae_int_t offsu,ae_int_t uheight,ae_int_t urank,ae_int_t urowstride,ae_int_t uwidth,ae_vector * diagd,ae_int_t offsd,ae_vector * raw2smap,ae_vector * superrowidx,ae_int_t urbase,ae_state * _state)14826 ae_bool spchol_updatekernelabc4(/* Real    */ ae_vector* rowstorage,
14827      ae_int_t offss,
14828      ae_int_t twidth,
14829      ae_int_t offsu,
14830      ae_int_t uheight,
14831      ae_int_t urank,
14832      ae_int_t urowstride,
14833      ae_int_t uwidth,
14834      /* Real    */ ae_vector* diagd,
14835      ae_int_t offsd,
14836      /* Integer */ ae_vector* raw2smap,
14837      /* Integer */ ae_vector* superrowidx,
14838      ae_int_t urbase,
14839      ae_state *_state)
14840 {
14841     /*
14842      * Try fast kernels.
14843      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
14844      */
14845     _ALGLIB_KERNEL_RETURN_AVX2_FMA(spchol_updatekernelabc4,(rowstorage->ptr.p_double, offss, twidth, offsu, uheight, urank, urowstride, uwidth, diagd->ptr.p_double, offsd, raw2smap->ptr.p_int, superrowidx->ptr.p_int, urbase, _state))
14846 
14847     /*
14848      * Generic code
14849      */
14850     ae_int_t k;
14851     ae_int_t targetrow;
14852     ae_int_t targetcol;
14853     ae_int_t offsk;
14854     double d0;
14855     double d1;
14856     double d2;
14857     double d3;
14858     double u00;
14859     double u01;
14860     double u02;
14861     double u03;
14862     double u10;
14863     double u11;
14864     double u12;
14865     double u13;
14866     double u20;
14867     double u21;
14868     double u22;
14869     double u23;
14870     double u30;
14871     double u31;
14872     double u32;
14873     double u33;
14874     double uk0;
14875     double uk1;
14876     double uk2;
14877     double uk3;
14878     ae_int_t srccol0;
14879     ae_int_t srccol1;
14880     ae_int_t srccol2;
14881     ae_int_t srccol3;
14882     ae_bool result;
14883 
14884 
14885 
14886     /*
14887      * Filter out unsupported combinations (ones that are too sparse for the non-SIMD code)
14888      */
14889     result = ae_false;
14890     if( twidth<3||twidth>4 )
14891     {
14892         return result;
14893     }
14894     if( uwidth<1||uwidth>4 )
14895     {
14896         return result;
14897     }
14898     if( urank>4 )
14899     {
14900         return result;
14901     }
14902 
14903     /*
14904      * Determine source columns for target columns, -1 if target column
14905      * is not updated.
14906      */
14907     srccol0 = -1;
14908     srccol1 = -1;
14909     srccol2 = -1;
14910     srccol3 = -1;
14911     for(k=0; k<=uwidth-1; k++)
14912     {
14913         targetcol = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]];
14914         if( targetcol==0 )
14915         {
14916             srccol0 = k;
14917         }
14918         if( targetcol==1 )
14919         {
14920             srccol1 = k;
14921         }
14922         if( targetcol==2 )
14923         {
14924             srccol2 = k;
14925         }
14926         if( targetcol==3 )
14927         {
14928             srccol3 = k;
14929         }
14930     }
14931 
14932     /*
14933      * Load update matrix into aligned/rearranged 4x4 storage
14934      */
14935     d0 = (double)(0);
14936     d1 = (double)(0);
14937     d2 = (double)(0);
14938     d3 = (double)(0);
14939     u00 = (double)(0);
14940     u01 = (double)(0);
14941     u02 = (double)(0);
14942     u03 = (double)(0);
14943     u10 = (double)(0);
14944     u11 = (double)(0);
14945     u12 = (double)(0);
14946     u13 = (double)(0);
14947     u20 = (double)(0);
14948     u21 = (double)(0);
14949     u22 = (double)(0);
14950     u23 = (double)(0);
14951     u30 = (double)(0);
14952     u31 = (double)(0);
14953     u32 = (double)(0);
14954     u33 = (double)(0);
14955     if( urank>=1 )
14956     {
14957         d0 = diagd->ptr.p_double[offsd+0];
14958     }
14959     if( urank>=2 )
14960     {
14961         d1 = diagd->ptr.p_double[offsd+1];
14962     }
14963     if( urank>=3 )
14964     {
14965         d2 = diagd->ptr.p_double[offsd+2];
14966     }
14967     if( urank>=4 )
14968     {
14969         d3 = diagd->ptr.p_double[offsd+3];
14970     }
14971     if( srccol0>=0 )
14972     {
14973         if( urank>=1 )
14974         {
14975             u00 = d0*rowstorage->ptr.p_double[offsu+srccol0*urowstride+0];
14976         }
14977         if( urank>=2 )
14978         {
14979             u01 = d1*rowstorage->ptr.p_double[offsu+srccol0*urowstride+1];
14980         }
14981         if( urank>=3 )
14982         {
14983             u02 = d2*rowstorage->ptr.p_double[offsu+srccol0*urowstride+2];
14984         }
14985         if( urank>=4 )
14986         {
14987             u03 = d3*rowstorage->ptr.p_double[offsu+srccol0*urowstride+3];
14988         }
14989     }
14990     if( srccol1>=0 )
14991     {
14992         if( urank>=1 )
14993         {
14994             u10 = d0*rowstorage->ptr.p_double[offsu+srccol1*urowstride+0];
14995         }
14996         if( urank>=2 )
14997         {
14998             u11 = d1*rowstorage->ptr.p_double[offsu+srccol1*urowstride+1];
14999         }
15000         if( urank>=3 )
15001         {
15002             u12 = d2*rowstorage->ptr.p_double[offsu+srccol1*urowstride+2];
15003         }
15004         if( urank>=4 )
15005         {
15006             u13 = d3*rowstorage->ptr.p_double[offsu+srccol1*urowstride+3];
15007         }
15008     }
15009     if( srccol2>=0 )
15010     {
15011         if( urank>=1 )
15012         {
15013             u20 = d0*rowstorage->ptr.p_double[offsu+srccol2*urowstride+0];
15014         }
15015         if( urank>=2 )
15016         {
15017             u21 = d1*rowstorage->ptr.p_double[offsu+srccol2*urowstride+1];
15018         }
15019         if( urank>=3 )
15020         {
15021             u22 = d2*rowstorage->ptr.p_double[offsu+srccol2*urowstride+2];
15022         }
15023         if( urank>=4 )
15024         {
15025             u23 = d3*rowstorage->ptr.p_double[offsu+srccol2*urowstride+3];
15026         }
15027     }
15028     if( srccol3>=0 )
15029     {
15030         if( urank>=1 )
15031         {
15032             u30 = d0*rowstorage->ptr.p_double[offsu+srccol3*urowstride+0];
15033         }
15034         if( urank>=2 )
15035         {
15036             u31 = d1*rowstorage->ptr.p_double[offsu+srccol3*urowstride+1];
15037         }
15038         if( urank>=3 )
15039         {
15040             u32 = d2*rowstorage->ptr.p_double[offsu+srccol3*urowstride+2];
15041         }
15042         if( urank>=4 )
15043         {
15044             u33 = d3*rowstorage->ptr.p_double[offsu+srccol3*urowstride+3];
15045         }
15046     }
15047 
15048     /*
15049      * Run update
15050      */
15051     if( urank==1 )
15052     {
15053         for(k=0; k<=uheight-1; k++)
15054         {
15055             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
15056             offsk = offsu+k*urowstride;
15057             uk0 = rowstorage->ptr.p_double[offsk+0];
15058             rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0;
15059             rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0;
15060             rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0;
15061             rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0;
15062         }
15063     }
15064     if( urank==2 )
15065     {
15066         for(k=0; k<=uheight-1; k++)
15067         {
15068             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
15069             offsk = offsu+k*urowstride;
15070             uk0 = rowstorage->ptr.p_double[offsk+0];
15071             uk1 = rowstorage->ptr.p_double[offsk+1];
15072             rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1;
15073             rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1;
15074             rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1;
15075             rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1;
15076         }
15077     }
15078     if( urank==3 )
15079     {
15080         for(k=0; k<=uheight-1; k++)
15081         {
15082             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
15083             offsk = offsu+k*urowstride;
15084             uk0 = rowstorage->ptr.p_double[offsk+0];
15085             uk1 = rowstorage->ptr.p_double[offsk+1];
15086             uk2 = rowstorage->ptr.p_double[offsk+2];
15087             rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2;
15088             rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2;
15089             rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2;
15090             rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2;
15091         }
15092     }
15093     if( urank==4 )
15094     {
15095         for(k=0; k<=uheight-1; k++)
15096         {
15097             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
15098             offsk = offsu+k*urowstride;
15099             uk0 = rowstorage->ptr.p_double[offsk+0];
15100             uk1 = rowstorage->ptr.p_double[offsk+1];
15101             uk2 = rowstorage->ptr.p_double[offsk+2];
15102             uk3 = rowstorage->ptr.p_double[offsk+3];
15103             rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2-u03*uk3;
15104             rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2-u13*uk3;
15105             rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2-u23*uk3;
15106             rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2-u33*uk3;
15107         }
15108     }
15109     result = ae_true;
15110     return result;
15111 }
15112 
15113 
15114 /*************************************************************************
15115 Fast kernels for small supernodal updates: special 4x4x4x4 function.
15116 
15117 ! See comments on UpdateSupernode() for information  on generic supernodal
15118 ! updates, including notation used below.
15119 
15120 The generic update has following form:
15121 
15122     S := S - scatter(U*D*Uc')
15123 
15124 This specialized function performs 4x4x4x4 update, i.e.:
15125 * S is a tHeight*4 matrix
15126 * U is a uHeight*4 matrix
15127 * Uc' is a 4*4 matrix
15128 * scatter() scatters rows of U*Uc', but does not scatter columns (they are
15129   densely packed).
15130 
15131 Return value:
15132 * True if update was applied
15133 * False if kernel refused to perform an update.
15134 
15135   -- ALGLIB routine --
15136      20.09.2020
15137      Bochkanov Sergey
15138 *************************************************************************/
spchol_updatekernel4444(ae_vector * rowstorage,ae_int_t offss,ae_int_t sheight,ae_int_t offsu,ae_int_t uheight,ae_vector * diagd,ae_int_t offsd,ae_vector * raw2smap,ae_vector * superrowidx,ae_int_t urbase,ae_state * _state)15139 ae_bool spchol_updatekernel4444(/* Real    */ ae_vector* rowstorage,
15140      ae_int_t offss,
15141      ae_int_t sheight,
15142      ae_int_t offsu,
15143      ae_int_t uheight,
15144      /* Real    */ ae_vector* diagd,
15145      ae_int_t offsd,
15146      /* Integer */ ae_vector* raw2smap,
15147      /* Integer */ ae_vector* superrowidx,
15148      ae_int_t urbase,
15149      ae_state *_state)
15150 {
15151     ae_int_t k;
15152     ae_int_t targetrow;
15153     ae_int_t offsk;
15154     double d0;
15155     double d1;
15156     double d2;
15157     double d3;
15158     double u00;
15159     double u01;
15160     double u02;
15161     double u03;
15162     double u10;
15163     double u11;
15164     double u12;
15165     double u13;
15166     double u20;
15167     double u21;
15168     double u22;
15169     double u23;
15170     double u30;
15171     double u31;
15172     double u32;
15173     double u33;
15174     double uk0;
15175     double uk1;
15176     double uk2;
15177     double uk3;
15178     ae_bool result;
15179 
15180 
15181     /*
15182      * Try fast kernels.
15183      * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
15184      */
15185     _ALGLIB_KERNEL_RETURN_AVX2_FMA(spchol_updatekernel4444,(rowstorage->ptr.p_double, offss, sheight, offsu, uheight, diagd->ptr.p_double, offsd, raw2smap->ptr.p_int, superrowidx->ptr.p_int, urbase, _state))
15186 
15187     /*
15188      * Generic C fallback code
15189      */
15190     d0 = diagd->ptr.p_double[offsd+0];
15191     d1 = diagd->ptr.p_double[offsd+1];
15192     d2 = diagd->ptr.p_double[offsd+2];
15193     d3 = diagd->ptr.p_double[offsd+3];
15194     u00 = d0*rowstorage->ptr.p_double[offsu+0*4+0];
15195     u01 = d1*rowstorage->ptr.p_double[offsu+0*4+1];
15196     u02 = d2*rowstorage->ptr.p_double[offsu+0*4+2];
15197     u03 = d3*rowstorage->ptr.p_double[offsu+0*4+3];
15198     u10 = d0*rowstorage->ptr.p_double[offsu+1*4+0];
15199     u11 = d1*rowstorage->ptr.p_double[offsu+1*4+1];
15200     u12 = d2*rowstorage->ptr.p_double[offsu+1*4+2];
15201     u13 = d3*rowstorage->ptr.p_double[offsu+1*4+3];
15202     u20 = d0*rowstorage->ptr.p_double[offsu+2*4+0];
15203     u21 = d1*rowstorage->ptr.p_double[offsu+2*4+1];
15204     u22 = d2*rowstorage->ptr.p_double[offsu+2*4+2];
15205     u23 = d3*rowstorage->ptr.p_double[offsu+2*4+3];
15206     u30 = d0*rowstorage->ptr.p_double[offsu+3*4+0];
15207     u31 = d1*rowstorage->ptr.p_double[offsu+3*4+1];
15208     u32 = d2*rowstorage->ptr.p_double[offsu+3*4+2];
15209     u33 = d3*rowstorage->ptr.p_double[offsu+3*4+3];
15210     if( sheight==uheight )
15211     {
15212         /*
15213          * No row scatter, the most efficient code
15214          */
15215         for(k=0; k<=uheight-1; k++)
15216         {
15217             targetrow = offss+k*4;
15218             offsk = offsu+k*4;
15219             uk0 = rowstorage->ptr.p_double[offsk+0];
15220             uk1 = rowstorage->ptr.p_double[offsk+1];
15221             uk2 = rowstorage->ptr.p_double[offsk+2];
15222             uk3 = rowstorage->ptr.p_double[offsk+3];
15223             rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2-u03*uk3;
15224             rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2-u13*uk3;
15225             rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2-u23*uk3;
15226             rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2-u33*uk3;
15227         }
15228     }
15229     else
15230     {
15231         /*
15232          * Row scatter is performed, less efficient code using double mapping to determine target row index
15233          */
15234         for(k=0; k<=uheight-1; k++)
15235         {
15236             targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
15237             offsk = offsu+k*4;
15238             uk0 = rowstorage->ptr.p_double[offsk+0];
15239             uk1 = rowstorage->ptr.p_double[offsk+1];
15240             uk2 = rowstorage->ptr.p_double[offsk+2];
15241             uk3 = rowstorage->ptr.p_double[offsk+3];
15242             rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2-u03*uk3;
15243             rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2-u13*uk3;
15244             rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2-u23*uk3;
15245             rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2-u33*uk3;
15246         }
15247     }
15248     result = ae_true;
15249     return result;
15250 }
15251 
15252 /* ALGLIB_NO_FAST_KERNELS */
15253 #endif
15254 
15255 
15256 
15257 }
15258 
15259 
15260 /////////////////////////////////////////////////////////////////////////
15261 //
15262 // THIS SECTION CONTAINS PARALLEL SUBROUTINES
15263 //
15264 /////////////////////////////////////////////////////////////////////////
15265 namespace alglib_impl
15266 {
15267 
15268 
15269 }
15270 
15271