1 /*************************************************************************
2 ALGLIB 3.18.0 (source code generated 2021-10-25)
3 Copyright (c) Sergey Bochkanov (ALGLIB project).
4
5 >>> SOURCE LICENSE >>>
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation (www.fsf.org); either version 2 of the
9 License, or (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 A copy of the GNU General Public License is available at
17 http://www.fsf.org/licensing/licenses
18 >>> END OF LICENSE >>>
19 *************************************************************************/
20 #ifdef _MSC_VER
21 #define _CRT_SECURE_NO_WARNINGS
22 #endif
23 #include "stdafx.h"
24
25 //
26 // if AE_OS==AE_LINUX (will be redefined to AE_POSIX in ap.h),
27 // set _GNU_SOURCE flag BEFORE any #includes to get affinity
28 // management functions
29 //
30 #if (AE_OS==AE_LINUX) && !defined(_GNU_SOURCE)
31 #define _GNU_SOURCE
32 #endif
33
34 //
35 // Must be defined before we include ap.h
36 //
37 #define _ALGLIB_IMPL_DEFINES
38 #define _ALGLIB_INTEGRITY_CHECKS_ONCE
39
40 #include "ap.h"
41 #include <limits>
42 #include <locale.h>
43 #include <ctype.h>
44
45 #if defined(AE_CPU)
46 #if (AE_CPU==AE_INTEL)
47
48 #if AE_COMPILER==AE_MSVC
49 #include <intrin.h>
50 #endif
51
52 #endif
53 #endif
54
55 // disable some irrelevant warnings
56 #if (AE_COMPILER==AE_MSVC) && !defined(AE_ALL_WARNINGS)
57 #pragma warning(disable:4100)
58 #pragma warning(disable:4127)
59 #pragma warning(disable:4611)
60 #pragma warning(disable:4702)
61 #pragma warning(disable:4996)
62 #endif
63
64 /////////////////////////////////////////////////////////////////////////
65 //
66 // THIS SECTION IMPLEMENTS BASIC FUNCTIONALITY LIKE
67 // MEMORY MANAGEMENT FOR VECTORS/MATRICES WHICH IS
68 // SHARED BETWEEN C++ AND PURE C LIBRARIES
69 //
70 /////////////////////////////////////////////////////////////////////////
71 namespace alglib_impl
72 {
73 /*
74 * OS-specific includes
75 */
76 #ifdef AE_USE_CPP
77 }
78 #endif
79 #if AE_OS==AE_WINDOWS || defined(AE_DEBUG4WINDOWS)
80 #ifndef _WIN32_WINNT
81 #define _WIN32_WINNT 0x0501
82 #endif
83 #include <windows.h>
84 #include <process.h>
85 #elif AE_OS==AE_POSIX || defined(AE_DEBUG4POSIX)
86 #include <time.h>
87 #include <unistd.h>
88 #include <pthread.h>
89 #include <sched.h>
90 #include <sys/time.h>
91 #endif
92 /* Debugging helpers for Windows */
93 #ifdef AE_DEBUG4WINDOWS
94 #include <windows.h>
95 #include <stdio.h>
96 #endif
97 #ifdef AE_USE_CPP
98 namespace alglib_impl
99 {
100 #endif
101
102 /*
103 * local definitions
104 */
105 #define x_nb 16
106 #define AE_DATA_ALIGN 64
107 #define AE_PTR_ALIGN sizeof(void*)
108 #define DYN_BOTTOM ((void*)1)
109 #define DYN_FRAME ((void*)2)
110 #define AE_LITTLE_ENDIAN 1
111 #define AE_BIG_ENDIAN 2
112 #define AE_MIXED_ENDIAN 3
113 #define AE_SER_ENTRY_LENGTH 11
114 #define AE_SER_ENTRIES_PER_ROW 5
115
116 #define AE_SM_DEFAULT 0
117 #define AE_SM_ALLOC 1
118 #define AE_SM_READY2S 2
119 #define AE_SM_TO_STRING 10
120 #define AE_SM_TO_CPPSTRING 11
121 #define AE_SM_TO_STREAM 12
122 #define AE_SM_FROM_STRING 20
123 #define AE_SM_FROM_STREAM 22
124
125 #define AE_LOCK_CYCLES 512
126 #define AE_LOCK_TESTS_BEFORE_YIELD 16
127 #define AE_CRITICAL_ASSERT(x) if( !(x) ) abort()
128
129 /* IDs for set_dbg_value */
130 #define _ALGLIB_USE_ALLOC_COUNTER 0
131 #define _ALGLIB_USE_DBG_COUNTERS 1
132 #define _ALGLIB_USE_VENDOR_KERNELS 100
133 #define _ALGLIB_VENDOR_MEMSTAT 101
134
135 #define _ALGLIB_DEBUG_WORKSTEALING 200
136 #define _ALGLIB_WSDBG_NCORES 201
137 #define _ALGLIB_WSDBG_PUSHROOT_OK 202
138 #define _ALGLIB_WSDBG_PUSHROOT_FAILED 203
139
140 #define _ALGLIB_SET_GLOBAL_THREADING 1001
141 #define _ALGLIB_SET_NWORKERS 1002
142
143 /* IDs for get_dbg_value */
144 #define _ALGLIB_GET_ALLOC_COUNTER 0
145 #define _ALGLIB_GET_CUMULATIVE_ALLOC_SIZE 1
146 #define _ALGLIB_GET_CUMULATIVE_ALLOC_COUNT 2
147
148 #define _ALGLIB_GET_CORES_COUNT 1000
149 #define _ALGLIB_GET_GLOBAL_THREADING 1001
150 #define _ALGLIB_GET_NWORKERS 1002
151
152 /*************************************************************************
153 Lock.
154
155 This is internal structure which implements lock functionality.
156 *************************************************************************/
157 typedef struct
158 {
159 #if AE_OS==AE_WINDOWS
160 volatile ae_int_t * volatile p_lock;
161 char buf[sizeof(ae_int_t)+AE_LOCK_ALIGNMENT];
162 #elif AE_OS==AE_POSIX
163 pthread_mutex_t mutex;
164 #else
165 ae_bool is_locked;
166 #endif
167 } _lock;
168
169
170
171
172 /*
173 * Error tracking facilities; this fields are modified every time ae_set_error_flag()
174 * is called with non-zero cond. Thread unsafe access, but it does not matter actually.
175 */
176 static const char * sef_file = "";
177 static int sef_line = 0;
178 static const char * sef_xdesc = "";
179
180 /*
181 * Global flags, split into several char-sized variables in order
182 * to avoid problem with non-atomic reads/writes (single-byte ops
183 * are atomic on all modern architectures);
184 *
185 * Following variables are included:
186 * * threading-related settings
187 */
188 unsigned char _alglib_global_threading_flags = _ALGLIB_FLG_THREADING_SERIAL>>_ALGLIB_FLG_THREADING_SHIFT;
189
190 /*
191 * DESCRIPTION: recommended number of active workers:
192 * * positive value >=1 is used to specify exact number of active workers
193 * * 0 means that ALL available cores are used
194 * * negative value means that all cores EXCEPT for cores_to_use will be used
195 * (say, -1 means that all cores except for one will be used). At least one
196 * core will be used in this case, even if you assign -9999999 to this field.
197 *
198 * Default value = 0 (fully parallel execution) when AE_NWORKERS is not defined
199 * = 0 for manually defined number of cores (AE_NWORKERS is defined)
200 * PROTECTION: not needed; runtime modification is possible, but we do not need exact
201 * synchronization.
202 */
203 #if defined(AE_NWORKERS) && (AE_NWORKERS<=0)
204 #error AE_NWORKERS must be positive number or not defined at all.
205 #endif
206 #if defined(AE_NWORKERS)
207 ae_int_t _alglib_cores_to_use = 0;
208 #else
209 ae_int_t _alglib_cores_to_use = 0;
210 #endif
211
212 /*
213 * Debug counters
214 */
215 ae_int_t _alloc_counter = 0;
216 ae_int_t _alloc_counter_total = 0;
217 ae_bool _use_alloc_counter = ae_false;
218
219 ae_int_t _dbg_alloc_total = 0;
220 ae_bool _use_dbg_counters = ae_false;
221
222 ae_bool _use_vendor_kernels = ae_true;
223
224 ae_bool debug_workstealing = ae_false; /* debug workstealing environment? False by default */
225 ae_int_t dbgws_pushroot_ok = 0;
226 ae_int_t dbgws_pushroot_failed = 0;
227
228 #ifdef AE_SMP_DEBUGCOUNTERS
229 __declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_acquisitions = 0;
230 __declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_spinwaits = 0;
231 __declspec(align(AE_LOCK_ALIGNMENT)) volatile ae_int64_t _ae_dbg_lock_yields = 0;
232 #endif
233
234 /*
235 * Allocation debugging
236 */
237 ae_bool _force_malloc_failure = ae_false;
238 ae_int_t _malloc_failure_after = 0;
239
240
241 /*
242 * Trace-related declarations:
243 * alglib_trace_type - trace output type
244 * alglib_trace_file - file descriptor (to be used by ALGLIB code which
245 * sends messages to trace log
246 * alglib_fclose_trace - whether we have to call fclose() when disabling or
247 * changing trace output
248 * alglib_trace_tags - string buffer used to store tags + two additional
249 * characters (leading and trailing commas) + null
250 * terminator
251 */
252 #define ALGLIB_TRACE_NONE 0
253 #define ALGLIB_TRACE_FILE 1
254 #define ALGLIB_TRACE_TAGS_LEN 2048
255 #define ALGLIB_TRACE_BUFFER_LEN (ALGLIB_TRACE_TAGS_LEN+2+1)
256 static ae_int_t alglib_trace_type = ALGLIB_TRACE_NONE;
257 FILE *alglib_trace_file = NULL;
258 static ae_bool alglib_fclose_trace = ae_false;
259 static char alglib_trace_tags[ALGLIB_TRACE_BUFFER_LEN];
260
261 /*
262 * Fields for memory allocation over static array
263 */
264 #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
265 #if AE_THREADING!=AE_SERIAL_UNSAFE
266 #error Basis static malloc is thread-unsafe; define AE_THREADING=AE_SERIAL_UNSAFE to prove that you know it
267 #endif
268 static ae_int_t sm_page_size = 0;
269 static ae_int_t sm_page_cnt = 0;
270 static ae_int_t *sm_page_tbl = NULL;
271 static unsigned char *sm_mem = NULL;
272 #endif
273
274 /*
275 * These declarations are used to ensure that
276 * sizeof(ae_bool)=1, sizeof(ae_int32_t)==4, sizeof(ae_int64_t)==8, sizeof(ae_int_t)==sizeof(void*).
277 * they will lead to syntax error otherwise (array size will be negative).
278 *
279 * you can remove them, if you want - they are not used anywhere.
280 *
281 */
282 static char _ae_bool_must_be_8_bits_wide [1-2*((int)(sizeof(ae_bool))-1)*((int)(sizeof(ae_bool))-1)];
283 static char _ae_int32_t_must_be_32_bits_wide[1-2*((int)(sizeof(ae_int32_t))-4)*((int)(sizeof(ae_int32_t))-4)];
284 static char _ae_int64_t_must_be_64_bits_wide[1-2*((int)(sizeof(ae_int64_t))-8)*((int)(sizeof(ae_int64_t))-8)];
285 static char _ae_uint64_t_must_be_64_bits_wide[1-2*((int)(sizeof(ae_uint64_t))-8)*((int)(sizeof(ae_uint64_t))-8)];
286 static char _ae_int_t_must_be_pointer_sized [1-2*((int)(sizeof(ae_int_t))-(int)sizeof(void*))*((int)(sizeof(ae_int_t))-(int)(sizeof(void*)))];
287
288 /*
289 * This variable is used to prevent some tricky optimizations which may degrade multithreaded performance.
290 * It is touched once in the ae_init_pool() function from smp.c in order to prevent optimizations.
291 *
292 */
293 static volatile ae_int_t ae_never_change_it = 1;
294
295 /*************************************************************************
296 This function should never be called. It is here to prevent spurious
297 compiler warnings about unused variables (in fact: used).
298 *************************************************************************/
ae_never_call_it()299 void ae_never_call_it()
300 {
301 ae_touch_ptr((void*)_ae_bool_must_be_8_bits_wide);
302 ae_touch_ptr((void*)_ae_int32_t_must_be_32_bits_wide);
303 ae_touch_ptr((void*)_ae_int64_t_must_be_64_bits_wide);
304 ae_touch_ptr((void*)_ae_uint64_t_must_be_64_bits_wide);
305 ae_touch_ptr((void*)_ae_int_t_must_be_pointer_sized);
306 }
307
308 /*************************************************************************
309 Standard function wrappers for better GLIBC portability
310 *************************************************************************/
311 #if defined(X_FOR_LINUX)
312 __asm__(".symver exp,exp@GLIBC_2.2.5");
313 __asm__(".symver log,log@GLIBC_2.2.5");
314 __asm__(".symver pow,pow@GLIBC_2.2.5");
315
__wrap_exp(double x)316 double __wrap_exp(double x)
317 {
318 return exp(x);
319 }
320
__wrap_log(double x)321 double __wrap_log(double x)
322 {
323 return log(x);
324 }
325
__wrap_pow(double x,double y)326 double __wrap_pow(double x, double y)
327 {
328 return pow(x, y);
329 }
330 #endif
331
ae_set_dbg_flag(ae_int64_t flag_id,ae_int64_t flag_val)332 void ae_set_dbg_flag(ae_int64_t flag_id, ae_int64_t flag_val)
333 {
334 if( flag_id==_ALGLIB_USE_ALLOC_COUNTER )
335 {
336 _use_alloc_counter = flag_val!=0;
337 return;
338 }
339 if( flag_id==_ALGLIB_USE_DBG_COUNTERS )
340 {
341 _use_dbg_counters = flag_val!=0;
342 return;
343 }
344 if( flag_id==_ALGLIB_USE_VENDOR_KERNELS )
345 {
346 _use_vendor_kernels = flag_val!=0;
347 return;
348 }
349 if( flag_id==_ALGLIB_DEBUG_WORKSTEALING )
350 {
351 debug_workstealing = flag_val!=0;
352 return;
353 }
354 if( flag_id==_ALGLIB_SET_GLOBAL_THREADING )
355 {
356 ae_set_global_threading((ae_uint64_t)flag_val);
357 return;
358 }
359 if( flag_id==_ALGLIB_SET_NWORKERS )
360 {
361 _alglib_cores_to_use = (ae_int_t)flag_val;
362 return;
363 }
364 }
365
ae_get_dbg_value(ae_int64_t id)366 ae_int64_t ae_get_dbg_value(ae_int64_t id)
367 {
368 if( id==_ALGLIB_GET_ALLOC_COUNTER )
369 return _alloc_counter;
370 if( id==_ALGLIB_GET_CUMULATIVE_ALLOC_SIZE )
371 return _dbg_alloc_total;
372 if( id==_ALGLIB_GET_CUMULATIVE_ALLOC_COUNT )
373 return _alloc_counter_total;
374
375 if( id==_ALGLIB_VENDOR_MEMSTAT )
376 {
377 #if defined(AE_MKL)
378 return ae_mkl_memstat();
379 #else
380 return 0;
381 #endif
382 }
383
384 /* workstealing counters */
385 if( id==_ALGLIB_WSDBG_NCORES )
386 #if defined(AE_SMP)
387 return ae_cores_count();
388 #else
389 return 0;
390 #endif
391 if( id==_ALGLIB_WSDBG_PUSHROOT_OK )
392 return dbgws_pushroot_ok;
393 if( id==_ALGLIB_WSDBG_PUSHROOT_FAILED )
394 return dbgws_pushroot_failed;
395
396 if( id==_ALGLIB_GET_CORES_COUNT )
397 #if defined(AE_SMP)
398 return ae_cores_count();
399 #else
400 return 0;
401 #endif
402 if( id==_ALGLIB_GET_GLOBAL_THREADING )
403 return (ae_int64_t)ae_get_global_threading();
404 if( id==_ALGLIB_GET_NWORKERS )
405 return (ae_int64_t)_alglib_cores_to_use;
406
407 /* unknown value */
408 return 0;
409 }
410
411 /************************************************************************
412 This function sets default (global) threading model:
413 * serial execution
414 * multithreading, if cores_to_use allows it
415
416 ************************************************************************/
ae_set_global_threading(ae_uint64_t flg_value)417 void ae_set_global_threading(ae_uint64_t flg_value)
418 {
419 flg_value = flg_value&_ALGLIB_FLG_THREADING_MASK;
420 AE_CRITICAL_ASSERT(flg_value==_ALGLIB_FLG_THREADING_SERIAL || flg_value==_ALGLIB_FLG_THREADING_PARALLEL);
421 _alglib_global_threading_flags = (unsigned char)(flg_value>>_ALGLIB_FLG_THREADING_SHIFT);
422 }
423
424 /************************************************************************
425 This function gets default (global) threading model:
426 * serial execution
427 * multithreading, if cores_to_use allows it
428
429 ************************************************************************/
ae_get_global_threading()430 ae_uint64_t ae_get_global_threading()
431 {
432 return ((ae_uint64_t)_alglib_global_threading_flags)<<_ALGLIB_FLG_THREADING_SHIFT;
433 }
434
ae_set_error_flag(ae_bool * p_flag,ae_bool cond,const char * filename,int lineno,const char * xdesc)435 void ae_set_error_flag(ae_bool *p_flag, ae_bool cond, const char *filename, int lineno, const char *xdesc)
436 {
437 if( cond )
438 {
439 *p_flag = ae_true;
440 sef_file = filename;
441 sef_line = lineno;
442 sef_xdesc= xdesc;
443 #ifdef ALGLIB_ABORT_ON_ERROR_FLAG
444 printf("[ALGLIB] aborting on ae_set_error_flag(cond=true)\n");
445 printf("[ALGLIB] %s:%d\n", filename, lineno);
446 printf("[ALGLIB] %s\n", xdesc);
447 fflush(stdout);
448 if( alglib_trace_file!=NULL ) fflush(alglib_trace_file);
449 abort();
450 #endif
451 }
452 }
453
454 /************************************************************************
455 This function returns file name for the last call of ae_set_error_flag()
456 with non-zero cond parameter.
457 ************************************************************************/
ae_get_last_error_file()458 const char * ae_get_last_error_file()
459 {
460 return sef_file;
461 }
462
463 /************************************************************************
464 This function returns line number for the last call of ae_set_error_flag()
465 with non-zero cond parameter.
466 ************************************************************************/
ae_get_last_error_line()467 int ae_get_last_error_line()
468 {
469 return sef_line;
470 }
471
472 /************************************************************************
473 This function returns extra description for the last call of ae_set_error_flag()
474 with non-zero cond parameter.
475 ************************************************************************/
ae_get_last_error_xdesc()476 const char * ae_get_last_error_xdesc()
477 {
478 return sef_xdesc;
479 }
480
ae_misalignment(const void * ptr,size_t alignment)481 ae_int_t ae_misalignment(const void *ptr, size_t alignment)
482 {
483 union _u
484 {
485 const void *ptr;
486 ae_int_t iptr;
487 } u;
488 u.ptr = ptr;
489 return (ae_int_t)(u.iptr%alignment);
490 }
491
ae_align(void * ptr,size_t alignment)492 void* ae_align(void *ptr, size_t alignment)
493 {
494 char *result = (char*)ptr;
495 if( (result-(char*)0)%alignment!=0 )
496 result += alignment - (result-(char*)0)%alignment;
497 return result;
498 }
499
500 /************************************************************************
501 This function maps nworkers number (which can be positive, zero or
502 negative with 0 meaning "all cores", -1 meaning "all cores -1" and so on)
503 to "effective", strictly positive workers count.
504
505 This function is intended to be used by debugging/testing code which
506 tests different number of worker threads. It is NOT aligned in any way
507 with ALGLIB multithreading framework (i.e. it can return non-zero worker
508 count even for single-threaded GPLed ALGLIB).
509 ************************************************************************/
ae_get_effective_workers(ae_int_t nworkers)510 ae_int_t ae_get_effective_workers(ae_int_t nworkers)
511 {
512 ae_int_t ncores;
513
514 /* determine cores count */
515 #if defined(AE_NWORKERS)
516 ncores = AE_NWORKERS;
517 #elif AE_OS==AE_WINDOWS
518 SYSTEM_INFO sysInfo;
519 GetSystemInfo(&sysInfo);
520 ncores = (ae_int_t)(sysInfo.dwNumberOfProcessors);
521 #elif AE_OS==AE_POSIX
522 {
523 long r = sysconf(_SC_NPROCESSORS_ONLN);
524 ncores = r<=0 ? 1 : r;
525 }
526 #else
527 ncores = 1;
528 #endif
529 AE_CRITICAL_ASSERT(ncores>=1);
530
531 /* map nworkers to its effective value */
532 if( nworkers>=1 )
533 return nworkers>ncores ? ncores : nworkers;
534 return ncores+nworkers>=1 ? ncores+nworkers : 1;
535 }
536
537 /*************************************************************************
538 This function belongs to the family of "optional atomics", i.e. atomic
539 functions which either perform atomic changes - or do nothing at all, if
540 current compiler settings do not allow us to generate atomic code.
541
542 All "optional atomics" are synchronized, i.e. either all of them work - or
543 no one of the works.
544
545 This particular function performs atomic addition on pointer-sized value,
546 which must be pointer-size aligned.
547
548 NOTE: this function is not intended to be extremely high performance one,
549 so use it only when necessary.
550 *************************************************************************/
ae_optional_atomic_add_i(ae_int_t * p,ae_int_t v)551 void ae_optional_atomic_add_i(ae_int_t *p, ae_int_t v)
552 {
553 AE_CRITICAL_ASSERT(ae_misalignment(p,sizeof(void*))==0);
554 #if AE_OS==AE_WINDOWS
555 for(;;)
556 {
557 /* perform conversion between ae_int_t* and void**
558 without compiler warnings about indirection levels */
559 union _u
560 {
561 PVOID volatile * volatile ptr;
562 volatile ae_int_t * volatile iptr;
563 } u;
564 u.iptr = p;
565
566 /* atomic read for initial value */
567 PVOID v0 = InterlockedCompareExchangePointer(u.ptr, NULL, NULL);
568
569 /* increment cached value and store */
570 if( InterlockedCompareExchangePointer(u.ptr, (PVOID)(((char*)v0)+v), v0)==v0 )
571 break;
572 }
573 #elif defined(__clang__) && (AE_CPU==AE_INTEL)
574 __atomic_fetch_add(p, v, __ATOMIC_RELAXED);
575 #elif (AE_COMPILER==AE_GNUC) && (AE_CPU==AE_INTEL) && (__GNUC__*100+__GNUC__>=470)
576 __atomic_add_fetch(p, v, __ATOMIC_RELAXED);
577 #else
578 #endif
579 }
580
581 /*************************************************************************
582 This function belongs to the family of "optional atomics", i.e. atomic
583 functions which either perform atomic changes - or do nothing at all, if
584 current compiler settings do not allow us to generate atomic code.
585
586 All "optional atomics" are synchronized, i.e. either all of them work - or
587 no one of the works.
588
589 This particular function performs atomic subtraction on pointer-sized
590 value, which must be pointer-size aligned.
591
592 NOTE: this function is not intended to be extremely high performance one,
593 so use it only when necessary.
594 *************************************************************************/
ae_optional_atomic_sub_i(ae_int_t * p,ae_int_t v)595 void ae_optional_atomic_sub_i(ae_int_t *p, ae_int_t v)
596 {
597 AE_CRITICAL_ASSERT(ae_misalignment(p,sizeof(void*))==0);
598 #if AE_OS==AE_WINDOWS
599 for(;;)
600 {
601 /* perform conversion between ae_int_t* and void**
602 without compiler warnings about indirection levels */
603 union _u
604 {
605 PVOID volatile * volatile ptr;
606 volatile ae_int_t * volatile iptr;
607 } u;
608 u.iptr = p;
609
610 /* atomic read for initial value, convert it to 1-byte pointer */
611 PVOID v0 = InterlockedCompareExchangePointer(u.ptr, NULL, NULL);
612
613 /* increment cached value and store */
614 if( InterlockedCompareExchangePointer(u.ptr, (PVOID)(((char*)v0)-v), v0)==v0 )
615 break;
616 }
617 #elif defined(__clang__) && (AE_CPU==AE_INTEL)
618 __atomic_fetch_sub(p, v, __ATOMIC_RELAXED);
619 #elif (AE_COMPILER==AE_GNUC) && (AE_CPU==AE_INTEL) && (__GNUC__*100+__GNUC__>=470)
620 __atomic_sub_fetch(p, v, __ATOMIC_RELAXED);
621 #else
622 #endif
623 }
624
625
626 /*************************************************************************
627 This function cleans up automatically managed memory before caller terminates
628 ALGLIB executing by ae_break() or by simply stopping calling callback.
629
630 For state!=NULL it calls thread_exception_handler() and the ae_state_clear().
631 For state==NULL it does nothing.
632 *************************************************************************/
ae_clean_up_before_breaking(ae_state * state)633 void ae_clean_up_before_breaking(ae_state *state)
634 {
635 if( state!=NULL )
636 {
637 if( state->thread_exception_handler!=NULL )
638 state->thread_exception_handler(state);
639 ae_state_clear(state);
640 }
641 }
642
643 /*************************************************************************
644 This function abnormally aborts program, using one of several ways:
645
646 * for state!=NULL and state->break_jump being initialized with call to
647 ae_state_set_break_jump() - it performs longjmp() to return site.
648 * otherwise, abort() is called
649
650 In all cases, for state!=NULL function sets state->last_error and
651 state->error_msg fields. It also clears state with ae_state_clear().
652
653 If state is not NULL and state->thread_exception_handler is set, it is
654 called prior to handling error and clearing state.
655 *************************************************************************/
ae_break(ae_state * state,ae_error_type error_type,const char * msg)656 void ae_break(ae_state *state, ae_error_type error_type, const char *msg)
657 {
658 if( state!=NULL )
659 {
660 if( alglib_trace_type!=ALGLIB_TRACE_NONE )
661 ae_trace("---!!! CRITICAL ERROR !!!--- exception with message '%s' was generated\n", msg!=NULL ? msg : "");
662 ae_clean_up_before_breaking(state);
663 state->last_error = error_type;
664 state->error_msg = msg;
665 if( state->break_jump!=NULL )
666 longjmp(*(state->break_jump), 1);
667 else
668 abort();
669 }
670 else
671 abort();
672 }
673
674 #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
set_memory_pool(void * ptr,size_t size)675 void set_memory_pool(void *ptr, size_t size)
676 {
677 /*
678 * Integrity checks
679 */
680 AE_CRITICAL_ASSERT(sm_page_size==0);
681 AE_CRITICAL_ASSERT(sm_page_cnt==0);
682 AE_CRITICAL_ASSERT(sm_page_tbl==NULL);
683 AE_CRITICAL_ASSERT(sm_mem==NULL);
684 AE_CRITICAL_ASSERT(size>0);
685
686 /*
687 * Align pointer
688 */
689 size -= ae_misalignment(ptr, sizeof(ae_int_t));
690 ptr = ae_align(ptr, sizeof(ae_int_t));
691
692 /*
693 * Calculate page size and page count, prepare pointers to page table and memory
694 */
695 sm_page_size = 256;
696 AE_CRITICAL_ASSERT(size>=(sm_page_size+sizeof(ae_int_t))+sm_page_size); /* we expect to have memory for at least one page + table entry + alignment */
697 sm_page_cnt = (size-sm_page_size)/(sm_page_size+sizeof(ae_int_t));
698 AE_CRITICAL_ASSERT(sm_page_cnt>0);
699 sm_page_tbl = (ae_int_t*)ptr;
700 sm_mem = (unsigned char*)ae_align(sm_page_tbl+sm_page_cnt, sm_page_size);
701
702 /*
703 * Mark all pages as free
704 */
705 memset(sm_page_tbl, 0, sm_page_cnt*sizeof(ae_int_t));
706 }
707
ae_static_malloc(size_t size,size_t alignment)708 void* ae_static_malloc(size_t size, size_t alignment)
709 {
710 int rq_pages, i, j, cur_len;
711
712 AE_CRITICAL_ASSERT(size>=0);
713 AE_CRITICAL_ASSERT(sm_page_size>0);
714 AE_CRITICAL_ASSERT(sm_page_cnt>0);
715 AE_CRITICAL_ASSERT(sm_page_tbl!=NULL);
716 AE_CRITICAL_ASSERT(sm_mem!=NULL);
717
718 if( size==0 )
719 return NULL;
720 if( _force_malloc_failure )
721 return NULL;
722
723 /* check that page alignment and requested alignment match each other */
724 AE_CRITICAL_ASSERT(alignment<=sm_page_size);
725 AE_CRITICAL_ASSERT((sm_page_size%alignment)==0);
726
727 /* search long enough sequence of pages */
728 rq_pages = size/sm_page_size;
729 if( size%sm_page_size )
730 rq_pages++;
731 cur_len = 0;
732 for(i=0; i<sm_page_cnt;)
733 {
734 /* determine length of the sequence of free pages */
735 if( sm_page_tbl[i]==0 )
736 cur_len++;
737 else
738 {
739 AE_CRITICAL_ASSERT(sm_page_tbl[i]>0);
740 cur_len=0;
741 i += sm_page_tbl[i];
742 continue;
743 }
744
745 /* found it? */
746 if( cur_len>=rq_pages )
747 {
748 /* update counters (if flag is set) */
749 if( _use_alloc_counter )
750 {
751 ae_optional_atomic_add_i(&_alloc_counter, 1);
752 ae_optional_atomic_add_i(&_alloc_counter_total, 1);
753 }
754 if( _use_dbg_counters )
755 ae_optional_atomic_add_i(&_dbg_alloc_total, size);
756
757 /* mark pages and return */
758 for(j=0; j<rq_pages; j++)
759 sm_page_tbl[i-j] = -1;
760 sm_page_tbl[i-(rq_pages-1)] = rq_pages;
761 return sm_mem+(i-(rq_pages-1))*sm_page_size;
762 }
763
764 /* next element */
765 i++;
766 }
767 return NULL;
768 }
769
ae_static_free(void * block)770 void ae_static_free(void *block)
771 {
772 ae_int_t page_idx, page_cnt, i;
773 if( block==NULL )
774 return;
775 page_idx = (unsigned char*)block-sm_mem;
776 AE_CRITICAL_ASSERT(page_idx>=0);
777 AE_CRITICAL_ASSERT((page_idx%sm_page_size)==0);
778 page_idx = page_idx/sm_page_size;
779 AE_CRITICAL_ASSERT(page_idx<sm_page_cnt);
780 page_cnt = sm_page_tbl[page_idx];
781 AE_CRITICAL_ASSERT(page_cnt>=1);
782 for(i=0; i<page_cnt; i++)
783 sm_page_tbl[page_idx+i] = 0;
784
785 /* update counters (if flag is set) */
786 if( _use_alloc_counter )
787 ae_optional_atomic_sub_i(&_alloc_counter, 1);
788 }
789
memory_pool_stats(ae_int_t * bytes_used,ae_int_t * bytes_free)790 void memory_pool_stats(ae_int_t *bytes_used, ae_int_t *bytes_free)
791 {
792 int i;
793
794 AE_CRITICAL_ASSERT(sm_page_size>0);
795 AE_CRITICAL_ASSERT(sm_page_cnt>0);
796 AE_CRITICAL_ASSERT(sm_page_tbl!=NULL);
797 AE_CRITICAL_ASSERT(sm_mem!=NULL);
798
799 /* scan page table */
800 *bytes_used = 0;
801 *bytes_free = 0;
802 for(i=0; i<sm_page_cnt;)
803 {
804 if( sm_page_tbl[i]==0 )
805 {
806 (*bytes_free)++;
807 i++;
808 }
809 else
810 {
811 AE_CRITICAL_ASSERT(sm_page_tbl[i]>0);
812 *bytes_used += sm_page_tbl[i];
813 i += sm_page_tbl[i];
814 }
815 }
816 *bytes_used *= sm_page_size;
817 *bytes_free *= sm_page_size;
818 }
819 #endif
820
aligned_malloc(size_t size,size_t alignment)821 void* aligned_malloc(size_t size, size_t alignment)
822 {
823 #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
824 return ae_static_malloc(size, alignment);
825 #else
826 char *result = NULL;
827
828 if( size==0 )
829 return NULL;
830 if( _force_malloc_failure )
831 return NULL;
832 if( _malloc_failure_after>0 && _alloc_counter_total>=_malloc_failure_after )
833 return NULL;
834
835 /* allocate */
836 if( alignment<=1 )
837 {
838 /* no alignment, just call alloc */
839 void *block;
840 void **p; ;
841 block = malloc(sizeof(void*)+size);
842 if( block==NULL )
843 return NULL;
844 p = (void**)block;
845 *p = block;
846 result = (char*)((char*)block+sizeof(void*));
847 }
848 else
849 {
850 /* align */
851 void *block;
852 block = malloc(alignment-1+sizeof(void*)+size);
853 if( block==NULL )
854 return NULL;
855 result = (char*)block+sizeof(void*);
856 /*if( (result-(char*)0)%alignment!=0 )
857 result += alignment - (result-(char*)0)%alignment;*/
858 result = (char*)ae_align(result, alignment);
859 *((void**)(result-sizeof(void*))) = block;
860 }
861
862 /* update counters (if flag is set) */
863 if( _use_alloc_counter )
864 {
865 ae_optional_atomic_add_i(&_alloc_counter, 1);
866 ae_optional_atomic_add_i(&_alloc_counter_total, 1);
867 }
868 if( _use_dbg_counters )
869 ae_optional_atomic_add_i(&_dbg_alloc_total, (ae_int64_t)size);
870
871 /* return */
872 return (void*)result;
873 #endif
874 }
875
aligned_extract_ptr(void * block)876 void* aligned_extract_ptr(void *block)
877 {
878 #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
879 return NULL;
880 #else
881 if( block==NULL )
882 return NULL;
883 return *((void**)((char*)block-sizeof(void*)));
884 #endif
885 }
886
aligned_free(void * block)887 void aligned_free(void *block)
888 {
889 #if AE_MALLOC==AE_BASIC_STATIC_MALLOC
890 ae_static_free(block);
891 #else
892 void *p;
893 if( block==NULL )
894 return;
895 p = aligned_extract_ptr(block);
896 free(p);
897 if( _use_alloc_counter )
898 ae_optional_atomic_sub_i(&_alloc_counter, 1);
899 #endif
900 }
901
eternal_malloc(size_t size)902 void* eternal_malloc(size_t size)
903 {
904 if( size==0 )
905 return NULL;
906 if( _force_malloc_failure )
907 return NULL;
908 return malloc(size);
909 }
910
911 /************************************************************************
912 Allocate memory with automatic alignment.
913
914 Returns NULL when zero size is specified.
915
916 Error handling:
917 * if state is NULL, returns NULL on allocation error
918 * if state is not NULL, calls ae_break() on allocation error
919 ************************************************************************/
ae_malloc(size_t size,ae_state * state)920 void* ae_malloc(size_t size, ae_state *state)
921 {
922 void *result;
923 if( size==0 )
924 return NULL;
925 result = aligned_malloc(size,AE_DATA_ALIGN);
926 if( result==NULL && state!=NULL)
927 ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
928 return result;
929 }
930
ae_free(void * p)931 void ae_free(void *p)
932 {
933 if( p!=NULL )
934 aligned_free(p);
935 }
936
937 /************************************************************************
938 Sets pointers to the matrix rows.
939
940 * dst must be correctly initialized matrix
941 * dst->data.ptr points to the beginning of memory block allocated for
942 row pointers.
943 * dst->ptr - undefined (initialized during algorithm processing)
944 * storage parameter points to the beginning of actual storage
945 ************************************************************************/
ae_matrix_update_row_pointers(ae_matrix * dst,void * storage)946 void ae_matrix_update_row_pointers(ae_matrix *dst, void *storage)
947 {
948 char *p_base;
949 void **pp_ptr;
950 ae_int_t i;
951 if( dst->rows>0 && dst->cols>0 )
952 {
953 p_base = (char*)storage;
954 pp_ptr = (void**)dst->data.ptr;
955 dst->ptr.pp_void = pp_ptr;
956 for(i=0; i<dst->rows; i++, p_base+=dst->stride*ae_sizeof(dst->datatype))
957 pp_ptr[i] = p_base;
958 }
959 else
960 dst->ptr.pp_void = NULL;
961 }
962
963 /************************************************************************
964 Returns size of datatype.
965 Zero for dynamic types like strings or multiple precision types.
966 ************************************************************************/
ae_sizeof(ae_datatype datatype)967 ae_int_t ae_sizeof(ae_datatype datatype)
968 {
969 switch(datatype)
970 {
971 case DT_BOOL: return (ae_int_t)sizeof(ae_bool);
972 case DT_INT: return (ae_int_t)sizeof(ae_int_t);
973 case DT_REAL: return (ae_int_t)sizeof(double);
974 case DT_COMPLEX: return 2*(ae_int_t)sizeof(double);
975 default: return 0;
976 }
977 }
978
979 /************************************************************************
980 Checks that n bytes pointed by ptr are zero.
981
982 This function is used in the constructors to check that instance fields
983 on entry are correctly initialized by zeros.
984 ************************************************************************/
ae_check_zeros(const void * ptr,ae_int_t n)985 ae_bool ae_check_zeros(const void *ptr, ae_int_t n)
986 {
987 ae_int_t nu, nr, i;
988 unsigned long long c = 0x0;
989
990 /*
991 * determine leading and trailing lengths
992 */
993 nu = n/sizeof(unsigned long long);
994 nr = n%sizeof(unsigned long long);
995
996 /*
997 * handle leading nu long long elements
998 */
999 if( nu>0 )
1000 {
1001 const unsigned long long *p_ull;
1002 p_ull = (const unsigned long long *)ptr;
1003 for(i=0; i<nu; i++)
1004 c |= p_ull[i];
1005 }
1006
1007 /*
1008 * handle trailing nr char elements
1009 */
1010 if( nr>0 )
1011 {
1012 const unsigned char *p_uc;
1013 p_uc = ((const unsigned char *)ptr)+nu*sizeof(unsigned long long);
1014 for(i=0; i<nr; i++)
1015 c |= p_uc[i];
1016 }
1017
1018 /*
1019 * done
1020 */
1021 return c==0x0;
1022 }
1023
1024
1025 /************************************************************************
1026 This dummy function is used to prevent compiler messages about unused
1027 locals in automatically generated code.
1028
1029 It makes nothing - just accepts pointer, "touches" it - and that is all.
1030 It performs several tricky operations without side effects which confuse
1031 compiler so it does not compain about unused locals in THIS function.
1032 ************************************************************************/
ae_touch_ptr(void * p)1033 void ae_touch_ptr(void *p)
1034 {
1035 void * volatile fake_variable0 = p;
1036 void * volatile fake_variable1 = fake_variable0;
1037 fake_variable0 = fake_variable1;
1038 }
1039
1040 /************************************************************************
1041 This function initializes ALGLIB environment state.
1042
1043 NOTES:
1044 * stacks contain no frames, so ae_make_frame() must be called before
1045 attaching dynamic blocks. Without it ae_leave_frame() will cycle
1046 forever (which is intended behavior).
1047 ************************************************************************/
ae_state_init(ae_state * state)1048 void ae_state_init(ae_state *state)
1049 {
1050 ae_int32_t *vp;
1051
1052 /*
1053 * Set flags
1054 */
1055 state->flags = 0x0;
1056
1057 /*
1058 * p_next points to itself because:
1059 * * correct program should be able to detect end of the list
1060 * by looking at the ptr field.
1061 * * NULL p_next may be used to distinguish automatic blocks
1062 * (in the list) from non-automatic (not in the list)
1063 */
1064 state->last_block.p_next = &(state->last_block);
1065 state->last_block.deallocator = NULL;
1066 state->last_block.ptr = DYN_BOTTOM;
1067 state->p_top_block = &(state->last_block);
1068 state->break_jump = NULL;
1069 state->error_msg = "";
1070
1071 /*
1072 * determine endianness and initialize precomputed IEEE special quantities.
1073 */
1074 state->endianness = ae_get_endianness();
1075 if( state->endianness==AE_LITTLE_ENDIAN )
1076 {
1077 vp = (ae_int32_t*)(&state->v_nan);
1078 vp[0] = 0;
1079 vp[1] = (ae_int32_t)0x7FF80000;
1080 vp = (ae_int32_t*)(&state->v_posinf);
1081 vp[0] = 0;
1082 vp[1] = (ae_int32_t)0x7FF00000;
1083 vp = (ae_int32_t*)(&state->v_neginf);
1084 vp[0] = 0;
1085 vp[1] = (ae_int32_t)0xFFF00000;
1086 }
1087 else if( state->endianness==AE_BIG_ENDIAN )
1088 {
1089 vp = (ae_int32_t*)(&state->v_nan);
1090 vp[1] = 0;
1091 vp[0] = (ae_int32_t)0x7FF80000;
1092 vp = (ae_int32_t*)(&state->v_posinf);
1093 vp[1] = 0;
1094 vp[0] = (ae_int32_t)0x7FF00000;
1095 vp = (ae_int32_t*)(&state->v_neginf);
1096 vp[1] = 0;
1097 vp[0] = (ae_int32_t)0xFFF00000;
1098 }
1099 else
1100 abort();
1101
1102 /*
1103 * set threading information
1104 */
1105 state->worker_thread = NULL;
1106 state->parent_task = NULL;
1107 state->thread_exception_handler = NULL;
1108 }
1109
1110
1111 /************************************************************************
1112 This function clears ALGLIB environment state.
1113 All dynamic data controlled by state are freed.
1114 ************************************************************************/
ae_state_clear(ae_state * state)1115 void ae_state_clear(ae_state *state)
1116 {
1117 while( state->p_top_block->ptr!=DYN_BOTTOM )
1118 ae_frame_leave(state);
1119 }
1120
1121
1122 /************************************************************************
1123 This function sets jump buffer for error handling.
1124
1125 buf may be NULL.
1126 ************************************************************************/
ae_state_set_break_jump(ae_state * state,jmp_buf * buf)1127 void ae_state_set_break_jump(ae_state *state, jmp_buf *buf)
1128 {
1129 state->break_jump = buf;
1130 }
1131
1132
1133 /************************************************************************
1134 This function sets flags member of the ae_state structure
1135
1136 buf may be NULL.
1137 ************************************************************************/
ae_state_set_flags(ae_state * state,ae_uint64_t flags)1138 void ae_state_set_flags(ae_state *state, ae_uint64_t flags)
1139 {
1140 state->flags = flags;
1141 }
1142
1143
1144 /************************************************************************
1145 This function makes new stack frame.
1146
1147 This function takes two parameters: environment state and pointer to the
1148 dynamic block which will be used as indicator of the frame beginning.
1149 This dynamic block must be initialized by caller and mustn't be changed/
1150 deallocated/reused till ae_leave_frame called. It may be global or local
1151 variable (local is even better).
1152 ************************************************************************/
ae_frame_make(ae_state * state,ae_frame * tmp)1153 void ae_frame_make(ae_state *state, ae_frame *tmp)
1154 {
1155 tmp->db_marker.p_next = state->p_top_block;
1156 tmp->db_marker.deallocator = NULL;
1157 tmp->db_marker.ptr = DYN_FRAME;
1158 state->p_top_block = &tmp->db_marker;
1159 }
1160
1161
1162 /************************************************************************
1163 This function leaves current stack frame and deallocates all automatic
1164 dynamic blocks which were attached to this frame.
1165 ************************************************************************/
ae_frame_leave(ae_state * state)1166 void ae_frame_leave(ae_state *state)
1167 {
1168 while( state->p_top_block->ptr!=DYN_FRAME && state->p_top_block->ptr!=DYN_BOTTOM)
1169 {
1170 if( state->p_top_block->ptr!=NULL && state->p_top_block->deallocator!=NULL)
1171 ((ae_deallocator)(state->p_top_block->deallocator))(state->p_top_block->ptr);
1172 state->p_top_block = state->p_top_block->p_next;
1173 }
1174 state->p_top_block = state->p_top_block->p_next;
1175 }
1176
1177
1178 /************************************************************************
1179 This function attaches block to the dynamic block list
1180
1181 block block
1182 state ALGLIB environment state
1183
1184 This function does NOT generate exceptions.
1185
1186 NOTES:
1187 * never call it for special blocks which marks frame boundaries!
1188 ************************************************************************/
ae_db_attach(ae_dyn_block * block,ae_state * state)1189 void ae_db_attach(ae_dyn_block *block, ae_state *state)
1190 {
1191 block->p_next = state->p_top_block;
1192 state->p_top_block = block;
1193 }
1194
1195
1196 /************************************************************************
1197 This function initializes dynamic block:
1198
1199 block destination block, MUST be zero-filled on entry
1200 size size (in bytes), >=0.
1201 state ALGLIB environment state, non-NULL
1202 make_automatic if true, vector is added to the dynamic block list
1203
1204 block is assumed to be uninitialized, its fields are ignored. You may
1205 call this function with zero size in order to register block in the
1206 dynamic list.
1207
1208 Error handling: calls ae_break() on allocation error. Block is left in
1209 valid state (empty, but valid).
1210
1211 NOTES:
1212 * never call it for blocks which are already in the list; use ae_db_realloc
1213 for already allocated blocks.
1214
1215 NOTE: no memory allocation is performed for initialization with size=0
1216 ************************************************************************/
ae_db_init(ae_dyn_block * block,ae_int_t size,ae_state * state,ae_bool make_automatic)1217 void ae_db_init(ae_dyn_block *block, ae_int_t size, ae_state *state, ae_bool make_automatic)
1218 {
1219 AE_CRITICAL_ASSERT(state!=NULL);
1220 AE_CRITICAL_ASSERT(ae_check_zeros(block,sizeof(*block)));
1221
1222 /*
1223 * NOTE: these strange dances around block->ptr are necessary
1224 * in order to correctly handle possible exceptions during
1225 * memory allocation.
1226 */
1227 ae_assert(size>=0, "ae_db_init(): negative size", state);
1228 block->ptr = NULL;
1229 block->valgrind_hint = NULL;
1230 ae_touch_ptr(block->ptr);
1231 ae_touch_ptr(block->valgrind_hint);
1232 if( make_automatic )
1233 ae_db_attach(block, state);
1234 else
1235 block->p_next = NULL;
1236 if( size!=0 )
1237 {
1238 block->ptr = ae_malloc((size_t)size, state);
1239 block->valgrind_hint = aligned_extract_ptr(block->ptr);
1240 }
1241 block->deallocator = ae_free;
1242 }
1243
1244
1245 /************************************************************************
1246 This function realloc's dynamic block:
1247
1248 block destination block (initialized)
1249 size new size (in bytes)
1250 state ALGLIB environment state
1251
1252 block is assumed to be initialized.
1253
1254 This function:
1255 * deletes old contents
1256 * preserves automatic state
1257
1258 Error handling: calls ae_break() on allocation error. Block is left in
1259 valid state - empty, but valid.
1260
1261 NOTES:
1262 * never call it for special blocks which mark frame boundaries!
1263 ************************************************************************/
ae_db_realloc(ae_dyn_block * block,ae_int_t size,ae_state * state)1264 void ae_db_realloc(ae_dyn_block *block, ae_int_t size, ae_state *state)
1265 {
1266 AE_CRITICAL_ASSERT(state!=NULL);
1267
1268 /*
1269 * NOTE: these strange dances around block->ptr are necessary
1270 * in order to correctly handle possible exceptions during
1271 * memory allocation.
1272 */
1273 ae_assert(size>=0, "ae_db_realloc(): negative size", state);
1274 if( block->ptr!=NULL )
1275 {
1276 ((ae_deallocator)block->deallocator)(block->ptr);
1277 block->ptr = NULL;
1278 block->valgrind_hint = NULL;
1279 }
1280 block->ptr = ae_malloc((size_t)size, state);
1281 block->valgrind_hint = aligned_extract_ptr(block->ptr);
1282 block->deallocator = ae_free;
1283 }
1284
1285
1286 /************************************************************************
1287 This function clears dynamic block (releases all dynamically allocated
1288 memory). Dynamic block may be in automatic management list - in this case
1289 it will NOT be removed from list.
1290
1291 block destination block (initialized)
1292
1293 NOTES:
1294 * never call it for special blocks which marks frame boundaries!
1295 ************************************************************************/
ae_db_free(ae_dyn_block * block)1296 void ae_db_free(ae_dyn_block *block)
1297 {
1298 if( block->ptr!=NULL )
1299 ((ae_deallocator)block->deallocator)(block->ptr);
1300 block->ptr = NULL;
1301 block->valgrind_hint = NULL;
1302 block->deallocator = ae_free;
1303 }
1304
1305 /************************************************************************
1306 This function swaps contents of two dynamic blocks (pointers and
1307 deallocators) leaving other parameters (automatic management settings,
1308 etc.) unchanged.
1309
1310 NOTES:
1311 * never call it for special blocks which marks frame boundaries!
1312 ************************************************************************/
ae_db_swap(ae_dyn_block * block1,ae_dyn_block * block2)1313 void ae_db_swap(ae_dyn_block *block1, ae_dyn_block *block2)
1314 {
1315 void (*deallocator)(void*) = NULL;
1316 void * volatile ptr;
1317 void * valgrind_hint;
1318
1319 ptr = block1->ptr;
1320 valgrind_hint = block1->valgrind_hint;
1321 deallocator = block1->deallocator;
1322
1323 block1->ptr = block2->ptr;
1324 block1->valgrind_hint = block2->valgrind_hint;
1325 block1->deallocator = block2->deallocator;
1326
1327 block2->ptr = ptr;
1328 block2->valgrind_hint = valgrind_hint;
1329 block2->deallocator = deallocator;
1330 }
1331
1332 /*************************************************************************
1333 This function creates ae_vector.
1334 Vector size may be zero. Vector contents is uninitialized.
1335
1336 dst destination vector, MUST be zero-filled (we check it
1337 and call abort() if *dst is non-zero; the rationale is
1338 that we can not correctly handle errors in constructors
1339 without zero-filling).
1340 size vector size, may be zero
1341 datatype guess what...
1342 state pointer to current state structure. Can not be NULL.
1343 used for exception handling (say, allocation error results
1344 in longjmp call).
1345 make_automatic if true, vector will be registered in the current frame
1346 of the state structure;
1347
1348 NOTE: no memory allocation is performed for initialization with size=0
1349 *************************************************************************/
ae_vector_init(ae_vector * dst,ae_int_t size,ae_datatype datatype,ae_state * state,ae_bool make_automatic)1350 void ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
1351 {
1352 /*
1353 * Integrity checks
1354 */
1355 AE_CRITICAL_ASSERT(state!=NULL);
1356 AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1357 ae_assert(size>=0, "ae_vector_init(): negative size", state);
1358
1359 /* prepare for possible errors during allocation */
1360 dst->cnt = 0;
1361 dst->ptr.p_ptr = NULL;
1362
1363 /* init */
1364 ae_db_init(&dst->data, size*ae_sizeof(datatype), state, make_automatic);
1365 dst->cnt = size;
1366 dst->datatype = datatype;
1367 dst->ptr.p_ptr = dst->data.ptr;
1368 dst->is_attached = ae_false;
1369 }
1370
1371
1372 /************************************************************************
1373 This function creates copy of ae_vector. New copy of the data is created,
1374 which is managed and owned by newly initialized vector.
1375
1376 dst destination vector, MUST be zero-filled (we check it
1377 and call abort() if *dst is non-zero; the rationale is
1378 that we can not correctly handle errors in constructors
1379 without zero-filling).
1380 src well, it is source
1381 state pointer to current state structure. Can not be NULL.
1382 used for exception handling (say, allocation error results
1383 in longjmp call).
1384 make_automatic if true, vector will be registered in the current frame
1385 of the state structure;
1386
1387 dst is assumed to be uninitialized, its fields are ignored.
1388 ************************************************************************/
ae_vector_init_copy(ae_vector * dst,ae_vector * src,ae_state * state,ae_bool make_automatic)1389 void ae_vector_init_copy(ae_vector *dst, ae_vector *src, ae_state *state, ae_bool make_automatic)
1390 {
1391 AE_CRITICAL_ASSERT(state!=NULL);
1392
1393 ae_vector_init(dst, src->cnt, src->datatype, state, make_automatic);
1394 if( src->cnt!=0 )
1395 memmove(dst->ptr.p_ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype)));
1396 }
1397
1398 /************************************************************************
1399 This function initializes ae_vector using X-structure as source. New copy
1400 of data is created, which is owned/managed by ae_vector structure. Both
1401 structures (source and destination) remain completely independent after
1402 this call.
1403
1404 dst destination vector, MUST be zero-filled (we check it
1405 and call abort() if *dst is non-zero; the rationale is
1406 that we can not correctly handle errors in constructors
1407 without zero-filling).
1408 src well, it is source
1409 state pointer to current state structure. Can not be NULL.
1410 used for exception handling (say, allocation error results
1411 in longjmp call).
1412 make_automatic if true, vector will be registered in the current frame
1413 of the state structure;
1414
1415 dst is assumed to be uninitialized, its fields are ignored.
1416 ************************************************************************/
ae_vector_init_from_x(ae_vector * dst,x_vector * src,ae_state * state,ae_bool make_automatic)1417 void ae_vector_init_from_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic)
1418 {
1419 AE_CRITICAL_ASSERT(state!=NULL);
1420
1421 ae_vector_init(dst, (ae_int_t)src->cnt, (ae_datatype)src->datatype, state, make_automatic);
1422 if( src->cnt>0 )
1423 memmove(dst->ptr.p_ptr, src->x_ptr.p_ptr, (size_t)(((ae_int_t)src->cnt)*ae_sizeof((ae_datatype)src->datatype)));
1424 }
1425
1426 /************************************************************************
1427 This function initializes ae_vector using X-structure as source.
1428
1429 New vector is attached to source:
1430 * DST shares memory with SRC
1431 * both DST and SRC are writable - all writes to DST change elements of
1432 SRC and vice versa.
1433 * DST can be reallocated with ae_vector_set_length(), in this case SRC
1434 remains untouched
1435 * SRC, however, CAN NOT BE REALLOCATED AS LONG AS DST EXISTS
1436
1437 NOTE: is_attached field is set to ae_true in order to indicate that
1438 vector does not own its memory.
1439
1440 dst destination vector
1441 src well, it is source
1442 state pointer to current state structure. Can not be NULL.
1443 used for exception handling (say, allocation error results
1444 in longjmp call).
1445 make_automatic if true, vector will be registered in the current frame
1446 of the state structure;
1447
1448 dst is assumed to be uninitialized, its fields are ignored.
1449 ************************************************************************/
ae_vector_init_attach_to_x(ae_vector * dst,x_vector * src,ae_state * state,ae_bool make_automatic)1450 void ae_vector_init_attach_to_x(ae_vector *dst, x_vector *src, ae_state *state, ae_bool make_automatic)
1451 {
1452 volatile ae_int_t cnt;
1453
1454 AE_CRITICAL_ASSERT(state!=NULL);
1455 AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1456
1457 cnt = (ae_int_t)src->cnt;
1458
1459 /* ensure that size is correct */
1460 ae_assert(cnt==src->cnt, "ae_vector_init_attach_to_x(): 32/64 overflow", state);
1461 ae_assert(cnt>=0, "ae_vector_init_attach_to_x(): negative length", state);
1462
1463 /* prepare for possible errors during allocation */
1464 dst->cnt = 0;
1465 dst->ptr.p_ptr = NULL;
1466 dst->datatype = (ae_datatype)src->datatype;
1467
1468 /* zero-size init in order to correctly register in the frame */
1469 ae_db_init(&dst->data, 0, state, make_automatic);
1470
1471 /* init */
1472 dst->cnt = cnt;
1473 dst->ptr.p_ptr = src->x_ptr.p_ptr;
1474 dst->is_attached = ae_true;
1475 }
1476
1477 /************************************************************************
1478 This function changes length of ae_vector.
1479
1480 dst destination vector
1481 newsize vector size, may be zero
1482 state ALGLIB environment state, can not be NULL
1483
1484 Error handling: calls ae_break() on allocation error
1485
1486 NOTES:
1487 * vector must be initialized
1488 * all contents is destroyed during setlength() call
1489 * new size may be zero.
1490 ************************************************************************/
ae_vector_set_length(ae_vector * dst,ae_int_t newsize,ae_state * state)1491 void ae_vector_set_length(ae_vector *dst, ae_int_t newsize, ae_state *state)
1492 {
1493 AE_CRITICAL_ASSERT(state!=NULL);
1494 ae_assert(newsize>=0, "ae_vector_set_length(): negative size", state);
1495 if( dst->cnt==newsize )
1496 return;
1497
1498 /* realloc, being ready for exception during reallocation (cnt=ptr=0 on entry) */
1499 dst->cnt = 0;
1500 dst->ptr.p_ptr = NULL;
1501 ae_db_realloc(&dst->data, newsize*ae_sizeof(dst->datatype), state);
1502 dst->cnt = newsize;
1503 dst->ptr.p_ptr = dst->data.ptr;
1504 }
1505
1506 /************************************************************************
1507 This function resized ae_vector, preserving previously existing elements.
1508 Values of elements added during vector growth is undefined.
1509
1510 dst destination vector
1511 newsize vector size, may be zero
1512 state ALGLIB environment state, can not be NULL
1513
1514 Error handling: calls ae_break() on allocation error
1515
1516 NOTES:
1517 * vector must be initialized
1518 * new size may be zero.
1519 ************************************************************************/
ae_vector_resize(ae_vector * dst,ae_int_t newsize,ae_state * state)1520 void ae_vector_resize(ae_vector *dst, ae_int_t newsize, ae_state *state)
1521 {
1522 ae_vector tmp;
1523 ae_int_t bytes_total;
1524
1525 memset(&tmp, 0, sizeof(tmp));
1526 ae_vector_init(&tmp, newsize, dst->datatype, state, ae_false);
1527 bytes_total = (dst->cnt<newsize ? dst->cnt : newsize)*ae_sizeof(dst->datatype);
1528 if( bytes_total>0 )
1529 memmove(tmp.ptr.p_ptr, dst->ptr.p_ptr, bytes_total);
1530 ae_swap_vectors(dst, &tmp);
1531 ae_vector_clear(&tmp);
1532 }
1533
1534
1535 /************************************************************************
1536 This function provides "CLEAR" functionality for vector (contents is
1537 cleared, but structure still left in valid state).
1538
1539 The function clears vector contents (releases all dynamically allocated
1540 memory). Vector may be in automatic management list - in this case it
1541 will NOT be removed from list.
1542
1543 IMPORTANT: this function does NOT invalidates dst; it just releases all
1544 dynamically allocated storage, but dst still may be used after call to
1545 ae_vector_set_length().
1546
1547 dst destination vector
1548 ************************************************************************/
ae_vector_clear(ae_vector * dst)1549 void ae_vector_clear(ae_vector *dst)
1550 {
1551 dst->cnt = 0;
1552 ae_db_free(&dst->data);
1553 dst->ptr.p_ptr = 0;
1554 dst->is_attached = ae_false;
1555 }
1556
1557
1558 /************************************************************************
1559 This function provides "DESTROY" functionality for vector (contents is
1560 cleared, all internal structures are destroyed). For vectors it is same
1561 as CLEAR.
1562
1563 dst destination vector
1564 ************************************************************************/
ae_vector_destroy(ae_vector * dst)1565 void ae_vector_destroy(ae_vector *dst)
1566 {
1567 ae_vector_clear(dst);
1568 }
1569
1570
1571 /************************************************************************
1572 This function efficiently swaps contents of two vectors, leaving other
1573 pararemeters (automatic management, etc.) unchanged.
1574 ************************************************************************/
ae_swap_vectors(ae_vector * vec1,ae_vector * vec2)1575 void ae_swap_vectors(ae_vector *vec1, ae_vector *vec2)
1576 {
1577 ae_int_t cnt;
1578 ae_datatype datatype;
1579 void *p_ptr;
1580
1581 ae_assert(!vec1->is_attached, "ALGLIB: internal error, attempt to swap vectors attached to X-object", NULL);
1582 ae_assert(!vec2->is_attached, "ALGLIB: internal error, attempt to swap vectors attached to X-object", NULL);
1583
1584 ae_db_swap(&vec1->data, &vec2->data);
1585
1586 cnt = vec1->cnt;
1587 datatype = vec1->datatype;
1588 p_ptr = vec1->ptr.p_ptr;
1589 vec1->cnt = vec2->cnt;
1590 vec1->datatype = vec2->datatype;
1591 vec1->ptr.p_ptr = vec2->ptr.p_ptr;
1592 vec2->cnt = cnt;
1593 vec2->datatype = datatype;
1594 vec2->ptr.p_ptr = p_ptr;
1595 }
1596
1597 /************************************************************************
1598 This function creates ae_matrix.
1599
1600 Matrix size may be zero, in such cases both rows and cols are zero.
1601 Matrix contents is uninitialized.
1602
1603 dst destination matrix, must be zero-filled
1604 rows rows count
1605 cols cols count
1606 datatype element type
1607 state pointer to current state structure. Can not be NULL.
1608 used for exception handling (say, allocation error results
1609 in longjmp call).
1610 make_automatic if true, matrix will be registered in the current frame
1611 of the state structure;
1612
1613 dst is assumed to be uninitialized, its fields are ignored.
1614
1615 NOTE: no memory allocation is performed for initialization with rows=cols=0
1616 ************************************************************************/
ae_matrix_init(ae_matrix * dst,ae_int_t rows,ae_int_t cols,ae_datatype datatype,ae_state * state,ae_bool make_automatic)1617 void ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
1618 {
1619 AE_CRITICAL_ASSERT(state!=NULL);
1620 AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1621
1622 ae_assert(rows>=0 && cols>=0, "ae_matrix_init(): negative length", state);
1623
1624 /* if one of rows/cols is zero, another MUST be too; perform quick exit */
1625 if( rows==0 || cols==0 )
1626 {
1627 dst->rows = 0;
1628 dst->cols = 0;
1629 dst->is_attached = ae_false;
1630 dst->ptr.pp_void = NULL;
1631 dst->stride = 0;
1632 dst->datatype = datatype;
1633 ae_db_init(&dst->data, 0, state, make_automatic);
1634 return;
1635 }
1636
1637 /* init, being ready for exception during allocation (rows=cols=ptr=NULL on entry) */
1638 dst->is_attached = ae_false;
1639 dst->rows = 0;
1640 dst->cols = 0;
1641 dst->ptr.pp_void = NULL;
1642 dst->stride = cols;
1643 while( dst->stride*ae_sizeof(datatype)%AE_DATA_ALIGN!=0 )
1644 dst->stride++;
1645 dst->datatype = datatype;
1646 ae_db_init(&dst->data, rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(datatype))+AE_DATA_ALIGN-1, state, make_automatic);
1647 dst->rows = rows;
1648 dst->cols = cols;
1649 ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+rows*sizeof(void*),AE_DATA_ALIGN));
1650 }
1651
1652
1653 /************************************************************************
1654 This function creates copy of ae_matrix. A new copy of the data is created.
1655
1656 dst destination matrix, must be zero-filled
1657 src well, it is source
1658 state pointer to current state structure. Can not be NULL.
1659 used for exception handling (say, allocation error results
1660 in longjmp call).
1661 make_automatic if true, matrix will be registered in the current frame
1662 of the state structure;
1663
1664 dst is assumed to be uninitialized, its fields are ignored.
1665 ************************************************************************/
ae_matrix_init_copy(ae_matrix * dst,ae_matrix * src,ae_state * state,ae_bool make_automatic)1666 void ae_matrix_init_copy(ae_matrix *dst, ae_matrix *src, ae_state *state, ae_bool make_automatic)
1667 {
1668 ae_int_t i;
1669 ae_matrix_init(dst, src->rows, src->cols, src->datatype, state, make_automatic);
1670 if( src->rows!=0 && src->cols!=0 )
1671 {
1672 if( dst->stride==src->stride )
1673 memmove(dst->ptr.pp_void[0], src->ptr.pp_void[0], (size_t)(src->rows*src->stride*ae_sizeof(src->datatype)));
1674 else
1675 for(i=0; i<dst->rows; i++)
1676 memmove(dst->ptr.pp_void[i], src->ptr.pp_void[i], (size_t)(dst->cols*ae_sizeof(dst->datatype)));
1677 }
1678 }
1679
1680
1681 /************************************************************************
1682 This function initializes ae_matrix using X-structure as source. New copy
1683 of data is created, which is owned/managed by ae_matrix structure. Both
1684 structures (source and destination) remain completely independent after
1685 this call.
1686
1687 dst destination matrix, must be zero-filled
1688 src well, it is source
1689 state pointer to current state structure. Can not be NULL.
1690 used for exception handling (say, allocation error results
1691 in longjmp call).
1692 make_automatic if true, matrix will be registered in the current frame
1693 of the state structure;
1694
1695 dst is assumed to be uninitialized, its fields are ignored.
1696 ************************************************************************/
ae_matrix_init_from_x(ae_matrix * dst,x_matrix * src,ae_state * state,ae_bool make_automatic)1697 void ae_matrix_init_from_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic)
1698 {
1699 char *p_src_row;
1700 char *p_dst_row;
1701 ae_int_t row_size;
1702 ae_int_t i;
1703 AE_CRITICAL_ASSERT(state!=NULL);
1704 ae_matrix_init(dst, (ae_int_t)src->rows, (ae_int_t)src->cols, (ae_datatype)src->datatype, state, make_automatic);
1705 if( src->rows!=0 && src->cols!=0 )
1706 {
1707 p_src_row = (char*)src->x_ptr.p_ptr;
1708 p_dst_row = (char*)(dst->ptr.pp_void[0]);
1709 row_size = ae_sizeof((ae_datatype)src->datatype)*(ae_int_t)src->cols;
1710 for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof((ae_datatype)src->datatype), p_dst_row+=dst->stride*ae_sizeof((ae_datatype)src->datatype))
1711 memmove(p_dst_row, p_src_row, (size_t)(row_size));
1712 }
1713 }
1714
1715
1716 /************************************************************************
1717 This function initializes ae_matrix using X-structure as source.
1718
1719 New matrix is attached to source:
1720 * DST shares memory with SRC
1721 * both DST and SRC are writable - all writes to DST change elements of
1722 SRC and vice versa.
1723 * DST can be reallocated with ae_matrix_set_length(), in this case SRC
1724 remains untouched
1725 * SRC, however, CAN NOT BE REALLOCATED AS LONG AS DST EXISTS
1726
1727 dst destination matrix, must be zero-filled
1728 src well, it is source
1729 state pointer to current state structure. Can not be NULL.
1730 used for exception handling (say, allocation error results
1731 in longjmp call).
1732 make_automatic if true, matrix will be registered in the current frame
1733 of the state structure;
1734
1735 dst is assumed to be uninitialized, its fields are ignored.
1736 ************************************************************************/
ae_matrix_init_attach_to_x(ae_matrix * dst,x_matrix * src,ae_state * state,ae_bool make_automatic)1737 void ae_matrix_init_attach_to_x(ae_matrix *dst, x_matrix *src, ae_state *state, ae_bool make_automatic)
1738 {
1739 ae_int_t rows, cols;
1740
1741 AE_CRITICAL_ASSERT(state!=NULL);
1742 AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1743
1744 rows = (ae_int_t)src->rows;
1745 cols = (ae_int_t)src->cols;
1746
1747 /* check that X-source is densely packed */
1748 ae_assert(src->cols==src->stride, "ae_matrix_init_attach_to_x(): unsupported stride", state);
1749
1750 /* ensure that size is correct */
1751 ae_assert(rows==src->rows, "ae_matrix_init_attach_to_x(): 32/64 overflow", state);
1752 ae_assert(cols==src->cols, "ae_matrix_init_attach_to_x(): 32/64 overflow", state);
1753 ae_assert(rows>=0 && cols>=0, "ae_matrix_init_attach_to_x(): negative length", state);
1754
1755 /* if one of rows/cols is zero, another MUST be too */
1756 if( rows==0 || cols==0 )
1757 {
1758 rows = 0;
1759 cols = 0;
1760 }
1761
1762 /* init, being ready for allocation error */
1763 dst->is_attached = ae_true;
1764 dst->rows = 0;
1765 dst->cols = 0;
1766 dst->stride = cols;
1767 dst->datatype = (ae_datatype)src->datatype;
1768 dst->ptr.pp_void = NULL;
1769 ae_db_init(&dst->data, rows*(ae_int_t)sizeof(void*), state, make_automatic);
1770 dst->rows = rows;
1771 dst->cols = cols;
1772 if( dst->rows>0 && dst->cols>0 )
1773 {
1774 ae_int_t i, rowsize;
1775 char *p_row;
1776 void **pp_ptr;
1777
1778 p_row = (char*)src->x_ptr.p_ptr;
1779 rowsize = dst->stride*ae_sizeof(dst->datatype);
1780 pp_ptr = (void**)dst->data.ptr;
1781 dst->ptr.pp_void = pp_ptr;
1782 for(i=0; i<dst->rows; i++, p_row+=rowsize)
1783 pp_ptr[i] = p_row;
1784 }
1785 }
1786
1787
1788 /************************************************************************
1789 This function changes length of ae_matrix.
1790
1791 dst destination matrix
1792 rows size, may be zero
1793 cols size, may be zero
1794 state ALGLIB environment state
1795
1796 Error handling:
1797 * if state is NULL, returns ae_false on allocation error
1798 * if state is not NULL, calls ae_break() on allocation error
1799 * returns ae_true on success
1800
1801 NOTES:
1802 * matrix must be initialized
1803 * all contents is destroyed during setlength() call
1804 * new size may be zero.
1805 ************************************************************************/
ae_matrix_set_length(ae_matrix * dst,ae_int_t rows,ae_int_t cols,ae_state * state)1806 void ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_state *state)
1807 {
1808 AE_CRITICAL_ASSERT(state!=NULL);
1809 ae_assert(rows>=0 && cols>=0, "ae_matrix_set_length(): negative length", state);
1810 if( dst->rows==rows && dst->cols==cols )
1811 return;
1812
1813 /* prepare stride */
1814 dst->stride = cols;
1815 while( dst->stride*ae_sizeof(dst->datatype)%AE_DATA_ALIGN!=0 )
1816 dst->stride++;
1817
1818 /* realloc, being ready for an exception during reallocation (rows=cols=0 on entry) */
1819 dst->rows = 0;
1820 dst->cols = 0;
1821 dst->ptr.pp_void = NULL;
1822 ae_db_realloc(&dst->data, rows*((ae_int_t)sizeof(void*)+dst->stride*ae_sizeof(dst->datatype))+AE_DATA_ALIGN-1, state);
1823 dst->rows = rows;
1824 dst->cols = cols;
1825
1826 /* update pointers to rows */
1827 ae_matrix_update_row_pointers(dst, ae_align((char*)dst->data.ptr+dst->rows*sizeof(void*),AE_DATA_ALIGN));
1828 }
1829
1830
1831 /************************************************************************
1832 This function provides "CLEAR" functionality for vector (contents is
1833 cleared, but structure still left in valid state).
1834
1835 The function clears matrix contents (releases all dynamically allocated
1836 memory). Matrix may be in automatic management list - in this case it
1837 will NOT be removed from list.
1838
1839 IMPORTANT: this function does NOT invalidates dst; it just releases all
1840 dynamically allocated storage, but dst still may be used after call to
1841 ae_matrix_set_length().
1842
1843 dst destination matrix
1844 ************************************************************************/
ae_matrix_clear(ae_matrix * dst)1845 void ae_matrix_clear(ae_matrix *dst)
1846 {
1847 dst->rows = 0;
1848 dst->cols = 0;
1849 dst->stride = 0;
1850 ae_db_free(&dst->data);
1851 dst->ptr.p_ptr = 0;
1852 dst->is_attached = ae_false;
1853 }
1854
1855
1856 /************************************************************************
1857 This function provides "DESTROY" functionality for matrix (contents is
1858 cleared, but structure still left in valid state).
1859
1860 For matrices it is same as CLEAR.
1861
1862 dst destination matrix
1863 ************************************************************************/
ae_matrix_destroy(ae_matrix * dst)1864 void ae_matrix_destroy(ae_matrix *dst)
1865 {
1866 ae_matrix_clear(dst);
1867 }
1868
1869
1870 /************************************************************************
1871 This function efficiently swaps contents of two vectors, leaving other
1872 pararemeters (automatic management, etc.) unchanged.
1873 ************************************************************************/
ae_swap_matrices(ae_matrix * mat1,ae_matrix * mat2)1874 void ae_swap_matrices(ae_matrix *mat1, ae_matrix *mat2)
1875 {
1876 ae_int_t rows;
1877 ae_int_t cols;
1878 ae_int_t stride;
1879 ae_datatype datatype;
1880 void *p_ptr;
1881
1882 ae_assert(!mat1->is_attached, "ALGLIB: internal error, attempt to swap matrices attached to X-object", NULL);
1883 ae_assert(!mat2->is_attached, "ALGLIB: internal error, attempt to swap matrices attached to X-object", NULL);
1884
1885 ae_db_swap(&mat1->data, &mat2->data);
1886
1887 rows = mat1->rows;
1888 cols = mat1->cols;
1889 stride = mat1->stride;
1890 datatype = mat1->datatype;
1891 p_ptr = mat1->ptr.p_ptr;
1892
1893 mat1->rows = mat2->rows;
1894 mat1->cols = mat2->cols;
1895 mat1->stride = mat2->stride;
1896 mat1->datatype = mat2->datatype;
1897 mat1->ptr.p_ptr = mat2->ptr.p_ptr;
1898
1899 mat2->rows = rows;
1900 mat2->cols = cols;
1901 mat2->stride = stride;
1902 mat2->datatype = datatype;
1903 mat2->ptr.p_ptr = p_ptr;
1904 }
1905
1906
1907 /************************************************************************
1908 This function creates smart pointer structure.
1909
1910 dst destination smart pointer, must be zero-filled
1911 subscriber pointer to pointer which receives updates in the
1912 internal object stored in ae_smart_ptr. Any update to
1913 dst->ptr is translated to subscriber. Can be NULL.
1914 state pointer to current state structure. Can not be NULL.
1915 used for exception handling (say, allocation error results
1916 in longjmp call).
1917 make_automatic if true, pointer will be registered in the current frame
1918 of the state structure;
1919
1920 Error handling:
1921 * on failure calls ae_break() with NULL state pointer. Usually it results
1922 in abort() call.
1923
1924 After initialization, smart pointer stores NULL pointer.
1925 ************************************************************************/
ae_smart_ptr_init(ae_smart_ptr * dst,void ** subscriber,ae_state * state,ae_bool make_automatic)1926 void ae_smart_ptr_init(ae_smart_ptr *dst, void **subscriber, ae_state *state, ae_bool make_automatic)
1927 {
1928 AE_CRITICAL_ASSERT(state!=NULL);
1929 AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
1930 dst->subscriber = subscriber;
1931 dst->ptr = NULL;
1932 if( dst->subscriber!=NULL )
1933 *(dst->subscriber) = dst->ptr;
1934 dst->is_owner = ae_false;
1935 dst->is_dynamic = ae_false;
1936 dst->frame_entry.deallocator = ae_smart_ptr_destroy;
1937 dst->frame_entry.ptr = dst;
1938 if( make_automatic )
1939 ae_db_attach(&dst->frame_entry, state);
1940 }
1941
1942
1943 /************************************************************************
1944 This function clears smart pointer structure.
1945
1946 dst destination smart pointer.
1947
1948 After call to this function smart pointer contains NULL reference, which
1949 is propagated to its subscriber (in cases non-NULL subscruber was
1950 specified during pointer creation).
1951 ************************************************************************/
ae_smart_ptr_clear(void * _dst)1952 void ae_smart_ptr_clear(void *_dst)
1953 {
1954 ae_smart_ptr *dst = (ae_smart_ptr*)_dst;
1955 if( dst->is_owner && dst->ptr!=NULL )
1956 {
1957 dst->destroy(dst->ptr);
1958 if( dst->is_dynamic )
1959 ae_free(dst->ptr);
1960 }
1961 dst->is_owner = ae_false;
1962 dst->is_dynamic = ae_false;
1963 dst->ptr = NULL;
1964 dst->destroy = NULL;
1965 if( dst->subscriber!=NULL )
1966 *(dst->subscriber) = NULL;
1967 }
1968
1969
1970 /************************************************************************
1971 This function dstroys smart pointer structure (same as clearing it).
1972
1973 dst destination smart pointer.
1974 ************************************************************************/
ae_smart_ptr_destroy(void * _dst)1975 void ae_smart_ptr_destroy(void *_dst)
1976 {
1977 ae_smart_ptr_clear(_dst);
1978 }
1979
1980
1981 /************************************************************************
1982 This function assigns pointer to ae_smart_ptr structure.
1983
1984 dst destination smart pointer.
1985 new_ptr new pointer to assign
1986 is_owner whether smart pointer owns new_ptr
1987 is_dynamic whether object is dynamic - clearing such object
1988 requires BOTH calling destructor function AND calling
1989 ae_free() for memory occupied by object.
1990 destroy destructor function
1991
1992 In case smart pointer already contains non-NULL value and owns this value,
1993 it is freed before assigning new pointer.
1994
1995 Changes in pointer are propagated to its subscriber (in case non-NULL
1996 subscriber was specified during pointer creation).
1997
1998 You can specify NULL new_ptr, in which case is_owner/destroy are ignored.
1999 ************************************************************************/
ae_smart_ptr_assign(ae_smart_ptr * dst,void * new_ptr,ae_bool is_owner,ae_bool is_dynamic,void (* destroy)(void *))2000 void ae_smart_ptr_assign(ae_smart_ptr *dst, void *new_ptr, ae_bool is_owner, ae_bool is_dynamic, void (*destroy)(void*))
2001 {
2002 if( dst->is_owner && dst->ptr!=NULL )
2003 {
2004 dst->destroy(dst->ptr);
2005 if( dst->is_dynamic )
2006 ae_free(dst->ptr);
2007 }
2008 if( new_ptr!=NULL )
2009 {
2010 dst->ptr = new_ptr;
2011 dst->is_owner = is_owner;
2012 dst->is_dynamic = is_dynamic;
2013 dst->destroy = destroy;
2014 }
2015 else
2016 {
2017 dst->ptr = NULL;
2018 dst->is_owner = ae_false;
2019 dst->is_dynamic = ae_false;
2020 dst->destroy = NULL;
2021 }
2022 if( dst->subscriber!=NULL )
2023 *(dst->subscriber) = dst->ptr;
2024 }
2025
2026
2027 /************************************************************************
2028 This function releases pointer owned by ae_smart_ptr structure:
2029 * all internal fields are set to NULL
2030 * destructor function for internal pointer is NOT called even when we own
2031 this pointer. After this call ae_smart_ptr releases ownership of its
2032 pointer and passes it to caller.
2033 * changes in pointer are propagated to its subscriber (in case non-NULL
2034 subscriber was specified during pointer creation).
2035
2036 dst destination smart pointer.
2037 ************************************************************************/
ae_smart_ptr_release(ae_smart_ptr * dst)2038 void ae_smart_ptr_release(ae_smart_ptr *dst)
2039 {
2040 dst->is_owner = ae_false;
2041 dst->is_dynamic = ae_false;
2042 dst->ptr = NULL;
2043 dst->destroy = NULL;
2044 if( dst->subscriber!=NULL )
2045 *(dst->subscriber) = NULL;
2046 }
2047
2048 /************************************************************************
2049 This function copies contents of ae_vector (SRC) to x_vector (DST).
2050
2051 This function should not be called for DST which is attached to SRC
2052 (opposite situation, when SRC is attached to DST, is possible).
2053
2054 Depending on situation, following actions are performed
2055 * for SRC attached to DST, this function performs no actions (no need to
2056 do anything)
2057 * for independent vectors of different sizes it allocates storage in DST
2058 and copy contents of SRC to DST. DST->last_action field is set to
2059 ACT_NEW_LOCATION, and DST->owner is set to OWN_AE.
2060 * for independent vectors of same sizes it does not perform memory
2061 (re)allocation. It just copies SRC to already existing place.
2062 DST->last_action is set to ACT_SAME_LOCATION (unless it was
2063 ACT_NEW_LOCATION), DST->owner is unmodified.
2064
2065 dst destination vector
2066 src source, vector in x-format
2067 state ALGLIB environment state
2068
2069 NOTES:
2070 * dst is assumed to be initialized. Its contents is freed before copying
2071 data from src (if size / type are different) or overwritten (if
2072 possible given destination size).
2073 ************************************************************************/
ae_x_set_vector(x_vector * dst,ae_vector * src,ae_state * state)2074 void ae_x_set_vector(x_vector *dst, ae_vector *src, ae_state *state)
2075 {
2076 if( src->ptr.p_ptr == dst->x_ptr.p_ptr )
2077 {
2078 /* src->ptr points to the beginning of dst, attached matrices, no need to copy */
2079 return;
2080 }
2081 if( dst->cnt!=src->cnt || dst->datatype!=src->datatype )
2082 {
2083 if( dst->owner==OWN_AE )
2084 ae_free(dst->x_ptr.p_ptr);
2085 dst->x_ptr.p_ptr = ae_malloc((size_t)(src->cnt*ae_sizeof(src->datatype)), state);
2086 if( src->cnt!=0 && dst->x_ptr.p_ptr==NULL )
2087 ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
2088 dst->last_action = ACT_NEW_LOCATION;
2089 dst->cnt = src->cnt;
2090 dst->datatype = src->datatype;
2091 dst->owner = OWN_AE;
2092 }
2093 else
2094 {
2095 if( dst->last_action==ACT_UNCHANGED )
2096 dst->last_action = ACT_SAME_LOCATION;
2097 else if( dst->last_action==ACT_SAME_LOCATION )
2098 dst->last_action = ACT_SAME_LOCATION;
2099 else if( dst->last_action==ACT_NEW_LOCATION )
2100 dst->last_action = ACT_NEW_LOCATION;
2101 else
2102 ae_assert(ae_false, "ALGLIB: internal error in ae_x_set_vector()", state);
2103 }
2104 if( src->cnt )
2105 memmove(dst->x_ptr.p_ptr, src->ptr.p_ptr, (size_t)(src->cnt*ae_sizeof(src->datatype)));
2106 }
2107
2108 /************************************************************************
2109 This function copies contents of ae_matrix to x_matrix.
2110
2111 This function should not be called for DST which is attached to SRC
2112 (opposite situation, when SRC is attached to DST, is possible).
2113
2114 Depending on situation, following actions are performed
2115 * for SRC attached to DST, this function performs no actions (no need to
2116 do anything)
2117 * for independent matrices of different sizes it allocates storage in DST
2118 and copy contents of SRC to DST. DST->last_action field is set to
2119 ACT_NEW_LOCATION, and DST->owner is set to OWN_AE.
2120 * for independent matrices of same sizes it does not perform memory
2121 (re)allocation. It just copies SRC to already existing place.
2122 DST->last_action is set to ACT_SAME_LOCATION (unless it was
2123 ACT_NEW_LOCATION), DST->owner is unmodified.
2124
2125 dst destination vector
2126 src source, matrix in x-format
2127 state ALGLIB environment state
2128
2129 NOTES:
2130 * dst is assumed to be initialized. Its contents is freed before copying
2131 data from src (if size / type are different) or overwritten (if
2132 possible given destination size).
2133 ************************************************************************/
ae_x_set_matrix(x_matrix * dst,ae_matrix * src,ae_state * state)2134 void ae_x_set_matrix(x_matrix *dst, ae_matrix *src, ae_state *state)
2135 {
2136 char *p_src_row;
2137 char *p_dst_row;
2138 ae_int_t i;
2139 ae_int_t row_size;
2140 if( src->ptr.pp_void!=NULL && src->ptr.pp_void[0] == dst->x_ptr.p_ptr )
2141 {
2142 /* src->ptr points to the beginning of dst, attached matrices, no need to copy */
2143 return;
2144 }
2145 if( dst->rows!=src->rows || dst->cols!=src->cols || dst->datatype!=src->datatype )
2146 {
2147 if( dst->owner==OWN_AE )
2148 ae_free(dst->x_ptr.p_ptr);
2149 dst->rows = src->rows;
2150 dst->cols = src->cols;
2151 dst->stride = src->cols;
2152 dst->datatype = src->datatype;
2153 dst->x_ptr.p_ptr = ae_malloc((size_t)(dst->rows*((ae_int_t)dst->stride)*ae_sizeof(src->datatype)), state);
2154 if( dst->rows!=0 && dst->stride!=0 && dst->x_ptr.p_ptr==NULL )
2155 ae_break(state, ERR_OUT_OF_MEMORY, "ae_malloc(): out of memory");
2156 dst->last_action = ACT_NEW_LOCATION;
2157 dst->owner = OWN_AE;
2158 }
2159 else
2160 {
2161 if( dst->last_action==ACT_UNCHANGED )
2162 dst->last_action = ACT_SAME_LOCATION;
2163 else if( dst->last_action==ACT_SAME_LOCATION )
2164 dst->last_action = ACT_SAME_LOCATION;
2165 else if( dst->last_action==ACT_NEW_LOCATION )
2166 dst->last_action = ACT_NEW_LOCATION;
2167 else
2168 ae_assert(ae_false, "ALGLIB: internal error in ae_x_set_vector()", state);
2169 }
2170 if( src->rows!=0 && src->cols!=0 )
2171 {
2172 p_src_row = (char*)(src->ptr.pp_void[0]);
2173 p_dst_row = (char*)dst->x_ptr.p_ptr;
2174 row_size = ae_sizeof(src->datatype)*src->cols;
2175 for(i=0; i<src->rows; i++, p_src_row+=src->stride*ae_sizeof(src->datatype), p_dst_row+=dst->stride*ae_sizeof(src->datatype))
2176 memmove(p_dst_row, p_src_row, (size_t)(row_size));
2177 }
2178 }
2179
2180 /************************************************************************
2181 This function attaches x_vector to ae_vector's contents.
2182 Ownership of memory allocated is not changed (it is still managed by
2183 ae_matrix).
2184
2185 dst destination vector
2186 src source, vector in x-format
2187 state ALGLIB environment state
2188
2189 NOTES:
2190 * dst is assumed to be initialized. Its contents is freed before
2191 attaching to src.
2192 * this function doesn't need ae_state parameter because it can't fail
2193 (assuming correctly initialized src)
2194 ************************************************************************/
ae_x_attach_to_vector(x_vector * dst,ae_vector * src)2195 void ae_x_attach_to_vector(x_vector *dst, ae_vector *src)
2196 {
2197 if( dst->owner==OWN_AE )
2198 ae_free(dst->x_ptr.p_ptr);
2199 dst->x_ptr.p_ptr = src->ptr.p_ptr;
2200 dst->last_action = ACT_NEW_LOCATION;
2201 dst->cnt = src->cnt;
2202 dst->datatype = src->datatype;
2203 dst->owner = OWN_CALLER;
2204 }
2205
2206 /************************************************************************
2207 This function attaches x_matrix to ae_matrix's contents.
2208 Ownership of memory allocated is not changed (it is still managed by
2209 ae_matrix).
2210
2211 dst destination vector
2212 src source, matrix in x-format
2213 state ALGLIB environment state
2214
2215 NOTES:
2216 * dst is assumed to be initialized. Its contents is freed before
2217 attaching to src.
2218 * this function doesn't need ae_state parameter because it can't fail
2219 (assuming correctly initialized src)
2220 ************************************************************************/
ae_x_attach_to_matrix(x_matrix * dst,ae_matrix * src)2221 void ae_x_attach_to_matrix(x_matrix *dst, ae_matrix *src)
2222 {
2223 if( dst->owner==OWN_AE )
2224 ae_free(dst->x_ptr.p_ptr);
2225 dst->rows = src->rows;
2226 dst->cols = src->cols;
2227 dst->stride = src->stride;
2228 dst->datatype = src->datatype;
2229 dst->x_ptr.p_ptr = &(src->ptr.pp_double[0][0]);
2230 dst->last_action = ACT_NEW_LOCATION;
2231 dst->owner = OWN_CALLER;
2232 }
2233
2234 /************************************************************************
2235 This function clears x_vector. It does nothing if vector is not owned by
2236 ALGLIB environment.
2237
2238 dst vector
2239 ************************************************************************/
x_vector_clear(x_vector * dst)2240 void x_vector_clear(x_vector *dst)
2241 {
2242 if( dst->owner==OWN_AE )
2243 aligned_free(dst->x_ptr.p_ptr);
2244 dst->x_ptr.p_ptr = NULL;
2245 dst->cnt = 0;
2246 }
2247
2248 /************************************************************************
2249 Assertion
2250
2251 For non-NULL state it allows to gracefully leave ALGLIB session,
2252 removing all frames and deallocating registered dynamic data structure.
2253
2254 For NULL state it just abort()'s program.
2255
2256 IMPORTANT: this function ALWAYS evaluates its argument. It can not be
2257 replaced by macro which does nothing. So, you may place actual
2258 function calls at cond, and these will always be performed.
2259 ************************************************************************/
ae_assert(ae_bool cond,const char * msg,ae_state * state)2260 void ae_assert(ae_bool cond, const char *msg, ae_state *state)
2261 {
2262 if( !cond )
2263 ae_break(state, ERR_ASSERTION_FAILED, msg);
2264 }
2265
2266 /************************************************************************
2267 CPUID
2268
2269 Returns information about features CPU and compiler support.
2270
2271 You must tell ALGLIB what CPU family is used by defining AE_CPU symbol
2272 (without this hint zero will be returned).
2273
2274 Note: results of this function depend on both CPU and compiler;
2275 if compiler doesn't support SSE intrinsics, function won't set
2276 corresponding flag.
2277 ************************************************************************/
2278 static volatile ae_bool _ae_cpuid_initialized = ae_false;
2279 static volatile ae_bool _ae_cpuid_has_sse2 = ae_false;
2280 static volatile ae_bool _ae_cpuid_has_avx2 = ae_false;
2281 static volatile ae_bool _ae_cpuid_has_fma = ae_false;
ae_cpuid()2282 ae_int_t ae_cpuid()
2283 {
2284 /*
2285 * to speed up CPU detection we cache results from previous attempts
2286 * there is no synchronization, but it is still thread safe.
2287 *
2288 * thread safety is guaranteed on all modern architectures which
2289 * have following property: simultaneous writes by different cores
2290 * to the same location will be executed in serial manner.
2291 *
2292 */
2293 ae_int_t result;
2294
2295 /*
2296 * if not initialized, determine system properties
2297 */
2298 if( !_ae_cpuid_initialized )
2299 {
2300 /*
2301 * SSE2
2302 */
2303 #if defined(AE_CPU)
2304 #if (AE_CPU==AE_INTEL)
2305 #if AE_COMPILER==AE_MSVC
2306 {
2307 /* SSE2 support */
2308 #if defined(_ALGLIB_HAS_SSE2_INTRINSICS)
2309 int CPUInfo[4];
2310 __cpuid(CPUInfo, 1);
2311 if( (CPUInfo[3]&0x04000000)!=0 )
2312 _ae_cpuid_has_sse2 = ae_true;
2313 #endif
2314
2315 /* check OS support for XSAVE XGETBV */
2316 #if defined(_ALGLIB_HAS_AVX2_INTRINSICS)
2317 __cpuid(CPUInfo, 1);
2318 if( (CPUInfo[2]&(0x1<<27))!=0 )
2319 if( (_xgetbv(0)&0x6)==0x6 )
2320 {
2321 /* AVX2 support */
2322 #if defined(_ALGLIB_HAS_AVX2_INTRINSICS) && (_MSC_VER>=1600)
2323 if( _ae_cpuid_has_sse2 )
2324 {
2325 __cpuidex(CPUInfo, 7, 0);
2326 if( (CPUInfo[1]&(0x1<<5))!=0 )
2327 _ae_cpuid_has_avx2 = ae_true;
2328 }
2329 #endif
2330
2331 /* FMA support */
2332 #if defined(_ALGLIB_HAS_FMA_INTRINSICS) && (_MSC_VER>=1600)
2333 if( _ae_cpuid_has_avx2 )
2334 {
2335 __cpuid(CPUInfo, 1);
2336 if( (CPUInfo[2]&(0x1<<12))!=0 )
2337 _ae_cpuid_has_fma = ae_true;
2338 }
2339 #endif
2340 }
2341 #endif
2342 }
2343 #elif AE_COMPILER==AE_GNUC
2344 {
2345 ae_int_t a,b,c,d;
2346
2347 /* SSE2 support */
2348 #if defined(_ALGLIB_HAS_SSE2_INTRINSICS)
2349 __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
2350 if( (d&0x04000000)!=0 )
2351 _ae_cpuid_has_sse2 = ae_true;
2352 #endif
2353
2354 /* check OS support for XSAVE XGETBV */
2355 #if defined(_ALGLIB_HAS_AVX2_INTRINSICS)
2356 __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
2357 if( (c&(0x1<<27))!=0 )
2358 {
2359 __asm__ volatile ("xgetbv" : "=a" (a), "=d" (d) : "c" (0));
2360 if( (a&0x6)==0x6 )
2361 {
2362 /* AVX2 support */
2363 #if defined(_ALGLIB_HAS_AVX2_INTRINSICS)
2364 if( _ae_cpuid_has_sse2 )
2365 {
2366 __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (7), "c" (0) );
2367 if( (b&(0x1<<5))!=0 )
2368 _ae_cpuid_has_avx2 = ae_true;
2369 }
2370 #endif
2371
2372 /* FMA support */
2373 #if defined(_ALGLIB_HAS_FMA_INTRINSICS)
2374 if( _ae_cpuid_has_avx2 )
2375 {
2376 __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1) );
2377 if( (c&(0x1<<12))!=0 )
2378 _ae_cpuid_has_fma = ae_true;
2379 }
2380 #endif
2381 }
2382 }
2383 #endif
2384 }
2385 #elif AE_COMPILER==AE_SUNC
2386 {
2387 ae_int_t a,b,c,d;
2388 __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1));
2389 if( (d&0x04000000)!=0 )
2390 _ae_cpuid_has_sse2 = ae_true;
2391 }
2392 #else
2393 #endif
2394 #endif
2395 #endif
2396 /*
2397 * Perform one more CPUID call to generate memory fence
2398 */
2399 #if AE_CPU==AE_INTEL
2400 #if AE_COMPILER==AE_MSVC
2401 { int CPUInfo[4]; __cpuid(CPUInfo, 1); }
2402 #elif AE_COMPILER==AE_GNUC
2403 { ae_int_t a,b,c,d; __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1)); }
2404 #elif AE_COMPILER==AE_SUNC
2405 { ae_int_t a,b,c,d; __asm__ __volatile__ ("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (1)); }
2406 #else
2407 #endif
2408 #endif
2409
2410 /*
2411 * set initialization flag
2412 */
2413 _ae_cpuid_initialized = ae_true;
2414 }
2415
2416 /*
2417 * return
2418 */
2419 result = 0;
2420 if( _ae_cpuid_has_sse2 )
2421 result = result|CPU_SSE2;
2422 if( _ae_cpuid_has_avx2 )
2423 result = result|CPU_AVX2;
2424 if( _ae_cpuid_has_fma )
2425 result = result|CPU_FMA;
2426 return result;
2427 }
2428
2429 /************************************************************************
2430 Activates tracing to file
2431
2432 IMPORTANT: this function is NOT thread-safe! Calling it from multiple
2433 threads will result in undefined behavior. Calling it when
2434 some thread calls ALGLIB functions may result in undefined
2435 behavior.
2436 ************************************************************************/
ae_trace_file(const char * tags,const char * filename)2437 void ae_trace_file(const char *tags, const char *filename)
2438 {
2439 /*
2440 * clean up previous call
2441 */
2442 if( alglib_fclose_trace )
2443 {
2444 if( alglib_trace_file!=NULL )
2445 fclose(alglib_trace_file);
2446 alglib_trace_file = NULL;
2447 alglib_fclose_trace = ae_false;
2448 }
2449
2450 /*
2451 * store ",tags," to buffer. Leading and trailing commas allow us
2452 * to perform checks for various tags by simply calling strstr().
2453 */
2454 memset(alglib_trace_tags, 0, ALGLIB_TRACE_BUFFER_LEN);
2455 strcat(alglib_trace_tags, ",");
2456 strncat(alglib_trace_tags, tags, ALGLIB_TRACE_TAGS_LEN);
2457 strcat(alglib_trace_tags, ",");
2458 for(int i=0; alglib_trace_tags[i]!=0; i++)
2459 alglib_trace_tags[i] = tolower(alglib_trace_tags[i]);
2460
2461 /*
2462 * set up trace
2463 */
2464 alglib_trace_type = ALGLIB_TRACE_FILE;
2465 alglib_trace_file = fopen(filename, "ab");
2466 alglib_fclose_trace = ae_true;
2467 }
2468
2469 /************************************************************************
2470 Disables tracing
2471 ************************************************************************/
ae_trace_disable()2472 void ae_trace_disable()
2473 {
2474 alglib_trace_type = ALGLIB_TRACE_NONE;
2475 if( alglib_fclose_trace )
2476 fclose(alglib_trace_file);
2477 alglib_trace_file = NULL;
2478 alglib_fclose_trace = ae_false;
2479 }
2480
2481 /************************************************************************
2482 Checks whether specific kind of tracing is enabled
2483 ************************************************************************/
ae_is_trace_enabled(const char * tag)2484 ae_bool ae_is_trace_enabled(const char *tag)
2485 {
2486 char buf[ALGLIB_TRACE_BUFFER_LEN];
2487
2488 /* check global trace status */
2489 if( alglib_trace_type==ALGLIB_TRACE_NONE || alglib_trace_file==NULL )
2490 return ae_false;
2491
2492 /* copy tag to buffer, lowercase it */
2493 memset(buf, 0, ALGLIB_TRACE_BUFFER_LEN);
2494 strcat(buf, ",");
2495 strncat(buf, tag, ALGLIB_TRACE_TAGS_LEN);
2496 strcat(buf, "?");
2497 for(int i=0; buf[i]!=0; i++)
2498 buf[i] = tolower(buf[i]);
2499
2500 /* contains tag (followed by comma, which means exact match) */
2501 buf[strlen(buf)-1] = ',';
2502 if( strstr(alglib_trace_tags,buf)!=NULL )
2503 return ae_true;
2504
2505 /* contains tag (followed by dot, which means match with child) */
2506 buf[strlen(buf)-1] = '.';
2507 if( strstr(alglib_trace_tags,buf)!=NULL )
2508 return ae_true;
2509
2510 /* nothing */
2511 return ae_false;
2512 }
2513
ae_trace(const char * printf_fmt,...)2514 void ae_trace(const char * printf_fmt, ...)
2515 {
2516 /* check global trace status */
2517 if( alglib_trace_type==ALGLIB_TRACE_FILE && alglib_trace_file!=NULL )
2518 {
2519 va_list args;
2520
2521 /* fprintf() */
2522 va_start(args, printf_fmt);
2523 vfprintf(alglib_trace_file, printf_fmt, args);
2524 va_end(args);
2525
2526 /* flush output */
2527 fflush(alglib_trace_file);
2528 }
2529 }
2530
ae_tickcount()2531 int ae_tickcount()
2532 {
2533 #if AE_OS==AE_WINDOWS || defined(AE_DEBUG4WINDOWS)
2534 return (int)GetTickCount();
2535 #elif AE_OS==AE_POSIX || defined(AE_DEBUG4POSIX)
2536 struct timeval now;
2537 ae_int64_t r, v;
2538 gettimeofday(&now, NULL);
2539 v = now.tv_sec;
2540 r = v*1000;
2541 v = now.tv_usec/1000;
2542 r = r+v;
2543 return r;
2544 /*struct timespec now;
2545 if (clock_gettime(CLOCK_MONOTONIC, &now) )
2546 return 0;
2547 return now.tv_sec * 1000.0 + now.tv_nsec / 1000000.0;*/
2548 #else
2549 return 0;
2550 #endif
2551 }
2552
2553
2554 /************************************************************************
2555 Real math functions
2556 ************************************************************************/
ae_fp_eq(double v1,double v2)2557 ae_bool ae_fp_eq(double v1, double v2)
2558 {
2559 /* IEEE-strict floating point comparison */
2560 volatile double x = v1;
2561 volatile double y = v2;
2562 return x==y;
2563 }
2564
ae_fp_neq(double v1,double v2)2565 ae_bool ae_fp_neq(double v1, double v2)
2566 {
2567 /* IEEE-strict floating point comparison */
2568 return !ae_fp_eq(v1,v2);
2569 }
2570
ae_fp_less(double v1,double v2)2571 ae_bool ae_fp_less(double v1, double v2)
2572 {
2573 /* IEEE-strict floating point comparison */
2574 volatile double x = v1;
2575 volatile double y = v2;
2576 return x<y;
2577 }
2578
ae_fp_less_eq(double v1,double v2)2579 ae_bool ae_fp_less_eq(double v1, double v2)
2580 {
2581 /* IEEE-strict floating point comparison */
2582 volatile double x = v1;
2583 volatile double y = v2;
2584 return x<=y;
2585 }
2586
ae_fp_greater(double v1,double v2)2587 ae_bool ae_fp_greater(double v1, double v2)
2588 {
2589 /* IEEE-strict floating point comparison */
2590 volatile double x = v1;
2591 volatile double y = v2;
2592 return x>y;
2593 }
2594
ae_fp_greater_eq(double v1,double v2)2595 ae_bool ae_fp_greater_eq(double v1, double v2)
2596 {
2597 /* IEEE-strict floating point comparison */
2598 volatile double x = v1;
2599 volatile double y = v2;
2600 return x>=y;
2601 }
2602
ae_isfinite_stateless(double x,ae_int_t endianness)2603 ae_bool ae_isfinite_stateless(double x, ae_int_t endianness)
2604 {
2605 union _u
2606 {
2607 double a;
2608 ae_int32_t p[2];
2609 } u;
2610 ae_int32_t high;
2611 u.a = x;
2612 if( endianness==AE_LITTLE_ENDIAN )
2613 high = u.p[1];
2614 else
2615 high = u.p[0];
2616 return (high & (ae_int32_t)0x7FF00000)!=(ae_int32_t)0x7FF00000;
2617 }
2618
ae_isnan_stateless(double x,ae_int_t endianness)2619 ae_bool ae_isnan_stateless(double x, ae_int_t endianness)
2620 {
2621 union _u
2622 {
2623 double a;
2624 ae_int32_t p[2];
2625 } u;
2626 ae_int32_t high, low;
2627 u.a = x;
2628 if( endianness==AE_LITTLE_ENDIAN )
2629 {
2630 high = u.p[1];
2631 low = u.p[0];
2632 }
2633 else
2634 {
2635 high = u.p[0];
2636 low = u.p[1];
2637 }
2638 return ((high &0x7FF00000)==0x7FF00000) && (((high &0x000FFFFF)!=0) || (low!=0));
2639 }
2640
ae_isinf_stateless(double x,ae_int_t endianness)2641 ae_bool ae_isinf_stateless(double x, ae_int_t endianness)
2642 {
2643 union _u
2644 {
2645 double a;
2646 ae_int32_t p[2];
2647 } u;
2648 ae_int32_t high, low;
2649 u.a = x;
2650 if( endianness==AE_LITTLE_ENDIAN )
2651 {
2652 high = u.p[1];
2653 low = u.p[0];
2654 }
2655 else
2656 {
2657 high = u.p[0];
2658 low = u.p[1];
2659 }
2660
2661 /* 31 least significant bits of high are compared */
2662 return ((high&0x7FFFFFFF)==0x7FF00000) && (low==0);
2663 }
2664
ae_isposinf_stateless(double x,ae_int_t endianness)2665 ae_bool ae_isposinf_stateless(double x, ae_int_t endianness)
2666 {
2667 union _u
2668 {
2669 double a;
2670 ae_int32_t p[2];
2671 } u;
2672 ae_int32_t high, low;
2673 u.a = x;
2674 if( endianness==AE_LITTLE_ENDIAN )
2675 {
2676 high = u.p[1];
2677 low = u.p[0];
2678 }
2679 else
2680 {
2681 high = u.p[0];
2682 low = u.p[1];
2683 }
2684
2685 /* all 32 bits of high are compared */
2686 return (high==(ae_int32_t)0x7FF00000) && (low==0);
2687 }
2688
ae_isneginf_stateless(double x,ae_int_t endianness)2689 ae_bool ae_isneginf_stateless(double x, ae_int_t endianness)
2690 {
2691 union _u
2692 {
2693 double a;
2694 ae_int32_t p[2];
2695 } u;
2696 ae_int32_t high, low;
2697 u.a = x;
2698 if( endianness==AE_LITTLE_ENDIAN )
2699 {
2700 high = u.p[1];
2701 low = u.p[0];
2702 }
2703 else
2704 {
2705 high = u.p[0];
2706 low = u.p[1];
2707 }
2708
2709 /* this code is a bit tricky to avoid comparison of high with 0xFFF00000, which may be unsafe with some buggy compilers */
2710 return ((high&0x7FFFFFFF)==0x7FF00000) && (high!=(ae_int32_t)0x7FF00000) && (low==0);
2711 }
2712
ae_get_endianness()2713 ae_int_t ae_get_endianness()
2714 {
2715 union
2716 {
2717 double a;
2718 ae_int32_t p[2];
2719 } u;
2720
2721 /*
2722 * determine endianness
2723 * two types are supported: big-endian and little-endian.
2724 * mixed-endian hardware is NOT supported.
2725 *
2726 * 1983 is used as magic number because its non-periodic double
2727 * representation allow us to easily distinguish between upper
2728 * and lower halfs and to detect mixed endian hardware.
2729 *
2730 */
2731 u.a = 1.0/1983.0;
2732 if( u.p[1]==(ae_int32_t)0x3f408642 )
2733 return AE_LITTLE_ENDIAN;
2734 if( u.p[0]==(ae_int32_t)0x3f408642 )
2735 return AE_BIG_ENDIAN;
2736 return AE_MIXED_ENDIAN;
2737 }
2738
ae_isfinite(double x,ae_state * state)2739 ae_bool ae_isfinite(double x,ae_state *state)
2740 {
2741 return ae_isfinite_stateless(x, state->endianness);
2742 }
2743
ae_isnan(double x,ae_state * state)2744 ae_bool ae_isnan(double x, ae_state *state)
2745 {
2746 return ae_isnan_stateless(x, state->endianness);
2747 }
2748
ae_isinf(double x,ae_state * state)2749 ae_bool ae_isinf(double x, ae_state *state)
2750 {
2751 return ae_isinf_stateless(x, state->endianness);
2752 }
2753
ae_isposinf(double x,ae_state * state)2754 ae_bool ae_isposinf(double x,ae_state *state)
2755 {
2756 return ae_isposinf_stateless(x, state->endianness);
2757 }
2758
ae_isneginf(double x,ae_state * state)2759 ae_bool ae_isneginf(double x,ae_state *state)
2760 {
2761 return ae_isneginf_stateless(x, state->endianness);
2762 }
2763
ae_fabs(double x,ae_state * state)2764 double ae_fabs(double x, ae_state *state)
2765 {
2766 return fabs(x);
2767 }
2768
ae_iabs(ae_int_t x,ae_state * state)2769 ae_int_t ae_iabs(ae_int_t x, ae_state *state)
2770 {
2771 return x>=0 ? x : -x;
2772 }
2773
ae_sqr(double x,ae_state * state)2774 double ae_sqr(double x, ae_state *state)
2775 {
2776 return x*x;
2777 }
2778
ae_sqrt(double x,ae_state * state)2779 double ae_sqrt(double x, ae_state *state)
2780 {
2781 return sqrt(x);
2782 }
2783
ae_sign(double x,ae_state * state)2784 ae_int_t ae_sign(double x, ae_state *state)
2785 {
2786 if( x>0 ) return 1;
2787 if( x<0 ) return -1;
2788 return 0;
2789 }
2790
ae_round(double x,ae_state * state)2791 ae_int_t ae_round(double x, ae_state *state)
2792 {
2793 return (ae_int_t)(ae_ifloor(x+0.5,state));
2794 }
2795
ae_trunc(double x,ae_state * state)2796 ae_int_t ae_trunc(double x, ae_state *state)
2797 {
2798 return (ae_int_t)(x>0 ? ae_ifloor(x,state) : ae_iceil(x,state));
2799 }
2800
ae_ifloor(double x,ae_state * state)2801 ae_int_t ae_ifloor(double x, ae_state *state)
2802 {
2803 return (ae_int_t)(floor(x));
2804 }
2805
ae_iceil(double x,ae_state * state)2806 ae_int_t ae_iceil(double x, ae_state *state)
2807 {
2808 return (ae_int_t)(ceil(x));
2809 }
2810
ae_maxint(ae_int_t m1,ae_int_t m2,ae_state * state)2811 ae_int_t ae_maxint(ae_int_t m1, ae_int_t m2, ae_state *state)
2812 {
2813 return m1>m2 ? m1 : m2;
2814 }
2815
ae_minint(ae_int_t m1,ae_int_t m2,ae_state * state)2816 ae_int_t ae_minint(ae_int_t m1, ae_int_t m2, ae_state *state)
2817 {
2818 return m1>m2 ? m2 : m1;
2819 }
2820
ae_maxreal(double m1,double m2,ae_state * state)2821 double ae_maxreal(double m1, double m2, ae_state *state)
2822 {
2823 return m1>m2 ? m1 : m2;
2824 }
2825
ae_minreal(double m1,double m2,ae_state * state)2826 double ae_minreal(double m1, double m2, ae_state *state)
2827 {
2828 return m1>m2 ? m2 : m1;
2829 }
2830
ae_randomreal(ae_state * state)2831 double ae_randomreal(ae_state *state)
2832 {
2833 int i1 = rand();
2834 int i2 = rand();
2835 double mx = (double)(RAND_MAX)+1.0;
2836 volatile double tmp0 = i2/mx;
2837 volatile double tmp1 = i1+tmp0;
2838 return tmp1/mx;
2839 }
2840
ae_randominteger(ae_int_t maxv,ae_state * state)2841 ae_int_t ae_randominteger(ae_int_t maxv, ae_state *state)
2842 {
2843 return rand()%maxv;
2844 }
2845
ae_sin(double x,ae_state * state)2846 double ae_sin(double x, ae_state *state)
2847 {
2848 return sin(x);
2849 }
2850
ae_cos(double x,ae_state * state)2851 double ae_cos(double x, ae_state *state)
2852 {
2853 return cos(x);
2854 }
2855
ae_tan(double x,ae_state * state)2856 double ae_tan(double x, ae_state *state)
2857 {
2858 return tan(x);
2859 }
2860
ae_sinh(double x,ae_state * state)2861 double ae_sinh(double x, ae_state *state)
2862 {
2863 return sinh(x);
2864 }
2865
ae_cosh(double x,ae_state * state)2866 double ae_cosh(double x, ae_state *state)
2867 {
2868 return cosh(x);
2869 }
ae_tanh(double x,ae_state * state)2870 double ae_tanh(double x, ae_state *state)
2871 {
2872 return tanh(x);
2873 }
2874
ae_asin(double x,ae_state * state)2875 double ae_asin(double x, ae_state *state)
2876 {
2877 return asin(x);
2878 }
2879
ae_acos(double x,ae_state * state)2880 double ae_acos(double x, ae_state *state)
2881 {
2882 return acos(x);
2883 }
2884
ae_atan(double x,ae_state * state)2885 double ae_atan(double x, ae_state *state)
2886 {
2887 return atan(x);
2888 }
2889
ae_atan2(double y,double x,ae_state * state)2890 double ae_atan2(double y, double x, ae_state *state)
2891 {
2892 return atan2(y,x);
2893 }
2894
ae_log(double x,ae_state * state)2895 double ae_log(double x, ae_state *state)
2896 {
2897 return log(x);
2898 }
2899
ae_pow(double x,double y,ae_state * state)2900 double ae_pow(double x, double y, ae_state *state)
2901 {
2902 return pow(x,y);
2903 }
2904
ae_exp(double x,ae_state * state)2905 double ae_exp(double x, ae_state *state)
2906 {
2907 return exp(x);
2908 }
2909
2910 /************************************************************************
2911 Symmetric/Hermitian properties: check and force
2912 ************************************************************************/
x_split_length(ae_int_t n,ae_int_t nb,ae_int_t * n1,ae_int_t * n2)2913 static void x_split_length(ae_int_t n, ae_int_t nb, ae_int_t* n1, ae_int_t* n2)
2914 {
2915 ae_int_t r;
2916 if( n<=nb )
2917 {
2918 *n1 = n;
2919 *n2 = 0;
2920 }
2921 else
2922 {
2923 if( n%nb!=0 )
2924 {
2925 *n2 = n%nb;
2926 *n1 = n-(*n2);
2927 }
2928 else
2929 {
2930 *n2 = n/2;
2931 *n1 = n-(*n2);
2932 if( *n1%nb==0 )
2933 {
2934 return;
2935 }
2936 r = nb-*n1%nb;
2937 *n1 = *n1+r;
2938 *n2 = *n2-r;
2939 }
2940 }
2941 }
x_safepythag2(double x,double y)2942 static double x_safepythag2(double x, double y)
2943 {
2944 double w;
2945 double xabs;
2946 double yabs;
2947 double z;
2948 xabs = fabs(x);
2949 yabs = fabs(y);
2950 w = xabs>yabs ? xabs : yabs;
2951 z = xabs<yabs ? xabs : yabs;
2952 if( z==0 )
2953 return w;
2954 else
2955 {
2956 double t;
2957 t = z/w;
2958 return w*sqrt(1+t*t);
2959 }
2960 }
2961 /*
2962 * this function checks difference between offdiagonal blocks BL and BU
2963 * (see below). Block BL is specified by offsets (offset0,offset1) and
2964 * sizes (len0,len1).
2965 *
2966 * [ . ]
2967 * [ A0 BU ]
2968 * A = [ BL A1 ]
2969 * [ . ]
2970 *
2971 * this subroutine updates current values of:
2972 * a) mx maximum value of A[i,j] found so far
2973 * b) err componentwise difference between elements of BL and BU^T
2974 *
2975 */
is_symmetric_rec_off_stat(x_matrix * a,ae_int_t offset0,ae_int_t offset1,ae_int_t len0,ae_int_t len1,ae_bool * nonfinite,double * mx,double * err,ae_state * _state)2976 static void is_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
2977 {
2978 /* try to split problem into two smaller ones */
2979 if( len0>x_nb || len1>x_nb )
2980 {
2981 ae_int_t n1, n2;
2982 if( len0>len1 )
2983 {
2984 x_split_length(len0, x_nb, &n1, &n2);
2985 is_symmetric_rec_off_stat(a, offset0, offset1, n1, len1, nonfinite, mx, err, _state);
2986 is_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state);
2987 }
2988 else
2989 {
2990 x_split_length(len1, x_nb, &n1, &n2);
2991 is_symmetric_rec_off_stat(a, offset0, offset1, len0, n1, nonfinite, mx, err, _state);
2992 is_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state);
2993 }
2994 return;
2995 }
2996 else
2997 {
2998 /* base case */
2999 double *p1, *p2, *prow, *pcol;
3000 double v;
3001 ae_int_t i, j;
3002
3003 p1 = (double*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3004 p2 = (double*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3005 for(i=0; i<len0; i++)
3006 {
3007 pcol = p2+i;
3008 prow = p1+i*a->stride;
3009 for(j=0; j<len1; j++)
3010 {
3011 if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) )
3012 {
3013 *nonfinite = ae_true;
3014 }
3015 else
3016 {
3017 v = fabs(*pcol);
3018 *mx = *mx>v ? *mx : v;
3019 v = fabs(*prow);
3020 *mx = *mx>v ? *mx : v;
3021 v = fabs(*pcol-*prow);
3022 *err = *err>v ? *err : v;
3023 }
3024 pcol += a->stride;
3025 prow++;
3026 }
3027 }
3028 }
3029 }
3030 /*
3031 * this function checks that diagonal block A0 is symmetric.
3032 * Block A0 is specified by its offset and size.
3033 *
3034 * [ . ]
3035 * [ A0 ]
3036 * A = [ . ]
3037 * [ . ]
3038 *
3039 * this subroutine updates current values of:
3040 * a) mx maximum value of A[i,j] found so far
3041 * b) err componentwise difference between A0 and A0^T
3042 *
3043 */
is_symmetric_rec_diag_stat(x_matrix * a,ae_int_t offset,ae_int_t len,ae_bool * nonfinite,double * mx,double * err,ae_state * _state)3044 static void is_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
3045 {
3046 double *p, *prow, *pcol;
3047 double v;
3048 ae_int_t i, j;
3049
3050 /* try to split problem into two smaller ones */
3051 if( len>x_nb )
3052 {
3053 ae_int_t n1, n2;
3054 x_split_length(len, x_nb, &n1, &n2);
3055 is_symmetric_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state);
3056 is_symmetric_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state);
3057 is_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state);
3058 return;
3059 }
3060
3061 /* base case */
3062 p = (double*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3063 for(i=0; i<len; i++)
3064 {
3065 pcol = p+i;
3066 prow = p+i*a->stride;
3067 for(j=0; j<i; j++,pcol+=a->stride,prow++)
3068 {
3069 if( !ae_isfinite(*pcol,_state) || !ae_isfinite(*prow,_state) )
3070 {
3071 *nonfinite = ae_true;
3072 }
3073 else
3074 {
3075 v = fabs(*pcol);
3076 *mx = *mx>v ? *mx : v;
3077 v = fabs(*prow);
3078 *mx = *mx>v ? *mx : v;
3079 v = fabs(*pcol-*prow);
3080 *err = *err>v ? *err : v;
3081 }
3082 }
3083 v = fabs(p[i+i*a->stride]);
3084 *mx = *mx>v ? *mx : v;
3085 }
3086 }
3087 /*
3088 * this function checks difference between offdiagonal blocks BL and BU
3089 * (see below). Block BL is specified by offsets (offset0,offset1) and
3090 * sizes (len0,len1).
3091 *
3092 * [ . ]
3093 * [ A0 BU ]
3094 * A = [ BL A1 ]
3095 * [ . ]
3096 *
3097 * this subroutine updates current values of:
3098 * a) mx maximum value of A[i,j] found so far
3099 * b) err componentwise difference between elements of BL and BU^H
3100 *
3101 */
is_hermitian_rec_off_stat(x_matrix * a,ae_int_t offset0,ae_int_t offset1,ae_int_t len0,ae_int_t len1,ae_bool * nonfinite,double * mx,double * err,ae_state * _state)3102 static void is_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
3103 {
3104 /* try to split problem into two smaller ones */
3105 if( len0>x_nb || len1>x_nb )
3106 {
3107 ae_int_t n1, n2;
3108 if( len0>len1 )
3109 {
3110 x_split_length(len0, x_nb, &n1, &n2);
3111 is_hermitian_rec_off_stat(a, offset0, offset1, n1, len1, nonfinite, mx, err, _state);
3112 is_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1, nonfinite, mx, err, _state);
3113 }
3114 else
3115 {
3116 x_split_length(len1, x_nb, &n1, &n2);
3117 is_hermitian_rec_off_stat(a, offset0, offset1, len0, n1, nonfinite, mx, err, _state);
3118 is_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2, nonfinite, mx, err, _state);
3119 }
3120 return;
3121 }
3122 else
3123 {
3124 /* base case */
3125 ae_complex *p1, *p2, *prow, *pcol;
3126 double v;
3127 ae_int_t i, j;
3128
3129 p1 = (ae_complex*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3130 p2 = (ae_complex*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3131 for(i=0; i<len0; i++)
3132 {
3133 pcol = p2+i;
3134 prow = p1+i*a->stride;
3135 for(j=0; j<len1; j++)
3136 {
3137 if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) )
3138 {
3139 *nonfinite = ae_true;
3140 }
3141 else
3142 {
3143 v = x_safepythag2(pcol->x, pcol->y);
3144 *mx = *mx>v ? *mx : v;
3145 v = x_safepythag2(prow->x, prow->y);
3146 *mx = *mx>v ? *mx : v;
3147 v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y);
3148 *err = *err>v ? *err : v;
3149 }
3150 pcol += a->stride;
3151 prow++;
3152 }
3153 }
3154 }
3155 }
3156 /*
3157 * this function checks that diagonal block A0 is Hermitian.
3158 * Block A0 is specified by its offset and size.
3159 *
3160 * [ . ]
3161 * [ A0 ]
3162 * A = [ . ]
3163 * [ . ]
3164 *
3165 * this subroutine updates current values of:
3166 * a) mx maximum value of A[i,j] found so far
3167 * b) err componentwise difference between A0 and A0^H
3168 *
3169 */
is_hermitian_rec_diag_stat(x_matrix * a,ae_int_t offset,ae_int_t len,ae_bool * nonfinite,double * mx,double * err,ae_state * _state)3170 static void is_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len, ae_bool *nonfinite, double *mx, double *err, ae_state *_state)
3171 {
3172 ae_complex *p, *prow, *pcol;
3173 double v;
3174 ae_int_t i, j;
3175
3176 /* try to split problem into two smaller ones */
3177 if( len>x_nb )
3178 {
3179 ae_int_t n1, n2;
3180 x_split_length(len, x_nb, &n1, &n2);
3181 is_hermitian_rec_diag_stat(a, offset, n1, nonfinite, mx, err, _state);
3182 is_hermitian_rec_diag_stat(a, offset+n1, n2, nonfinite, mx, err, _state);
3183 is_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1, nonfinite, mx, err, _state);
3184 return;
3185 }
3186
3187 /* base case */
3188 p = (ae_complex*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3189 for(i=0; i<len; i++)
3190 {
3191 pcol = p+i;
3192 prow = p+i*a->stride;
3193 for(j=0; j<i; j++,pcol+=a->stride,prow++)
3194 {
3195 if( !ae_isfinite(pcol->x, _state) || !ae_isfinite(pcol->y, _state) || !ae_isfinite(prow->x, _state) || !ae_isfinite(prow->y, _state) )
3196 {
3197 *nonfinite = ae_true;
3198 }
3199 else
3200 {
3201 v = x_safepythag2(pcol->x, pcol->y);
3202 *mx = *mx>v ? *mx : v;
3203 v = x_safepythag2(prow->x, prow->y);
3204 *mx = *mx>v ? *mx : v;
3205 v = x_safepythag2(pcol->x-prow->x, pcol->y+prow->y);
3206 *err = *err>v ? *err : v;
3207 }
3208 }
3209 if( !ae_isfinite(p[i+i*a->stride].x, _state) || !ae_isfinite(p[i+i*a->stride].y, _state) )
3210 {
3211 *nonfinite = ae_true;
3212 }
3213 else
3214 {
3215 v = fabs(p[i+i*a->stride].x);
3216 *mx = *mx>v ? *mx : v;
3217 v = fabs(p[i+i*a->stride].y);
3218 *err = *err>v ? *err : v;
3219 }
3220 }
3221 }
3222 /*
3223 * this function copies offdiagonal block BL to its symmetric counterpart
3224 * BU (see below). Block BL is specified by offsets (offset0,offset1)
3225 * and sizes (len0,len1).
3226 *
3227 * [ . ]
3228 * [ A0 BU ]
3229 * A = [ BL A1 ]
3230 * [ . ]
3231 *
3232 */
force_symmetric_rec_off_stat(x_matrix * a,ae_int_t offset0,ae_int_t offset1,ae_int_t len0,ae_int_t len1)3233 static void force_symmetric_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1)
3234 {
3235 /* try to split problem into two smaller ones */
3236 if( len0>x_nb || len1>x_nb )
3237 {
3238 ae_int_t n1, n2;
3239 if( len0>len1 )
3240 {
3241 x_split_length(len0, x_nb, &n1, &n2);
3242 force_symmetric_rec_off_stat(a, offset0, offset1, n1, len1);
3243 force_symmetric_rec_off_stat(a, offset0+n1, offset1, n2, len1);
3244 }
3245 else
3246 {
3247 x_split_length(len1, x_nb, &n1, &n2);
3248 force_symmetric_rec_off_stat(a, offset0, offset1, len0, n1);
3249 force_symmetric_rec_off_stat(a, offset0, offset1+n1, len0, n2);
3250 }
3251 return;
3252 }
3253 else
3254 {
3255 /* base case */
3256 double *p1, *p2, *prow, *pcol;
3257 ae_int_t i, j;
3258
3259 p1 = (double*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3260 p2 = (double*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3261 for(i=0; i<len0; i++)
3262 {
3263 pcol = p2+i;
3264 prow = p1+i*a->stride;
3265 for(j=0; j<len1; j++)
3266 {
3267 *pcol = *prow;
3268 pcol += a->stride;
3269 prow++;
3270 }
3271 }
3272 }
3273 }
3274 /*
3275 * this function copies lower part of diagonal block A0 to its upper part
3276 * Block is specified by offset and size.
3277 *
3278 * [ . ]
3279 * [ A0 ]
3280 * A = [ . ]
3281 * [ . ]
3282 *
3283 */
force_symmetric_rec_diag_stat(x_matrix * a,ae_int_t offset,ae_int_t len)3284 static void force_symmetric_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len)
3285 {
3286 double *p, *prow, *pcol;
3287 ae_int_t i, j;
3288
3289 /* try to split problem into two smaller ones */
3290 if( len>x_nb )
3291 {
3292 ae_int_t n1, n2;
3293 x_split_length(len, x_nb, &n1, &n2);
3294 force_symmetric_rec_diag_stat(a, offset, n1);
3295 force_symmetric_rec_diag_stat(a, offset+n1, n2);
3296 force_symmetric_rec_off_stat(a, offset+n1, offset, n2, n1);
3297 return;
3298 }
3299
3300 /* base case */
3301 p = (double*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3302 for(i=0; i<len; i++)
3303 {
3304 pcol = p+i;
3305 prow = p+i*a->stride;
3306 for(j=0; j<i; j++,pcol+=a->stride,prow++)
3307 *pcol = *prow;
3308 }
3309 }
3310 /*
3311 * this function copies Hermitian transpose of offdiagonal block BL to
3312 * its symmetric counterpart BU (see below). Block BL is specified by
3313 * offsets (offset0,offset1) and sizes (len0,len1).
3314 *
3315 * [ . ]
3316 * [ A0 BU ]
3317 * A = [ BL A1 ]
3318 * [ . ]
3319 */
force_hermitian_rec_off_stat(x_matrix * a,ae_int_t offset0,ae_int_t offset1,ae_int_t len0,ae_int_t len1)3320 static void force_hermitian_rec_off_stat(x_matrix *a, ae_int_t offset0, ae_int_t offset1, ae_int_t len0, ae_int_t len1)
3321 {
3322 /* try to split problem into two smaller ones */
3323 if( len0>x_nb || len1>x_nb )
3324 {
3325 ae_int_t n1, n2;
3326 if( len0>len1 )
3327 {
3328 x_split_length(len0, x_nb, &n1, &n2);
3329 force_hermitian_rec_off_stat(a, offset0, offset1, n1, len1);
3330 force_hermitian_rec_off_stat(a, offset0+n1, offset1, n2, len1);
3331 }
3332 else
3333 {
3334 x_split_length(len1, x_nb, &n1, &n2);
3335 force_hermitian_rec_off_stat(a, offset0, offset1, len0, n1);
3336 force_hermitian_rec_off_stat(a, offset0, offset1+n1, len0, n2);
3337 }
3338 return;
3339 }
3340 else
3341 {
3342 /* base case */
3343 ae_complex *p1, *p2, *prow, *pcol;
3344 ae_int_t i, j;
3345
3346 p1 = (ae_complex*)(a->x_ptr.p_ptr)+offset0*a->stride+offset1;
3347 p2 = (ae_complex*)(a->x_ptr.p_ptr)+offset1*a->stride+offset0;
3348 for(i=0; i<len0; i++)
3349 {
3350 pcol = p2+i;
3351 prow = p1+i*a->stride;
3352 for(j=0; j<len1; j++)
3353 {
3354 *pcol = *prow;
3355 pcol += a->stride;
3356 prow++;
3357 }
3358 }
3359 }
3360 }
3361 /*
3362 * this function copies Hermitian transpose of lower part of
3363 * diagonal block A0 to its upper part Block is specified by offset and size.
3364 *
3365 * [ . ]
3366 * [ A0 ]
3367 * A = [ . ]
3368 * [ . ]
3369 *
3370 */
force_hermitian_rec_diag_stat(x_matrix * a,ae_int_t offset,ae_int_t len)3371 static void force_hermitian_rec_diag_stat(x_matrix *a, ae_int_t offset, ae_int_t len)
3372 {
3373 ae_complex *p, *prow, *pcol;
3374 ae_int_t i, j;
3375
3376 /* try to split problem into two smaller ones */
3377 if( len>x_nb )
3378 {
3379 ae_int_t n1, n2;
3380 x_split_length(len, x_nb, &n1, &n2);
3381 force_hermitian_rec_diag_stat(a, offset, n1);
3382 force_hermitian_rec_diag_stat(a, offset+n1, n2);
3383 force_hermitian_rec_off_stat(a, offset+n1, offset, n2, n1);
3384 return;
3385 }
3386
3387 /* base case */
3388 p = (ae_complex*)(a->x_ptr.p_ptr)+offset*a->stride+offset;
3389 for(i=0; i<len; i++)
3390 {
3391 pcol = p+i;
3392 prow = p+i*a->stride;
3393 for(j=0; j<i; j++,pcol+=a->stride,prow++)
3394 *pcol = *prow;
3395 }
3396 }
x_is_symmetric(x_matrix * a)3397 ae_bool x_is_symmetric(x_matrix *a)
3398 {
3399 double mx, err;
3400 ae_bool nonfinite;
3401 ae_state _alglib_env_state;
3402 if( a->datatype!=DT_REAL )
3403 return ae_false;
3404 if( a->cols!=a->rows )
3405 return ae_false;
3406 if( a->cols==0 || a->rows==0 )
3407 return ae_true;
3408 ae_state_init(&_alglib_env_state);
3409 mx = 0;
3410 err = 0;
3411 nonfinite = ae_false;
3412 is_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state);
3413 if( nonfinite )
3414 return ae_false;
3415 if( mx==0 )
3416 return ae_true;
3417 return err/mx<=1.0E-14;
3418 }
x_is_hermitian(x_matrix * a)3419 ae_bool x_is_hermitian(x_matrix *a)
3420 {
3421 double mx, err;
3422 ae_bool nonfinite;
3423 ae_state _alglib_env_state;
3424 if( a->datatype!=DT_COMPLEX )
3425 return ae_false;
3426 if( a->cols!=a->rows )
3427 return ae_false;
3428 if( a->cols==0 || a->rows==0 )
3429 return ae_true;
3430 ae_state_init(&_alglib_env_state);
3431 mx = 0;
3432 err = 0;
3433 nonfinite = ae_false;
3434 is_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows, &nonfinite, &mx, &err, &_alglib_env_state);
3435 if( nonfinite )
3436 return ae_false;
3437 if( mx==0 )
3438 return ae_true;
3439 return err/mx<=1.0E-14;
3440 }
x_force_symmetric(x_matrix * a)3441 ae_bool x_force_symmetric(x_matrix *a)
3442 {
3443 if( a->datatype!=DT_REAL )
3444 return ae_false;
3445 if( a->cols!=a->rows )
3446 return ae_false;
3447 if( a->cols==0 || a->rows==0 )
3448 return ae_true;
3449 force_symmetric_rec_diag_stat(a, 0, (ae_int_t)a->rows);
3450 return ae_true;
3451 }
x_force_hermitian(x_matrix * a)3452 ae_bool x_force_hermitian(x_matrix *a)
3453 {
3454 if( a->datatype!=DT_COMPLEX )
3455 return ae_false;
3456 if( a->cols!=a->rows )
3457 return ae_false;
3458 if( a->cols==0 || a->rows==0 )
3459 return ae_true;
3460 force_hermitian_rec_diag_stat(a, 0, (ae_int_t)a->rows);
3461 return ae_true;
3462 }
3463
ae_is_symmetric(ae_matrix * a)3464 ae_bool ae_is_symmetric(ae_matrix *a)
3465 {
3466 x_matrix x;
3467 x.owner = OWN_CALLER;
3468 ae_x_attach_to_matrix(&x, a);
3469 return x_is_symmetric(&x);
3470 }
3471
ae_is_hermitian(ae_matrix * a)3472 ae_bool ae_is_hermitian(ae_matrix *a)
3473 {
3474 x_matrix x;
3475 x.owner = OWN_CALLER;
3476 ae_x_attach_to_matrix(&x, a);
3477 return x_is_hermitian(&x);
3478 }
3479
ae_force_symmetric(ae_matrix * a)3480 ae_bool ae_force_symmetric(ae_matrix *a)
3481 {
3482 x_matrix x;
3483 x.owner = OWN_CALLER;
3484 ae_x_attach_to_matrix(&x, a);
3485 return x_force_symmetric(&x);
3486 }
3487
ae_force_hermitian(ae_matrix * a)3488 ae_bool ae_force_hermitian(ae_matrix *a)
3489 {
3490 x_matrix x;
3491 x.owner = OWN_CALLER;
3492 ae_x_attach_to_matrix(&x, a);
3493 return x_force_hermitian(&x);
3494 }
3495
3496 /************************************************************************
3497 This function converts six-bit value (from 0 to 63) to character (only
3498 digits, lowercase and uppercase letters, minus and underscore are used).
3499
3500 If v is negative or greater than 63, this function returns '?'.
3501 ************************************************************************/
3502 static char _sixbits2char_tbl[64] = {
3503 '0', '1', '2', '3', '4', '5', '6', '7',
3504 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
3505 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
3506 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
3507 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
3508 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
3509 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
3510 'u', 'v', 'w', 'x', 'y', 'z', '-', '_' };
3511
ae_sixbits2char(ae_int_t v)3512 char ae_sixbits2char(ae_int_t v)
3513 {
3514
3515 if( v<0 || v>63 )
3516 return '?';
3517 return _sixbits2char_tbl[v];
3518
3519 /* v is correct, process it */
3520 /*if( v<10 )
3521 return '0'+v;
3522 v -= 10;
3523 if( v<26 )
3524 return 'A'+v;
3525 v -= 26;
3526 if( v<26 )
3527 return 'a'+v;
3528 v -= 26;
3529 return v==0 ? '-' : '_';*/
3530 }
3531
3532 /************************************************************************
3533 This function converts character to six-bit value (from 0 to 63).
3534
3535 This function is inverse of ae_sixbits2char()
3536 If c is not correct character, this function returns -1.
3537 ************************************************************************/
3538 static ae_int_t _ae_char2sixbits_tbl[] = {
3539 -1, -1, -1, -1, -1, -1, -1, -1,
3540 -1, -1, -1, -1, -1, -1, -1, -1,
3541 -1, -1, -1, -1, -1, -1, -1, -1,
3542 -1, -1, -1, -1, -1, -1, -1, -1,
3543 -1, -1, -1, -1, -1, -1, -1, -1,
3544 -1, -1, -1, -1, -1, 62, -1, -1,
3545 0, 1, 2, 3, 4, 5, 6, 7,
3546 8, 9, -1, -1, -1, -1, -1, -1,
3547 -1, 10, 11, 12, 13, 14, 15, 16,
3548 17, 18, 19, 20, 21, 22, 23, 24,
3549 25, 26, 27, 28, 29, 30, 31, 32,
3550 33, 34, 35, -1, -1, -1, -1, 63,
3551 -1, 36, 37, 38, 39, 40, 41, 42,
3552 43, 44, 45, 46, 47, 48, 49, 50,
3553 51, 52, 53, 54, 55, 56, 57, 58,
3554 59, 60, 61, -1, -1, -1, -1, -1 };
ae_char2sixbits(char c)3555 ae_int_t ae_char2sixbits(char c)
3556 {
3557 return (c>=0 && c<127) ? _ae_char2sixbits_tbl[(int)c] : -1;
3558 }
3559
3560 /************************************************************************
3561 This function converts three bytes (24 bits) to four six-bit values
3562 (24 bits again).
3563
3564 src pointer to three bytes
3565 dst pointer to four ints
3566 ************************************************************************/
ae_threebytes2foursixbits(const unsigned char * src,ae_int_t * dst)3567 void ae_threebytes2foursixbits(const unsigned char *src, ae_int_t *dst)
3568 {
3569 dst[0] = src[0] & 0x3F;
3570 dst[1] = (src[0]>>6) | ((src[1]&0x0F)<<2);
3571 dst[2] = (src[1]>>4) | ((src[2]&0x03)<<4);
3572 dst[3] = src[2]>>2;
3573 }
3574
3575 /************************************************************************
3576 This function converts four six-bit values (24 bits) to three bytes
3577 (24 bits again).
3578
3579 src pointer to four ints
3580 dst pointer to three bytes
3581 ************************************************************************/
ae_foursixbits2threebytes(const ae_int_t * src,unsigned char * dst)3582 void ae_foursixbits2threebytes(const ae_int_t *src, unsigned char *dst)
3583 {
3584 dst[0] = (unsigned char)( src[0] | ((src[1]&0x03)<<6));
3585 dst[1] = (unsigned char)((src[1]>>2) | ((src[2]&0x0F)<<4));
3586 dst[2] = (unsigned char)((src[2]>>4) | (src[3]<<2));
3587 }
3588
3589 /************************************************************************
3590 This function serializes boolean value into buffer
3591
3592 v boolean value to be serialized
3593 buf buffer, at least 12 characters wide
3594 (11 chars for value, one for trailing zero)
3595 state ALGLIB environment state
3596 ************************************************************************/
ae_bool2str(ae_bool v,char * buf,ae_state * state)3597 void ae_bool2str(ae_bool v, char *buf, ae_state *state)
3598 {
3599 char c = v ? '1' : '0';
3600 ae_int_t i;
3601 for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3602 buf[i] = c;
3603 buf[AE_SER_ENTRY_LENGTH] = 0;
3604 }
3605
3606 /************************************************************************
3607 This function unserializes boolean value from buffer
3608
3609 buf buffer which contains value; leading spaces/tabs/newlines are
3610 ignored, traling spaces/tabs/newlines are treated as end of
3611 the boolean value.
3612 state ALGLIB environment state
3613
3614 This function raises an error in case unexpected symbol is found
3615 ************************************************************************/
ae_str2bool(const char * buf,ae_state * state,const char ** pasttheend)3616 ae_bool ae_str2bool(const char *buf, ae_state *state, const char **pasttheend)
3617 {
3618 ae_bool was0, was1;
3619 const char *emsg = "ALGLIB: unable to read boolean value from stream";
3620
3621 was0 = ae_false;
3622 was1 = ae_false;
3623 while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3624 buf++;
3625 while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
3626 {
3627 if( *buf=='0' )
3628 {
3629 was0 = ae_true;
3630 buf++;
3631 continue;
3632 }
3633 if( *buf=='1' )
3634 {
3635 was1 = ae_true;
3636 buf++;
3637 continue;
3638 }
3639 ae_break(state, ERR_ASSERTION_FAILED, emsg);
3640 }
3641 *pasttheend = buf;
3642 if( (!was0) && (!was1) )
3643 ae_break(state, ERR_ASSERTION_FAILED, emsg);
3644 if( was0 && was1 )
3645 ae_break(state, ERR_ASSERTION_FAILED, emsg);
3646 return was1 ? ae_true : ae_false;
3647 }
3648
3649 /************************************************************************
3650 This function serializes integer value into buffer
3651
3652 v integer value to be serialized
3653 buf buffer, at least 12 characters wide
3654 (11 chars for value, one for trailing zero)
3655 state ALGLIB environment state
3656 ************************************************************************/
ae_int2str(ae_int_t v,char * buf,ae_state * state)3657 void ae_int2str(ae_int_t v, char *buf, ae_state *state)
3658 {
3659 union _u
3660 {
3661 ae_int_t ival;
3662 unsigned char bytes[9];
3663 } u;
3664 ae_int_t i;
3665 ae_int_t sixbits[12];
3666 unsigned char c;
3667
3668 /*
3669 * copy v to array of chars, sign extending it and
3670 * converting to little endian order
3671 *
3672 * because we don't want to mention size of ae_int_t explicitly,
3673 * we do it as follows:
3674 * 1. we fill u.bytes by zeros or ones (depending on sign of v)
3675 * 2. we copy v to u.ival
3676 * 3. if we run on big endian architecture, we reorder u.bytes
3677 * 4. now we have signed 64-bit representation of v stored in u.bytes
3678 * 5. additionally, we set 9th byte of u.bytes to zero in order to
3679 * simplify conversion to six-bit representation
3680 */
3681 c = v<0 ? (unsigned char)0xFF : (unsigned char)0x00;
3682 u.ival = v;
3683 for(i=sizeof(ae_int_t); i<=8; i++) /* <=8 is preferred because it avoids unnecessary compiler warnings*/
3684 u.bytes[i] = c;
3685 u.bytes[8] = 0;
3686 if( state->endianness==AE_BIG_ENDIAN )
3687 {
3688 for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3689 {
3690 unsigned char tc;
3691 tc = u.bytes[i];
3692 u.bytes[i] = u.bytes[sizeof(ae_int_t)-1-i];
3693 u.bytes[sizeof(ae_int_t)-1-i] = tc;
3694 }
3695 }
3696
3697 /*
3698 * convert to six-bit representation, output
3699 *
3700 * NOTE: last 12th element of sixbits is always zero, we do not output it
3701 */
3702 ae_threebytes2foursixbits(u.bytes+0, sixbits+0);
3703 ae_threebytes2foursixbits(u.bytes+3, sixbits+4);
3704 ae_threebytes2foursixbits(u.bytes+6, sixbits+8);
3705 for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3706 buf[i] = ae_sixbits2char(sixbits[i]);
3707 buf[AE_SER_ENTRY_LENGTH] = 0x00;
3708 }
3709
3710 /************************************************************************
3711 This function serializes 64-bit integer value into buffer
3712
3713 v integer value to be serialized
3714 buf buffer, at least 12 characters wide
3715 (11 chars for value, one for trailing zero)
3716 state ALGLIB environment state
3717 ************************************************************************/
ae_int642str(ae_int64_t v,char * buf,ae_state * state)3718 void ae_int642str(ae_int64_t v, char *buf, ae_state *state)
3719 {
3720 unsigned char bytes[9];
3721 ae_int_t i;
3722 ae_int_t sixbits[12];
3723
3724 /*
3725 * copy v to array of chars, sign extending it and
3726 * converting to little endian order
3727 *
3728 * because we don't want to mention size of ae_int_t explicitly,
3729 * we do it as follows:
3730 * 1. we fill bytes by zeros or ones (depending on sign of v)
3731 * 2. we memmove v to bytes
3732 * 3. if we run on big endian architecture, we reorder bytes
3733 * 4. now we have signed 64-bit representation of v stored in bytes
3734 * 5. additionally, we set 9th byte of bytes to zero in order to
3735 * simplify conversion to six-bit representation
3736 */
3737 memset(bytes, v<0 ? 0xFF : 0x00, 8);
3738 memmove(bytes, &v, 8);
3739 bytes[8] = 0;
3740 if( state->endianness==AE_BIG_ENDIAN )
3741 {
3742 for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3743 {
3744 unsigned char tc;
3745 tc = bytes[i];
3746 bytes[i] = bytes[sizeof(ae_int_t)-1-i];
3747 bytes[sizeof(ae_int_t)-1-i] = tc;
3748 }
3749 }
3750
3751 /*
3752 * convert to six-bit representation, output
3753 *
3754 * NOTE: last 12th element of sixbits is always zero, we do not output it
3755 */
3756 ae_threebytes2foursixbits(bytes+0, sixbits+0);
3757 ae_threebytes2foursixbits(bytes+3, sixbits+4);
3758 ae_threebytes2foursixbits(bytes+6, sixbits+8);
3759 for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3760 buf[i] = ae_sixbits2char(sixbits[i]);
3761 buf[AE_SER_ENTRY_LENGTH] = 0x00;
3762 }
3763
3764 /************************************************************************
3765 This function unserializes integer value from string
3766
3767 buf buffer which contains value; leading spaces/tabs/newlines are
3768 ignored, traling spaces/tabs/newlines are treated as end of
3769 the boolean value.
3770 state ALGLIB environment state
3771
3772 This function raises an error in case unexpected symbol is found
3773 ************************************************************************/
ae_str2int(const char * buf,ae_state * state,const char ** pasttheend)3774 ae_int_t ae_str2int(const char *buf, ae_state *state, const char **pasttheend)
3775 {
3776 const char *emsg = "ALGLIB: unable to read integer value from stream";
3777 ae_int_t sixbits[12];
3778 ae_int_t sixbitsread, i;
3779 union _u
3780 {
3781 ae_int_t ival;
3782 unsigned char bytes[9];
3783 } u;
3784 /*
3785 * 1. skip leading spaces
3786 * 2. read and decode six-bit digits
3787 * 3. set trailing digits to zeros
3788 * 4. convert to little endian 64-bit integer representation
3789 * 5. convert to big endian representation, if needed
3790 */
3791 while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3792 buf++;
3793 sixbitsread = 0;
3794 while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
3795 {
3796 ae_int_t d;
3797 d = ae_char2sixbits(*buf);
3798 if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
3799 ae_break(state, ERR_ASSERTION_FAILED, emsg);
3800 sixbits[sixbitsread] = d;
3801 sixbitsread++;
3802 buf++;
3803 }
3804 *pasttheend = buf;
3805 if( sixbitsread==0 )
3806 ae_break(state, ERR_ASSERTION_FAILED, emsg);
3807 for(i=sixbitsread; i<12; i++)
3808 sixbits[i] = 0;
3809 ae_foursixbits2threebytes(sixbits+0, u.bytes+0);
3810 ae_foursixbits2threebytes(sixbits+4, u.bytes+3);
3811 ae_foursixbits2threebytes(sixbits+8, u.bytes+6);
3812 if( state->endianness==AE_BIG_ENDIAN )
3813 {
3814 for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3815 {
3816 unsigned char tc;
3817 tc = u.bytes[i];
3818 u.bytes[i] = u.bytes[sizeof(ae_int_t)-1-i];
3819 u.bytes[sizeof(ae_int_t)-1-i] = tc;
3820 }
3821 }
3822 return u.ival;
3823 }
3824
3825 /************************************************************************
3826 This function unserializes 64-bit integer value from string
3827
3828 buf buffer which contains value; leading spaces/tabs/newlines are
3829 ignored, traling spaces/tabs/newlines are treated as end of
3830 the boolean value.
3831 state ALGLIB environment state
3832
3833 This function raises an error in case unexpected symbol is found
3834 ************************************************************************/
ae_str2int64(const char * buf,ae_state * state,const char ** pasttheend)3835 ae_int64_t ae_str2int64(const char *buf, ae_state *state, const char **pasttheend)
3836 {
3837 const char *emsg = "ALGLIB: unable to read integer value from stream";
3838 ae_int_t sixbits[12];
3839 ae_int_t sixbitsread, i;
3840 unsigned char bytes[9];
3841 ae_int64_t result;
3842
3843 /*
3844 * 1. skip leading spaces
3845 * 2. read and decode six-bit digits
3846 * 3. set trailing digits to zeros
3847 * 4. convert to little endian 64-bit integer representation
3848 * 5. convert to big endian representation, if needed
3849 */
3850 while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3851 buf++;
3852 sixbitsread = 0;
3853 while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
3854 {
3855 ae_int_t d;
3856 d = ae_char2sixbits(*buf);
3857 if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
3858 ae_break(state, ERR_ASSERTION_FAILED, emsg);
3859 sixbits[sixbitsread] = d;
3860 sixbitsread++;
3861 buf++;
3862 }
3863 *pasttheend = buf;
3864 if( sixbitsread==0 )
3865 ae_break(state, ERR_ASSERTION_FAILED, emsg);
3866 for(i=sixbitsread; i<12; i++)
3867 sixbits[i] = 0;
3868 ae_foursixbits2threebytes(sixbits+0, bytes+0);
3869 ae_foursixbits2threebytes(sixbits+4, bytes+3);
3870 ae_foursixbits2threebytes(sixbits+8, bytes+6);
3871 if( state->endianness==AE_BIG_ENDIAN )
3872 {
3873 for(i=0; i<(ae_int_t)(sizeof(ae_int_t)/2); i++)
3874 {
3875 unsigned char tc;
3876 tc = bytes[i];
3877 bytes[i] = bytes[sizeof(ae_int_t)-1-i];
3878 bytes[sizeof(ae_int_t)-1-i] = tc;
3879 }
3880 }
3881 memmove(&result, bytes, sizeof(result));
3882 return result;
3883 }
3884
3885
3886 /************************************************************************
3887 This function serializes double value into buffer
3888
3889 v double value to be serialized
3890 buf buffer, at least 12 characters wide
3891 (11 chars for value, one for trailing zero)
3892 state ALGLIB environment state
3893 ************************************************************************/
ae_double2str(double v,char * buf,ae_state * state)3894 void ae_double2str(double v, char *buf, ae_state *state)
3895 {
3896 union _u
3897 {
3898 double dval;
3899 unsigned char bytes[9];
3900 } u;
3901 ae_int_t i;
3902 ae_int_t sixbits[12];
3903
3904 /*
3905 * handle special quantities
3906 */
3907 if( ae_isnan(v, state) )
3908 {
3909 const char *s = ".nan_______";
3910 memmove(buf, s, strlen(s)+1);
3911 return;
3912 }
3913 if( ae_isposinf(v, state) )
3914 {
3915 const char *s = ".posinf____";
3916 memmove(buf, s, strlen(s)+1);
3917 return;
3918 }
3919 if( ae_isneginf(v, state) )
3920 {
3921 const char *s = ".neginf____";
3922 memmove(buf, s, strlen(s)+1);
3923 return;
3924 }
3925
3926 /*
3927 * process general case:
3928 * 1. copy v to array of chars
3929 * 2. set 9th byte of u.bytes to zero in order to
3930 * simplify conversion to six-bit representation
3931 * 3. convert to little endian (if needed)
3932 * 4. convert to six-bit representation
3933 * (last 12th element of sixbits is always zero, we do not output it)
3934 */
3935 u.dval = v;
3936 u.bytes[8] = 0;
3937 if( state->endianness==AE_BIG_ENDIAN )
3938 {
3939 for(i=0; i<(ae_int_t)(sizeof(double)/2); i++)
3940 {
3941 unsigned char tc;
3942 tc = u.bytes[i];
3943 u.bytes[i] = u.bytes[sizeof(double)-1-i];
3944 u.bytes[sizeof(double)-1-i] = tc;
3945 }
3946 }
3947 ae_threebytes2foursixbits(u.bytes+0, sixbits+0);
3948 ae_threebytes2foursixbits(u.bytes+3, sixbits+4);
3949 ae_threebytes2foursixbits(u.bytes+6, sixbits+8);
3950 for(i=0; i<AE_SER_ENTRY_LENGTH; i++)
3951 buf[i] = ae_sixbits2char(sixbits[i]);
3952 buf[AE_SER_ENTRY_LENGTH] = 0x00;
3953 }
3954
3955 /************************************************************************
3956 This function unserializes double value from string
3957
3958 buf buffer which contains value; leading spaces/tabs/newlines are
3959 ignored, traling spaces/tabs/newlines are treated as end of
3960 the boolean value.
3961 state ALGLIB environment state
3962
3963 This function raises an error in case unexpected symbol is found
3964 ************************************************************************/
ae_str2double(const char * buf,ae_state * state,const char ** pasttheend)3965 double ae_str2double(const char *buf, ae_state *state, const char **pasttheend)
3966 {
3967 const char *emsg = "ALGLIB: unable to read double value from stream";
3968 ae_int_t sixbits[12];
3969 ae_int_t sixbitsread, i;
3970 union _u
3971 {
3972 double dval;
3973 unsigned char bytes[9];
3974 } u;
3975
3976
3977 /*
3978 * skip leading spaces
3979 */
3980 while( *buf==' ' || *buf=='\t' || *buf=='\n' || *buf=='\r' )
3981 buf++;
3982
3983 /*
3984 * Handle special cases
3985 */
3986 if( *buf=='.' )
3987 {
3988 const char *s_nan = ".nan_______";
3989 const char *s_posinf = ".posinf____";
3990 const char *s_neginf = ".neginf____";
3991 if( strncmp(buf, s_nan, strlen(s_nan))==0 )
3992 {
3993 *pasttheend = buf+strlen(s_nan);
3994 return state->v_nan;
3995 }
3996 if( strncmp(buf, s_posinf, strlen(s_posinf))==0 )
3997 {
3998 *pasttheend = buf+strlen(s_posinf);
3999 return state->v_posinf;
4000 }
4001 if( strncmp(buf, s_neginf, strlen(s_neginf))==0 )
4002 {
4003 *pasttheend = buf+strlen(s_neginf);
4004 return state->v_neginf;
4005 }
4006 ae_break(state, ERR_ASSERTION_FAILED, emsg);
4007 }
4008
4009 /*
4010 * General case:
4011 * 1. read and decode six-bit digits
4012 * 2. check that all 11 digits were read
4013 * 3. set last 12th digit to zero (needed for simplicity of conversion)
4014 * 4. convert to 8 bytes
4015 * 5. convert to big endian representation, if needed
4016 */
4017 sixbitsread = 0;
4018 while( *buf!=' ' && *buf!='\t' && *buf!='\n' && *buf!='\r' && *buf!=0 )
4019 {
4020 ae_int_t d;
4021 d = ae_char2sixbits(*buf);
4022 if( d<0 || sixbitsread>=AE_SER_ENTRY_LENGTH )
4023 ae_break(state, ERR_ASSERTION_FAILED, emsg);
4024 sixbits[sixbitsread] = d;
4025 sixbitsread++;
4026 buf++;
4027 }
4028 *pasttheend = buf;
4029 if( sixbitsread!=AE_SER_ENTRY_LENGTH )
4030 ae_break(state, ERR_ASSERTION_FAILED, emsg);
4031 sixbits[AE_SER_ENTRY_LENGTH] = 0;
4032 ae_foursixbits2threebytes(sixbits+0, u.bytes+0);
4033 ae_foursixbits2threebytes(sixbits+4, u.bytes+3);
4034 ae_foursixbits2threebytes(sixbits+8, u.bytes+6);
4035 if( state->endianness==AE_BIG_ENDIAN )
4036 {
4037 for(i=0; i<(ae_int_t)(sizeof(double)/2); i++)
4038 {
4039 unsigned char tc;
4040 tc = u.bytes[i];
4041 u.bytes[i] = u.bytes[sizeof(double)-1-i];
4042 u.bytes[sizeof(double)-1-i] = tc;
4043 }
4044 }
4045 return u.dval;
4046 }
4047
4048
4049 /************************************************************************
4050 This function performs given number of spin-wait iterations
4051 ************************************************************************/
ae_spin_wait(ae_int_t cnt)4052 void ae_spin_wait(ae_int_t cnt)
4053 {
4054 /*
4055 * these strange operations with ae_never_change_it are necessary to
4056 * prevent compiler optimization of the loop.
4057 */
4058 volatile ae_int_t i;
4059
4060 /* very unlikely because no one will wait for such amount of cycles */
4061 if( cnt>0x12345678 )
4062 ae_never_change_it = cnt%10;
4063
4064 /* spin wait, test condition which will never be true */
4065 for(i=0; i<cnt; i++)
4066 if( ae_never_change_it>0 )
4067 ae_never_change_it--;
4068 }
4069
4070
4071 /************************************************************************
4072 This function causes the calling thread to relinquish the CPU. The thread
4073 is moved to the end of the queue and some other thread gets to run.
4074
4075 NOTE: this function should NOT be called when AE_OS is AE_UNKNOWN - the
4076 whole program will be abnormally terminated.
4077 ************************************************************************/
ae_yield()4078 void ae_yield()
4079 {
4080 #if AE_OS==AE_WINDOWS
4081 if( !SwitchToThread() )
4082 Sleep(0);
4083 #elif AE_OS==AE_POSIX
4084 sched_yield();
4085 #else
4086 abort();
4087 #endif
4088 }
4089
4090 /************************************************************************
4091 This function initializes _lock structure which is internally used by
4092 ae_lock high-level structure.
4093
4094 _lock structure is statically allocated, no malloc() calls is performed
4095 during its allocation. However, you have to call _ae_free_lock_raw() in
4096 order to deallocate this lock properly.
4097 ************************************************************************/
_ae_init_lock_raw(_lock * p)4098 void _ae_init_lock_raw(_lock *p)
4099 {
4100 #if AE_OS==AE_WINDOWS
4101 p->p_lock = (ae_int_t*)ae_align((void*)(&p->buf),AE_LOCK_ALIGNMENT);
4102 p->p_lock[0] = 0;
4103 #elif AE_OS==AE_POSIX
4104 pthread_mutex_init(&p->mutex, NULL);
4105 #else
4106 p->is_locked = ae_false;
4107 #endif
4108 }
4109
4110
4111 /************************************************************************
4112 This function acquires _lock structure.
4113
4114 It is low-level workhorse utilized by ae_acquire_lock().
4115 ************************************************************************/
_ae_acquire_lock_raw(_lock * p)4116 void _ae_acquire_lock_raw(_lock *p)
4117 {
4118 #if AE_OS==AE_WINDOWS
4119 ae_int_t cnt = 0;
4120 #ifdef AE_SMP_DEBUGCOUNTERS
4121 InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_acquisitions);
4122 #endif
4123 for(;;)
4124 {
4125 if( InterlockedCompareExchange((LONG volatile *)p->p_lock, 1, 0)==0 )
4126 return;
4127 ae_spin_wait(AE_LOCK_CYCLES);
4128 #ifdef AE_SMP_DEBUGCOUNTERS
4129 InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_spinwaits);
4130 #endif
4131 cnt++;
4132 if( cnt%AE_LOCK_TESTS_BEFORE_YIELD==0 )
4133 {
4134 #ifdef AE_SMP_DEBUGCOUNTERS
4135 InterlockedIncrement((LONG volatile *)&_ae_dbg_lock_yields);
4136 #endif
4137 ae_yield();
4138 }
4139 }
4140 #elif AE_OS==AE_POSIX
4141 ae_int_t cnt = 0;
4142 for(;;)
4143 {
4144 if( pthread_mutex_trylock(&p->mutex)==0 )
4145 return;
4146 ae_spin_wait(AE_LOCK_CYCLES);
4147 cnt++;
4148 if( cnt%AE_LOCK_TESTS_BEFORE_YIELD==0 )
4149 ae_yield();
4150 }
4151 ;
4152 #else
4153 AE_CRITICAL_ASSERT(!p->is_locked);
4154 p->is_locked = ae_true;
4155 #endif
4156 }
4157
4158
4159 /************************************************************************
4160 This function releases _lock structure.
4161
4162 It is low-level lock function which is used by ae_release_lock.
4163 ************************************************************************/
_ae_release_lock_raw(_lock * p)4164 void _ae_release_lock_raw(_lock *p)
4165 {
4166 #if AE_OS==AE_WINDOWS
4167 InterlockedExchange((LONG volatile *)p->p_lock, 0);
4168 #elif AE_OS==AE_POSIX
4169 pthread_mutex_unlock(&p->mutex);
4170 #else
4171 p->is_locked = ae_false;
4172 #endif
4173 }
4174
4175
4176 /************************************************************************
4177 This function frees _lock structure.
4178 ************************************************************************/
_ae_free_lock_raw(_lock * p)4179 void _ae_free_lock_raw(_lock *p)
4180 {
4181 #if AE_OS==AE_POSIX
4182 pthread_mutex_destroy(&p->mutex);
4183 #endif
4184 }
4185
4186
4187 /************************************************************************
4188 This function initializes ae_lock structure.
4189
4190 INPUT PARAMETERS:
4191 lock - pointer to lock structure, must be zero-filled
4192 state - pointer to state structure, used for exception
4193 handling and management of automatic objects.
4194 make_automatic - if true, lock object is added to automatic
4195 memory management list.
4196
4197 NOTE: as a special exception, this function allows you to specify NULL
4198 state pointer. In this case all exception arising during construction
4199 are handled as critical failures, with abort() being called.
4200 make_automatic must be false on such calls.
4201 ************************************************************************/
ae_init_lock(ae_lock * lock,ae_state * state,ae_bool make_automatic)4202 void ae_init_lock(ae_lock *lock, ae_state *state, ae_bool make_automatic)
4203 {
4204 _lock *p;
4205 AE_CRITICAL_ASSERT(ae_check_zeros(lock,sizeof(*lock)));
4206 if(state==NULL)
4207 {
4208 ae_state _tmp_state;
4209 AE_CRITICAL_ASSERT(!make_automatic);
4210 ae_state_init(&_tmp_state);
4211 ae_init_lock(lock, &_tmp_state, ae_false);
4212 ae_state_clear(&_tmp_state);
4213 return;
4214 }
4215 lock->eternal = ae_false;
4216 ae_db_init(&lock->db, sizeof(_lock), state, make_automatic);
4217 lock->lock_ptr = lock->db.ptr;
4218 p = (_lock*)lock->lock_ptr;
4219 _ae_init_lock_raw(p);
4220 }
4221
4222 /************************************************************************
4223 This function initializes "eternal" ae_lock structure which is expected
4224 to persist until the end of the execution of the program. Eternal locks
4225 can not be deallocated (cleared) and do not increase debug allocation
4226 counters. Errors during allocation of eternal locks are considered
4227 critical exceptions and handled by calling abort().
4228
4229 INPUT PARAMETERS:
4230 lock - pointer to lock structure, must be zero-filled
4231 state - pointer to state structure, used for exception
4232 handling and management of automatic objects;
4233 non-NULL.
4234 make_automatic - if true, lock object is added to automatic
4235 memory management list.
4236 ************************************************************************/
ae_init_lock_eternal(ae_lock * lock)4237 void ae_init_lock_eternal(ae_lock *lock)
4238 {
4239 _lock *p;
4240 AE_CRITICAL_ASSERT(ae_check_zeros(lock,sizeof(*lock)));
4241 lock->eternal = ae_true;
4242 lock->lock_ptr = eternal_malloc(sizeof(_lock));
4243 p = (_lock*)lock->lock_ptr;
4244 _ae_init_lock_raw(p);
4245 }
4246
4247
4248 /************************************************************************
4249 This function acquires lock. In case lock is busy, we perform several
4250 iterations inside tight loop before trying again.
4251 ************************************************************************/
ae_acquire_lock(ae_lock * lock)4252 void ae_acquire_lock(ae_lock *lock)
4253 {
4254 _lock *p;
4255 p = (_lock*)lock->lock_ptr;
4256 _ae_acquire_lock_raw(p);
4257 }
4258
4259
4260 /************************************************************************
4261 This function releases lock.
4262 ************************************************************************/
ae_release_lock(ae_lock * lock)4263 void ae_release_lock(ae_lock *lock)
4264 {
4265 _lock *p;
4266 p = (_lock*)lock->lock_ptr;
4267 _ae_release_lock_raw(p);
4268 }
4269
4270
4271 /************************************************************************
4272 This function frees ae_lock structure.
4273 ************************************************************************/
ae_free_lock(ae_lock * lock)4274 void ae_free_lock(ae_lock *lock)
4275 {
4276 _lock *p;
4277 AE_CRITICAL_ASSERT(!lock->eternal);
4278 p = (_lock*)lock->lock_ptr;
4279 if( p!=NULL )
4280 _ae_free_lock_raw(p);
4281 ae_db_free(&lock->db);
4282 }
4283
4284
4285 /************************************************************************
4286 This function creates ae_shared_pool structure.
4287
4288 dst destination shared pool, must be zero-filled
4289 already allocated, but not initialized.
4290 state pointer to current state structure. Can not be NULL.
4291 used for exception handling (say, allocation error results
4292 in longjmp call).
4293 make_automatic if true, vector will be registered in the current frame
4294 of the state structure;
4295
4296 Error handling:
4297 * on failure calls ae_break() with NULL state pointer. Usually it results
4298 in abort() call.
4299
4300 dst is assumed to be uninitialized, its fields are ignored.
4301 ************************************************************************/
ae_shared_pool_init(void * _dst,ae_state * state,ae_bool make_automatic)4302 void ae_shared_pool_init(void *_dst, ae_state *state, ae_bool make_automatic)
4303 {
4304 ae_shared_pool *dst;
4305
4306 AE_CRITICAL_ASSERT(state!=NULL);
4307 dst = (ae_shared_pool*)_dst;
4308 AE_CRITICAL_ASSERT(ae_check_zeros(dst,sizeof(*dst)));
4309
4310 /* init */
4311 dst->seed_object = NULL;
4312 dst->recycled_objects = NULL;
4313 dst->recycled_entries = NULL;
4314 dst->enumeration_counter = NULL;
4315 dst->size_of_object = 0;
4316 dst->init = NULL;
4317 dst->init_copy = NULL;
4318 dst->destroy = NULL;
4319 dst->frame_entry.deallocator = ae_shared_pool_destroy;
4320 dst->frame_entry.ptr = dst;
4321 if( make_automatic )
4322 ae_db_attach(&dst->frame_entry, state);
4323 ae_init_lock(&dst->pool_lock, state, ae_false);
4324 }
4325
4326
4327 /************************************************************************
4328 This function clears all dynamically allocated fields of the pool except
4329 for the lock. It does NOT try to acquire pool_lock.
4330
4331 NOTE: this function is NOT thread-safe, it is not protected by lock.
4332 ************************************************************************/
ae_shared_pool_internalclear(ae_shared_pool * dst)4333 static void ae_shared_pool_internalclear(ae_shared_pool *dst)
4334 {
4335 ae_shared_pool_entry *ptr, *tmp;
4336
4337 /* destroy seed */
4338 if( dst->seed_object!=NULL )
4339 {
4340 dst->destroy((void*)dst->seed_object);
4341 ae_free((void*)dst->seed_object);
4342 dst->seed_object = NULL;
4343 }
4344
4345 /* destroy recycled objects */
4346 for(ptr=dst->recycled_objects; ptr!=NULL;)
4347 {
4348 tmp = (ae_shared_pool_entry*)ptr->next_entry;
4349 dst->destroy(ptr->obj);
4350 ae_free(ptr->obj);
4351 ae_free(ptr);
4352 ptr = tmp;
4353 }
4354 dst->recycled_objects = NULL;
4355
4356 /* destroy recycled entries */
4357 for(ptr=dst->recycled_entries; ptr!=NULL;)
4358 {
4359 tmp = (ae_shared_pool_entry*)ptr->next_entry;
4360 ae_free(ptr);
4361 ptr = tmp;
4362 }
4363 dst->recycled_entries = NULL;
4364 }
4365
4366
4367 /************************************************************************
4368 This function creates copy of ae_shared_pool.
4369
4370 dst destination pool, must be zero-filled
4371 src source pool
4372 state pointer to current state structure. Can not be NULL.
4373 used for exception handling (say, allocation error results
4374 in longjmp call).
4375 make_automatic if true, vector will be registered in the current frame
4376 of the state structure;
4377
4378 dst is assumed to be uninitialized, its fields are ignored.
4379
4380 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4381 you should NOT call it when lock can be used by another thread.
4382 ************************************************************************/
ae_shared_pool_init_copy(void * _dst,void * _src,ae_state * state,ae_bool make_automatic)4383 void ae_shared_pool_init_copy(void *_dst, void *_src, ae_state *state, ae_bool make_automatic)
4384 {
4385 ae_shared_pool *dst, *src;
4386 ae_shared_pool_entry *ptr;
4387
4388 /* state!=NULL, allocation errors result in exception */
4389 /* AE_CRITICAL_ASSERT(state!=NULL); */
4390
4391 dst = (ae_shared_pool*)_dst;
4392 src = (ae_shared_pool*)_src;
4393 ae_shared_pool_init(dst, state, make_automatic);
4394
4395 /* copy non-pointer fields */
4396 dst->size_of_object = src->size_of_object;
4397 dst->init = src->init;
4398 dst->init_copy = src->init_copy;
4399 dst->destroy = src->destroy;
4400
4401 /* copy seed object */
4402 if( src->seed_object!=NULL )
4403 {
4404 dst->seed_object = ae_malloc(dst->size_of_object, state);
4405 memset(dst->seed_object, 0, dst->size_of_object);
4406 dst->init_copy(dst->seed_object, src->seed_object, state, ae_false);
4407 }
4408
4409 /* copy recycled objects */
4410 dst->recycled_objects = NULL;
4411 for(ptr=src->recycled_objects; ptr!=NULL; ptr=(ae_shared_pool_entry*)ptr->next_entry)
4412 {
4413 ae_shared_pool_entry *tmp;
4414
4415 /* allocate entry, immediately add to the recycled list
4416 (we do not want to lose it in case of future malloc failures) */
4417 tmp = (ae_shared_pool_entry*)ae_malloc(sizeof(ae_shared_pool_entry), state);
4418 memset(tmp, 0, sizeof(*tmp));
4419 tmp->next_entry = dst->recycled_objects;
4420 dst->recycled_objects = tmp;
4421
4422 /* prepare place for object, init_copy() it */
4423 tmp->obj = ae_malloc(dst->size_of_object, state);
4424 memset(tmp->obj, 0, dst->size_of_object);
4425 dst->init_copy(tmp->obj, ptr->obj, state, ae_false);
4426 }
4427
4428 /* recycled entries are not copied because they do not store any information */
4429 dst->recycled_entries = NULL;
4430
4431 /* enumeration counter is reset on copying */
4432 dst->enumeration_counter = NULL;
4433
4434 /* initialize frame record */
4435 dst->frame_entry.deallocator = ae_shared_pool_destroy;
4436 dst->frame_entry.ptr = dst;
4437 }
4438
4439
4440 /************************************************************************
4441 This function performs destruction of the pool object.
4442
4443 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4444 you should NOT call it when pool can be used by another thread.
4445 ************************************************************************/
ae_shared_pool_clear(void * _dst)4446 void ae_shared_pool_clear(void *_dst)
4447 {
4448 ae_shared_pool *dst = (ae_shared_pool*)_dst;
4449
4450 /* clear seed and lists */
4451 ae_shared_pool_internalclear(dst);
4452
4453 /* clear fields */
4454 dst->seed_object = NULL;
4455 dst->recycled_objects = NULL;
4456 dst->recycled_entries = NULL;
4457 dst->enumeration_counter = NULL;
4458 dst->size_of_object = 0;
4459 dst->init = NULL;
4460 dst->init_copy = NULL;
4461 dst->destroy = NULL;
4462 }
4463
ae_shared_pool_destroy(void * _dst)4464 void ae_shared_pool_destroy(void *_dst)
4465 {
4466 ae_shared_pool *dst = (ae_shared_pool*)_dst;
4467 ae_shared_pool_clear(_dst);
4468 ae_free_lock(&dst->pool_lock);
4469 }
4470
4471
4472 /************************************************************************
4473 This function returns True, if internal seed object was set. It returns
4474 False for un-seeded pool.
4475
4476 dst destination pool (initialized by constructor function)
4477
4478 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4479 you should NOT call it when lock can be used by another thread.
4480 ************************************************************************/
ae_shared_pool_is_initialized(void * _dst)4481 ae_bool ae_shared_pool_is_initialized(void *_dst)
4482 {
4483 ae_shared_pool *dst = (ae_shared_pool*)_dst;
4484 return dst->seed_object!=NULL;
4485 }
4486
4487
4488 /************************************************************************
4489 This function sets internal seed object. All objects owned by the pool
4490 (current seed object, recycled objects) are automatically freed.
4491
4492 dst destination pool (initialized by constructor function)
4493 seed_object new seed object
4494 size_of_object sizeof(), used to allocate memory
4495 init constructor function
4496 init_copy copy constructor
4497 clear destructor function
4498 state ALGLIB environment state
4499
4500 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4501 you should NOT call it when lock can be used by another thread.
4502 ************************************************************************/
ae_shared_pool_set_seed(ae_shared_pool * dst,void * seed_object,ae_int_t size_of_object,void (* init)(void * dst,ae_state * state,ae_bool make_automatic),void (* init_copy)(void * dst,void * src,ae_state * state,ae_bool make_automatic),void (* destroy)(void * ptr),ae_state * state)4503 void ae_shared_pool_set_seed(
4504 ae_shared_pool *dst,
4505 void *seed_object,
4506 ae_int_t size_of_object,
4507 void (*init)(void* dst, ae_state* state, ae_bool make_automatic),
4508 void (*init_copy)(void* dst, void* src, ae_state* state, ae_bool make_automatic),
4509 void (*destroy)(void* ptr),
4510 ae_state *state)
4511 {
4512 /* state!=NULL, allocation errors result in exception */
4513 AE_CRITICAL_ASSERT(state!=NULL);
4514
4515 /* destroy internal objects */
4516 ae_shared_pool_internalclear(dst);
4517
4518 /* set non-pointer fields */
4519 dst->size_of_object = size_of_object;
4520 dst->init = init;
4521 dst->init_copy = init_copy;
4522 dst->destroy = destroy;
4523
4524 /* set seed object */
4525 dst->seed_object = ae_malloc(size_of_object, state);
4526 memset(dst->seed_object, 0, size_of_object);
4527 init_copy(dst->seed_object, seed_object, state, ae_false);
4528 }
4529
4530
4531 /************************************************************************
4532 This function retrieves a copy of the seed object from the pool and
4533 stores it to target smart pointer ptr.
4534
4535 In case target pointer owns non-NULL value, it is deallocated before
4536 storing value retrieved from pool. Target pointer becomes owner of the
4537 value which was retrieved from pool.
4538
4539 pool pool
4540 pptr pointer to ae_smart_ptr structure
4541 state ALGLIB environment state
4542
4543 NOTE: this function IS thread-safe. It acquires pool lock during its
4544 operation and can be used simultaneously from several threads.
4545 ************************************************************************/
ae_shared_pool_retrieve(ae_shared_pool * pool,ae_smart_ptr * pptr,ae_state * state)4546 void ae_shared_pool_retrieve(
4547 ae_shared_pool *pool,
4548 ae_smart_ptr *pptr,
4549 ae_state *state)
4550 {
4551 void *new_obj;
4552
4553 /* state!=NULL, allocation errors are handled by throwing exception from ae_malloc() */
4554 AE_CRITICAL_ASSERT(state!=NULL);
4555
4556 /* assert that pool was seeded */
4557 ae_assert(
4558 pool->seed_object!=NULL,
4559 "ALGLIB: shared pool is not seeded, PoolRetrieve() failed",
4560 state);
4561
4562 /* acquire lock */
4563 ae_acquire_lock(&pool->pool_lock);
4564
4565 /* try to reuse recycled objects */
4566 if( pool->recycled_objects!=NULL )
4567 {
4568 ae_shared_pool_entry *result;
4569
4570 /* retrieve entry/object from list of recycled objects */
4571 result = pool->recycled_objects;
4572 pool->recycled_objects = (ae_shared_pool_entry*)pool->recycled_objects->next_entry;
4573 new_obj = result->obj;
4574 result->obj = NULL;
4575
4576 /* move entry to list of recycled entries */
4577 result->next_entry = pool->recycled_entries;
4578 pool->recycled_entries = result;
4579
4580 /* release lock */
4581 ae_release_lock(&pool->pool_lock);
4582
4583 /* assign object to smart pointer */
4584 ae_smart_ptr_assign(pptr, new_obj, ae_true, ae_true, pool->destroy);
4585 return;
4586 }
4587
4588 /* release lock; we do not need it anymore because copy constructor does not modify source variable */
4589 ae_release_lock(&pool->pool_lock);
4590
4591 /* create new object from seed, immediately assign object to smart pointer
4592 (do not want to lose it in case of future failures) */
4593 new_obj = ae_malloc(pool->size_of_object, state);
4594 memset(new_obj, 0, pool->size_of_object);
4595 ae_smart_ptr_assign(pptr, new_obj, ae_true, ae_true, pool->destroy);
4596
4597 /* perform actual copying; before this line smartptr points to zero-filled instance */
4598 pool->init_copy(new_obj, pool->seed_object, state, ae_false);
4599 }
4600
4601
4602 /************************************************************************
4603 This function recycles object owned by smart pointer by moving it to
4604 internal storage of the shared pool.
4605
4606 Source pointer must own the object. After function is over, it owns NULL
4607 pointer.
4608
4609 pool pool
4610 pptr pointer to ae_smart_ptr structure
4611 state ALGLIB environment state
4612
4613 NOTE: this function IS thread-safe. It acquires pool lock during its
4614 operation and can be used simultaneously from several threads.
4615 ************************************************************************/
ae_shared_pool_recycle(ae_shared_pool * pool,ae_smart_ptr * pptr,ae_state * state)4616 void ae_shared_pool_recycle(
4617 ae_shared_pool *pool,
4618 ae_smart_ptr *pptr,
4619 ae_state *state)
4620 {
4621 ae_shared_pool_entry *new_entry;
4622
4623 /* state!=NULL, allocation errors are handled by throwing exception from ae_malloc() */
4624 AE_CRITICAL_ASSERT(state!=NULL);
4625
4626 /* assert that pool was seeded */
4627 ae_assert(
4628 pool->seed_object!=NULL,
4629 "ALGLIB: shared pool is not seeded, PoolRecycle() failed",
4630 state);
4631
4632 /* assert that pointer non-null and owns the object */
4633 ae_assert(pptr->is_owner, "ALGLIB: pptr in ae_shared_pool_recycle() does not own its pointer", state);
4634 ae_assert(pptr->ptr!=NULL, "ALGLIB: pptr in ae_shared_pool_recycle() is NULL", state);
4635
4636 /* acquire lock */
4637 ae_acquire_lock(&pool->pool_lock);
4638
4639 /* acquire shared pool entry (reuse one from recycled_entries or allocate new one) */
4640 if( pool->recycled_entries!=NULL )
4641 {
4642 /* reuse previously allocated entry */
4643 new_entry = pool->recycled_entries;
4644 pool->recycled_entries = (ae_shared_pool_entry*)new_entry->next_entry;
4645 }
4646 else
4647 {
4648 /*
4649 * Allocate memory for new entry.
4650 *
4651 * NOTE: we release pool lock during allocation because ae_malloc() may raise
4652 * exception and we do not want our pool to be left in the locked state.
4653 */
4654 ae_release_lock(&pool->pool_lock);
4655 new_entry = (ae_shared_pool_entry*)ae_malloc(sizeof(ae_shared_pool_entry), state);
4656 ae_acquire_lock(&pool->pool_lock);
4657 }
4658
4659 /* add object to the list of recycled objects */
4660 new_entry->obj = pptr->ptr;
4661 new_entry->next_entry = pool->recycled_objects;
4662 pool->recycled_objects = new_entry;
4663
4664 /* release lock object */
4665 ae_release_lock(&pool->pool_lock);
4666
4667 /* release source pointer */
4668 ae_smart_ptr_release(pptr);
4669 }
4670
4671
4672 /************************************************************************
4673 This function clears internal list of recycled objects, but does not
4674 change seed object managed by the pool.
4675
4676 pool pool
4677 state ALGLIB environment state
4678
4679 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4680 you should NOT call it when lock can be used by another thread.
4681 ************************************************************************/
ae_shared_pool_clear_recycled(ae_shared_pool * pool,ae_state * state)4682 void ae_shared_pool_clear_recycled(
4683 ae_shared_pool *pool,
4684 ae_state *state)
4685 {
4686 ae_shared_pool_entry *ptr, *tmp;
4687
4688 /* clear recycled objects */
4689 for(ptr=pool->recycled_objects; ptr!=NULL;)
4690 {
4691 tmp = (ae_shared_pool_entry*)ptr->next_entry;
4692 pool->destroy(ptr->obj);
4693 ae_free(ptr->obj);
4694 ae_free(ptr);
4695 ptr = tmp;
4696 }
4697 pool->recycled_objects = NULL;
4698 }
4699
4700
4701 /************************************************************************
4702 This function allows to enumerate recycled elements of the shared pool.
4703 It stores pointer to the first recycled object in the smart pointer.
4704
4705 IMPORTANT:
4706 * in case target pointer owns non-NULL value, it is deallocated before
4707 storing value retrieved from pool.
4708 * recycled object IS NOT removed from pool
4709 * target pointer DOES NOT become owner of the new value
4710 * this function IS NOT thread-safe
4711 * you SHOULD NOT modify shared pool during enumeration (although you can
4712 modify state of the objects retrieved from pool)
4713 * in case there is no recycled objects in the pool, NULL is stored to pptr
4714 * in case pool is not seeded, NULL is stored to pptr
4715
4716 pool pool
4717 pptr pointer to ae_smart_ptr structure
4718 state ALGLIB environment state
4719 ************************************************************************/
ae_shared_pool_first_recycled(ae_shared_pool * pool,ae_smart_ptr * pptr,ae_state * state)4720 void ae_shared_pool_first_recycled(
4721 ae_shared_pool *pool,
4722 ae_smart_ptr *pptr,
4723 ae_state *state)
4724 {
4725 /* modify internal enumeration counter */
4726 pool->enumeration_counter = pool->recycled_objects;
4727
4728 /* exit on empty list */
4729 if( pool->enumeration_counter==NULL )
4730 {
4731 ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
4732 return;
4733 }
4734
4735 /* assign object to smart pointer */
4736 ae_smart_ptr_assign(pptr, pool->enumeration_counter->obj, ae_false, ae_false, pool->destroy);
4737 }
4738
4739
4740 /************************************************************************
4741 This function allows to enumerate recycled elements of the shared pool.
4742 It stores pointer to the next recycled object in the smart pointer.
4743
4744 IMPORTANT:
4745 * in case target pointer owns non-NULL value, it is deallocated before
4746 storing value retrieved from pool.
4747 * recycled object IS NOT removed from pool
4748 * target pointer DOES NOT become owner of the new value
4749 * this function IS NOT thread-safe
4750 * you SHOULD NOT modify shared pool during enumeration (although you can
4751 modify state of the objects retrieved from pool)
4752 * in case there is no recycled objects left in the pool, NULL is stored.
4753 * in case pool is not seeded, NULL is stored.
4754
4755 pool pool
4756 pptr pointer to ae_smart_ptr structure
4757 state ALGLIB environment state
4758 ************************************************************************/
ae_shared_pool_next_recycled(ae_shared_pool * pool,ae_smart_ptr * pptr,ae_state * state)4759 void ae_shared_pool_next_recycled(
4760 ae_shared_pool *pool,
4761 ae_smart_ptr *pptr,
4762 ae_state *state)
4763 {
4764 /* exit on end of list */
4765 if( pool->enumeration_counter==NULL )
4766 {
4767 ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
4768 return;
4769 }
4770
4771 /* modify internal enumeration counter */
4772 pool->enumeration_counter = (ae_shared_pool_entry*)pool->enumeration_counter->next_entry;
4773
4774 /* exit on empty list */
4775 if( pool->enumeration_counter==NULL )
4776 {
4777 ae_smart_ptr_assign(pptr, NULL, ae_false, ae_false, NULL);
4778 return;
4779 }
4780
4781 /* assign object to smart pointer */
4782 ae_smart_ptr_assign(pptr, pool->enumeration_counter->obj, ae_false, ae_false, pool->destroy);
4783 }
4784
4785
4786
4787 /************************************************************************
4788 This function clears internal list of recycled objects and seed object.
4789 However, pool still can be used (after initialization with another seed).
4790
4791 pool pool
4792 state ALGLIB environment state
4793
4794 NOTE: this function is NOT thread-safe. It does not acquire pool lock, so
4795 you should NOT call it when lock can be used by another thread.
4796 ************************************************************************/
ae_shared_pool_reset(ae_shared_pool * pool,ae_state * state)4797 void ae_shared_pool_reset(
4798 ae_shared_pool *pool,
4799 ae_state *state)
4800 {
4801 /* clear seed and lists */
4802 ae_shared_pool_internalclear(pool);
4803
4804 /* clear fields */
4805 pool->seed_object = NULL;
4806 pool->recycled_objects = NULL;
4807 pool->recycled_entries = NULL;
4808 pool->enumeration_counter = NULL;
4809 pool->size_of_object = 0;
4810 pool->init = NULL;
4811 pool->init_copy = NULL;
4812 pool->destroy = NULL;
4813 }
4814
4815
4816 /************************************************************************
4817 This function initializes serializer
4818 ************************************************************************/
ae_serializer_init(ae_serializer * serializer)4819 void ae_serializer_init(ae_serializer *serializer)
4820 {
4821 serializer->mode = AE_SM_DEFAULT;
4822 serializer->entries_needed = 0;
4823 serializer->bytes_asked = 0;
4824 }
4825
ae_serializer_clear(ae_serializer * serializer)4826 void ae_serializer_clear(ae_serializer *serializer)
4827 {
4828 }
4829
ae_serializer_alloc_start(ae_serializer * serializer)4830 void ae_serializer_alloc_start(ae_serializer *serializer)
4831 {
4832 serializer->entries_needed = 0;
4833 serializer->bytes_asked = 0;
4834 serializer->mode = AE_SM_ALLOC;
4835 }
4836
ae_serializer_alloc_entry(ae_serializer * serializer)4837 void ae_serializer_alloc_entry(ae_serializer *serializer)
4838 {
4839 serializer->entries_needed++;
4840 }
4841
ae_serializer_alloc_byte_array(ae_serializer * serializer,ae_vector * bytes)4842 void ae_serializer_alloc_byte_array(ae_serializer *serializer, ae_vector *bytes)
4843 {
4844 ae_int_t n;
4845 n = bytes->cnt;
4846 n = n/8 + (n%8>0 ? 1 : 0);
4847 serializer->entries_needed += 1+n;
4848 }
4849
4850 /************************************************************************
4851 After allocation phase is done, this function returns required size of
4852 the output string buffer (including trailing zero symbol). Actual size of
4853 the data being stored can be a few characters smaller than requested.
4854 ************************************************************************/
ae_serializer_get_alloc_size(ae_serializer * serializer)4855 ae_int_t ae_serializer_get_alloc_size(ae_serializer *serializer)
4856 {
4857 ae_int_t rows, lastrowsize, result;
4858
4859 serializer->mode = AE_SM_READY2S;
4860
4861 /* if no entries needes (degenerate case) */
4862 if( serializer->entries_needed==0 )
4863 {
4864 serializer->bytes_asked = 4; /* a pair of chars for \r\n, one for dot, one for trailing zero */
4865 return serializer->bytes_asked;
4866 }
4867
4868 /* non-degenerate case */
4869 rows = serializer->entries_needed/AE_SER_ENTRIES_PER_ROW;
4870 lastrowsize = AE_SER_ENTRIES_PER_ROW;
4871 if( serializer->entries_needed%AE_SER_ENTRIES_PER_ROW )
4872 {
4873 lastrowsize = serializer->entries_needed%AE_SER_ENTRIES_PER_ROW;
4874 rows++;
4875 }
4876
4877 /* calculate result size */
4878 result = ((rows-1)*AE_SER_ENTRIES_PER_ROW+lastrowsize)*AE_SER_ENTRY_LENGTH; /* data size */
4879 result += (rows-1)*(AE_SER_ENTRIES_PER_ROW-1)+(lastrowsize-1); /* space symbols */
4880 result += rows*2; /* newline symbols */
4881 result += 1; /* trailing dot */
4882 result += 1; /* trailing zero */
4883 serializer->bytes_asked = result;
4884 return result;
4885 }
4886
4887 #ifdef AE_USE_CPP_SERIALIZATION
ae_serializer_sstart_str(ae_serializer * serializer,std::string * buf)4888 void ae_serializer_sstart_str(ae_serializer *serializer, std::string *buf)
4889 {
4890 serializer->mode = AE_SM_TO_CPPSTRING;
4891 serializer->out_cppstr = buf;
4892 serializer->entries_saved = 0;
4893 serializer->bytes_written = 0;
4894 }
4895
ae_serializer_ustart_str(ae_serializer * serializer,const std::string * buf)4896 void ae_serializer_ustart_str(ae_serializer *serializer, const std::string *buf)
4897 {
4898 serializer->mode = AE_SM_FROM_STRING;
4899 serializer->in_str = buf->c_str();
4900 }
4901
cpp_writer(const char * p_string,ae_int_t aux)4902 static char cpp_writer(const char *p_string, ae_int_t aux)
4903 {
4904 std::ostream *stream = reinterpret_cast<std::ostream*>(aux);
4905 stream->write(p_string, strlen(p_string));
4906 return stream->bad() ? 1 : 0;
4907 }
4908
cpp_reader(ae_int_t aux,ae_int_t cnt,char * p_buf)4909 static char cpp_reader(ae_int_t aux, ae_int_t cnt, char *p_buf)
4910 {
4911 std::istream *stream = reinterpret_cast<std::istream*>(aux);
4912 int c;
4913 if( cnt<=0 )
4914 return 1; /* unexpected cnt */
4915 for(;;)
4916 {
4917 c = stream->get();
4918 if( c<0 || c>255 )
4919 return 1; /* failure! */
4920 if( c!=' ' && c!='\t' && c!='\n' && c!='\r' )
4921 break;
4922 }
4923 p_buf[0] = (char)c;
4924 for(int k=1; k<cnt; k++)
4925 {
4926 c = stream->get();
4927 if( c<0 || c>255 || c==' ' || c=='\t' || c=='\n' || c=='\r' )
4928 return 1; /* failure! */
4929 p_buf[k] = (char)c;
4930 }
4931 p_buf[cnt] = 0;
4932 return 0; /* success */
4933 }
4934
ae_serializer_sstart_stream(ae_serializer * serializer,std::ostream * stream)4935 void ae_serializer_sstart_stream(ae_serializer *serializer, std::ostream *stream)
4936 {
4937 serializer->mode = AE_SM_TO_STREAM;
4938 serializer->stream_writer = cpp_writer;
4939 serializer->stream_aux = reinterpret_cast<ae_int_t>(stream);
4940 serializer->entries_saved = 0;
4941 serializer->bytes_written = 0;
4942 }
4943
ae_serializer_ustart_stream(ae_serializer * serializer,const std::istream * stream)4944 void ae_serializer_ustart_stream(ae_serializer *serializer, const std::istream *stream)
4945 {
4946 serializer->mode = AE_SM_FROM_STREAM;
4947 serializer->stream_reader = cpp_reader;
4948 serializer->stream_aux = reinterpret_cast<ae_int_t>(stream);
4949 }
4950 #endif
4951
ae_serializer_sstart_str(ae_serializer * serializer,char * buf)4952 void ae_serializer_sstart_str(ae_serializer *serializer, char *buf)
4953 {
4954 serializer->mode = AE_SM_TO_STRING;
4955 serializer->out_str = buf;
4956 serializer->out_str[0] = 0;
4957 serializer->entries_saved = 0;
4958 serializer->bytes_written = 0;
4959 }
4960
ae_serializer_ustart_str(ae_serializer * serializer,const char * buf)4961 void ae_serializer_ustart_str(ae_serializer *serializer, const char *buf)
4962 {
4963 serializer->mode = AE_SM_FROM_STRING;
4964 serializer->in_str = buf;
4965 }
4966
ae_serializer_sstart_stream(ae_serializer * serializer,ae_stream_writer writer,ae_int_t aux)4967 void ae_serializer_sstart_stream(ae_serializer *serializer, ae_stream_writer writer, ae_int_t aux)
4968 {
4969 serializer->mode = AE_SM_TO_STREAM;
4970 serializer->stream_writer = writer;
4971 serializer->stream_aux = aux;
4972 serializer->entries_saved = 0;
4973 serializer->bytes_written = 0;
4974 }
4975
ae_serializer_ustart_stream(ae_serializer * serializer,ae_stream_reader reader,ae_int_t aux)4976 void ae_serializer_ustart_stream(ae_serializer *serializer, ae_stream_reader reader, ae_int_t aux)
4977 {
4978 serializer->mode = AE_SM_FROM_STREAM;
4979 serializer->stream_reader = reader;
4980 serializer->stream_aux = aux;
4981 }
4982
ae_serializer_serialize_bool(ae_serializer * serializer,ae_bool v,ae_state * state)4983 void ae_serializer_serialize_bool(ae_serializer *serializer, ae_bool v, ae_state *state)
4984 {
4985 char buf[AE_SER_ENTRY_LENGTH+2+1];
4986 const char *emsg = "ALGLIB: serialization integrity error";
4987 ae_int_t bytes_appended;
4988
4989 /* prepare serialization, check consistency */
4990 ae_bool2str(v, buf, state);
4991 serializer->entries_saved++;
4992 if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
4993 strcat(buf, " ");
4994 else
4995 strcat(buf, "\r\n");
4996 bytes_appended = (ae_int_t)strlen(buf);
4997 ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
4998 serializer->bytes_written += bytes_appended;
4999
5000 /* append to buffer */
5001 #ifdef AE_USE_CPP_SERIALIZATION
5002 if( serializer->mode==AE_SM_TO_CPPSTRING )
5003 {
5004 *(serializer->out_cppstr) += buf;
5005 return;
5006 }
5007 #endif
5008 if( serializer->mode==AE_SM_TO_STRING )
5009 {
5010 strcat(serializer->out_str, buf);
5011 serializer->out_str += bytes_appended;
5012 return;
5013 }
5014 if( serializer->mode==AE_SM_TO_STREAM )
5015 {
5016 ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
5017 return;
5018 }
5019 ae_break(state, ERR_ASSERTION_FAILED, emsg);
5020 }
5021
ae_serializer_serialize_int(ae_serializer * serializer,ae_int_t v,ae_state * state)5022 void ae_serializer_serialize_int(ae_serializer *serializer, ae_int_t v, ae_state *state)
5023 {
5024 char buf[AE_SER_ENTRY_LENGTH+2+1];
5025 const char *emsg = "ALGLIB: serialization integrity error";
5026 ae_int_t bytes_appended;
5027
5028 /* prepare serialization, check consistency */
5029 ae_int2str(v, buf, state);
5030 serializer->entries_saved++;
5031 if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
5032 strcat(buf, " ");
5033 else
5034 strcat(buf, "\r\n");
5035 bytes_appended = (ae_int_t)strlen(buf);
5036 ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
5037 serializer->bytes_written += bytes_appended;
5038
5039 /* append to buffer */
5040 #ifdef AE_USE_CPP_SERIALIZATION
5041 if( serializer->mode==AE_SM_TO_CPPSTRING )
5042 {
5043 *(serializer->out_cppstr) += buf;
5044 return;
5045 }
5046 #endif
5047 if( serializer->mode==AE_SM_TO_STRING )
5048 {
5049 strcat(serializer->out_str, buf);
5050 serializer->out_str += bytes_appended;
5051 return;
5052 }
5053 if( serializer->mode==AE_SM_TO_STREAM )
5054 {
5055 ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
5056 return;
5057 }
5058 ae_break(state, ERR_ASSERTION_FAILED, emsg);
5059 }
5060
ae_serializer_serialize_int64(ae_serializer * serializer,ae_int64_t v,ae_state * state)5061 void ae_serializer_serialize_int64(ae_serializer *serializer, ae_int64_t v, ae_state *state)
5062 {
5063 char buf[AE_SER_ENTRY_LENGTH+2+1];
5064 const char *emsg = "ALGLIB: serialization integrity error";
5065 ae_int_t bytes_appended;
5066
5067 /* prepare serialization, check consistency */
5068 ae_int642str(v, buf, state);
5069 serializer->entries_saved++;
5070 if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
5071 strcat(buf, " ");
5072 else
5073 strcat(buf, "\r\n");
5074 bytes_appended = (ae_int_t)strlen(buf);
5075 ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
5076 serializer->bytes_written += bytes_appended;
5077
5078 /* append to buffer */
5079 #ifdef AE_USE_CPP_SERIALIZATION
5080 if( serializer->mode==AE_SM_TO_CPPSTRING )
5081 {
5082 *(serializer->out_cppstr) += buf;
5083 return;
5084 }
5085 #endif
5086 if( serializer->mode==AE_SM_TO_STRING )
5087 {
5088 strcat(serializer->out_str, buf);
5089 serializer->out_str += bytes_appended;
5090 return;
5091 }
5092 if( serializer->mode==AE_SM_TO_STREAM )
5093 {
5094 ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
5095 return;
5096 }
5097 ae_break(state, ERR_ASSERTION_FAILED, emsg);
5098 }
5099
ae_serializer_serialize_double(ae_serializer * serializer,double v,ae_state * state)5100 void ae_serializer_serialize_double(ae_serializer *serializer, double v, ae_state *state)
5101 {
5102 char buf[AE_SER_ENTRY_LENGTH+2+1];
5103 const char *emsg = "ALGLIB: serialization integrity error";
5104 ae_int_t bytes_appended;
5105
5106 /* prepare serialization, check consistency */
5107 ae_double2str(v, buf, state);
5108 serializer->entries_saved++;
5109 if( serializer->entries_saved%AE_SER_ENTRIES_PER_ROW )
5110 strcat(buf, " ");
5111 else
5112 strcat(buf, "\r\n");
5113 bytes_appended = (ae_int_t)strlen(buf);
5114 ae_assert(serializer->bytes_written+bytes_appended<serializer->bytes_asked, emsg, state); /* strict "less" because we need space for trailing zero */
5115 serializer->bytes_written += bytes_appended;
5116
5117 /* append to buffer */
5118 #ifdef AE_USE_CPP_SERIALIZATION
5119 if( serializer->mode==AE_SM_TO_CPPSTRING )
5120 {
5121 *(serializer->out_cppstr) += buf;
5122 return;
5123 }
5124 #endif
5125 if( serializer->mode==AE_SM_TO_STRING )
5126 {
5127 strcat(serializer->out_str, buf);
5128 serializer->out_str += bytes_appended;
5129 return;
5130 }
5131 if( serializer->mode==AE_SM_TO_STREAM )
5132 {
5133 ae_assert(serializer->stream_writer(buf, serializer->stream_aux)==0, "serializer: error writing to stream", state);
5134 return;
5135 }
5136 ae_break(state, ERR_ASSERTION_FAILED, emsg);
5137 }
5138
ae_serializer_serialize_byte_array(ae_serializer * serializer,ae_vector * bytes,ae_state * state)5139 void ae_serializer_serialize_byte_array(ae_serializer *serializer, ae_vector *bytes, ae_state *state)
5140 {
5141 ae_int_t chunk_size, entries_count;
5142
5143 chunk_size = 8;
5144
5145 /* save array length */
5146 ae_serializer_serialize_int(serializer, bytes->cnt, state);
5147
5148 /* determine entries count */
5149 entries_count = bytes->cnt/chunk_size + (bytes->cnt%chunk_size>0 ? 1 : 0);
5150 for(ae_int_t eidx=0; eidx<entries_count; eidx++)
5151 {
5152 ae_int64_t tmpi;
5153 ae_int_t elen;
5154 elen = bytes->cnt - eidx*chunk_size;
5155 elen = elen>chunk_size ? chunk_size : elen;
5156 memset(&tmpi, 0, sizeof(tmpi));
5157 memmove(&tmpi, bytes->ptr.p_ubyte + eidx*chunk_size, elen);
5158 ae_serializer_serialize_int64(serializer, tmpi, state);
5159 }
5160 }
5161
ae_serializer_unserialize_bool(ae_serializer * serializer,ae_bool * v,ae_state * state)5162 void ae_serializer_unserialize_bool(ae_serializer *serializer, ae_bool *v, ae_state *state)
5163 {
5164 if( serializer->mode==AE_SM_FROM_STRING )
5165 {
5166 *v = ae_str2bool(serializer->in_str, state, &serializer->in_str);
5167 return;
5168 }
5169 if( serializer->mode==AE_SM_FROM_STREAM )
5170 {
5171 char buf[AE_SER_ENTRY_LENGTH+2+1];
5172 const char *p = buf;
5173 ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5174 *v = ae_str2bool(buf, state, &p);
5175 return;
5176 }
5177 ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5178 }
5179
ae_serializer_unserialize_int(ae_serializer * serializer,ae_int_t * v,ae_state * state)5180 void ae_serializer_unserialize_int(ae_serializer *serializer, ae_int_t *v, ae_state *state)
5181 {
5182 if( serializer->mode==AE_SM_FROM_STRING )
5183 {
5184 *v = ae_str2int(serializer->in_str, state, &serializer->in_str);
5185 return;
5186 }
5187 if( serializer->mode==AE_SM_FROM_STREAM )
5188 {
5189 char buf[AE_SER_ENTRY_LENGTH+2+1];
5190 const char *p = buf;
5191 ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5192 *v = ae_str2int(buf, state, &p);
5193 return;
5194 }
5195 ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5196 }
5197
ae_serializer_unserialize_int64(ae_serializer * serializer,ae_int64_t * v,ae_state * state)5198 void ae_serializer_unserialize_int64(ae_serializer *serializer, ae_int64_t *v, ae_state *state)
5199 {
5200 if( serializer->mode==AE_SM_FROM_STRING )
5201 {
5202 *v = ae_str2int64(serializer->in_str, state, &serializer->in_str);
5203 return;
5204 }
5205 if( serializer->mode==AE_SM_FROM_STREAM )
5206 {
5207 char buf[AE_SER_ENTRY_LENGTH+2+1];
5208 const char *p = buf;
5209 ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5210 *v = ae_str2int64(buf, state, &p);
5211 return;
5212 }
5213 ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5214 }
5215
ae_serializer_unserialize_double(ae_serializer * serializer,double * v,ae_state * state)5216 void ae_serializer_unserialize_double(ae_serializer *serializer, double *v, ae_state *state)
5217 {
5218 if( serializer->mode==AE_SM_FROM_STRING )
5219 {
5220 *v = ae_str2double(serializer->in_str, state, &serializer->in_str);
5221 return;
5222 }
5223 if( serializer->mode==AE_SM_FROM_STREAM )
5224 {
5225 char buf[AE_SER_ENTRY_LENGTH+2+1];
5226 const char *p = buf;
5227 ae_assert(serializer->stream_reader(serializer->stream_aux, AE_SER_ENTRY_LENGTH, buf)==0, "serializer: error reading from stream", state);
5228 *v = ae_str2double(buf, state, &p);
5229 return;
5230 }
5231 ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5232 }
5233
ae_serializer_unserialize_byte_array(ae_serializer * serializer,ae_vector * bytes,ae_state * state)5234 void ae_serializer_unserialize_byte_array(ae_serializer *serializer, ae_vector *bytes, ae_state *state)
5235 {
5236 ae_int_t chunk_size, n, entries_count;
5237
5238 chunk_size = 8;
5239
5240 /* read array length, allocate output */
5241 ae_serializer_unserialize_int(serializer, &n, state);
5242 ae_vector_set_length(bytes, n, state);
5243
5244 /* determine entries count, read entries */
5245 entries_count = n/chunk_size + (n%chunk_size>0 ? 1 : 0);
5246 for(ae_int_t eidx=0; eidx<entries_count; eidx++)
5247 {
5248 ae_int_t elen;
5249 ae_int64_t tmp64;
5250
5251 elen = n-eidx*chunk_size;
5252 elen = elen>chunk_size ? chunk_size : elen;
5253 ae_serializer_unserialize_int64(serializer, &tmp64, state);
5254 memmove(bytes->ptr.p_ubyte+eidx*chunk_size, &tmp64, elen);
5255 }
5256 }
5257
ae_serializer_stop(ae_serializer * serializer,ae_state * state)5258 void ae_serializer_stop(ae_serializer *serializer, ae_state *state)
5259 {
5260 #ifdef AE_USE_CPP_SERIALIZATION
5261 if( serializer->mode==AE_SM_TO_CPPSTRING )
5262 {
5263 ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state);/* strict "less" because we need space for trailing zero */
5264 serializer->bytes_written++;
5265 *(serializer->out_cppstr) += ".";
5266 return;
5267 }
5268 #endif
5269 if( serializer->mode==AE_SM_TO_STRING )
5270 {
5271 ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state); /* strict "less" because we need space for trailing zero */
5272 serializer->bytes_written++;
5273 strcat(serializer->out_str, ".");
5274 serializer->out_str += 1;
5275 return;
5276 }
5277 if( serializer->mode==AE_SM_TO_STREAM )
5278 {
5279 ae_assert(serializer->bytes_written+1<serializer->bytes_asked, "ae_serializer: integrity check failed", state); /* strict "less" because we need space for trailing zero */
5280 serializer->bytes_written++;
5281 ae_assert(serializer->stream_writer(".", serializer->stream_aux)==0, "ae_serializer: error writing to stream", state);
5282 return;
5283 }
5284 if( serializer->mode==AE_SM_FROM_STRING )
5285 {
5286 /*
5287 * because input string may be from pre-3.11 serializer,
5288 * which does not include trailing dot, we do not test
5289 * string for presence of "." symbol. Anyway, because string
5290 * is not stream, we do not have to read ALL trailing symbols.
5291 */
5292 return;
5293 }
5294 if( serializer->mode==AE_SM_FROM_STREAM )
5295 {
5296 /*
5297 * Read trailing dot, perform integrity check
5298 */
5299 char buf[2];
5300 ae_assert(serializer->stream_reader(serializer->stream_aux, 1, buf)==0, "ae_serializer: error reading from stream", state);
5301 ae_assert(buf[0]=='.', "ae_serializer: trailing . is not found in the stream", state);
5302 return;
5303 }
5304 ae_break(state, ERR_ASSERTION_FAILED, "ae_serializer: integrity check failed");
5305 }
5306
5307
5308 /************************************************************************
5309 Complex math functions
5310 ************************************************************************/
ae_complex_from_i(ae_int_t v)5311 ae_complex ae_complex_from_i(ae_int_t v)
5312 {
5313 ae_complex r;
5314 r.x = (double)v;
5315 r.y = 0.0;
5316 return r;
5317 }
5318
ae_complex_from_d(double v)5319 ae_complex ae_complex_from_d(double v)
5320 {
5321 ae_complex r;
5322 r.x = v;
5323 r.y = 0.0;
5324 return r;
5325 }
5326
ae_c_neg(ae_complex lhs)5327 ae_complex ae_c_neg(ae_complex lhs)
5328 {
5329 ae_complex result;
5330 result.x = -lhs.x;
5331 result.y = -lhs.y;
5332 return result;
5333 }
5334
ae_c_conj(ae_complex lhs,ae_state * state)5335 ae_complex ae_c_conj(ae_complex lhs, ae_state *state)
5336 {
5337 ae_complex result;
5338 result.x = +lhs.x;
5339 result.y = -lhs.y;
5340 return result;
5341 }
5342
ae_c_sqr(ae_complex lhs,ae_state * state)5343 ae_complex ae_c_sqr(ae_complex lhs, ae_state *state)
5344 {
5345 ae_complex result;
5346 result.x = lhs.x*lhs.x-lhs.y*lhs.y;
5347 result.y = 2*lhs.x*lhs.y;
5348 return result;
5349 }
5350
ae_c_abs(ae_complex z,ae_state * state)5351 double ae_c_abs(ae_complex z, ae_state *state)
5352 {
5353 double w;
5354 double xabs;
5355 double yabs;
5356 double v;
5357
5358 xabs = fabs(z.x);
5359 yabs = fabs(z.y);
5360 w = xabs>yabs ? xabs : yabs;
5361 v = xabs<yabs ? xabs : yabs;
5362 if( v==0 )
5363 return w;
5364 else
5365 {
5366 double t = v/w;
5367 return w*sqrt(1+t*t);
5368 }
5369 }
5370
ae_c_eq(ae_complex lhs,ae_complex rhs)5371 ae_bool ae_c_eq(ae_complex lhs, ae_complex rhs)
5372 {
5373 volatile double x1 = lhs.x;
5374 volatile double x2 = rhs.x;
5375 volatile double y1 = lhs.y;
5376 volatile double y2 = rhs.y;
5377 return x1==x2 && y1==y2;
5378 }
5379
ae_c_neq(ae_complex lhs,ae_complex rhs)5380 ae_bool ae_c_neq(ae_complex lhs, ae_complex rhs)
5381 {
5382 volatile double x1 = lhs.x;
5383 volatile double x2 = rhs.x;
5384 volatile double y1 = lhs.y;
5385 volatile double y2 = rhs.y;
5386 return x1!=x2 || y1!=y2;
5387 }
5388
ae_c_add(ae_complex lhs,ae_complex rhs)5389 ae_complex ae_c_add(ae_complex lhs, ae_complex rhs)
5390 {
5391 ae_complex result;
5392 result.x = lhs.x+rhs.x;
5393 result.y = lhs.y+rhs.y;
5394 return result;
5395 }
5396
ae_c_mul(ae_complex lhs,ae_complex rhs)5397 ae_complex ae_c_mul(ae_complex lhs, ae_complex rhs)
5398 {
5399 ae_complex result;
5400 result.x = lhs.x*rhs.x-lhs.y*rhs.y;
5401 result.y = lhs.x*rhs.y+lhs.y*rhs.x;
5402 return result;
5403 }
5404
ae_c_sub(ae_complex lhs,ae_complex rhs)5405 ae_complex ae_c_sub(ae_complex lhs, ae_complex rhs)
5406 {
5407 ae_complex result;
5408 result.x = lhs.x-rhs.x;
5409 result.y = lhs.y-rhs.y;
5410 return result;
5411 }
5412
ae_c_div(ae_complex lhs,ae_complex rhs)5413 ae_complex ae_c_div(ae_complex lhs, ae_complex rhs)
5414 {
5415 ae_complex result;
5416 double e;
5417 double f;
5418 if( fabs(rhs.y)<fabs(rhs.x) )
5419 {
5420 e = rhs.y/rhs.x;
5421 f = rhs.x+rhs.y*e;
5422 result.x = (lhs.x+lhs.y*e)/f;
5423 result.y = (lhs.y-lhs.x*e)/f;
5424 }
5425 else
5426 {
5427 e = rhs.x/rhs.y;
5428 f = rhs.y+rhs.x*e;
5429 result.x = (lhs.y+lhs.x*e)/f;
5430 result.y = (-lhs.x+lhs.y*e)/f;
5431 }
5432 return result;
5433 }
5434
ae_c_eq_d(ae_complex lhs,double rhs)5435 ae_bool ae_c_eq_d(ae_complex lhs, double rhs)
5436 {
5437 volatile double x1 = lhs.x;
5438 volatile double x2 = rhs;
5439 volatile double y1 = lhs.y;
5440 volatile double y2 = 0;
5441 return x1==x2 && y1==y2;
5442 }
5443
ae_c_neq_d(ae_complex lhs,double rhs)5444 ae_bool ae_c_neq_d(ae_complex lhs, double rhs)
5445 {
5446 volatile double x1 = lhs.x;
5447 volatile double x2 = rhs;
5448 volatile double y1 = lhs.y;
5449 volatile double y2 = 0;
5450 return x1!=x2 || y1!=y2;
5451 }
5452
ae_c_add_d(ae_complex lhs,double rhs)5453 ae_complex ae_c_add_d(ae_complex lhs, double rhs)
5454 {
5455 ae_complex result;
5456 result.x = lhs.x+rhs;
5457 result.y = lhs.y;
5458 return result;
5459 }
5460
ae_c_mul_d(ae_complex lhs,double rhs)5461 ae_complex ae_c_mul_d(ae_complex lhs, double rhs)
5462 {
5463 ae_complex result;
5464 result.x = lhs.x*rhs;
5465 result.y = lhs.y*rhs;
5466 return result;
5467 }
5468
ae_c_sub_d(ae_complex lhs,double rhs)5469 ae_complex ae_c_sub_d(ae_complex lhs, double rhs)
5470 {
5471 ae_complex result;
5472 result.x = lhs.x-rhs;
5473 result.y = lhs.y;
5474 return result;
5475 }
5476
ae_c_d_sub(double lhs,ae_complex rhs)5477 ae_complex ae_c_d_sub(double lhs, ae_complex rhs)
5478 {
5479 ae_complex result;
5480 result.x = lhs-rhs.x;
5481 result.y = -rhs.y;
5482 return result;
5483 }
5484
ae_c_div_d(ae_complex lhs,double rhs)5485 ae_complex ae_c_div_d(ae_complex lhs, double rhs)
5486 {
5487 ae_complex result;
5488 result.x = lhs.x/rhs;
5489 result.y = lhs.y/rhs;
5490 return result;
5491 }
5492
ae_c_d_div(double lhs,ae_complex rhs)5493 ae_complex ae_c_d_div(double lhs, ae_complex rhs)
5494 {
5495 ae_complex result;
5496 double e;
5497 double f;
5498 if( fabs(rhs.y)<fabs(rhs.x) )
5499 {
5500 e = rhs.y/rhs.x;
5501 f = rhs.x+rhs.y*e;
5502 result.x = lhs/f;
5503 result.y = -lhs*e/f;
5504 }
5505 else
5506 {
5507 e = rhs.x/rhs.y;
5508 f = rhs.y+rhs.x*e;
5509 result.x = lhs*e/f;
5510 result.y = -lhs/f;
5511 }
5512 return result;
5513 }
5514
5515
5516 /************************************************************************
5517 Complex BLAS operations
5518 ************************************************************************/
ae_v_cdotproduct(const ae_complex * v0,ae_int_t stride0,const char * conj0,const ae_complex * v1,ae_int_t stride1,const char * conj1,ae_int_t n)5519 ae_complex ae_v_cdotproduct(const ae_complex *v0, ae_int_t stride0, const char *conj0, const ae_complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
5520 {
5521 double rx = 0, ry = 0;
5522 ae_int_t i;
5523 ae_bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n'));
5524 ae_bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n'));
5525 ae_complex result;
5526 if( bconj0 && bconj1 )
5527 {
5528 double v0x, v0y, v1x, v1y;
5529 for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5530 {
5531 v0x = v0->x;
5532 v0y = -v0->y;
5533 v1x = v1->x;
5534 v1y = -v1->y;
5535 rx += v0x*v1x-v0y*v1y;
5536 ry += v0x*v1y+v0y*v1x;
5537 }
5538 }
5539 if( !bconj0 && bconj1 )
5540 {
5541 double v0x, v0y, v1x, v1y;
5542 for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5543 {
5544 v0x = v0->x;
5545 v0y = v0->y;
5546 v1x = v1->x;
5547 v1y = -v1->y;
5548 rx += v0x*v1x-v0y*v1y;
5549 ry += v0x*v1y+v0y*v1x;
5550 }
5551 }
5552 if( bconj0 && !bconj1 )
5553 {
5554 double v0x, v0y, v1x, v1y;
5555 for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5556 {
5557 v0x = v0->x;
5558 v0y = -v0->y;
5559 v1x = v1->x;
5560 v1y = v1->y;
5561 rx += v0x*v1x-v0y*v1y;
5562 ry += v0x*v1y+v0y*v1x;
5563 }
5564 }
5565 if( !bconj0 && !bconj1 )
5566 {
5567 double v0x, v0y, v1x, v1y;
5568 for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
5569 {
5570 v0x = v0->x;
5571 v0y = v0->y;
5572 v1x = v1->x;
5573 v1y = v1->y;
5574 rx += v0x*v1x-v0y*v1y;
5575 ry += v0x*v1y+v0y*v1x;
5576 }
5577 }
5578 result.x = rx;
5579 result.y = ry;
5580 return result;
5581 }
5582
ae_v_cmove(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)5583 void ae_v_cmove(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5584 {
5585 ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5586 ae_int_t i;
5587 if( stride_dst!=1 || stride_src!=1 )
5588 {
5589 /*
5590 * general unoptimized case
5591 */
5592 if( bconj )
5593 {
5594 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5595 {
5596 vdst->x = vsrc->x;
5597 vdst->y = -vsrc->y;
5598 }
5599 }
5600 else
5601 {
5602 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5603 *vdst = *vsrc;
5604 }
5605 }
5606 else
5607 {
5608 /*
5609 * optimized case
5610 */
5611 if( bconj )
5612 {
5613 for(i=0; i<n; i++, vdst++, vsrc++)
5614 {
5615 vdst->x = vsrc->x;
5616 vdst->y = -vsrc->y;
5617 }
5618 }
5619 else
5620 {
5621 for(i=0; i<n; i++, vdst++, vsrc++)
5622 *vdst = *vsrc;
5623 }
5624 }
5625 }
5626
ae_v_cmoveneg(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)5627 void ae_v_cmoveneg(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5628 {
5629 ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5630 ae_int_t i;
5631 if( stride_dst!=1 || stride_src!=1 )
5632 {
5633 /*
5634 * general unoptimized case
5635 */
5636 if( bconj )
5637 {
5638 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5639 {
5640 vdst->x = -vsrc->x;
5641 vdst->y = vsrc->y;
5642 }
5643 }
5644 else
5645 {
5646 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5647 {
5648 vdst->x = -vsrc->x;
5649 vdst->y = -vsrc->y;
5650 }
5651 }
5652 }
5653 else
5654 {
5655 /*
5656 * optimized case
5657 */
5658 if( bconj )
5659 {
5660 for(i=0; i<n; i++, vdst++, vsrc++)
5661 {
5662 vdst->x = -vsrc->x;
5663 vdst->y = vsrc->y;
5664 }
5665 }
5666 else
5667 {
5668 for(i=0; i<n; i++, vdst++, vsrc++)
5669 {
5670 vdst->x = -vsrc->x;
5671 vdst->y = -vsrc->y;
5672 }
5673 }
5674 }
5675 }
5676
ae_v_cmoved(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)5677 void ae_v_cmoved(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
5678 {
5679 ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5680 ae_int_t i;
5681 if( stride_dst!=1 || stride_src!=1 )
5682 {
5683 /*
5684 * general unoptimized case
5685 */
5686 if( bconj )
5687 {
5688 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5689 {
5690 vdst->x = alpha*vsrc->x;
5691 vdst->y = -alpha*vsrc->y;
5692 }
5693 }
5694 else
5695 {
5696 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5697 {
5698 vdst->x = alpha*vsrc->x;
5699 vdst->y = alpha*vsrc->y;
5700 }
5701 }
5702 }
5703 else
5704 {
5705 /*
5706 * optimized case
5707 */
5708 if( bconj )
5709 {
5710 for(i=0; i<n; i++, vdst++, vsrc++)
5711 {
5712 vdst->x = alpha*vsrc->x;
5713 vdst->y = -alpha*vsrc->y;
5714 }
5715 }
5716 else
5717 {
5718 for(i=0; i<n; i++, vdst++, vsrc++)
5719 {
5720 vdst->x = alpha*vsrc->x;
5721 vdst->y = alpha*vsrc->y;
5722 }
5723 }
5724 }
5725 }
5726
ae_v_cmovec(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,ae_complex alpha)5727 void ae_v_cmovec(ae_complex *vdst, ae_int_t stride_dst, const ae_complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
5728 {
5729 ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5730 ae_int_t i;
5731 if( stride_dst!=1 || stride_src!=1 )
5732 {
5733 /*
5734 * general unoptimized case
5735 */
5736 if( bconj )
5737 {
5738 double ax = alpha.x, ay = alpha.y;
5739 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5740 {
5741 vdst->x = ax*vsrc->x+ay*vsrc->y;
5742 vdst->y = -ax*vsrc->y+ay*vsrc->x;
5743 }
5744 }
5745 else
5746 {
5747 double ax = alpha.x, ay = alpha.y;
5748 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5749 {
5750 vdst->x = ax*vsrc->x-ay*vsrc->y;
5751 vdst->y = ax*vsrc->y+ay*vsrc->x;
5752 }
5753 }
5754 }
5755 else
5756 {
5757 /*
5758 * highly optimized case
5759 */
5760 if( bconj )
5761 {
5762 double ax = alpha.x, ay = alpha.y;
5763 for(i=0; i<n; i++, vdst++, vsrc++)
5764 {
5765 vdst->x = ax*vsrc->x+ay*vsrc->y;
5766 vdst->y = -ax*vsrc->y+ay*vsrc->x;
5767 }
5768 }
5769 else
5770 {
5771 double ax = alpha.x, ay = alpha.y;
5772 for(i=0; i<n; i++, vdst++, vsrc++)
5773 {
5774 vdst->x = ax*vsrc->x-ay*vsrc->y;
5775 vdst->y = ax*vsrc->y+ay*vsrc->x;
5776 }
5777 }
5778 }
5779 }
5780
ae_v_cadd(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)5781 void ae_v_cadd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5782 {
5783 ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5784 ae_int_t i;
5785 if( stride_dst!=1 || stride_src!=1 )
5786 {
5787 /*
5788 * general unoptimized case
5789 */
5790 if( bconj )
5791 {
5792 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5793 {
5794 vdst->x += vsrc->x;
5795 vdst->y -= vsrc->y;
5796 }
5797 }
5798 else
5799 {
5800 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5801 {
5802 vdst->x += vsrc->x;
5803 vdst->y += vsrc->y;
5804 }
5805 }
5806 }
5807 else
5808 {
5809 /*
5810 * optimized case
5811 */
5812 if( bconj )
5813 {
5814 for(i=0; i<n; i++, vdst++, vsrc++)
5815 {
5816 vdst->x += vsrc->x;
5817 vdst->y -= vsrc->y;
5818 }
5819 }
5820 else
5821 {
5822 for(i=0; i<n; i++, vdst++, vsrc++)
5823 {
5824 vdst->x += vsrc->x;
5825 vdst->y += vsrc->y;
5826 }
5827 }
5828 }
5829 }
5830
ae_v_caddd(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)5831 void ae_v_caddd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
5832 {
5833 ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5834 ae_int_t i;
5835 if( stride_dst!=1 || stride_src!=1 )
5836 {
5837 /*
5838 * general unoptimized case
5839 */
5840 if( bconj )
5841 {
5842 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5843 {
5844 vdst->x += alpha*vsrc->x;
5845 vdst->y -= alpha*vsrc->y;
5846 }
5847 }
5848 else
5849 {
5850 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5851 {
5852 vdst->x += alpha*vsrc->x;
5853 vdst->y += alpha*vsrc->y;
5854 }
5855 }
5856 }
5857 else
5858 {
5859 /*
5860 * optimized case
5861 */
5862 if( bconj )
5863 {
5864 for(i=0; i<n; i++, vdst++, vsrc++)
5865 {
5866 vdst->x += alpha*vsrc->x;
5867 vdst->y -= alpha*vsrc->y;
5868 }
5869 }
5870 else
5871 {
5872 for(i=0; i<n; i++, vdst++, vsrc++)
5873 {
5874 vdst->x += alpha*vsrc->x;
5875 vdst->y += alpha*vsrc->y;
5876 }
5877 }
5878 }
5879 }
5880
ae_v_caddc(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,ae_complex alpha)5881 void ae_v_caddc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
5882 {
5883 ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5884 ae_int_t i;
5885 if( stride_dst!=1 || stride_src!=1 )
5886 {
5887 /*
5888 * general unoptimized case
5889 */
5890 double ax = alpha.x, ay = alpha.y;
5891 if( bconj )
5892 {
5893 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5894 {
5895 vdst->x += ax*vsrc->x+ay*vsrc->y;
5896 vdst->y -= ax*vsrc->y-ay*vsrc->x;
5897 }
5898 }
5899 else
5900 {
5901 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5902 {
5903 vdst->x += ax*vsrc->x-ay*vsrc->y;
5904 vdst->y += ax*vsrc->y+ay*vsrc->x;
5905 }
5906 }
5907 }
5908 else
5909 {
5910 /*
5911 * highly optimized case
5912 */
5913 double ax = alpha.x, ay = alpha.y;
5914 if( bconj )
5915 {
5916 for(i=0; i<n; i++, vdst++, vsrc++)
5917 {
5918 vdst->x += ax*vsrc->x+ay*vsrc->y;
5919 vdst->y -= ax*vsrc->y-ay*vsrc->x;
5920 }
5921 }
5922 else
5923 {
5924 for(i=0; i<n; i++, vdst++, vsrc++)
5925 {
5926 vdst->x += ax*vsrc->x-ay*vsrc->y;
5927 vdst->y += ax*vsrc->y+ay*vsrc->x;
5928 }
5929 }
5930 }
5931 }
5932
ae_v_csub(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)5933 void ae_v_csub(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
5934 {
5935 ae_bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
5936 ae_int_t i;
5937 if( stride_dst!=1 || stride_src!=1 )
5938 {
5939 /*
5940 * general unoptimized case
5941 */
5942 if( bconj )
5943 {
5944 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5945 {
5946 vdst->x -= vsrc->x;
5947 vdst->y += vsrc->y;
5948 }
5949 }
5950 else
5951 {
5952 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
5953 {
5954 vdst->x -= vsrc->x;
5955 vdst->y -= vsrc->y;
5956 }
5957 }
5958 }
5959 else
5960 {
5961 /*
5962 * highly optimized case
5963 */
5964 if( bconj )
5965 {
5966 for(i=0; i<n; i++, vdst++, vsrc++)
5967 {
5968 vdst->x -= vsrc->x;
5969 vdst->y += vsrc->y;
5970 }
5971 }
5972 else
5973 {
5974 for(i=0; i<n; i++, vdst++, vsrc++)
5975 {
5976 vdst->x -= vsrc->x;
5977 vdst->y -= vsrc->y;
5978 }
5979 }
5980 }
5981 }
5982
ae_v_csubd(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)5983 void ae_v_csubd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
5984 {
5985 ae_v_caddd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
5986 }
5987
ae_v_csubc(ae_complex * vdst,ae_int_t stride_dst,const ae_complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,ae_complex alpha)5988 void ae_v_csubc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
5989 {
5990 alpha.x = -alpha.x;
5991 alpha.y = -alpha.y;
5992 ae_v_caddc(vdst, stride_dst, vsrc, stride_src, conj_src, n, alpha);
5993 }
5994
ae_v_cmuld(ae_complex * vdst,ae_int_t stride_dst,ae_int_t n,double alpha)5995 void ae_v_cmuld(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
5996 {
5997 ae_int_t i;
5998 if( stride_dst!=1 )
5999 {
6000 /*
6001 * general unoptimized case
6002 */
6003 for(i=0; i<n; i++, vdst+=stride_dst)
6004 {
6005 vdst->x *= alpha;
6006 vdst->y *= alpha;
6007 }
6008 }
6009 else
6010 {
6011 /*
6012 * optimized case
6013 */
6014 for(i=0; i<n; i++, vdst++)
6015 {
6016 vdst->x *= alpha;
6017 vdst->y *= alpha;
6018 }
6019 }
6020 }
6021
ae_v_cmulc(ae_complex * vdst,ae_int_t stride_dst,ae_int_t n,ae_complex alpha)6022 void ae_v_cmulc(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, ae_complex alpha)
6023 {
6024 ae_int_t i;
6025 if( stride_dst!=1 )
6026 {
6027 /*
6028 * general unoptimized case
6029 */
6030 double ax = alpha.x, ay = alpha.y;
6031 for(i=0; i<n; i++, vdst+=stride_dst)
6032 {
6033 double dstx = vdst->x, dsty = vdst->y;
6034 vdst->x = ax*dstx-ay*dsty;
6035 vdst->y = ax*dsty+ay*dstx;
6036 }
6037 }
6038 else
6039 {
6040 /*
6041 * highly optimized case
6042 */
6043 double ax = alpha.x, ay = alpha.y;
6044 for(i=0; i<n; i++, vdst++)
6045 {
6046 double dstx = vdst->x, dsty = vdst->y;
6047 vdst->x = ax*dstx-ay*dsty;
6048 vdst->y = ax*dsty+ay*dstx;
6049 }
6050 }
6051 }
6052
6053 /************************************************************************
6054 Real BLAS operations
6055 ************************************************************************/
ae_v_dotproduct(const double * v0,ae_int_t stride0,const double * v1,ae_int_t stride1,ae_int_t n)6056 double ae_v_dotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
6057 {
6058 double result = 0;
6059 ae_int_t i;
6060 if( stride0!=1 || stride1!=1 )
6061 {
6062 /*
6063 * slow general code
6064 */
6065 for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6066 result += (*v0)*(*v1);
6067 }
6068 else
6069 {
6070 /*
6071 * optimized code for stride=1
6072 */
6073 ae_int_t n4 = n/4;
6074 ae_int_t nleft = n%4;
6075 for(i=0; i<n4; i++, v0+=4, v1+=4)
6076 result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3];
6077 for(i=0; i<nleft; i++, v0++, v1++)
6078 result += v0[0]*v1[0];
6079 }
6080 return result;
6081 }
6082
ae_v_move(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6083 void ae_v_move(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
6084 {
6085 ae_int_t i;
6086 if( stride_dst!=1 || stride_src!=1 )
6087 {
6088 /*
6089 * general unoptimized case
6090 */
6091 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6092 *vdst = *vsrc;
6093 }
6094 else
6095 {
6096 /*
6097 * optimized case
6098 */
6099 ae_int_t n2 = n/2;
6100 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6101 {
6102 vdst[0] = vsrc[0];
6103 vdst[1] = vsrc[1];
6104 }
6105 if( n%2!=0 )
6106 vdst[0] = vsrc[0];
6107 }
6108 }
6109
ae_v_moveneg(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6110 void ae_v_moveneg(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
6111 {
6112 ae_int_t i;
6113 if( stride_dst!=1 || stride_src!=1 )
6114 {
6115 /*
6116 * general unoptimized case
6117 */
6118 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6119 *vdst = -*vsrc;
6120 }
6121 else
6122 {
6123 /*
6124 * optimized case
6125 */
6126 ae_int_t n2 = n/2;
6127 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6128 {
6129 vdst[0] = -vsrc[0];
6130 vdst[1] = -vsrc[1];
6131 }
6132 if( n%2!=0 )
6133 vdst[0] = -vsrc[0];
6134 }
6135 }
6136
ae_v_moved(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)6137 void ae_v_moved(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
6138 {
6139 ae_int_t i;
6140 if( stride_dst!=1 || stride_src!=1 )
6141 {
6142 /*
6143 * general unoptimized case
6144 */
6145 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6146 *vdst = alpha*(*vsrc);
6147 }
6148 else
6149 {
6150 /*
6151 * optimized case
6152 */
6153 ae_int_t n2 = n/2;
6154 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6155 {
6156 vdst[0] = alpha*vsrc[0];
6157 vdst[1] = alpha*vsrc[1];
6158 }
6159 if( n%2!=0 )
6160 vdst[0] = alpha*vsrc[0];
6161 }
6162 }
6163
ae_v_add(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6164 void ae_v_add(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
6165 {
6166 ae_int_t i;
6167 if( stride_dst!=1 || stride_src!=1 )
6168 {
6169 /*
6170 * general unoptimized case
6171 */
6172 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6173 *vdst += *vsrc;
6174 }
6175 else
6176 {
6177 /*
6178 * optimized case
6179 */
6180 ae_int_t n2 = n/2;
6181 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6182 {
6183 vdst[0] += vsrc[0];
6184 vdst[1] += vsrc[1];
6185 }
6186 if( n%2!=0 )
6187 vdst[0] += vsrc[0];
6188 }
6189 }
6190
ae_v_addd(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)6191 void ae_v_addd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
6192 {
6193 ae_int_t i;
6194 if( stride_dst!=1 || stride_src!=1 )
6195 {
6196 /*
6197 * general unoptimized case
6198 */
6199 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6200 *vdst += alpha*(*vsrc);
6201 }
6202 else
6203 {
6204 /*
6205 * optimized case
6206 */
6207 ae_int_t n2 = n/2;
6208 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6209 {
6210 vdst[0] += alpha*vsrc[0];
6211 vdst[1] += alpha*vsrc[1];
6212 }
6213 if( n%2!=0 )
6214 vdst[0] += alpha*vsrc[0];
6215 }
6216 }
6217
ae_v_sub(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6218 void ae_v_sub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
6219 {
6220 ae_int_t i;
6221 if( stride_dst!=1 || stride_src!=1 )
6222 {
6223 /*
6224 * general unoptimized case
6225 */
6226 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6227 *vdst -= *vsrc;
6228 }
6229 else
6230 {
6231 /*
6232 * highly optimized case
6233 */
6234 ae_int_t n2 = n/2;
6235 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6236 {
6237 vdst[0] -= vsrc[0];
6238 vdst[1] -= vsrc[1];
6239 }
6240 if( n%2!=0 )
6241 vdst[0] -= vsrc[0];
6242 }
6243 }
6244
ae_v_subd(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)6245 void ae_v_subd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
6246 {
6247 ae_v_addd(vdst, stride_dst, vsrc, stride_src, n, -alpha);
6248 }
6249
ae_v_muld(double * vdst,ae_int_t stride_dst,ae_int_t n,double alpha)6250 void ae_v_muld(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
6251 {
6252 ae_int_t i;
6253 if( stride_dst!=1 )
6254 {
6255 /*
6256 * general unoptimized case
6257 */
6258 for(i=0; i<n; i++, vdst+=stride_dst)
6259 *vdst *= alpha;
6260 }
6261 else
6262 {
6263 /*
6264 * highly optimized case
6265 */
6266 for(i=0; i<n; i++, vdst++)
6267 *vdst *= alpha;
6268 }
6269 }
6270
6271 /************************************************************************
6272 Other functions
6273 ************************************************************************/
ae_v_len(ae_int_t a,ae_int_t b)6274 ae_int_t ae_v_len(ae_int_t a, ae_int_t b)
6275 {
6276 return b-a+1;
6277 }
6278
6279 /************************************************************************
6280 RComm functions
6281 ************************************************************************/
_rcommstate_init(rcommstate * p,ae_state * _state,ae_bool make_automatic)6282 void _rcommstate_init(rcommstate* p, ae_state *_state, ae_bool make_automatic)
6283 {
6284 /* initial zero-filling */
6285 memset(&p->ba, 0, sizeof(p->ba));
6286 memset(&p->ia, 0, sizeof(p->ia));
6287 memset(&p->ra, 0, sizeof(p->ra));
6288 memset(&p->ca, 0, sizeof(p->ca));
6289
6290 /* initialization */
6291 ae_vector_init(&p->ba, 0, DT_BOOL, _state, make_automatic);
6292 ae_vector_init(&p->ia, 0, DT_INT, _state, make_automatic);
6293 ae_vector_init(&p->ra, 0, DT_REAL, _state, make_automatic);
6294 ae_vector_init(&p->ca, 0, DT_COMPLEX, _state, make_automatic);
6295 }
6296
_rcommstate_init_copy(rcommstate * dst,rcommstate * src,ae_state * _state,ae_bool make_automatic)6297 void _rcommstate_init_copy(rcommstate* dst, rcommstate* src, ae_state *_state, ae_bool make_automatic)
6298 {
6299 /* initial zero-filling */
6300 memset(&dst->ba, 0, sizeof(dst->ba));
6301 memset(&dst->ia, 0, sizeof(dst->ia));
6302 memset(&dst->ra, 0, sizeof(dst->ra));
6303 memset(&dst->ca, 0, sizeof(dst->ca));
6304
6305 /* initialization */
6306 ae_vector_init_copy(&dst->ba, &src->ba, _state, make_automatic);
6307 ae_vector_init_copy(&dst->ia, &src->ia, _state, make_automatic);
6308 ae_vector_init_copy(&dst->ra, &src->ra, _state, make_automatic);
6309 ae_vector_init_copy(&dst->ca, &src->ca, _state, make_automatic);
6310 dst->stage = src->stage;
6311 }
6312
_rcommstate_clear(rcommstate * p)6313 void _rcommstate_clear(rcommstate* p)
6314 {
6315 ae_vector_clear(&p->ba);
6316 ae_vector_clear(&p->ia);
6317 ae_vector_clear(&p->ra);
6318 ae_vector_clear(&p->ca);
6319 }
6320
_rcommstate_destroy(rcommstate * p)6321 void _rcommstate_destroy(rcommstate* p)
6322 {
6323 _rcommstate_clear(p);
6324 }
6325
6326
6327 }
6328
6329 /////////////////////////////////////////////////////////////////////////
6330 //
6331 // THIS SECTION CONTAINS C++ RELATED FUNCTIONALITY
6332 //
6333 /////////////////////////////////////////////////////////////////////////
6334 /********************************************************************
6335 Internal forwards
6336 ********************************************************************/
6337 namespace alglib
6338 {
6339 double get_aenv_nan();
6340 double get_aenv_posinf();
6341 double get_aenv_neginf();
6342 ae_int_t my_stricmp(const char *s1, const char *s2);
6343 char* filter_spaces(const char *s);
6344 void str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec);
6345 void str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat);
6346
6347 ae_bool parse_bool_delim(const char *s, const char *delim);
6348 ae_int_t parse_int_delim(const char *s, const char *delim);
6349 bool _parse_real_delim(const char *s, const char *delim, double *result, const char **new_s);
6350 double parse_real_delim(const char *s, const char *delim);
6351 alglib::complex parse_complex_delim(const char *s, const char *delim);
6352
6353 std::string arraytostring(const bool *ptr, ae_int_t n);
6354 std::string arraytostring(const ae_int_t *ptr, ae_int_t n);
6355 std::string arraytostring(const double *ptr, ae_int_t n, int dps);
6356 std::string arraytostring(const alglib::complex *ptr, ae_int_t n, int dps);
6357 }
6358
6359 /********************************************************************
6360 Global and local constants/variables
6361 ********************************************************************/
6362 const double alglib::machineepsilon = 5E-16;
6363 const double alglib::maxrealnumber = 1E300;
6364 const double alglib::minrealnumber = 1E-300;
6365 const alglib::ae_int_t alglib::endianness = alglib_impl::ae_get_endianness();
6366 const double alglib::fp_nan = alglib::get_aenv_nan();
6367 const double alglib::fp_posinf = alglib::get_aenv_posinf();
6368 const double alglib::fp_neginf = alglib::get_aenv_neginf();
6369 #if defined(AE_NO_EXCEPTIONS)
6370 static const char *_alglib_last_error = NULL;
6371 #endif
6372 static const alglib_impl::ae_uint64_t _i64_xdefault = 0x0;
6373 static const alglib_impl::ae_uint64_t _i64_xserial = _ALGLIB_FLG_THREADING_SERIAL;
6374 static const alglib_impl::ae_uint64_t _i64_xparallel = _ALGLIB_FLG_THREADING_PARALLEL;
6375 const alglib::xparams &alglib::xdefault = *((const alglib::xparams *)(&_i64_xdefault));
6376 const alglib::xparams &alglib::serial = *((const alglib::xparams *)(&_i64_xserial));
6377 const alglib::xparams &alglib::parallel = *((const alglib::xparams *)(&_i64_xparallel));
6378
6379
6380
6381 /********************************************************************
6382 Exception handling
6383 ********************************************************************/
6384 #if !defined(AE_NO_EXCEPTIONS)
ap_error()6385 alglib::ap_error::ap_error()
6386 {
6387 }
6388
ap_error(const char * s)6389 alglib::ap_error::ap_error(const char *s)
6390 {
6391 msg = s;
6392 }
6393
make_assertion(bool bClause)6394 void alglib::ap_error::make_assertion(bool bClause)
6395 {
6396 if(!bClause)
6397 _ALGLIB_CPP_EXCEPTION("");
6398 }
6399
make_assertion(bool bClause,const char * p_msg)6400 void alglib::ap_error::make_assertion(bool bClause, const char *p_msg)
6401 {
6402 if(!bClause)
6403 _ALGLIB_CPP_EXCEPTION(p_msg);
6404 }
6405 #else
set_error_flag(const char * s)6406 void alglib::set_error_flag(const char *s)
6407 {
6408 if( s==NULL )
6409 s = "ALGLIB: unknown error";
6410 _alglib_last_error = s;
6411 }
6412
get_error_flag(const char ** p_msg)6413 bool alglib::get_error_flag(const char **p_msg)
6414 {
6415 if( _alglib_last_error==NULL )
6416 return false;
6417 if( p_msg!=NULL )
6418 *p_msg = _alglib_last_error;
6419 return true;
6420 }
6421
clear_error_flag()6422 void alglib::clear_error_flag()
6423 {
6424 _alglib_last_error = NULL;
6425 }
6426 #endif
6427
6428 /********************************************************************
6429 Complex number with double precision.
6430 ********************************************************************/
complex()6431 alglib::complex::complex():x(0.0),y(0.0)
6432 {
6433 }
6434
complex(const double & _x)6435 alglib::complex::complex(const double &_x):x(_x),y(0.0)
6436 {
6437 }
6438
complex(const double & _x,const double & _y)6439 alglib::complex::complex(const double &_x, const double &_y):x(_x),y(_y)
6440 {
6441 }
6442
complex(const alglib::complex & z)6443 alglib::complex::complex(const alglib::complex &z):x(z.x),y(z.y)
6444 {
6445 }
6446
operator =(const double & v)6447 alglib::complex& alglib::complex::operator= (const double& v)
6448 {
6449 x = v;
6450 y = 0.0;
6451 return *this;
6452 }
6453
operator +=(const double & v)6454 alglib::complex& alglib::complex::operator+=(const double& v)
6455 {
6456 x += v;
6457 return *this;
6458 }
6459
operator -=(const double & v)6460 alglib::complex& alglib::complex::operator-=(const double& v)
6461 {
6462 x -= v;
6463 return *this;
6464 }
6465
operator *=(const double & v)6466 alglib::complex& alglib::complex::operator*=(const double& v)
6467 {
6468 x *= v;
6469 y *= v;
6470 return *this;
6471 }
6472
operator /=(const double & v)6473 alglib::complex& alglib::complex::operator/=(const double& v)
6474 {
6475 x /= v;
6476 y /= v;
6477 return *this;
6478 }
6479
operator =(const alglib::complex & z)6480 alglib::complex& alglib::complex::operator= (const alglib::complex& z)
6481 {
6482 x = z.x;
6483 y = z.y;
6484 return *this;
6485 }
6486
operator +=(const alglib::complex & z)6487 alglib::complex& alglib::complex::operator+=(const alglib::complex& z)
6488 {
6489 x += z.x;
6490 y += z.y;
6491 return *this;
6492 }
6493
operator -=(const alglib::complex & z)6494 alglib::complex& alglib::complex::operator-=(const alglib::complex& z)
6495 {
6496 x -= z.x;
6497 y -= z.y;
6498 return *this;
6499 }
6500
operator *=(const alglib::complex & z)6501 alglib::complex& alglib::complex::operator*=(const alglib::complex& z)
6502 {
6503 double t = x*z.x-y*z.y;
6504 y = x*z.y+y*z.x;
6505 x = t;
6506 return *this;
6507 }
6508
operator /=(const alglib::complex & z)6509 alglib::complex& alglib::complex::operator/=(const alglib::complex& z)
6510 {
6511 alglib::complex result;
6512 double e;
6513 double f;
6514 if( fabs(z.y)<fabs(z.x) )
6515 {
6516 e = z.y/z.x;
6517 f = z.x+z.y*e;
6518 result.x = (x+y*e)/f;
6519 result.y = (y-x*e)/f;
6520 }
6521 else
6522 {
6523 e = z.x/z.y;
6524 f = z.y+z.x*e;
6525 result.x = (y+x*e)/f;
6526 result.y = (-x+y*e)/f;
6527 }
6528 *this = result;
6529 return *this;
6530 }
6531
c_ptr()6532 alglib_impl::ae_complex* alglib::complex::c_ptr()
6533 {
6534 return (alglib_impl::ae_complex*)this;
6535 }
6536
c_ptr() const6537 const alglib_impl::ae_complex* alglib::complex::c_ptr() const
6538 {
6539 return (const alglib_impl::ae_complex*)this;
6540 }
6541
6542 #if !defined(AE_NO_EXCEPTIONS)
tostring(int _dps) const6543 std::string alglib::complex::tostring(int _dps) const
6544 {
6545 char mask[32];
6546 char buf_x[32];
6547 char buf_y[32];
6548 char buf_zero[32];
6549 int dps = _dps>=0 ? _dps : -_dps;
6550 if( dps<=0 || dps>=20 )
6551 _ALGLIB_CPP_EXCEPTION("complex::tostring(): incorrect dps");
6552
6553 // handle IEEE special quantities
6554 if( fp_isnan(x) || fp_isnan(y) )
6555 return "NAN";
6556 if( fp_isinf(x) || fp_isinf(y) )
6557 return "INF";
6558
6559 // generate mask
6560 if( sprintf(mask, "%%.%d%s", dps, _dps>=0 ? "f" : "e")>=(int)sizeof(mask) )
6561 _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6562
6563 // print |x|, |y| and zero with same mask and compare
6564 if( sprintf(buf_x, mask, (double)(fabs(x)))>=(int)sizeof(buf_x) )
6565 _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6566 if( sprintf(buf_y, mask, (double)(fabs(y)))>=(int)sizeof(buf_y) )
6567 _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6568 if( sprintf(buf_zero, mask, (double)0)>=(int)sizeof(buf_zero) )
6569 _ALGLIB_CPP_EXCEPTION("complex::tostring(): buffer overflow");
6570
6571 // different zero/nonzero patterns
6572 if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)!=0 )
6573 return std::string(x>0 ? "" : "-")+buf_x+(y>0 ? "+" : "-")+buf_y+"i";
6574 if( strcmp(buf_x,buf_zero)!=0 && strcmp(buf_y,buf_zero)==0 )
6575 return std::string(x>0 ? "" : "-")+buf_x;
6576 if( strcmp(buf_x,buf_zero)==0 && strcmp(buf_y,buf_zero)!=0 )
6577 return std::string(y>0 ? "" : "-")+buf_y+"i";
6578 return std::string("0");
6579 }
6580 #endif
6581
operator ==(const alglib::complex & lhs,const alglib::complex & rhs)6582 bool alglib::operator==(const alglib::complex& lhs, const alglib::complex& rhs)
6583 {
6584 volatile double x1 = lhs.x;
6585 volatile double x2 = rhs.x;
6586 volatile double y1 = lhs.y;
6587 volatile double y2 = rhs.y;
6588 return x1==x2 && y1==y2;
6589 }
6590
operator !=(const alglib::complex & lhs,const alglib::complex & rhs)6591 bool alglib::operator!=(const alglib::complex& lhs, const alglib::complex& rhs)
6592 { return !(lhs==rhs); }
6593
operator +(const alglib::complex & lhs)6594 const alglib::complex alglib::operator+(const alglib::complex& lhs)
6595 { return lhs; }
6596
operator -(const alglib::complex & lhs)6597 const alglib::complex alglib::operator-(const alglib::complex& lhs)
6598 { return alglib::complex(-lhs.x, -lhs.y); }
6599
operator +(const alglib::complex & lhs,const alglib::complex & rhs)6600 const alglib::complex alglib::operator+(const alglib::complex& lhs, const alglib::complex& rhs)
6601 { alglib::complex r = lhs; r += rhs; return r; }
6602
operator +(const alglib::complex & lhs,const double & rhs)6603 const alglib::complex alglib::operator+(const alglib::complex& lhs, const double& rhs)
6604 { alglib::complex r = lhs; r += rhs; return r; }
6605
operator +(const double & lhs,const alglib::complex & rhs)6606 const alglib::complex alglib::operator+(const double& lhs, const alglib::complex& rhs)
6607 { alglib::complex r = rhs; r += lhs; return r; }
6608
operator -(const alglib::complex & lhs,const alglib::complex & rhs)6609 const alglib::complex alglib::operator-(const alglib::complex& lhs, const alglib::complex& rhs)
6610 { alglib::complex r = lhs; r -= rhs; return r; }
6611
operator -(const alglib::complex & lhs,const double & rhs)6612 const alglib::complex alglib::operator-(const alglib::complex& lhs, const double& rhs)
6613 { alglib::complex r = lhs; r -= rhs; return r; }
6614
operator -(const double & lhs,const alglib::complex & rhs)6615 const alglib::complex alglib::operator-(const double& lhs, const alglib::complex& rhs)
6616 { alglib::complex r = lhs; r -= rhs; return r; }
6617
operator *(const alglib::complex & lhs,const alglib::complex & rhs)6618 const alglib::complex alglib::operator*(const alglib::complex& lhs, const alglib::complex& rhs)
6619 { return alglib::complex(lhs.x*rhs.x - lhs.y*rhs.y, lhs.x*rhs.y + lhs.y*rhs.x); }
6620
operator *(const alglib::complex & lhs,const double & rhs)6621 const alglib::complex alglib::operator*(const alglib::complex& lhs, const double& rhs)
6622 { return alglib::complex(lhs.x*rhs, lhs.y*rhs); }
6623
operator *(const double & lhs,const alglib::complex & rhs)6624 const alglib::complex alglib::operator*(const double& lhs, const alglib::complex& rhs)
6625 { return alglib::complex(lhs*rhs.x, lhs*rhs.y); }
6626
operator /(const alglib::complex & lhs,const alglib::complex & rhs)6627 const alglib::complex alglib::operator/(const alglib::complex& lhs, const alglib::complex& rhs)
6628 {
6629 alglib::complex result;
6630 double e;
6631 double f;
6632 if( fabs(rhs.y)<fabs(rhs.x) )
6633 {
6634 e = rhs.y/rhs.x;
6635 f = rhs.x+rhs.y*e;
6636 result.x = (lhs.x+lhs.y*e)/f;
6637 result.y = (lhs.y-lhs.x*e)/f;
6638 }
6639 else
6640 {
6641 e = rhs.x/rhs.y;
6642 f = rhs.y+rhs.x*e;
6643 result.x = (lhs.y+lhs.x*e)/f;
6644 result.y = (-lhs.x+lhs.y*e)/f;
6645 }
6646 return result;
6647 }
6648
operator /(const double & lhs,const alglib::complex & rhs)6649 const alglib::complex alglib::operator/(const double& lhs, const alglib::complex& rhs)
6650 {
6651 alglib::complex result;
6652 double e;
6653 double f;
6654 if( fabs(rhs.y)<fabs(rhs.x) )
6655 {
6656 e = rhs.y/rhs.x;
6657 f = rhs.x+rhs.y*e;
6658 result.x = lhs/f;
6659 result.y = -lhs*e/f;
6660 }
6661 else
6662 {
6663 e = rhs.x/rhs.y;
6664 f = rhs.y+rhs.x*e;
6665 result.x = lhs*e/f;
6666 result.y = -lhs/f;
6667 }
6668 return result;
6669 }
6670
operator /(const alglib::complex & lhs,const double & rhs)6671 const alglib::complex alglib::operator/(const alglib::complex& lhs, const double& rhs)
6672 { return alglib::complex(lhs.x/rhs, lhs.y/rhs); }
6673
abscomplex(const alglib::complex & z)6674 double alglib::abscomplex(const alglib::complex &z)
6675 {
6676 double w;
6677 double xabs;
6678 double yabs;
6679 double v;
6680
6681 xabs = fabs(z.x);
6682 yabs = fabs(z.y);
6683 w = xabs>yabs ? xabs : yabs;
6684 v = xabs<yabs ? xabs : yabs;
6685 if( v==0 )
6686 return w;
6687 else
6688 {
6689 double t = v/w;
6690 return w*sqrt(1+t*t);
6691 }
6692 }
6693
conj(const alglib::complex & z)6694 alglib::complex alglib::conj(const alglib::complex &z)
6695 { return alglib::complex(z.x, -z.y); }
6696
csqr(const alglib::complex & z)6697 alglib::complex alglib::csqr(const alglib::complex &z)
6698 { return alglib::complex(z.x*z.x-z.y*z.y, 2*z.x*z.y); }
6699
setnworkers(alglib::ae_int_t nworkers)6700 void alglib::setnworkers(alglib::ae_int_t nworkers)
6701 {
6702 #ifdef AE_HPC
6703 alglib_impl::ae_set_cores_to_use(nworkers);
6704 #endif
6705 }
6706
setglobalthreading(const alglib::xparams settings)6707 void alglib::setglobalthreading(const alglib::xparams settings)
6708 {
6709 #ifdef AE_HPC
6710 alglib_impl::ae_set_global_threading(settings.flags);
6711 #endif
6712 }
6713
getnworkers()6714 alglib::ae_int_t alglib::getnworkers()
6715 {
6716 #ifdef AE_HPC
6717 return alglib_impl::ae_get_cores_to_use();
6718 #else
6719 return 1;
6720 #endif
6721 }
6722
_ae_cores_count()6723 alglib::ae_int_t alglib::_ae_cores_count()
6724 {
6725 #ifdef AE_HPC
6726 return alglib_impl::ae_cores_count();
6727 #else
6728 return 1;
6729 #endif
6730 }
6731
_ae_set_global_threading(alglib_impl::ae_uint64_t flg_value)6732 void alglib::_ae_set_global_threading(alglib_impl::ae_uint64_t flg_value)
6733 {
6734 #ifdef AE_HPC
6735 alglib_impl::ae_set_global_threading(flg_value);
6736 #endif
6737 }
6738
_ae_get_global_threading()6739 alglib_impl::ae_uint64_t alglib::_ae_get_global_threading()
6740 {
6741 #ifdef AE_HPC
6742 return alglib_impl::ae_get_global_threading();
6743 #else
6744 return _ALGLIB_FLG_THREADING_SERIAL;
6745 #endif
6746 }
6747
6748
6749 /********************************************************************
6750 Level 1 BLAS functions
6751 ********************************************************************/
vdotproduct(const double * v0,ae_int_t stride0,const double * v1,ae_int_t stride1,ae_int_t n)6752 double alglib::vdotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
6753 {
6754 double result = 0;
6755 ae_int_t i;
6756 if( stride0!=1 || stride1!=1 )
6757 {
6758 //
6759 // slow general code
6760 //
6761 for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6762 result += (*v0)*(*v1);
6763 }
6764 else
6765 {
6766 //
6767 // optimized code for stride=1
6768 //
6769 ae_int_t n4 = n/4;
6770 ae_int_t nleft = n%4;
6771 for(i=0; i<n4; i++, v0+=4, v1+=4)
6772 result += v0[0]*v1[0]+v0[1]*v1[1]+v0[2]*v1[2]+v0[3]*v1[3];
6773 for(i=0; i<nleft; i++, v0++, v1++)
6774 result += v0[0]*v1[0];
6775 }
6776 return result;
6777 }
6778
vdotproduct(const double * v1,const double * v2,ae_int_t N)6779 double alglib::vdotproduct(const double *v1, const double *v2, ae_int_t N)
6780 {
6781 return vdotproduct(v1, 1, v2, 1, N);
6782 }
6783
vdotproduct(const alglib::complex * v0,ae_int_t stride0,const char * conj0,const alglib::complex * v1,ae_int_t stride1,const char * conj1,ae_int_t n)6784 alglib::complex alglib::vdotproduct(const alglib::complex *v0, ae_int_t stride0, const char *conj0, const alglib::complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
6785 {
6786 double rx = 0, ry = 0;
6787 ae_int_t i;
6788 bool bconj0 = !((conj0[0]=='N') || (conj0[0]=='n'));
6789 bool bconj1 = !((conj1[0]=='N') || (conj1[0]=='n'));
6790 if( bconj0 && bconj1 )
6791 {
6792 double v0x, v0y, v1x, v1y;
6793 for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6794 {
6795 v0x = v0->x;
6796 v0y = -v0->y;
6797 v1x = v1->x;
6798 v1y = -v1->y;
6799 rx += v0x*v1x-v0y*v1y;
6800 ry += v0x*v1y+v0y*v1x;
6801 }
6802 }
6803 if( !bconj0 && bconj1 )
6804 {
6805 double v0x, v0y, v1x, v1y;
6806 for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6807 {
6808 v0x = v0->x;
6809 v0y = v0->y;
6810 v1x = v1->x;
6811 v1y = -v1->y;
6812 rx += v0x*v1x-v0y*v1y;
6813 ry += v0x*v1y+v0y*v1x;
6814 }
6815 }
6816 if( bconj0 && !bconj1 )
6817 {
6818 double v0x, v0y, v1x, v1y;
6819 for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6820 {
6821 v0x = v0->x;
6822 v0y = -v0->y;
6823 v1x = v1->x;
6824 v1y = v1->y;
6825 rx += v0x*v1x-v0y*v1y;
6826 ry += v0x*v1y+v0y*v1x;
6827 }
6828 }
6829 if( !bconj0 && !bconj1 )
6830 {
6831 double v0x, v0y, v1x, v1y;
6832 for(i=0; i<n; i++, v0+=stride0, v1+=stride1)
6833 {
6834 v0x = v0->x;
6835 v0y = v0->y;
6836 v1x = v1->x;
6837 v1y = v1->y;
6838 rx += v0x*v1x-v0y*v1y;
6839 ry += v0x*v1y+v0y*v1x;
6840 }
6841 }
6842 return alglib::complex(rx,ry);
6843 }
6844
vdotproduct(const alglib::complex * v1,const alglib::complex * v2,ae_int_t N)6845 alglib::complex alglib::vdotproduct(const alglib::complex *v1, const alglib::complex *v2, ae_int_t N)
6846 {
6847 return vdotproduct(v1, 1, "N", v2, 1, "N", N);
6848 }
6849
vmove(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6850 void alglib::vmove(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
6851 {
6852 ae_int_t i;
6853 if( stride_dst!=1 || stride_src!=1 )
6854 {
6855 //
6856 // general unoptimized case
6857 //
6858 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6859 *vdst = *vsrc;
6860 }
6861 else
6862 {
6863 //
6864 // optimized case
6865 //
6866 ae_int_t n2 = n/2;
6867 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6868 {
6869 vdst[0] = vsrc[0];
6870 vdst[1] = vsrc[1];
6871 }
6872 if( n%2!=0 )
6873 vdst[0] = vsrc[0];
6874 }
6875 }
6876
vmove(double * vdst,const double * vsrc,ae_int_t N)6877 void alglib::vmove(double *vdst, const double* vsrc, ae_int_t N)
6878 {
6879 vmove(vdst, 1, vsrc, 1, N);
6880 }
6881
vmove(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)6882 void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
6883 {
6884 bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
6885 ae_int_t i;
6886 if( stride_dst!=1 || stride_src!=1 )
6887 {
6888 //
6889 // general unoptimized case
6890 //
6891 if( bconj )
6892 {
6893 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6894 {
6895 vdst->x = vsrc->x;
6896 vdst->y = -vsrc->y;
6897 }
6898 }
6899 else
6900 {
6901 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6902 *vdst = *vsrc;
6903 }
6904 }
6905 else
6906 {
6907 //
6908 // optimized case
6909 //
6910 if( bconj )
6911 {
6912 for(i=0; i<n; i++, vdst++, vsrc++)
6913 {
6914 vdst->x = vsrc->x;
6915 vdst->y = -vsrc->y;
6916 }
6917 }
6918 else
6919 {
6920 for(i=0; i<n; i++, vdst++, vsrc++)
6921 *vdst = *vsrc;
6922 }
6923 }
6924 }
6925
vmove(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N)6926 void alglib::vmove(alglib::complex *vdst, const alglib::complex* vsrc, ae_int_t N)
6927 {
6928 vmove(vdst, 1, vsrc, 1, "N", N);
6929 }
6930
vmoveneg(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)6931 void alglib::vmoveneg(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n)
6932 {
6933 ae_int_t i;
6934 if( stride_dst!=1 || stride_src!=1 )
6935 {
6936 //
6937 // general unoptimized case
6938 //
6939 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6940 *vdst = -*vsrc;
6941 }
6942 else
6943 {
6944 //
6945 // optimized case
6946 //
6947 ae_int_t n2 = n/2;
6948 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
6949 {
6950 vdst[0] = -vsrc[0];
6951 vdst[1] = -vsrc[1];
6952 }
6953 if( n%2!=0 )
6954 vdst[0] = -vsrc[0];
6955 }
6956 }
6957
vmoveneg(double * vdst,const double * vsrc,ae_int_t N)6958 void alglib::vmoveneg(double *vdst, const double *vsrc, ae_int_t N)
6959 {
6960 vmoveneg(vdst, 1, vsrc, 1, N);
6961 }
6962
vmoveneg(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)6963 void alglib::vmoveneg(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
6964 {
6965 bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
6966 ae_int_t i;
6967 if( stride_dst!=1 || stride_src!=1 )
6968 {
6969 //
6970 // general unoptimized case
6971 //
6972 if( bconj )
6973 {
6974 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6975 {
6976 vdst->x = -vsrc->x;
6977 vdst->y = vsrc->y;
6978 }
6979 }
6980 else
6981 {
6982 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
6983 {
6984 vdst->x = -vsrc->x;
6985 vdst->y = -vsrc->y;
6986 }
6987 }
6988 }
6989 else
6990 {
6991 //
6992 // optimized case
6993 //
6994 if( bconj )
6995 {
6996 for(i=0; i<n; i++, vdst++, vsrc++)
6997 {
6998 vdst->x = -vsrc->x;
6999 vdst->y = vsrc->y;
7000 }
7001 }
7002 else
7003 {
7004 for(i=0; i<n; i++, vdst++, vsrc++)
7005 {
7006 vdst->x = -vsrc->x;
7007 vdst->y = -vsrc->y;
7008 }
7009 }
7010 }
7011 }
7012
vmoveneg(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N)7013 void alglib::vmoveneg(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
7014 {
7015 vmoveneg(vdst, 1, vsrc, 1, "N", N);
7016 }
7017
vmove(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)7018 void alglib::vmove(double *vdst, ae_int_t stride_dst, const double* vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
7019 {
7020 ae_int_t i;
7021 if( stride_dst!=1 || stride_src!=1 )
7022 {
7023 //
7024 // general unoptimized case
7025 //
7026 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7027 *vdst = alpha*(*vsrc);
7028 }
7029 else
7030 {
7031 //
7032 // optimized case
7033 //
7034 ae_int_t n2 = n/2;
7035 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7036 {
7037 vdst[0] = alpha*vsrc[0];
7038 vdst[1] = alpha*vsrc[1];
7039 }
7040 if( n%2!=0 )
7041 vdst[0] = alpha*vsrc[0];
7042 }
7043 }
7044
vmove(double * vdst,const double * vsrc,ae_int_t N,double alpha)7045 void alglib::vmove(double *vdst, const double *vsrc, ae_int_t N, double alpha)
7046 {
7047 vmove(vdst, 1, vsrc, 1, N, alpha);
7048 }
7049
vmove(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)7050 void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
7051 {
7052 bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7053 ae_int_t i;
7054 if( stride_dst!=1 || stride_src!=1 )
7055 {
7056 //
7057 // general unoptimized case
7058 //
7059 if( bconj )
7060 {
7061 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7062 {
7063 vdst->x = alpha*vsrc->x;
7064 vdst->y = -alpha*vsrc->y;
7065 }
7066 }
7067 else
7068 {
7069 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7070 {
7071 vdst->x = alpha*vsrc->x;
7072 vdst->y = alpha*vsrc->y;
7073 }
7074 }
7075 }
7076 else
7077 {
7078 //
7079 // optimized case
7080 //
7081 if( bconj )
7082 {
7083 for(i=0; i<n; i++, vdst++, vsrc++)
7084 {
7085 vdst->x = alpha*vsrc->x;
7086 vdst->y = -alpha*vsrc->y;
7087 }
7088 }
7089 else
7090 {
7091 for(i=0; i<n; i++, vdst++, vsrc++)
7092 {
7093 vdst->x = alpha*vsrc->x;
7094 vdst->y = alpha*vsrc->y;
7095 }
7096 }
7097 }
7098 }
7099
vmove(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N,double alpha)7100 void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha)
7101 {
7102 vmove(vdst, 1, vsrc, 1, "N", N, alpha);
7103 }
7104
vmove(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,alglib::complex alpha)7105 void alglib::vmove(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex* vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
7106 {
7107 bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7108 ae_int_t i;
7109 if( stride_dst!=1 || stride_src!=1 )
7110 {
7111 //
7112 // general unoptimized case
7113 //
7114 if( bconj )
7115 {
7116 double ax = alpha.x, ay = alpha.y;
7117 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7118 {
7119 vdst->x = ax*vsrc->x+ay*vsrc->y;
7120 vdst->y = -ax*vsrc->y+ay*vsrc->x;
7121 }
7122 }
7123 else
7124 {
7125 double ax = alpha.x, ay = alpha.y;
7126 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7127 {
7128 vdst->x = ax*vsrc->x-ay*vsrc->y;
7129 vdst->y = ax*vsrc->y+ay*vsrc->x;
7130 }
7131 }
7132 }
7133 else
7134 {
7135 //
7136 // optimized case
7137 //
7138 if( bconj )
7139 {
7140 double ax = alpha.x, ay = alpha.y;
7141 for(i=0; i<n; i++, vdst++, vsrc++)
7142 {
7143 vdst->x = ax*vsrc->x+ay*vsrc->y;
7144 vdst->y = -ax*vsrc->y+ay*vsrc->x;
7145 }
7146 }
7147 else
7148 {
7149 double ax = alpha.x, ay = alpha.y;
7150 for(i=0; i<n; i++, vdst++, vsrc++)
7151 {
7152 vdst->x = ax*vsrc->x-ay*vsrc->y;
7153 vdst->y = ax*vsrc->y+ay*vsrc->x;
7154 }
7155 }
7156 }
7157 }
7158
vmove(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N,alglib::complex alpha)7159 void alglib::vmove(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha)
7160 {
7161 vmove(vdst, 1, vsrc, 1, "N", N, alpha);
7162 }
7163
vadd(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)7164 void alglib::vadd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
7165 {
7166 ae_int_t i;
7167 if( stride_dst!=1 || stride_src!=1 )
7168 {
7169 //
7170 // general unoptimized case
7171 //
7172 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7173 *vdst += *vsrc;
7174 }
7175 else
7176 {
7177 //
7178 // optimized case
7179 //
7180 ae_int_t n2 = n/2;
7181 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7182 {
7183 vdst[0] += vsrc[0];
7184 vdst[1] += vsrc[1];
7185 }
7186 if( n%2!=0 )
7187 vdst[0] += vsrc[0];
7188 }
7189 }
7190
vadd(double * vdst,const double * vsrc,ae_int_t N)7191 void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N)
7192 {
7193 vadd(vdst, 1, vsrc, 1, N);
7194 }
7195
vadd(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)7196 void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
7197 {
7198 bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7199 ae_int_t i;
7200 if( stride_dst!=1 || stride_src!=1 )
7201 {
7202 //
7203 // general unoptimized case
7204 //
7205 if( bconj )
7206 {
7207 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7208 {
7209 vdst->x += vsrc->x;
7210 vdst->y -= vsrc->y;
7211 }
7212 }
7213 else
7214 {
7215 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7216 {
7217 vdst->x += vsrc->x;
7218 vdst->y += vsrc->y;
7219 }
7220 }
7221 }
7222 else
7223 {
7224 //
7225 // optimized case
7226 //
7227 if( bconj )
7228 {
7229 for(i=0; i<n; i++, vdst++, vsrc++)
7230 {
7231 vdst->x += vsrc->x;
7232 vdst->y -= vsrc->y;
7233 }
7234 }
7235 else
7236 {
7237 for(i=0; i<n; i++, vdst++, vsrc++)
7238 {
7239 vdst->x += vsrc->x;
7240 vdst->y += vsrc->y;
7241 }
7242 }
7243 }
7244 }
7245
vadd(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N)7246 void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
7247 {
7248 vadd(vdst, 1, vsrc, 1, "N", N);
7249 }
7250
vadd(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)7251 void alglib::vadd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
7252 {
7253 ae_int_t i;
7254 if( stride_dst!=1 || stride_src!=1 )
7255 {
7256 //
7257 // general unoptimized case
7258 //
7259 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7260 *vdst += alpha*(*vsrc);
7261 }
7262 else
7263 {
7264 //
7265 // optimized case
7266 //
7267 ae_int_t n2 = n/2;
7268 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7269 {
7270 vdst[0] += alpha*vsrc[0];
7271 vdst[1] += alpha*vsrc[1];
7272 }
7273 if( n%2!=0 )
7274 vdst[0] += alpha*vsrc[0];
7275 }
7276 }
7277
vadd(double * vdst,const double * vsrc,ae_int_t N,double alpha)7278 void alglib::vadd(double *vdst, const double *vsrc, ae_int_t N, double alpha)
7279 {
7280 vadd(vdst, 1, vsrc, 1, N, alpha);
7281 }
7282
vadd(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)7283 void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
7284 {
7285 bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7286 ae_int_t i;
7287 if( stride_dst!=1 || stride_src!=1 )
7288 {
7289 //
7290 // general unoptimized case
7291 //
7292 if( bconj )
7293 {
7294 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7295 {
7296 vdst->x += alpha*vsrc->x;
7297 vdst->y -= alpha*vsrc->y;
7298 }
7299 }
7300 else
7301 {
7302 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7303 {
7304 vdst->x += alpha*vsrc->x;
7305 vdst->y += alpha*vsrc->y;
7306 }
7307 }
7308 }
7309 else
7310 {
7311 //
7312 // optimized case
7313 //
7314 if( bconj )
7315 {
7316 for(i=0; i<n; i++, vdst++, vsrc++)
7317 {
7318 vdst->x += alpha*vsrc->x;
7319 vdst->y -= alpha*vsrc->y;
7320 }
7321 }
7322 else
7323 {
7324 for(i=0; i<n; i++, vdst++, vsrc++)
7325 {
7326 vdst->x += alpha*vsrc->x;
7327 vdst->y += alpha*vsrc->y;
7328 }
7329 }
7330 }
7331 }
7332
vadd(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N,double alpha)7333 void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, double alpha)
7334 {
7335 vadd(vdst, 1, vsrc, 1, "N", N, alpha);
7336 }
7337
vadd(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,alglib::complex alpha)7338 void alglib::vadd(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
7339 {
7340 bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7341 ae_int_t i;
7342 if( stride_dst!=1 || stride_src!=1 )
7343 {
7344 //
7345 // general unoptimized case
7346 //
7347 double ax = alpha.x, ay = alpha.y;
7348 if( bconj )
7349 {
7350 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7351 {
7352 vdst->x += ax*vsrc->x+ay*vsrc->y;
7353 vdst->y -= ax*vsrc->y-ay*vsrc->x;
7354 }
7355 }
7356 else
7357 {
7358 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7359 {
7360 vdst->x += ax*vsrc->x-ay*vsrc->y;
7361 vdst->y += ax*vsrc->y+ay*vsrc->x;
7362 }
7363 }
7364 }
7365 else
7366 {
7367 //
7368 // optimized case
7369 //
7370 double ax = alpha.x, ay = alpha.y;
7371 if( bconj )
7372 {
7373 for(i=0; i<n; i++, vdst++, vsrc++)
7374 {
7375 vdst->x += ax*vsrc->x+ay*vsrc->y;
7376 vdst->y -= ax*vsrc->y-ay*vsrc->x;
7377 }
7378 }
7379 else
7380 {
7381 for(i=0; i<n; i++, vdst++, vsrc++)
7382 {
7383 vdst->x += ax*vsrc->x-ay*vsrc->y;
7384 vdst->y += ax*vsrc->y+ay*vsrc->x;
7385 }
7386 }
7387 }
7388 }
7389
vadd(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N,alglib::complex alpha)7390 void alglib::vadd(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N, alglib::complex alpha)
7391 {
7392 vadd(vdst, 1, vsrc, 1, "N", N, alpha);
7393 }
7394
vsub(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n)7395 void alglib::vsub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
7396 {
7397 ae_int_t i;
7398 if( stride_dst!=1 || stride_src!=1 )
7399 {
7400 //
7401 // general unoptimized case
7402 //
7403 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7404 *vdst -= *vsrc;
7405 }
7406 else
7407 {
7408 //
7409 // optimized case
7410 //
7411 ae_int_t n2 = n/2;
7412 for(i=0; i<n2; i++, vdst+=2, vsrc+=2)
7413 {
7414 vdst[0] -= vsrc[0];
7415 vdst[1] -= vsrc[1];
7416 }
7417 if( n%2!=0 )
7418 vdst[0] -= vsrc[0];
7419 }
7420 }
7421
vsub(double * vdst,const double * vsrc,ae_int_t N)7422 void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N)
7423 {
7424 vsub(vdst, 1, vsrc, 1, N);
7425 }
7426
vsub(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n)7427 void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
7428 {
7429 bool bconj = !((conj_src[0]=='N') || (conj_src[0]=='n'));
7430 ae_int_t i;
7431 if( stride_dst!=1 || stride_src!=1 )
7432 {
7433 //
7434 // general unoptimized case
7435 //
7436 if( bconj )
7437 {
7438 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7439 {
7440 vdst->x -= vsrc->x;
7441 vdst->y += vsrc->y;
7442 }
7443 }
7444 else
7445 {
7446 for(i=0; i<n; i++, vdst+=stride_dst, vsrc+=stride_src)
7447 {
7448 vdst->x -= vsrc->x;
7449 vdst->y -= vsrc->y;
7450 }
7451 }
7452 }
7453 else
7454 {
7455 //
7456 // optimized case
7457 //
7458 if( bconj )
7459 {
7460 for(i=0; i<n; i++, vdst++, vsrc++)
7461 {
7462 vdst->x -= vsrc->x;
7463 vdst->y += vsrc->y;
7464 }
7465 }
7466 else
7467 {
7468 for(i=0; i<n; i++, vdst++, vsrc++)
7469 {
7470 vdst->x -= vsrc->x;
7471 vdst->y -= vsrc->y;
7472 }
7473 }
7474 }
7475 }
7476
vsub(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t N)7477 void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t N)
7478 {
7479 vsub(vdst, 1, vsrc, 1, "N", N);
7480 }
7481
vsub(double * vdst,ae_int_t stride_dst,const double * vsrc,ae_int_t stride_src,ae_int_t n,double alpha)7482 void alglib::vsub(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
7483 {
7484 vadd(vdst, stride_dst, vsrc, stride_src, n, -alpha);
7485 }
7486
vsub(double * vdst,const double * vsrc,ae_int_t N,double alpha)7487 void alglib::vsub(double *vdst, const double *vsrc, ae_int_t N, double alpha)
7488 {
7489 vadd(vdst, 1, vsrc, 1, N, -alpha);
7490 }
7491
vsub(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,double alpha)7492 void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
7493 {
7494 vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
7495 }
7496
vsub(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t n,double alpha)7497 void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, double alpha)
7498 {
7499 vadd(vdst, 1, vsrc, 1, "N", n, -alpha);
7500 }
7501
vsub(alglib::complex * vdst,ae_int_t stride_dst,const alglib::complex * vsrc,ae_int_t stride_src,const char * conj_src,ae_int_t n,alglib::complex alpha)7502 void alglib::vsub(alglib::complex *vdst, ae_int_t stride_dst, const alglib::complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, alglib::complex alpha)
7503 {
7504 vadd(vdst, stride_dst, vsrc, stride_src, conj_src, n, -alpha);
7505 }
7506
vsub(alglib::complex * vdst,const alglib::complex * vsrc,ae_int_t n,alglib::complex alpha)7507 void alglib::vsub(alglib::complex *vdst, const alglib::complex *vsrc, ae_int_t n, alglib::complex alpha)
7508 {
7509 vadd(vdst, 1, vsrc, 1, "N", n, -alpha);
7510 }
vmul(double * vdst,ae_int_t stride_dst,ae_int_t n,double alpha)7511 void alglib::vmul(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
7512 {
7513 ae_int_t i;
7514 if( stride_dst!=1 )
7515 {
7516 //
7517 // general unoptimized case
7518 //
7519 for(i=0; i<n; i++, vdst+=stride_dst)
7520 *vdst *= alpha;
7521 }
7522 else
7523 {
7524 //
7525 // optimized case
7526 //
7527 for(i=0; i<n; i++, vdst++)
7528 *vdst *= alpha;
7529 }
7530 }
7531
vmul(double * vdst,ae_int_t N,double alpha)7532 void alglib::vmul(double *vdst, ae_int_t N, double alpha)
7533 {
7534 vmul(vdst, 1, N, alpha);
7535 }
7536
vmul(alglib::complex * vdst,ae_int_t stride_dst,ae_int_t n,double alpha)7537 void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
7538 {
7539 ae_int_t i;
7540 if( stride_dst!=1 )
7541 {
7542 //
7543 // general unoptimized case
7544 //
7545 for(i=0; i<n; i++, vdst+=stride_dst)
7546 {
7547 vdst->x *= alpha;
7548 vdst->y *= alpha;
7549 }
7550 }
7551 else
7552 {
7553 //
7554 // optimized case
7555 //
7556 for(i=0; i<n; i++, vdst++)
7557 {
7558 vdst->x *= alpha;
7559 vdst->y *= alpha;
7560 }
7561 }
7562 }
7563
vmul(alglib::complex * vdst,ae_int_t N,double alpha)7564 void alglib::vmul(alglib::complex *vdst, ae_int_t N, double alpha)
7565 {
7566 vmul(vdst, 1, N, alpha);
7567 }
7568
vmul(alglib::complex * vdst,ae_int_t stride_dst,ae_int_t n,alglib::complex alpha)7569 void alglib::vmul(alglib::complex *vdst, ae_int_t stride_dst, ae_int_t n, alglib::complex alpha)
7570 {
7571 ae_int_t i;
7572 if( stride_dst!=1 )
7573 {
7574 //
7575 // general unoptimized case
7576 //
7577 double ax = alpha.x, ay = alpha.y;
7578 for(i=0; i<n; i++, vdst+=stride_dst)
7579 {
7580 double dstx = vdst->x, dsty = vdst->y;
7581 vdst->x = ax*dstx-ay*dsty;
7582 vdst->y = ax*dsty+ay*dstx;
7583 }
7584 }
7585 else
7586 {
7587 //
7588 // optimized case
7589 //
7590 double ax = alpha.x, ay = alpha.y;
7591 for(i=0; i<n; i++, vdst++)
7592 {
7593 double dstx = vdst->x, dsty = vdst->y;
7594 vdst->x = ax*dstx-ay*dsty;
7595 vdst->y = ax*dsty+ay*dstx;
7596 }
7597 }
7598 }
7599
vmul(alglib::complex * vdst,ae_int_t N,alglib::complex alpha)7600 void alglib::vmul(alglib::complex *vdst, ae_int_t N, alglib::complex alpha)
7601 {
7602 vmul(vdst, 1, N, alpha);
7603 }
7604
vlen(ae_int_t n1,ae_int_t n2)7605 alglib::ae_int_t alglib::vlen(ae_int_t n1, ae_int_t n2)
7606 {
7607 return n2-n1+1;
7608 }
7609
7610
7611 /********************************************************************
7612 Matrices and vectors
7613 ********************************************************************/
ae_vector_wrapper(alglib_impl::ae_vector * e_ptr,alglib_impl::ae_datatype datatype)7614 alglib::ae_vector_wrapper::ae_vector_wrapper(alglib_impl::ae_vector *e_ptr, alglib_impl::ae_datatype datatype)
7615 {
7616 if( e_ptr==NULL || e_ptr->datatype!=datatype )
7617 {
7618 const char *msg = "ALGLIB: ae_vector_wrapper datatype check failed";
7619 #if !defined(AE_NO_EXCEPTIONS)
7620 _ALGLIB_CPP_EXCEPTION(msg);
7621 #else
7622 ptr = NULL;
7623 is_frozen_proxy = false;
7624 _ALGLIB_SET_ERROR_FLAG(msg);
7625 return;
7626 #endif
7627 }
7628 ptr = e_ptr;
7629 is_frozen_proxy = true;
7630 }
7631
ae_vector_wrapper(alglib_impl::ae_datatype datatype)7632 alglib::ae_vector_wrapper::ae_vector_wrapper(alglib_impl::ae_datatype datatype)
7633 {
7634 jmp_buf _break_jump;
7635 alglib_impl::ae_state _state;
7636
7637 alglib_impl::ae_state_init(&_state);
7638 if( setjmp(_break_jump) )
7639 {
7640 #if !defined(AE_NO_EXCEPTIONS)
7641 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7642 #else
7643 ptr = NULL;
7644 is_frozen_proxy = false;
7645 _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7646 return;
7647 #endif
7648 }
7649 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7650 ptr = &inner_vec;
7651 is_frozen_proxy = false;
7652 memset(ptr, 0, sizeof(*ptr));
7653 ae_vector_init(ptr, 0, datatype, &_state, ae_false);
7654 ae_state_clear(&_state);
7655 }
7656
ae_vector_wrapper(const ae_vector_wrapper & rhs,alglib_impl::ae_datatype datatype)7657 alglib::ae_vector_wrapper::ae_vector_wrapper(const ae_vector_wrapper &rhs, alglib_impl::ae_datatype datatype)
7658 {
7659 jmp_buf _break_jump;
7660 alglib_impl::ae_state _state;
7661
7662 alglib_impl::ae_state_init(&_state);
7663 if( setjmp(_break_jump) )
7664 {
7665 #if !defined(AE_NO_EXCEPTIONS)
7666 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7667 #else
7668 ptr = NULL;
7669 is_frozen_proxy = false;
7670 _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7671 return;
7672 #endif
7673 }
7674 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7675 alglib_impl::ae_assert(rhs.ptr!=NULL, "ALGLIB: ae_vector_wrapper source is not initialized", &_state);
7676 alglib_impl::ae_assert(rhs.ptr->datatype==datatype, "ALGLIB: ae_vector_wrapper datatype check failed", &_state);
7677 ptr = &inner_vec;
7678 is_frozen_proxy = false;
7679 memset(ptr, 0, sizeof(*ptr));
7680 ae_vector_init_copy(ptr, rhs.ptr, &_state, ae_false);
7681 ae_state_clear(&_state);
7682 }
7683
~ae_vector_wrapper()7684 alglib::ae_vector_wrapper::~ae_vector_wrapper()
7685 {
7686 if( ptr==&inner_vec )
7687 ae_vector_clear(ptr);
7688 }
7689
setlength(ae_int_t iLen)7690 void alglib::ae_vector_wrapper::setlength(ae_int_t iLen)
7691 {
7692 jmp_buf _break_jump;
7693 alglib_impl::ae_state _state;
7694 alglib_impl::ae_state_init(&_state);
7695 if( setjmp(_break_jump) )
7696 {
7697 #if !defined(AE_NO_EXCEPTIONS)
7698 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7699 #else
7700 _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7701 return;
7702 #endif
7703 }
7704 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7705 alglib_impl::ae_assert(ptr!=NULL, "ALGLIB: setlength() error, ptr==NULL (array was not correctly initialized)", &_state);
7706 alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: setlength() error, ptr is frozen proxy array", &_state);
7707 alglib_impl::ae_vector_set_length(ptr, iLen, &_state);
7708 alglib_impl::ae_state_clear(&_state);
7709 }
7710
length() const7711 alglib::ae_int_t alglib::ae_vector_wrapper::length() const
7712 {
7713 if( ptr==NULL )
7714 return 0;
7715 return ptr->cnt;
7716 }
7717
attach_to(alglib_impl::x_vector * new_ptr,alglib_impl::ae_state * _state)7718 void alglib::ae_vector_wrapper::attach_to(alglib_impl::x_vector *new_ptr, alglib_impl::ae_state *_state)
7719 {
7720 if( ptr==&inner_vec )
7721 ae_vector_clear(ptr);
7722 ptr = &inner_vec;
7723 memset(ptr, 0, sizeof(*ptr));
7724 ae_vector_init_attach_to_x(ptr, new_ptr, _state, ae_false);
7725 is_frozen_proxy = true;
7726 }
7727
assign(const alglib::ae_vector_wrapper & rhs)7728 const alglib::ae_vector_wrapper& alglib::ae_vector_wrapper::assign(const alglib::ae_vector_wrapper &rhs)
7729 {
7730 jmp_buf _break_jump;
7731 alglib_impl::ae_state _state;
7732 if( this==&rhs )
7733 return *this;
7734 alglib_impl::ae_state_init(&_state);
7735 if( setjmp(_break_jump) )
7736 {
7737 #if !defined(AE_NO_EXCEPTIONS)
7738 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7739 #else
7740 _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
7741 return *this;
7742 #endif
7743 }
7744 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7745 ae_assert(ptr!=NULL, "ALGLIB: incorrect assignment (uninitialized destination)", &_state);
7746 ae_assert(rhs.ptr!=NULL, "ALGLIB: incorrect assignment (uninitialized source)", &_state);
7747 ae_assert(rhs.ptr->datatype==ptr->datatype, "ALGLIB: incorrect assignment to array (types do not match)", &_state);
7748 if( is_frozen_proxy )
7749 ae_assert(rhs.ptr->cnt==ptr->cnt, "ALGLIB: incorrect assignment to proxy array (sizes do not match)", &_state);
7750 if( rhs.ptr->cnt!=ptr->cnt )
7751 ae_vector_set_length(ptr, rhs.ptr->cnt, &_state);
7752 memcpy(ptr->ptr.p_ptr, rhs.ptr->ptr.p_ptr, ptr->cnt*alglib_impl::ae_sizeof(ptr->datatype));
7753 alglib_impl::ae_state_clear(&_state);
7754 return *this;
7755 }
7756
c_ptr() const7757 const alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr() const
7758 {
7759 return ptr;
7760 }
7761
c_ptr()7762 alglib_impl::ae_vector* alglib::ae_vector_wrapper::c_ptr()
7763 {
7764 return ptr;
7765 }
7766
7767 #if !defined(AE_NO_EXCEPTIONS)
ae_vector_wrapper(const char * s,alglib_impl::ae_datatype datatype)7768 alglib::ae_vector_wrapper::ae_vector_wrapper(const char *s, alglib_impl::ae_datatype datatype)
7769 {
7770 std::vector<const char*> svec;
7771 size_t i;
7772 char *p = filter_spaces(s);
7773 if( p==NULL )
7774 _ALGLIB_CPP_EXCEPTION("ALGLIB: allocation error");
7775 try
7776 {
7777 str_vector_create(p, true, &svec);
7778 {
7779 jmp_buf _break_jump;
7780 alglib_impl::ae_state _state;
7781 alglib_impl::ae_state_init(&_state);
7782 if( setjmp(_break_jump) )
7783 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
7784 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
7785 ptr = &inner_vec;
7786 is_frozen_proxy = false;
7787 memset(ptr, 0, sizeof(*ptr));
7788 ae_vector_init(ptr, (ae_int_t)(svec.size()), datatype, &_state, ae_false);
7789 ae_state_clear(&_state);
7790 }
7791 for(i=0; i<svec.size(); i++)
7792 {
7793 if( datatype==alglib_impl::DT_BOOL )
7794 ptr->ptr.p_bool[i] = parse_bool_delim(svec[i],",]");
7795 if( datatype==alglib_impl::DT_INT )
7796 ptr->ptr.p_int[i] = parse_int_delim(svec[i],",]");
7797 if( datatype==alglib_impl::DT_REAL )
7798 ptr->ptr.p_double[i] = parse_real_delim(svec[i],",]");
7799 if( datatype==alglib_impl::DT_COMPLEX )
7800 {
7801 alglib::complex t = parse_complex_delim(svec[i],",]");
7802 ptr->ptr.p_complex[i].x = t.x;
7803 ptr->ptr.p_complex[i].y = t.y;
7804 }
7805 }
7806 alglib_impl::ae_free(p);
7807 }
7808 catch(...)
7809 {
7810 alglib_impl::ae_free(p);
7811 throw;
7812 }
7813 }
7814 #endif
7815
boolean_1d_array()7816 alglib::boolean_1d_array::boolean_1d_array():ae_vector_wrapper(alglib_impl::DT_BOOL)
7817 {
7818 }
7819
boolean_1d_array(const alglib::boolean_1d_array & rhs)7820 alglib::boolean_1d_array::boolean_1d_array(const alglib::boolean_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_BOOL)
7821 {
7822 }
7823
boolean_1d_array(alglib_impl::ae_vector * p)7824 alglib::boolean_1d_array::boolean_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_BOOL)
7825 {
7826 }
7827
operator =(const alglib::boolean_1d_array & rhs)7828 const alglib::boolean_1d_array& alglib::boolean_1d_array::operator=(const alglib::boolean_1d_array &rhs)
7829 {
7830 return static_cast<const alglib::boolean_1d_array&>(assign(rhs));
7831 }
7832
~boolean_1d_array()7833 alglib::boolean_1d_array::~boolean_1d_array()
7834 {
7835 }
7836
operator ()(ae_int_t i) const7837 const ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i) const
7838 {
7839 return ptr->ptr.p_bool[i];
7840 }
7841
operator ()(ae_int_t i)7842 ae_bool& alglib::boolean_1d_array::operator()(ae_int_t i)
7843 {
7844 return ptr->ptr.p_bool[i];
7845 }
7846
operator [](ae_int_t i) const7847 const ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i) const
7848 {
7849 return ptr->ptr.p_bool[i];
7850 }
7851
operator [](ae_int_t i)7852 ae_bool& alglib::boolean_1d_array::operator[](ae_int_t i)
7853 {
7854 return ptr->ptr.p_bool[i];
7855 }
7856
setcontent(ae_int_t iLen,const bool * pContent)7857 void alglib::boolean_1d_array::setcontent(ae_int_t iLen, const bool *pContent )
7858 {
7859 ae_int_t i;
7860
7861 // setlength, with exception-free error handling fallback code
7862 setlength(iLen);
7863 if( ptr==NULL || ptr->cnt!=iLen )
7864 return;
7865
7866 // copy
7867 for(i=0; i<iLen; i++)
7868 ptr->ptr.p_bool[i] = pContent[i];
7869 }
7870
getcontent()7871 ae_bool* alglib::boolean_1d_array::getcontent()
7872 {
7873 return ptr->ptr.p_bool;
7874 }
7875
getcontent() const7876 const ae_bool* alglib::boolean_1d_array::getcontent() const
7877 {
7878 return ptr->ptr.p_bool;
7879 }
7880
7881 #if !defined(AE_NO_EXCEPTIONS)
boolean_1d_array(const char * s)7882 alglib::boolean_1d_array::boolean_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_BOOL)
7883 {
7884 }
7885
tostring() const7886 std::string alglib::boolean_1d_array::tostring() const
7887 {
7888 if( length()==0 )
7889 return "[]";
7890 return arraytostring(&(operator()(0)), length());
7891 }
7892 #endif
7893
integer_1d_array()7894 alglib::integer_1d_array::integer_1d_array():ae_vector_wrapper(alglib_impl::DT_INT)
7895 {
7896 }
7897
integer_1d_array(alglib_impl::ae_vector * p)7898 alglib::integer_1d_array::integer_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_INT)
7899 {
7900 }
7901
integer_1d_array(const alglib::integer_1d_array & rhs)7902 alglib::integer_1d_array::integer_1d_array(const alglib::integer_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_INT)
7903 {
7904 }
7905
operator =(const alglib::integer_1d_array & rhs)7906 const alglib::integer_1d_array& alglib::integer_1d_array::operator=(const alglib::integer_1d_array &rhs)
7907 {
7908 return static_cast<const alglib::integer_1d_array&>(assign(rhs));
7909 }
7910
~integer_1d_array()7911 alglib::integer_1d_array::~integer_1d_array()
7912 {
7913 }
7914
operator ()(ae_int_t i) const7915 const alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i) const
7916 {
7917 return ptr->ptr.p_int[i];
7918 }
7919
operator ()(ae_int_t i)7920 alglib::ae_int_t& alglib::integer_1d_array::operator()(ae_int_t i)
7921 {
7922 return ptr->ptr.p_int[i];
7923 }
7924
operator [](ae_int_t i) const7925 const alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i) const
7926 {
7927 return ptr->ptr.p_int[i];
7928 }
7929
operator [](ae_int_t i)7930 alglib::ae_int_t& alglib::integer_1d_array::operator[](ae_int_t i)
7931 {
7932 return ptr->ptr.p_int[i];
7933 }
7934
setcontent(ae_int_t iLen,const ae_int_t * pContent)7935 void alglib::integer_1d_array::setcontent(ae_int_t iLen, const ae_int_t *pContent )
7936 {
7937 ae_int_t i;
7938
7939 // setlength(), handle possible exception-free errors
7940 setlength(iLen);
7941 if( ptr==NULL || ptr->cnt!=iLen )
7942 return;
7943
7944 // copy
7945 for(i=0; i<iLen; i++)
7946 ptr->ptr.p_int[i] = pContent[i];
7947 }
7948
getcontent()7949 alglib::ae_int_t* alglib::integer_1d_array::getcontent()
7950 {
7951 return ptr->ptr.p_int;
7952 }
7953
getcontent() const7954 const alglib::ae_int_t* alglib::integer_1d_array::getcontent() const
7955 {
7956 return ptr->ptr.p_int;
7957 }
7958
7959 #if !defined(AE_NO_EXCEPTIONS)
integer_1d_array(const char * s)7960 alglib::integer_1d_array::integer_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_INT)
7961 {
7962 }
7963
tostring() const7964 std::string alglib::integer_1d_array::tostring() const
7965 {
7966 if( length()==0 )
7967 return "[]";
7968 return arraytostring(&operator()(0), length());
7969 }
7970 #endif
7971
real_1d_array()7972 alglib::real_1d_array::real_1d_array():ae_vector_wrapper(alglib_impl::DT_REAL)
7973 {
7974 }
7975
real_1d_array(alglib_impl::ae_vector * p)7976 alglib::real_1d_array::real_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_REAL)
7977 {
7978 }
7979
real_1d_array(const alglib::real_1d_array & rhs)7980 alglib::real_1d_array::real_1d_array(const alglib::real_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_REAL)
7981 {
7982 }
7983
operator =(const alglib::real_1d_array & rhs)7984 const alglib::real_1d_array& alglib::real_1d_array::operator=(const alglib::real_1d_array &rhs)
7985 {
7986 return static_cast<const alglib::real_1d_array&>(assign(rhs));
7987 }
7988
~real_1d_array()7989 alglib::real_1d_array::~real_1d_array()
7990 {
7991 }
7992
operator ()(ae_int_t i) const7993 const double& alglib::real_1d_array::operator()(ae_int_t i) const
7994 {
7995 return ptr->ptr.p_double[i];
7996 }
7997
operator ()(ae_int_t i)7998 double& alglib::real_1d_array::operator()(ae_int_t i)
7999 {
8000 return ptr->ptr.p_double[i];
8001 }
8002
operator [](ae_int_t i) const8003 const double& alglib::real_1d_array::operator[](ae_int_t i) const
8004 {
8005 return ptr->ptr.p_double[i];
8006 }
8007
operator [](ae_int_t i)8008 double& alglib::real_1d_array::operator[](ae_int_t i)
8009 {
8010 return ptr->ptr.p_double[i];
8011 }
8012
setcontent(ae_int_t iLen,const double * pContent)8013 void alglib::real_1d_array::setcontent(ae_int_t iLen, const double *pContent )
8014 {
8015 ae_int_t i;
8016
8017 // setlength(), handle possible exception-free errors
8018 setlength(iLen);
8019 if( ptr==NULL || ptr->cnt!=iLen )
8020 return;
8021
8022 // copy
8023 for(i=0; i<iLen; i++)
8024 ptr->ptr.p_double[i] = pContent[i];
8025 }
8026
attach_to_ptr(ae_int_t iLen,double * pContent)8027 void alglib::real_1d_array::attach_to_ptr(ae_int_t iLen, double *pContent ) // TODO: convert to constructor!!!!!!!
8028 {
8029 alglib_impl::x_vector x;
8030 jmp_buf _break_jump;
8031 alglib_impl::ae_state _state;
8032
8033 alglib_impl::ae_state_init(&_state);
8034 if( setjmp(_break_jump) )
8035 {
8036 #if !defined(AE_NO_EXCEPTIONS)
8037 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8038 #else
8039 ptr = NULL;
8040 is_frozen_proxy = false;
8041 _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8042 return;
8043 #endif
8044 }
8045 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8046 alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: unable to attach proxy object to something else", &_state);
8047 alglib_impl::ae_assert(iLen>0, "ALGLIB: non-positive length for attach_to_ptr()", &_state);
8048 x.cnt = iLen;
8049 x.datatype = alglib_impl::DT_REAL;
8050 x.owner = alglib_impl::OWN_CALLER;
8051 x.last_action = alglib_impl::ACT_UNCHANGED;
8052 x.x_ptr.p_ptr = pContent;
8053 attach_to(&x, &_state);
8054 ae_state_clear(&_state);
8055 }
8056
getcontent()8057 double* alglib::real_1d_array::getcontent()
8058 {
8059 return ptr->ptr.p_double;
8060 }
8061
getcontent() const8062 const double* alglib::real_1d_array::getcontent() const
8063 {
8064 return ptr->ptr.p_double;
8065 }
8066
8067 #if !defined(AE_NO_EXCEPTIONS)
real_1d_array(const char * s)8068 alglib::real_1d_array::real_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_REAL)
8069 {
8070 }
8071
tostring(int dps) const8072 std::string alglib::real_1d_array::tostring(int dps) const
8073 {
8074 if( length()==0 )
8075 return "[]";
8076 return arraytostring(&operator()(0), length(), dps);
8077 }
8078 #endif
8079
complex_1d_array()8080 alglib::complex_1d_array::complex_1d_array():ae_vector_wrapper(alglib_impl::DT_COMPLEX)
8081 {
8082 }
8083
complex_1d_array(alglib_impl::ae_vector * p)8084 alglib::complex_1d_array::complex_1d_array(alglib_impl::ae_vector *p):ae_vector_wrapper(p,alglib_impl::DT_COMPLEX)
8085 {
8086 }
8087
complex_1d_array(const alglib::complex_1d_array & rhs)8088 alglib::complex_1d_array::complex_1d_array(const alglib::complex_1d_array &rhs):ae_vector_wrapper(rhs,alglib_impl::DT_COMPLEX)
8089 {
8090 }
8091
operator =(const alglib::complex_1d_array & rhs)8092 const alglib::complex_1d_array& alglib::complex_1d_array::operator=(const alglib::complex_1d_array &rhs)
8093 {
8094 return static_cast<const alglib::complex_1d_array&>(assign(rhs));
8095 }
8096
~complex_1d_array()8097 alglib::complex_1d_array::~complex_1d_array()
8098 {
8099 }
8100
operator ()(ae_int_t i) const8101 const alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i) const
8102 {
8103 return *((const alglib::complex*)(ptr->ptr.p_complex+i));
8104 }
8105
operator ()(ae_int_t i)8106 alglib::complex& alglib::complex_1d_array::operator()(ae_int_t i)
8107 {
8108 return *((alglib::complex*)(ptr->ptr.p_complex+i));
8109 }
8110
operator [](ae_int_t i) const8111 const alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i) const
8112 {
8113 return *((const alglib::complex*)(ptr->ptr.p_complex+i));
8114 }
8115
operator [](ae_int_t i)8116 alglib::complex& alglib::complex_1d_array::operator[](ae_int_t i)
8117 {
8118 return *((alglib::complex*)(ptr->ptr.p_complex+i));
8119 }
8120
setcontent(ae_int_t iLen,const alglib::complex * pContent)8121 void alglib::complex_1d_array::setcontent(ae_int_t iLen, const alglib::complex *pContent )
8122 {
8123 ae_int_t i;
8124
8125 // setlength(), handle possible exception-free errors
8126 setlength(iLen);
8127 if( ptr==NULL || ptr->cnt!=iLen )
8128 return;
8129
8130 // copy
8131 for(i=0; i<iLen; i++)
8132 {
8133 ptr->ptr.p_complex[i].x = pContent[i].x;
8134 ptr->ptr.p_complex[i].y = pContent[i].y;
8135 }
8136 }
8137
getcontent()8138 alglib::complex* alglib::complex_1d_array::getcontent()
8139 {
8140 return (alglib::complex*)ptr->ptr.p_complex;
8141 }
8142
getcontent() const8143 const alglib::complex* alglib::complex_1d_array::getcontent() const
8144 {
8145 return (const alglib::complex*)ptr->ptr.p_complex;
8146 }
8147
8148 #if !defined(AE_NO_EXCEPTIONS)
complex_1d_array(const char * s)8149 alglib::complex_1d_array::complex_1d_array(const char *s):ae_vector_wrapper(s, alglib_impl::DT_COMPLEX)
8150 {
8151 }
8152
tostring(int dps) const8153 std::string alglib::complex_1d_array::tostring(int dps) const
8154 {
8155 if( length()==0 )
8156 return "[]";
8157 return arraytostring(&operator()(0), length(), dps);
8158 }
8159 #endif
8160
ae_matrix_wrapper(alglib_impl::ae_matrix * e_ptr,alglib_impl::ae_datatype datatype)8161 alglib::ae_matrix_wrapper::ae_matrix_wrapper(alglib_impl::ae_matrix *e_ptr, alglib_impl::ae_datatype datatype)
8162 {
8163 if( e_ptr->datatype!=datatype )
8164 {
8165 const char *msg = "ALGLIB: ae_vector_wrapper datatype check failed";
8166 #if !defined(AE_NO_EXCEPTIONS)
8167 _ALGLIB_CPP_EXCEPTION(msg);
8168 #else
8169 ptr = NULL;
8170 is_frozen_proxy = false;
8171 _ALGLIB_SET_ERROR_FLAG(msg);
8172 return;
8173 #endif
8174 }
8175 ptr = e_ptr;
8176 is_frozen_proxy = true;
8177 }
8178
ae_matrix_wrapper(alglib_impl::ae_datatype datatype)8179 alglib::ae_matrix_wrapper::ae_matrix_wrapper(alglib_impl::ae_datatype datatype)
8180 {
8181 jmp_buf _break_jump;
8182 alglib_impl::ae_state _state;
8183
8184 alglib_impl::ae_state_init(&_state);
8185 if( setjmp(_break_jump) )
8186 {
8187 #if !defined(AE_NO_EXCEPTIONS)
8188 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8189 #else
8190 ptr = NULL;
8191 is_frozen_proxy = false;
8192 _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8193 return;
8194 #endif
8195 }
8196 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8197 ptr = &inner_mat;
8198 is_frozen_proxy = false;
8199 memset(ptr, 0, sizeof(*ptr));
8200 ae_matrix_init(ptr, 0, 0, datatype, &_state, ae_false);
8201 ae_state_clear(&_state);
8202
8203 }
8204
ae_matrix_wrapper(const ae_matrix_wrapper & rhs,alglib_impl::ae_datatype datatype)8205 alglib::ae_matrix_wrapper::ae_matrix_wrapper(const ae_matrix_wrapper &rhs, alglib_impl::ae_datatype datatype)
8206 {
8207 jmp_buf _break_jump;
8208 alglib_impl::ae_state _state;
8209
8210 alglib_impl::ae_state_init(&_state);
8211 if( setjmp(_break_jump) )
8212 {
8213 #if !defined(AE_NO_EXCEPTIONS)
8214 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8215 #else
8216 ptr = NULL;
8217 is_frozen_proxy = false;
8218 _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8219 return;
8220 #endif
8221 }
8222 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8223 is_frozen_proxy = false;
8224 ptr = NULL;
8225 alglib_impl::ae_assert(rhs.ptr->datatype==datatype, "ALGLIB: ae_matrix_wrapper datatype check failed", &_state);
8226 if( rhs.ptr!=NULL )
8227 {
8228 ptr = &inner_mat;
8229 memset(ptr, 0, sizeof(*ptr));
8230 ae_matrix_init_copy(ptr, rhs.ptr, &_state, ae_false);
8231 }
8232 ae_state_clear(&_state);
8233 }
8234
~ae_matrix_wrapper()8235 alglib::ae_matrix_wrapper::~ae_matrix_wrapper()
8236 {
8237 if( ptr==&inner_mat )
8238 ae_matrix_clear(ptr);
8239 }
8240
8241 #if !defined(AE_NO_EXCEPTIONS)
ae_matrix_wrapper(const char * s,alglib_impl::ae_datatype datatype)8242 alglib::ae_matrix_wrapper::ae_matrix_wrapper(const char *s, alglib_impl::ae_datatype datatype)
8243 {
8244 std::vector< std::vector<const char*> > smat;
8245 size_t i, j;
8246 char *p = filter_spaces(s);
8247 if( p==NULL )
8248 _ALGLIB_CPP_EXCEPTION("ALGLIB: allocation error");
8249 try
8250 {
8251 str_matrix_create(p, &smat);
8252 {
8253 jmp_buf _break_jump;
8254 alglib_impl::ae_state _state;
8255 alglib_impl::ae_state_init(&_state);
8256 if( setjmp(_break_jump) )
8257 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8258 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8259 ptr = &inner_mat;
8260 is_frozen_proxy = false;
8261 memset(ptr, 0, sizeof(*ptr));
8262 if( smat.size()!=0 )
8263 ae_matrix_init(ptr, (ae_int_t)(smat.size()), (ae_int_t)(smat[0].size()), datatype, &_state, ae_false);
8264 else
8265 ae_matrix_init(ptr, 0, 0, datatype, &_state, ae_false);
8266 ae_state_clear(&_state);
8267 }
8268 for(i=0; i<smat.size(); i++)
8269 for(j=0; j<smat[0].size(); j++)
8270 {
8271 if( datatype==alglib_impl::DT_BOOL )
8272 ptr->ptr.pp_bool[i][j] = parse_bool_delim(smat[i][j],",]");
8273 if( datatype==alglib_impl::DT_INT )
8274 ptr->ptr.pp_int[i][j] = parse_int_delim(smat[i][j],",]");
8275 if( datatype==alglib_impl::DT_REAL )
8276 ptr->ptr.pp_double[i][j] = parse_real_delim(smat[i][j],",]");
8277 if( datatype==alglib_impl::DT_COMPLEX )
8278 {
8279 alglib::complex t = parse_complex_delim(smat[i][j],",]");
8280 ptr->ptr.pp_complex[i][j].x = t.x;
8281 ptr->ptr.pp_complex[i][j].y = t.y;
8282 }
8283 }
8284 alglib_impl::ae_free(p);
8285 }
8286 catch(...)
8287 {
8288 alglib_impl::ae_free(p);
8289 throw;
8290 }
8291 }
8292 #endif
8293
setlength(ae_int_t rows,ae_int_t cols)8294 void alglib::ae_matrix_wrapper::setlength(ae_int_t rows, ae_int_t cols) // TODO: automatic allocation of NULL ptr!!!!!
8295 {
8296 jmp_buf _break_jump;
8297 alglib_impl::ae_state _state;
8298 alglib_impl::ae_state_init(&_state);
8299 if( setjmp(_break_jump) )
8300 {
8301 #if !defined(AE_NO_EXCEPTIONS)
8302 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8303 #else
8304 _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8305 return;
8306 #endif
8307 }
8308 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8309 alglib_impl::ae_assert(ptr!=NULL, "ALGLIB: setlength() error, p_mat==NULL (array was not correctly initialized)", &_state);
8310 alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: setlength() error, attempt to resize proxy array", &_state);
8311 alglib_impl::ae_matrix_set_length(ptr, rows, cols, &_state);
8312 alglib_impl::ae_state_clear(&_state);
8313 }
8314
rows() const8315 alglib::ae_int_t alglib::ae_matrix_wrapper::rows() const
8316 {
8317 if( ptr==NULL )
8318 return 0;
8319 return ptr->rows;
8320 }
8321
cols() const8322 alglib::ae_int_t alglib::ae_matrix_wrapper::cols() const
8323 {
8324 if( ptr==NULL )
8325 return 0;
8326 return ptr->cols;
8327 }
8328
isempty() const8329 bool alglib::ae_matrix_wrapper::isempty() const
8330 {
8331 return rows()==0 || cols()==0;
8332 }
8333
getstride() const8334 alglib::ae_int_t alglib::ae_matrix_wrapper::getstride() const
8335 {
8336 if( ptr==NULL )
8337 return 0;
8338 return ptr->stride;
8339 }
8340
attach_to(alglib_impl::x_matrix * new_ptr,alglib_impl::ae_state * _state)8341 void alglib::ae_matrix_wrapper::attach_to(alglib_impl::x_matrix *new_ptr, alglib_impl::ae_state *_state)
8342 {
8343 if( ptr==&inner_mat )
8344 ae_matrix_clear(ptr);
8345 ptr = &inner_mat;
8346 memset(ptr, 0, sizeof(*ptr));
8347 ae_matrix_init_attach_to_x(ptr, new_ptr, _state, ae_false);
8348 is_frozen_proxy = true;
8349 }
8350
assign(const alglib::ae_matrix_wrapper & rhs)8351 const alglib::ae_matrix_wrapper& alglib::ae_matrix_wrapper::assign(const alglib::ae_matrix_wrapper &rhs)
8352 {
8353 ae_int_t i;
8354 jmp_buf _break_jump;
8355 alglib_impl::ae_state _state;
8356 if( this==&rhs )
8357 return *this;
8358 alglib_impl::ae_state_init(&_state);
8359 if( setjmp(_break_jump) )
8360 {
8361 #if !defined(AE_NO_EXCEPTIONS)
8362 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8363 #else
8364 _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8365 return *this;
8366 #endif
8367 }
8368 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8369 ae_assert(ptr!=NULL, "ALGLIB: incorrect assignment to matrix (uninitialized destination)", &_state);
8370 ae_assert(rhs.ptr!=NULL, "ALGLIB: incorrect assignment to array (uninitialized source)", &_state);
8371 ae_assert(rhs.ptr->datatype==ptr->datatype, "ALGLIB: incorrect assignment to array (types dont match)", &_state);
8372 if( is_frozen_proxy )
8373 {
8374 ae_assert(rhs.ptr->rows==ptr->rows, "ALGLIB: incorrect assignment to proxy array (sizes dont match)", &_state);
8375 ae_assert(rhs.ptr->cols==ptr->cols, "ALGLIB: incorrect assignment to proxy array (sizes dont match)", &_state);
8376 }
8377 if( (rhs.ptr->rows!=ptr->rows) || (rhs.ptr->cols!=ptr->cols) )
8378 ae_matrix_set_length(ptr, rhs.ptr->rows, rhs.ptr->cols, &_state);
8379 for(i=0; i<ptr->rows; i++)
8380 memcpy(ptr->ptr.pp_void[i], rhs.ptr->ptr.pp_void[i], ptr->cols*alglib_impl::ae_sizeof(ptr->datatype));
8381 alglib_impl::ae_state_clear(&_state);
8382 return *this;
8383 }
8384
c_ptr() const8385 const alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr() const
8386 {
8387 return ptr;
8388 }
8389
c_ptr()8390 alglib_impl::ae_matrix* alglib::ae_matrix_wrapper::c_ptr()
8391 {
8392 return ptr;
8393 }
8394
boolean_2d_array()8395 alglib::boolean_2d_array::boolean_2d_array():ae_matrix_wrapper(alglib_impl::DT_BOOL)
8396 {
8397 }
8398
boolean_2d_array(const alglib::boolean_2d_array & rhs)8399 alglib::boolean_2d_array::boolean_2d_array(const alglib::boolean_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_BOOL)
8400 {
8401 }
8402
boolean_2d_array(alglib_impl::ae_matrix * p)8403 alglib::boolean_2d_array::boolean_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_BOOL)
8404 {
8405 }
8406
~boolean_2d_array()8407 alglib::boolean_2d_array::~boolean_2d_array()
8408 {
8409 }
8410
operator =(const alglib::boolean_2d_array & rhs)8411 const alglib::boolean_2d_array& alglib::boolean_2d_array::operator=(const alglib::boolean_2d_array &rhs)
8412 {
8413 return static_cast<const boolean_2d_array&>(assign(rhs));
8414 }
8415
operator ()(ae_int_t i,ae_int_t j) const8416 const ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j) const
8417 {
8418 return ptr->ptr.pp_bool[i][j];
8419 }
8420
operator ()(ae_int_t i,ae_int_t j)8421 ae_bool& alglib::boolean_2d_array::operator()(ae_int_t i, ae_int_t j)
8422 {
8423 return ptr->ptr.pp_bool[i][j];
8424 }
8425
operator [](ae_int_t i) const8426 const ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i) const
8427 {
8428 return ptr->ptr.pp_bool[i];
8429 }
8430
operator [](ae_int_t i)8431 ae_bool* alglib::boolean_2d_array::operator[](ae_int_t i)
8432 {
8433 return ptr->ptr.pp_bool[i];
8434 }
8435
setcontent(ae_int_t irows,ae_int_t icols,const bool * pContent)8436 void alglib::boolean_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const bool *pContent )
8437 {
8438 ae_int_t i, j;
8439
8440 // setlength(), handle possible exception-free errors
8441 setlength(irows, icols);
8442 if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8443 return;
8444
8445 // copy
8446 for(i=0; i<irows; i++)
8447 for(j=0; j<icols; j++)
8448 ptr->ptr.pp_bool[i][j] = pContent[i*icols+j];
8449 }
8450
8451 #if !defined(AE_NO_EXCEPTIONS)
boolean_2d_array(const char * s)8452 alglib::boolean_2d_array::boolean_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_BOOL)
8453 {
8454 }
8455
tostring() const8456 std::string alglib::boolean_2d_array::tostring() const
8457 {
8458 std::string result;
8459 ae_int_t i;
8460 if( isempty() )
8461 return "[[]]";
8462 result = "[";
8463 for(i=0; i<rows(); i++)
8464 {
8465 if( i!=0 )
8466 result += ",";
8467 result += arraytostring(&operator()(i,0), cols());
8468 }
8469 result += "]";
8470 return result;
8471 }
8472 #endif
8473
integer_2d_array()8474 alglib::integer_2d_array::integer_2d_array():ae_matrix_wrapper(alglib_impl::DT_INT)
8475 {
8476 }
8477
integer_2d_array(const alglib::integer_2d_array & rhs)8478 alglib::integer_2d_array::integer_2d_array(const alglib::integer_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_INT)
8479 {
8480 }
8481
integer_2d_array(alglib_impl::ae_matrix * p)8482 alglib::integer_2d_array::integer_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_INT)
8483 {
8484 }
8485
~integer_2d_array()8486 alglib::integer_2d_array::~integer_2d_array()
8487 {
8488 }
8489
operator =(const alglib::integer_2d_array & rhs)8490 const alglib::integer_2d_array& alglib::integer_2d_array::operator=(const alglib::integer_2d_array &rhs)
8491 {
8492 return static_cast<const integer_2d_array&>(assign(rhs));
8493 }
8494
operator ()(ae_int_t i,ae_int_t j) const8495 const alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j) const
8496 {
8497 return ptr->ptr.pp_int[i][j];
8498 }
8499
operator ()(ae_int_t i,ae_int_t j)8500 alglib::ae_int_t& alglib::integer_2d_array::operator()(ae_int_t i, ae_int_t j)
8501 {
8502 return ptr->ptr.pp_int[i][j];
8503 }
8504
operator [](ae_int_t i) const8505 const alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i) const
8506 {
8507 return ptr->ptr.pp_int[i];
8508 }
8509
operator [](ae_int_t i)8510 alglib::ae_int_t* alglib::integer_2d_array::operator[](ae_int_t i)
8511 {
8512 return ptr->ptr.pp_int[i];
8513 }
8514
setcontent(ae_int_t irows,ae_int_t icols,const ae_int_t * pContent)8515 void alglib::integer_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const ae_int_t *pContent )
8516 {
8517 ae_int_t i, j;
8518
8519 // setlength(), handle possible exception-free errors
8520 setlength(irows, icols);
8521 if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8522 return;
8523
8524 // copy
8525 for(i=0; i<irows; i++)
8526 for(j=0; j<icols; j++)
8527 ptr->ptr.pp_int[i][j] = pContent[i*icols+j];
8528 }
8529
8530 #if !defined(AE_NO_EXCEPTIONS)
integer_2d_array(const char * s)8531 alglib::integer_2d_array::integer_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_INT)
8532 {
8533 }
8534
tostring() const8535 std::string alglib::integer_2d_array::tostring() const
8536 {
8537 std::string result;
8538 ae_int_t i;
8539 if( isempty() )
8540 return "[[]]";
8541 result = "[";
8542 for(i=0; i<rows(); i++)
8543 {
8544 if( i!=0 )
8545 result += ",";
8546 result += arraytostring(&operator()(i,0), cols());
8547 }
8548 result += "]";
8549 return result;
8550 }
8551 #endif
8552
real_2d_array()8553 alglib::real_2d_array::real_2d_array():ae_matrix_wrapper(alglib_impl::DT_REAL)
8554 {
8555 }
8556
real_2d_array(const alglib::real_2d_array & rhs)8557 alglib::real_2d_array::real_2d_array(const alglib::real_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_REAL)
8558 {
8559 }
8560
real_2d_array(alglib_impl::ae_matrix * p)8561 alglib::real_2d_array::real_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_REAL)
8562 {
8563 }
8564
~real_2d_array()8565 alglib::real_2d_array::~real_2d_array()
8566 {
8567 }
8568
operator =(const alglib::real_2d_array & rhs)8569 const alglib::real_2d_array& alglib::real_2d_array::operator=(const alglib::real_2d_array &rhs)
8570 {
8571 return static_cast<const real_2d_array&>(assign(rhs));
8572 }
8573
operator ()(ae_int_t i,ae_int_t j) const8574 const double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j) const
8575 {
8576 return ptr->ptr.pp_double[i][j];
8577 }
8578
operator ()(ae_int_t i,ae_int_t j)8579 double& alglib::real_2d_array::operator()(ae_int_t i, ae_int_t j)
8580 {
8581 return ptr->ptr.pp_double[i][j];
8582 }
8583
operator [](ae_int_t i) const8584 const double* alglib::real_2d_array::operator[](ae_int_t i) const
8585 {
8586 return ptr->ptr.pp_double[i];
8587 }
8588
operator [](ae_int_t i)8589 double* alglib::real_2d_array::operator[](ae_int_t i)
8590 {
8591 return ptr->ptr.pp_double[i];
8592 }
8593
setcontent(ae_int_t irows,ae_int_t icols,const double * pContent)8594 void alglib::real_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const double *pContent )
8595 {
8596 ae_int_t i, j;
8597
8598 // setlength(), handle possible exception-free errors
8599 setlength(irows, icols);
8600 if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8601 return;
8602
8603 // copy
8604 for(i=0; i<irows; i++)
8605 for(j=0; j<icols; j++)
8606 ptr->ptr.pp_double[i][j] = pContent[i*icols+j];
8607 }
8608
attach_to_ptr(ae_int_t irows,ae_int_t icols,double * pContent)8609 void alglib::real_2d_array::attach_to_ptr(ae_int_t irows, ae_int_t icols, double *pContent )
8610 {
8611 jmp_buf _break_jump;
8612 alglib_impl::ae_state _state;
8613 alglib_impl::x_matrix x;
8614 alglib_impl::ae_state_init(&_state);
8615 if( setjmp(_break_jump) )
8616 {
8617 #if !defined(AE_NO_EXCEPTIONS)
8618 _ALGLIB_CPP_EXCEPTION(_state.error_msg);
8619 #else
8620 ptr = NULL;
8621 is_frozen_proxy = false;
8622 _ALGLIB_SET_ERROR_FLAG(_state.error_msg);
8623 return;
8624 #endif
8625 }
8626 alglib_impl::ae_state_set_break_jump(&_state, &_break_jump);
8627 alglib_impl::ae_assert(!is_frozen_proxy, "ALGLIB: unable to attach proxy object to something else", &_state);
8628 alglib_impl::ae_assert(irows>0&&icols>0, "ALGLIB: non-positive length for attach_to_ptr()", &_state);
8629 x.rows = irows;
8630 x.cols = icols;
8631 x.stride = icols;
8632 x.datatype = alglib_impl::DT_REAL;
8633 x.owner = alglib_impl::OWN_CALLER;
8634 x.last_action = alglib_impl::ACT_UNCHANGED;
8635 x.x_ptr.p_ptr = pContent;
8636 attach_to(&x, &_state);
8637 ae_state_clear(&_state);
8638 }
8639
8640 #if !defined(AE_NO_EXCEPTIONS)
real_2d_array(const char * s)8641 alglib::real_2d_array::real_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_REAL)
8642 {
8643 }
8644
tostring(int dps) const8645 std::string alglib::real_2d_array::tostring(int dps) const
8646 {
8647 std::string result;
8648 ae_int_t i;
8649 if( isempty() )
8650 return "[[]]";
8651 result = "[";
8652 for(i=0; i<rows(); i++)
8653 {
8654 if( i!=0 )
8655 result += ",";
8656 result += arraytostring(&operator()(i,0), cols(), dps);
8657 }
8658 result += "]";
8659 return result;
8660 }
8661 #endif
8662
complex_2d_array()8663 alglib::complex_2d_array::complex_2d_array():ae_matrix_wrapper(alglib_impl::DT_COMPLEX)
8664 {
8665 }
8666
complex_2d_array(const alglib::complex_2d_array & rhs)8667 alglib::complex_2d_array::complex_2d_array(const alglib::complex_2d_array &rhs):ae_matrix_wrapper(rhs,alglib_impl::DT_COMPLEX)
8668 {
8669 }
8670
complex_2d_array(alglib_impl::ae_matrix * p)8671 alglib::complex_2d_array::complex_2d_array(alglib_impl::ae_matrix *p):ae_matrix_wrapper(p,alglib_impl::DT_COMPLEX)
8672 {
8673 }
8674
~complex_2d_array()8675 alglib::complex_2d_array::~complex_2d_array()
8676 {
8677 }
8678
operator =(const alglib::complex_2d_array & rhs)8679 const alglib::complex_2d_array& alglib::complex_2d_array::operator=(const alglib::complex_2d_array &rhs)
8680 {
8681 return static_cast<const complex_2d_array&>(assign(rhs));
8682 }
8683
operator ()(ae_int_t i,ae_int_t j) const8684 const alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j) const
8685 {
8686 return *((const alglib::complex*)(ptr->ptr.pp_complex[i]+j));
8687 }
8688
operator ()(ae_int_t i,ae_int_t j)8689 alglib::complex& alglib::complex_2d_array::operator()(ae_int_t i, ae_int_t j)
8690 {
8691 return *((alglib::complex*)(ptr->ptr.pp_complex[i]+j));
8692 }
8693
operator [](ae_int_t i) const8694 const alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i) const
8695 {
8696 return (const alglib::complex*)(ptr->ptr.pp_complex[i]);
8697 }
8698
operator [](ae_int_t i)8699 alglib::complex* alglib::complex_2d_array::operator[](ae_int_t i)
8700 {
8701 return (alglib::complex*)(ptr->ptr.pp_complex[i]);
8702 }
8703
setcontent(ae_int_t irows,ae_int_t icols,const alglib::complex * pContent)8704 void alglib::complex_2d_array::setcontent(ae_int_t irows, ae_int_t icols, const alglib::complex *pContent )
8705 {
8706 ae_int_t i, j;
8707
8708 // setlength(), handle possible exception-free errors
8709 setlength(irows, icols);
8710 if( ptr==NULL || ptr->rows!=irows || ptr->cols!=icols )
8711 return;
8712
8713 // copy
8714 for(i=0; i<irows; i++)
8715 for(j=0; j<icols; j++)
8716 {
8717 ptr->ptr.pp_complex[i][j].x = pContent[i*icols+j].x;
8718 ptr->ptr.pp_complex[i][j].y = pContent[i*icols+j].y;
8719 }
8720 }
8721
8722 #if !defined(AE_NO_EXCEPTIONS)
complex_2d_array(const char * s)8723 alglib::complex_2d_array::complex_2d_array(const char *s):ae_matrix_wrapper(s, alglib_impl::DT_COMPLEX)
8724 {
8725 }
8726
tostring(int dps) const8727 std::string alglib::complex_2d_array::tostring(int dps) const
8728 {
8729 std::string result;
8730 ae_int_t i;
8731 if( isempty() )
8732 return "[[]]";
8733 result = "[";
8734 for(i=0; i<rows(); i++)
8735 {
8736 if( i!=0 )
8737 result += ",";
8738 result += arraytostring(&operator()(i,0), cols(), dps);
8739 }
8740 result += "]";
8741 return result;
8742 }
8743 #endif
8744
8745 /********************************************************************
8746 Internal functions
8747 ********************************************************************/
get_aenv_nan()8748 double alglib::get_aenv_nan()
8749 {
8750 double r;
8751 alglib_impl::ae_state _alglib_env_state;
8752 alglib_impl::ae_state_init(&_alglib_env_state);
8753 r = _alglib_env_state.v_nan;
8754 alglib_impl::ae_state_clear(&_alglib_env_state);
8755 return r;
8756 }
8757
get_aenv_posinf()8758 double alglib::get_aenv_posinf()
8759 {
8760 double r;
8761 alglib_impl::ae_state _alglib_env_state;
8762 alglib_impl::ae_state_init(&_alglib_env_state);
8763 r = _alglib_env_state.v_posinf;
8764 alglib_impl::ae_state_clear(&_alglib_env_state);
8765 return r;
8766 }
8767
get_aenv_neginf()8768 double alglib::get_aenv_neginf()
8769 {
8770 double r;
8771 alglib_impl::ae_state _alglib_env_state;
8772 alglib_impl::ae_state_init(&_alglib_env_state);
8773 r = _alglib_env_state.v_neginf;
8774 alglib_impl::ae_state_clear(&_alglib_env_state);
8775 return r;
8776 }
8777
my_stricmp(const char * s1,const char * s2)8778 alglib::ae_int_t alglib::my_stricmp(const char *s1, const char *s2)
8779 {
8780 int c1, c2;
8781
8782 //
8783 // handle special cases
8784 //
8785 if(s1==NULL && s2!=NULL)
8786 return -1;
8787 if(s1!=NULL && s2==NULL)
8788 return +1;
8789 if(s1==NULL && s2==NULL)
8790 return 0;
8791
8792 //
8793 // compare
8794 //
8795 for (;;)
8796 {
8797 c1 = *s1;
8798 c2 = *s2;
8799 s1++;
8800 s2++;
8801 if( c1==0 )
8802 return c2==0 ? 0 : -1;
8803 if( c2==0 )
8804 return c1==0 ? 0 : +1;
8805 c1 = tolower(c1);
8806 c2 = tolower(c2);
8807 if( c1<c2 )
8808 return -1;
8809 if( c1>c2 )
8810 return +1;
8811 }
8812 }
8813
8814 #if !defined(AE_NO_EXCEPTIONS)
8815 //
8816 // This function filters out all spaces from the string.
8817 // It returns string allocated with ae_malloc().
8818 // On allocaction failure returns NULL.
8819 //
filter_spaces(const char * s)8820 char* alglib::filter_spaces(const char *s)
8821 {
8822 size_t i, n;
8823 char *r;
8824 char *r0;
8825 n = strlen(s);
8826 r = (char*)alglib_impl::ae_malloc(n+1,NULL);
8827 if( r==NULL )
8828 return r;
8829 for(i=0,r0=r; i<=n; i++,s++)
8830 if( !isspace(*s) )
8831 {
8832 *r0 = *s;
8833 r0++;
8834 }
8835 return r;
8836 }
8837
str_vector_create(const char * src,bool match_head_only,std::vector<const char * > * p_vec)8838 void alglib::str_vector_create(const char *src, bool match_head_only, std::vector<const char*> *p_vec)
8839 {
8840 //
8841 // parse beginning of the string.
8842 // try to handle "[]" string
8843 //
8844 p_vec->clear();
8845 if( *src!='[' )
8846 _ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
8847 src++;
8848 if( *src==']' )
8849 return;
8850 p_vec->push_back(src);
8851 for(;;)
8852 {
8853 if( *src==0 )
8854 _ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
8855 if( *src==']' )
8856 {
8857 if( src[1]==0 || !match_head_only)
8858 return;
8859 _ALGLIB_CPP_EXCEPTION("Incorrect initializer for vector");
8860 }
8861 if( *src==',' )
8862 {
8863 p_vec->push_back(src+1);
8864 src++;
8865 continue;
8866 }
8867 src++;
8868 }
8869 }
8870
str_matrix_create(const char * src,std::vector<std::vector<const char * >> * p_mat)8871 void alglib::str_matrix_create(const char *src, std::vector< std::vector<const char*> > *p_mat)
8872 {
8873 p_mat->clear();
8874
8875 //
8876 // Try to handle "[[]]" string
8877 //
8878 if( strcmp(src, "[[]]")==0 )
8879 return;
8880
8881 //
8882 // Parse non-empty string
8883 //
8884 if( *src!='[' )
8885 _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8886 src++;
8887 for(;;)
8888 {
8889 p_mat->push_back(std::vector<const char*>());
8890 str_vector_create(src, false, &p_mat->back());
8891 if( p_mat->back().size()==0 || p_mat->back().size()!=(*p_mat)[0].size() )
8892 _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8893 src = strchr(src, ']');
8894 if( src==NULL )
8895 _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8896 src++;
8897 if( *src==',' )
8898 {
8899 src++;
8900 continue;
8901 }
8902 if( *src==']' )
8903 break;
8904 _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8905 }
8906 src++;
8907 if( *src!=0 )
8908 _ALGLIB_CPP_EXCEPTION("Incorrect initializer for matrix");
8909 }
8910
parse_bool_delim(const char * s,const char * delim)8911 ae_bool alglib::parse_bool_delim(const char *s, const char *delim)
8912 {
8913 const char *p;
8914 char buf[8];
8915
8916 // try to parse false
8917 p = "false";
8918 memset(buf, 0, sizeof(buf));
8919 strncpy(buf, s, strlen(p));
8920 if( my_stricmp(buf, p)==0 )
8921 {
8922 if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL )
8923 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8924 return ae_false;
8925 }
8926
8927 // try to parse true
8928 p = "true";
8929 memset(buf, 0, sizeof(buf));
8930 strncpy(buf, s, strlen(p));
8931 if( my_stricmp(buf, p)==0 )
8932 {
8933 if( s[strlen(p)]==0 || strchr(delim,s[strlen(p)])==NULL )
8934 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8935 return ae_true;
8936 }
8937
8938 // error
8939 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8940 }
8941
parse_int_delim(const char * s,const char * delim)8942 alglib::ae_int_t alglib::parse_int_delim(const char *s, const char *delim)
8943 {
8944 const char *p;
8945 long long_val;
8946 volatile ae_int_t ae_val;
8947
8948 p = s;
8949
8950 //
8951 // check string structure:
8952 // * leading sign
8953 // * at least one digit
8954 // * delimiter
8955 //
8956 if( *s=='-' || *s=='+' )
8957 s++;
8958 if( *s==0 || strchr("1234567890",*s)==NULL)
8959 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8960 while( *s!=0 && strchr("1234567890",*s)!=NULL )
8961 s++;
8962 if( *s==0 || strchr(delim,*s)==NULL )
8963 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8964
8965 // convert and ensure that value fits into ae_int_t
8966 s = p;
8967 long_val = atol(s);
8968 ae_val = long_val;
8969 if( ae_val!=long_val )
8970 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
8971 return ae_val;
8972 }
8973
_parse_real_delim(const char * s,const char * delim,double * result,const char ** new_s)8974 bool alglib::_parse_real_delim(const char *s, const char *delim, double *result, const char **new_s)
8975 {
8976 const char *p;
8977 char *t;
8978 bool has_digits;
8979 char buf[64];
8980 int isign;
8981 lconv *loc;
8982
8983 p = s;
8984
8985 //
8986 // check string structure and decide what to do
8987 //
8988 isign = 1;
8989 if( *s=='-' || *s=='+' )
8990 {
8991 isign = *s=='-' ? -1 : +1;
8992 s++;
8993 }
8994 memset(buf, 0, sizeof(buf));
8995 strncpy(buf, s, 3);
8996 if( my_stricmp(buf,"nan")!=0 && my_stricmp(buf,"inf")!=0 )
8997 {
8998 //
8999 // [sign] [ddd] [.] [ddd] [e|E[sign]ddd]
9000 //
9001 has_digits = false;
9002 if( *s!=0 && strchr("1234567890",*s)!=NULL )
9003 {
9004 has_digits = true;
9005 while( *s!=0 && strchr("1234567890",*s)!=NULL )
9006 s++;
9007 }
9008 if( *s=='.' )
9009 s++;
9010 if( *s!=0 && strchr("1234567890",*s)!=NULL )
9011 {
9012 has_digits = true;
9013 while( *s!=0 && strchr("1234567890",*s)!=NULL )
9014 s++;
9015 }
9016 if (!has_digits )
9017 return false;
9018 if( *s=='e' || *s=='E' )
9019 {
9020 s++;
9021 if( *s=='-' || *s=='+' )
9022 s++;
9023 if( *s==0 || strchr("1234567890",*s)==NULL )
9024 return false;
9025 while( *s!=0 && strchr("1234567890",*s)!=NULL )
9026 s++;
9027 }
9028 if( *s==0 || strchr(delim,*s)==NULL )
9029 return false;
9030 *new_s = s;
9031
9032 //
9033 // finite value conversion
9034 //
9035 if( *new_s-p>=(int)sizeof(buf) )
9036 return false;
9037 strncpy(buf, p, (size_t)(*new_s-p));
9038 buf[*new_s-p] = 0;
9039 loc = localeconv();
9040 t = strchr(buf,'.');
9041 if( t!=NULL )
9042 *t = *loc->decimal_point;
9043 *result = atof(buf);
9044 return true;
9045 }
9046 else
9047 {
9048 //
9049 // check delimiter and update *new_s
9050 //
9051 s += 3;
9052 if( *s==0 || strchr(delim,*s)==NULL )
9053 return false;
9054 *new_s = s;
9055
9056 //
9057 // NAN, INF conversion
9058 //
9059 if( my_stricmp(buf,"nan")==0 )
9060 *result = fp_nan;
9061 if( my_stricmp(buf,"inf")==0 )
9062 *result = isign>0 ? fp_posinf : fp_neginf;
9063 return true;
9064 }
9065 }
9066
parse_real_delim(const char * s,const char * delim)9067 double alglib::parse_real_delim(const char *s, const char *delim)
9068 {
9069 double result;
9070 const char *new_s;
9071 if( !_parse_real_delim(s, delim, &result, &new_s) )
9072 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9073 return result;
9074 }
9075
parse_complex_delim(const char * s,const char * delim)9076 alglib::complex alglib::parse_complex_delim(const char *s, const char *delim)
9077 {
9078 double d_result;
9079 const char *new_s;
9080 alglib::complex c_result;
9081
9082 // parse as real value
9083 if( _parse_real_delim(s, delim, &d_result, &new_s) )
9084 return d_result;
9085
9086 // parse as "a+bi" or "a-bi"
9087 if( _parse_real_delim(s, "+-", &c_result.x, &new_s) )
9088 {
9089 s = new_s;
9090 if( !_parse_real_delim(s, "i", &c_result.y, &new_s) )
9091 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9092 s = new_s+1;
9093 if( *s==0 || strchr(delim,*s)==NULL )
9094 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9095 return c_result;
9096 }
9097
9098 // parse as complex value "bi+a" or "bi-a"
9099 if( _parse_real_delim(s, "i", &c_result.y, &new_s) )
9100 {
9101 s = new_s+1;
9102 if( *s==0 )
9103 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9104 if( strchr(delim,*s)!=NULL )
9105 {
9106 c_result.x = 0;
9107 return c_result;
9108 }
9109 if( strchr("+-",*s)!=NULL )
9110 {
9111 if( !_parse_real_delim(s, delim, &c_result.x, &new_s) )
9112 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9113 return c_result;
9114 }
9115 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9116 }
9117
9118 // error
9119 _ALGLIB_CPP_EXCEPTION("Cannot parse value");
9120 }
9121
arraytostring(const bool * ptr,ae_int_t n)9122 std::string alglib::arraytostring(const bool *ptr, ae_int_t n)
9123 {
9124 std::string result;
9125 ae_int_t i;
9126 result = "[";
9127 for(i=0; i<n; i++)
9128 {
9129 if( i!=0 )
9130 result += ",";
9131 result += ptr[i] ? "true" : "false";
9132 }
9133 result += "]";
9134 return result;
9135 }
9136
arraytostring(const ae_int_t * ptr,ae_int_t n)9137 std::string alglib::arraytostring(const ae_int_t *ptr, ae_int_t n)
9138 {
9139 std::string result;
9140 ae_int_t i;
9141 char buf[64];
9142 result = "[";
9143 for(i=0; i<n; i++)
9144 {
9145 if( sprintf(buf, i==0 ? "%ld" : ",%ld", long(ptr[i]))>=(int)sizeof(buf) )
9146 _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9147 result += buf;
9148 }
9149 result += "]";
9150 return result;
9151 }
9152
arraytostring(const double * ptr,ae_int_t n,int _dps)9153 std::string alglib::arraytostring(const double *ptr, ae_int_t n, int _dps)
9154 {
9155 std::string result;
9156 ae_int_t i;
9157 char buf[64];
9158 char mask1[64];
9159 char mask2[80];
9160 int dps = _dps>=0 ? _dps : -_dps;
9161 dps = dps<=50 ? dps : 50;
9162 result = "[";
9163 if( sprintf(mask1, "%%.%d%s", dps, _dps>=0 ? "f" : "e")>=(int)sizeof(mask1) )
9164 _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9165 if( sprintf(mask2, ",%s", mask1)>=(int)sizeof(mask2) )
9166 _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9167 for(i=0; i<n; i++)
9168 {
9169 buf[0] = 0;
9170 if( fp_isfinite(ptr[i]) )
9171 {
9172 if( sprintf(buf, i==0 ? mask1 : mask2, double(ptr[i]))>=(int)sizeof(buf) )
9173 _ALGLIB_CPP_EXCEPTION("arraytostring(): buffer overflow");
9174 }
9175 else if( fp_isnan(ptr[i]) )
9176 strcpy(buf, i==0 ? "NAN" : ",NAN");
9177 else if( fp_isposinf(ptr[i]) )
9178 strcpy(buf, i==0 ? "+INF" : ",+INF");
9179 else if( fp_isneginf(ptr[i]) )
9180 strcpy(buf, i==0 ? "-INF" : ",-INF");
9181 result += buf;
9182 }
9183 result += "]";
9184 return result;
9185 }
9186
arraytostring(const alglib::complex * ptr,ae_int_t n,int dps)9187 std::string alglib::arraytostring(const alglib::complex *ptr, ae_int_t n, int dps)
9188 {
9189 std::string result;
9190 ae_int_t i;
9191 result = "[";
9192 for(i=0; i<n; i++)
9193 {
9194 if( i!=0 )
9195 result += ",";
9196 result += ptr[i].tostring(dps);
9197 }
9198 result += "]";
9199 return result;
9200 }
9201 #endif
9202
9203
9204 /********************************************************************
9205 standard functions
9206 ********************************************************************/
sign(double x)9207 int alglib::sign(double x)
9208 {
9209 if( x>0 ) return 1;
9210 if( x<0 ) return -1;
9211 return 0;
9212 }
9213
randomreal()9214 double alglib::randomreal()
9215 {
9216 int i1 = rand();
9217 int i2 = rand();
9218 double mx = (double)(RAND_MAX)+1.0;
9219 volatile double tmp0 = i2/mx;
9220 volatile double tmp1 = i1+tmp0;
9221 return tmp1/mx;
9222 }
9223
randominteger(alglib::ae_int_t maxv)9224 alglib::ae_int_t alglib::randominteger(alglib::ae_int_t maxv)
9225 {
9226 return ((alglib::ae_int_t)rand())%maxv;
9227 }
9228
round(double x)9229 int alglib::round(double x)
9230 { return int(floor(x+0.5)); }
9231
trunc(double x)9232 int alglib::trunc(double x)
9233 { return int(x>0 ? floor(x) : ceil(x)); }
9234
ifloor(double x)9235 int alglib::ifloor(double x)
9236 { return int(floor(x)); }
9237
iceil(double x)9238 int alglib::iceil(double x)
9239 { return int(ceil(x)); }
9240
pi()9241 double alglib::pi()
9242 { return 3.14159265358979323846; }
9243
sqr(double x)9244 double alglib::sqr(double x)
9245 { return x*x; }
9246
maxint(int m1,int m2)9247 int alglib::maxint(int m1, int m2)
9248 {
9249 return m1>m2 ? m1 : m2;
9250 }
9251
minint(int m1,int m2)9252 int alglib::minint(int m1, int m2)
9253 {
9254 return m1>m2 ? m2 : m1;
9255 }
9256
maxreal(double m1,double m2)9257 double alglib::maxreal(double m1, double m2)
9258 {
9259 return m1>m2 ? m1 : m2;
9260 }
9261
minreal(double m1,double m2)9262 double alglib::minreal(double m1, double m2)
9263 {
9264 return m1>m2 ? m2 : m1;
9265 }
9266
fp_eq(double v1,double v2)9267 bool alglib::fp_eq(double v1, double v2)
9268 {
9269 // IEEE-strict floating point comparison
9270 volatile double x = v1;
9271 volatile double y = v2;
9272 return x==y;
9273 }
9274
fp_neq(double v1,double v2)9275 bool alglib::fp_neq(double v1, double v2)
9276 {
9277 // IEEE-strict floating point comparison
9278 return !fp_eq(v1,v2);
9279 }
9280
fp_less(double v1,double v2)9281 bool alglib::fp_less(double v1, double v2)
9282 {
9283 // IEEE-strict floating point comparison
9284 volatile double x = v1;
9285 volatile double y = v2;
9286 return x<y;
9287 }
9288
fp_less_eq(double v1,double v2)9289 bool alglib::fp_less_eq(double v1, double v2)
9290 {
9291 // IEEE-strict floating point comparison
9292 volatile double x = v1;
9293 volatile double y = v2;
9294 return x<=y;
9295 }
9296
fp_greater(double v1,double v2)9297 bool alglib::fp_greater(double v1, double v2)
9298 {
9299 // IEEE-strict floating point comparison
9300 volatile double x = v1;
9301 volatile double y = v2;
9302 return x>y;
9303 }
9304
fp_greater_eq(double v1,double v2)9305 bool alglib::fp_greater_eq(double v1, double v2)
9306 {
9307 // IEEE-strict floating point comparison
9308 volatile double x = v1;
9309 volatile double y = v2;
9310 return x>=y;
9311 }
9312
fp_isnan(double x)9313 bool alglib::fp_isnan(double x)
9314 {
9315 return alglib_impl::ae_isnan_stateless(x,endianness);
9316 }
9317
fp_isposinf(double x)9318 bool alglib::fp_isposinf(double x)
9319 {
9320 return alglib_impl::ae_isposinf_stateless(x,endianness);
9321 }
9322
fp_isneginf(double x)9323 bool alglib::fp_isneginf(double x)
9324 {
9325 return alglib_impl::ae_isneginf_stateless(x,endianness);
9326 }
9327
fp_isinf(double x)9328 bool alglib::fp_isinf(double x)
9329 {
9330 return alglib_impl::ae_isinf_stateless(x,endianness);
9331 }
9332
fp_isfinite(double x)9333 bool alglib::fp_isfinite(double x)
9334 {
9335 return alglib_impl::ae_isfinite_stateless(x,endianness);
9336 }
9337
9338 /********************************************************************
9339 CSV functions
9340 ********************************************************************/
9341 #if !defined(AE_NO_EXCEPTIONS)
read_csv(const char * filename,char separator,int flags,alglib::real_2d_array & out)9342 void alglib::read_csv(const char *filename, char separator, int flags, alglib::real_2d_array &out)
9343 {
9344 int flag;
9345
9346 //
9347 // Parameters
9348 //
9349 bool skip_first_row = (flags&CSV_SKIP_HEADERS)!=0;
9350
9351 //
9352 // Prepare empty output array
9353 //
9354 out.setlength(0,0);
9355
9356 //
9357 // Open file, determine size, read contents
9358 //
9359 FILE *f_in = fopen(filename, "rb");
9360 if( f_in==NULL )
9361 _ALGLIB_CPP_EXCEPTION("read_csv: unable to open input file");
9362 flag = fseek(f_in, 0, SEEK_END);
9363 AE_CRITICAL_ASSERT(flag==0);
9364 long int _filesize = ftell(f_in);
9365 AE_CRITICAL_ASSERT(_filesize>=0);
9366 if( _filesize==0 )
9367 {
9368 // empty file, return empty array, success
9369 fclose(f_in);
9370 return;
9371 }
9372 size_t filesize = _filesize;
9373 std::vector<char> v_buf;
9374 v_buf.resize(filesize+2, 0);
9375 char *p_buf = &v_buf[0];
9376 flag = fseek(f_in, 0, SEEK_SET);
9377 AE_CRITICAL_ASSERT(flag==0);
9378 size_t bytes_read = fread ((void*)p_buf, 1, filesize, f_in);
9379 AE_CRITICAL_ASSERT(bytes_read==filesize);
9380 fclose(f_in);
9381
9382 //
9383 // Normalize file contents:
9384 // * replace 0x0 by spaces
9385 // * remove trailing spaces and newlines
9386 // * append trailing '\n' and '\0' characters
9387 // Return if file contains only spaces/newlines.
9388 //
9389 for(size_t i=0; i<filesize; i++)
9390 if( p_buf[i]==0 )
9391 p_buf[i] = ' ';
9392 for(; filesize>0; )
9393 {
9394 char c = p_buf[filesize-1];
9395 if( c==' ' || c=='\t' || c=='\n' || c=='\r' )
9396 {
9397 filesize--;
9398 continue;
9399 }
9400 break;
9401 }
9402 if( filesize==0 )
9403 return;
9404 p_buf[filesize+0] = '\n';
9405 p_buf[filesize+1] = '\0';
9406 filesize+=2;
9407
9408 //
9409 // Scan dataset.
9410 //
9411 size_t rows_count = 0, cols_count = 0, max_length = 0;
9412 std::vector<size_t> offsets, lengths;
9413 for(size_t row_start=0; p_buf[row_start]!=0x0; )
9414 {
9415 // determine row length
9416 size_t row_length;
9417 for(row_length=0; p_buf[row_start+row_length]!='\n'; row_length++);
9418
9419 // determine cols count, perform integrity check
9420 size_t cur_cols_cnt=1;
9421 for(size_t idx=0; idx<row_length; idx++)
9422 if( p_buf[row_start+idx]==separator )
9423 cur_cols_cnt++;
9424 if( cols_count>0 && cols_count!=cur_cols_cnt )
9425 _ALGLIB_CPP_EXCEPTION("read_csv: non-rectangular contents, rows have different sizes");
9426 cols_count = cur_cols_cnt;
9427
9428 // store offsets and lengths of the fields
9429 size_t cur_offs = 0;
9430 for(size_t idx=0; idx<row_length+1; idx++)
9431 if( p_buf[row_start+idx]==separator || p_buf[row_start+idx]=='\n' )
9432 {
9433 offsets.push_back(row_start+cur_offs);
9434 lengths.push_back(idx-cur_offs);
9435 max_length = idx-cur_offs>max_length ? idx-cur_offs : max_length;
9436 cur_offs = idx+1;
9437 }
9438
9439 // advance row start
9440 rows_count++;
9441 row_start = row_start+row_length+1;
9442 }
9443 AE_CRITICAL_ASSERT(rows_count>=1);
9444 AE_CRITICAL_ASSERT(cols_count>=1);
9445 AE_CRITICAL_ASSERT(cols_count*rows_count==offsets.size());
9446 AE_CRITICAL_ASSERT(cols_count*rows_count==lengths.size());
9447 if( rows_count==1 && skip_first_row ) // empty output, return
9448 return;
9449
9450 //
9451 // Convert
9452 //
9453 size_t row0 = skip_first_row ? 1 : 0;
9454 size_t row1 = rows_count;
9455 lconv *loc = localeconv();
9456 out.setlength(row1-row0, cols_count);
9457 for(size_t ridx=row0; ridx<row1; ridx++)
9458 for(size_t cidx=0; cidx<cols_count; cidx++)
9459 {
9460 char *p_field = p_buf+offsets[ridx*cols_count+cidx];
9461 size_t field_len = lengths[ridx*cols_count+cidx];
9462 for(size_t idx=0; idx<field_len; idx++)
9463 if( p_field[idx]=='.' || p_field[idx]==',' )
9464 p_field[idx] = *loc->decimal_point;
9465 out[ridx-row0][cidx] = atof(p_field);
9466 }
9467 }
9468 #endif
9469
9470
9471
9472 /********************************************************************
9473 Trace functions
9474 ********************************************************************/
trace_file(std::string tags,std::string filename)9475 void alglib::trace_file(std::string tags, std::string filename)
9476 {
9477 alglib_impl::ae_trace_file(tags.c_str(), filename.c_str());
9478 }
9479
trace_disable()9480 void alglib::trace_disable()
9481 {
9482 alglib_impl::ae_trace_disable();
9483 }
9484
9485
9486
9487 /////////////////////////////////////////////////////////////////////////
9488 //
9489 // THIS SECTIONS CONTAINS OPTIMIZED LINEAR ALGEBRA CODE
9490 // IT IS SHARED BETWEEN C++ AND PURE C LIBRARIES
9491 //
9492 /////////////////////////////////////////////////////////////////////////
9493 #if defined(_ALGLIB_HAS_SSE2_INTRINSICS)
9494 #include "kernels_sse2.h"
9495 #endif
9496 #if defined(_ALGLIB_HAS_AVX2_INTRINSICS)
9497 #include "kernels_avx2.h"
9498 #endif
9499 #if defined(_ALGLIB_HAS_FMA_INTRINSICS)
9500 #include "kernels_fma.h"
9501 #endif
9502 namespace alglib_impl
9503 {
9504 #define alglib_simd_alignment 16
9505
9506 #define alglib_r_block 32
9507 #define alglib_half_r_block 16
9508 #define alglib_twice_r_block 64
9509
9510 #define alglib_c_block 16
9511 #define alglib_half_c_block 8
9512 #define alglib_twice_c_block 32
9513
9514
9515
9516
9517 /********************************************************************
9518 This subroutine calculates fast 32x32 real matrix-vector product:
9519
9520 y := beta*y + alpha*A*x
9521
9522 using either generic C code or native optimizations (if available)
9523
9524 IMPORTANT:
9525 * A must be stored in row-major order,
9526 stride is alglib_r_block,
9527 aligned on alglib_simd_alignment boundary
9528 * X must be aligned on alglib_simd_alignment boundary
9529 * Y may be non-aligned
9530 ********************************************************************/
_ialglib_mv_32(const double * a,const double * x,double * y,ae_int_t stride,double alpha,double beta)9531 void _ialglib_mv_32(const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
9532 {
9533 ae_int_t i, k;
9534 const double *pa0, *pa1, *pb;
9535
9536 pa0 = a;
9537 pa1 = a+alglib_r_block;
9538 pb = x;
9539 for(i=0; i<16; i++)
9540 {
9541 double v0 = 0, v1 = 0;
9542 for(k=0; k<4; k++)
9543 {
9544 v0 += pa0[0]*pb[0];
9545 v1 += pa1[0]*pb[0];
9546 v0 += pa0[1]*pb[1];
9547 v1 += pa1[1]*pb[1];
9548 v0 += pa0[2]*pb[2];
9549 v1 += pa1[2]*pb[2];
9550 v0 += pa0[3]*pb[3];
9551 v1 += pa1[3]*pb[3];
9552 v0 += pa0[4]*pb[4];
9553 v1 += pa1[4]*pb[4];
9554 v0 += pa0[5]*pb[5];
9555 v1 += pa1[5]*pb[5];
9556 v0 += pa0[6]*pb[6];
9557 v1 += pa1[6]*pb[6];
9558 v0 += pa0[7]*pb[7];
9559 v1 += pa1[7]*pb[7];
9560 pa0 += 8;
9561 pa1 += 8;
9562 pb += 8;
9563 }
9564 y[0] = beta*y[0]+alpha*v0;
9565 y[stride] = beta*y[stride]+alpha*v1;
9566
9567 /*
9568 * now we've processed rows I and I+1,
9569 * pa0 and pa1 are pointing to rows I+1 and I+2.
9570 * move to I+2 and I+3.
9571 */
9572 pa0 += alglib_r_block;
9573 pa1 += alglib_r_block;
9574 pb = x;
9575 y+=2*stride;
9576 }
9577 }
9578
9579
9580 /*************************************************************************
9581 This function calculates MxN real matrix-vector product:
9582
9583 y := beta*y + alpha*A*x
9584
9585 using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32.
9586
9587 If beta is zero, we do not use previous values of y (they are overwritten
9588 by alpha*A*x without ever being read). If alpha is zero, no matrix-vector
9589 product is calculated (only beta is updated); however, this update is not
9590 efficient and this function should NOT be used for multiplication of
9591 vector and scalar.
9592
9593 IMPORTANT:
9594 * 0<=M<=alglib_r_block, 0<=N<=alglib_r_block
9595 * A must be stored in row-major order with stride equal to alglib_r_block
9596 *************************************************************************/
_ialglib_rmv(ae_int_t m,ae_int_t n,const double * a,const double * x,double * y,ae_int_t stride,double alpha,double beta)9597 void _ialglib_rmv(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
9598 {
9599 /*
9600 * Handle special cases:
9601 * - alpha is zero or n is zero
9602 * - m is zero
9603 */
9604 if( m==0 )
9605 return;
9606 if( alpha==0.0 || n==0 )
9607 {
9608 ae_int_t i;
9609 if( beta==0.0 )
9610 {
9611 for(i=0; i<m; i++)
9612 {
9613 *y = 0.0;
9614 y += stride;
9615 }
9616 }
9617 else
9618 {
9619 for(i=0; i<m; i++)
9620 {
9621 *y *= beta;
9622 y += stride;
9623 }
9624 }
9625 return;
9626 }
9627
9628 /*
9629 * Handle general case: nonzero alpha, n and m
9630 *
9631 */
9632 if( m==32 && n==32 )
9633 {
9634 /*
9635 * 32x32, may be we have something better than general implementation
9636 */
9637 _ialglib_mv_32(a, x, y, stride, alpha, beta);
9638 }
9639 else
9640 {
9641 ae_int_t i, k, m2, n8, n2, ntrail2;
9642 const double *pa0, *pa1, *pb;
9643
9644 /*
9645 * First M/2 rows of A are processed in pairs.
9646 * optimized code is used.
9647 */
9648 m2 = m/2;
9649 n8 = n/8;
9650 ntrail2 = (n-8*n8)/2;
9651 for(i=0; i<m2; i++)
9652 {
9653 double v0 = 0, v1 = 0;
9654
9655 /*
9656 * 'a' points to the part of the matrix which
9657 * is not processed yet
9658 */
9659 pb = x;
9660 pa0 = a;
9661 pa1 = a+alglib_r_block;
9662 a += alglib_twice_r_block;
9663
9664 /*
9665 * 8 elements per iteration
9666 */
9667 for(k=0; k<n8; k++)
9668 {
9669 v0 += pa0[0]*pb[0];
9670 v1 += pa1[0]*pb[0];
9671 v0 += pa0[1]*pb[1];
9672 v1 += pa1[1]*pb[1];
9673 v0 += pa0[2]*pb[2];
9674 v1 += pa1[2]*pb[2];
9675 v0 += pa0[3]*pb[3];
9676 v1 += pa1[3]*pb[3];
9677 v0 += pa0[4]*pb[4];
9678 v1 += pa1[4]*pb[4];
9679 v0 += pa0[5]*pb[5];
9680 v1 += pa1[5]*pb[5];
9681 v0 += pa0[6]*pb[6];
9682 v1 += pa1[6]*pb[6];
9683 v0 += pa0[7]*pb[7];
9684 v1 += pa1[7]*pb[7];
9685 pa0 += 8;
9686 pa1 += 8;
9687 pb += 8;
9688 }
9689
9690 /*
9691 * 2 elements per iteration
9692 */
9693 for(k=0; k<ntrail2; k++)
9694 {
9695 v0 += pa0[0]*pb[0];
9696 v1 += pa1[0]*pb[0];
9697 v0 += pa0[1]*pb[1];
9698 v1 += pa1[1]*pb[1];
9699 pa0 += 2;
9700 pa1 += 2;
9701 pb += 2;
9702 }
9703
9704 /*
9705 * last element, if needed
9706 */
9707 if( n%2!=0 )
9708 {
9709 v0 += pa0[0]*pb[0];
9710 v1 += pa1[0]*pb[0];
9711 }
9712
9713 /*
9714 * final update
9715 */
9716 if( beta!=0 )
9717 {
9718 y[0] = beta*y[0]+alpha*v0;
9719 y[stride] = beta*y[stride]+alpha*v1;
9720 }
9721 else
9722 {
9723 y[0] = alpha*v0;
9724 y[stride] = alpha*v1;
9725 }
9726
9727 /*
9728 * move to the next pair of elements
9729 */
9730 y+=2*stride;
9731 }
9732
9733
9734 /*
9735 * Last (odd) row is processed with less optimized code.
9736 */
9737 if( m%2!=0 )
9738 {
9739 double v0 = 0;
9740
9741 /*
9742 * 'a' points to the part of the matrix which
9743 * is not processed yet
9744 */
9745 pb = x;
9746 pa0 = a;
9747
9748 /*
9749 * 2 elements per iteration
9750 */
9751 n2 = n/2;
9752 for(k=0; k<n2; k++)
9753 {
9754 v0 += pa0[0]*pb[0]+pa0[1]*pb[1];
9755 pa0 += 2;
9756 pb += 2;
9757 }
9758
9759 /*
9760 * last element, if needed
9761 */
9762 if( n%2!=0 )
9763 v0 += pa0[0]*pb[0];
9764
9765 /*
9766 * final update
9767 */
9768 if( beta!=0 )
9769 y[0] = beta*y[0]+alpha*v0;
9770 else
9771 y[0] = alpha*v0;
9772 }
9773 }
9774 }
9775
9776
9777 /*************************************************************************
9778 This function calculates MxN real matrix-vector product:
9779
9780 y := beta*y + alpha*A*x
9781
9782 using generic C code. It calls _ialglib_mv_32 if both M=32 and N=32.
9783
9784 If beta is zero, we do not use previous values of y (they are overwritten
9785 by alpha*A*x without ever being read). If alpha is zero, no matrix-vector
9786 product is calculated (only beta is updated); however, this update is not
9787 efficient and this function should NOT be used for multiplication of
9788 vector and scalar.
9789
9790 IMPORTANT:
9791 * 0<=M<=alglib_r_block, 0<=N<=alglib_r_block
9792 * A must be stored in row-major order with stride equal to alglib_r_block
9793 * y may be non-aligned
9794 * both A and x must have same offset with respect to 16-byte boundary:
9795 either both are aligned, or both are aligned with offset 8. Function
9796 will crash your system if you try to call it with misaligned or
9797 incorrectly aligned data.
9798
9799 This function supports SSE2; it can be used when:
9800 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
9801 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
9802
9803 If (1) is failed, this function will be undefined. If (2) is failed, call
9804 to this function will probably crash your system.
9805
9806 If you want to know whether it is safe to call it, you should check
9807 results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
9808 and will do its work.
9809 *************************************************************************/
9810 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_rmv_sse2(ae_int_t m,ae_int_t n,const double * a,const double * x,double * y,ae_int_t stride,double alpha,double beta)9811 void _ialglib_rmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, double *y, ae_int_t stride, double alpha, double beta)
9812 {
9813 ae_int_t i, k, n2;
9814 ae_int_t mb3, mtail, nhead, nb8, nb2, ntail;
9815 const double *pa0, *pa1, *pa2, *pb;
9816 __m128d v0, v1, v2, va0, va1, va2, vx, vtmp;
9817
9818 /*
9819 * Handle special cases:
9820 * - alpha is zero or n is zero
9821 * - m is zero
9822 */
9823 if( m==0 )
9824 return;
9825 if( alpha==0.0 || n==0 )
9826 {
9827 if( beta==0.0 )
9828 {
9829 for(i=0; i<m; i++)
9830 {
9831 *y = 0.0;
9832 y += stride;
9833 }
9834 }
9835 else
9836 {
9837 for(i=0; i<m; i++)
9838 {
9839 *y *= beta;
9840 y += stride;
9841 }
9842 }
9843 return;
9844 }
9845
9846 /*
9847 * Handle general case: nonzero alpha, n and m
9848 *
9849 * We divide problem as follows...
9850 *
9851 * Rows M are divided into:
9852 * - mb3 blocks, each 3xN
9853 * - mtail blocks, each 1xN
9854 *
9855 * Within a row, elements are divided into:
9856 * - nhead 1x1 blocks (used to align the rest, either 0 or 1)
9857 * - nb8 1x8 blocks, aligned to 16-byte boundary
9858 * - nb2 1x2 blocks, aligned to 16-byte boundary
9859 * - ntail 1x1 blocks, aligned too (altough we don't rely on it)
9860 *
9861 */
9862 n2 = n/2;
9863 mb3 = m/3;
9864 mtail = m%3;
9865 nhead = ae_misalignment(a,alglib_simd_alignment)==0 ? 0 : 1;
9866 nb8 = (n-nhead)/8;
9867 nb2 = (n-nhead-8*nb8)/2;
9868 ntail = n-nhead-8*nb8-2*nb2;
9869 for(i=0; i<mb3; i++)
9870 {
9871 double row0, row1, row2;
9872 row0 = 0;
9873 row1 = 0;
9874 row2 = 0;
9875 pb = x;
9876 pa0 = a;
9877 pa1 = a+alglib_r_block;
9878 pa2 = a+alglib_twice_r_block;
9879 a += 3*alglib_r_block;
9880 if( nhead==1 )
9881 {
9882 vx = _mm_load_sd(pb);
9883 v0 = _mm_load_sd(pa0);
9884 v1 = _mm_load_sd(pa1);
9885 v2 = _mm_load_sd(pa2);
9886
9887 v0 = _mm_mul_sd(v0,vx);
9888 v1 = _mm_mul_sd(v1,vx);
9889 v2 = _mm_mul_sd(v2,vx);
9890
9891 pa0++;
9892 pa1++;
9893 pa2++;
9894 pb++;
9895 }
9896 else
9897 {
9898 v0 = _mm_setzero_pd();
9899 v1 = _mm_setzero_pd();
9900 v2 = _mm_setzero_pd();
9901 }
9902 for(k=0; k<nb8; k++)
9903 {
9904 /*
9905 * this code is a shuffle of simultaneous dot product.
9906 * see below for commented unshuffled original version.
9907 */
9908 vx = _mm_load_pd(pb);
9909 va0 = _mm_load_pd(pa0);
9910 va1 = _mm_load_pd(pa1);
9911 va0 = _mm_mul_pd(va0,vx);
9912 va2 = _mm_load_pd(pa2);
9913 v0 = _mm_add_pd(va0,v0);
9914 va1 = _mm_mul_pd(va1,vx);
9915 va0 = _mm_load_pd(pa0+2);
9916 v1 = _mm_add_pd(va1,v1);
9917 va2 = _mm_mul_pd(va2,vx);
9918 va1 = _mm_load_pd(pa1+2);
9919 v2 = _mm_add_pd(va2,v2);
9920 vx = _mm_load_pd(pb+2);
9921 va0 = _mm_mul_pd(va0,vx);
9922 va2 = _mm_load_pd(pa2+2);
9923 v0 = _mm_add_pd(va0,v0);
9924 va1 = _mm_mul_pd(va1,vx);
9925 va0 = _mm_load_pd(pa0+4);
9926 v1 = _mm_add_pd(va1,v1);
9927 va2 = _mm_mul_pd(va2,vx);
9928 va1 = _mm_load_pd(pa1+4);
9929 v2 = _mm_add_pd(va2,v2);
9930 vx = _mm_load_pd(pb+4);
9931 va0 = _mm_mul_pd(va0,vx);
9932 va2 = _mm_load_pd(pa2+4);
9933 v0 = _mm_add_pd(va0,v0);
9934 va1 = _mm_mul_pd(va1,vx);
9935 va0 = _mm_load_pd(pa0+6);
9936 v1 = _mm_add_pd(va1,v1);
9937 va2 = _mm_mul_pd(va2,vx);
9938 va1 = _mm_load_pd(pa1+6);
9939 v2 = _mm_add_pd(va2,v2);
9940 vx = _mm_load_pd(pb+6);
9941 va0 = _mm_mul_pd(va0,vx);
9942 v0 = _mm_add_pd(va0,v0);
9943 va2 = _mm_load_pd(pa2+6);
9944 va1 = _mm_mul_pd(va1,vx);
9945 v1 = _mm_add_pd(va1,v1);
9946 va2 = _mm_mul_pd(va2,vx);
9947 v2 = _mm_add_pd(va2,v2);
9948
9949 pa0 += 8;
9950 pa1 += 8;
9951 pa2 += 8;
9952 pb += 8;
9953
9954 /*
9955 this is unshuffled version of code above
9956
9957 vx = _mm_load_pd(pb);
9958 va0 = _mm_load_pd(pa0);
9959 va1 = _mm_load_pd(pa1);
9960 va2 = _mm_load_pd(pa2);
9961
9962 va0 = _mm_mul_pd(va0,vx);
9963 va1 = _mm_mul_pd(va1,vx);
9964 va2 = _mm_mul_pd(va2,vx);
9965
9966 v0 = _mm_add_pd(va0,v0);
9967 v1 = _mm_add_pd(va1,v1);
9968 v2 = _mm_add_pd(va2,v2);
9969
9970 vx = _mm_load_pd(pb+2);
9971 va0 = _mm_load_pd(pa0+2);
9972 va1 = _mm_load_pd(pa1+2);
9973 va2 = _mm_load_pd(pa2+2);
9974
9975 va0 = _mm_mul_pd(va0,vx);
9976 va1 = _mm_mul_pd(va1,vx);
9977 va2 = _mm_mul_pd(va2,vx);
9978
9979 v0 = _mm_add_pd(va0,v0);
9980 v1 = _mm_add_pd(va1,v1);
9981 v2 = _mm_add_pd(va2,v2);
9982
9983 vx = _mm_load_pd(pb+4);
9984 va0 = _mm_load_pd(pa0+4);
9985 va1 = _mm_load_pd(pa1+4);
9986 va2 = _mm_load_pd(pa2+4);
9987
9988 va0 = _mm_mul_pd(va0,vx);
9989 va1 = _mm_mul_pd(va1,vx);
9990 va2 = _mm_mul_pd(va2,vx);
9991
9992 v0 = _mm_add_pd(va0,v0);
9993 v1 = _mm_add_pd(va1,v1);
9994 v2 = _mm_add_pd(va2,v2);
9995
9996 vx = _mm_load_pd(pb+6);
9997 va0 = _mm_load_pd(pa0+6);
9998 va1 = _mm_load_pd(pa1+6);
9999 va2 = _mm_load_pd(pa2+6);
10000
10001 va0 = _mm_mul_pd(va0,vx);
10002 va1 = _mm_mul_pd(va1,vx);
10003 va2 = _mm_mul_pd(va2,vx);
10004
10005 v0 = _mm_add_pd(va0,v0);
10006 v1 = _mm_add_pd(va1,v1);
10007 v2 = _mm_add_pd(va2,v2);
10008 */
10009 }
10010 for(k=0; k<nb2; k++)
10011 {
10012 vx = _mm_load_pd(pb);
10013 va0 = _mm_load_pd(pa0);
10014 va1 = _mm_load_pd(pa1);
10015 va2 = _mm_load_pd(pa2);
10016
10017 va0 = _mm_mul_pd(va0,vx);
10018 v0 = _mm_add_pd(va0,v0);
10019 va1 = _mm_mul_pd(va1,vx);
10020 v1 = _mm_add_pd(va1,v1);
10021 va2 = _mm_mul_pd(va2,vx);
10022 v2 = _mm_add_pd(va2,v2);
10023
10024 pa0 += 2;
10025 pa1 += 2;
10026 pa2 += 2;
10027 pb += 2;
10028 }
10029 for(k=0; k<ntail; k++)
10030 {
10031 vx = _mm_load1_pd(pb);
10032 va0 = _mm_load1_pd(pa0);
10033 va1 = _mm_load1_pd(pa1);
10034 va2 = _mm_load1_pd(pa2);
10035
10036 va0 = _mm_mul_sd(va0,vx);
10037 v0 = _mm_add_sd(v0,va0);
10038 va1 = _mm_mul_sd(va1,vx);
10039 v1 = _mm_add_sd(v1,va1);
10040 va2 = _mm_mul_sd(va2,vx);
10041 v2 = _mm_add_sd(v2,va2);
10042 }
10043 vtmp = _mm_add_pd(_mm_unpacklo_pd(v0,v1),_mm_unpackhi_pd(v0,v1));
10044 _mm_storel_pd(&row0, vtmp);
10045 _mm_storeh_pd(&row1, vtmp);
10046 v2 = _mm_add_sd(_mm_shuffle_pd(v2,v2,1),v2);
10047 _mm_storel_pd(&row2, v2);
10048 if( beta!=0 )
10049 {
10050 y[0] = beta*y[0]+alpha*row0;
10051 y[stride] = beta*y[stride]+alpha*row1;
10052 y[2*stride] = beta*y[2*stride]+alpha*row2;
10053 }
10054 else
10055 {
10056 y[0] = alpha*row0;
10057 y[stride] = alpha*row1;
10058 y[2*stride] = alpha*row2;
10059 }
10060 y+=3*stride;
10061 }
10062 for(i=0; i<mtail; i++)
10063 {
10064 double row0;
10065 row0 = 0;
10066 pb = x;
10067 pa0 = a;
10068 a += alglib_r_block;
10069 for(k=0; k<n2; k++)
10070 {
10071 row0 += pb[0]*pa0[0]+pb[1]*pa0[1];
10072 pa0 += 2;
10073 pb += 2;
10074 }
10075 if( n%2 )
10076 row0 += pb[0]*pa0[0];
10077 if( beta!=0 )
10078 y[0] = beta*y[0]+alpha*row0;
10079 else
10080 y[0] = alpha*row0;
10081 y+=stride;
10082 }
10083 }
10084 #endif
10085
10086
10087 /*************************************************************************
10088 This subroutine calculates fast MxN complex matrix-vector product:
10089
10090 y := beta*y + alpha*A*x
10091
10092 using generic C code, where A, x, y, alpha and beta are complex.
10093
10094 If beta is zero, we do not use previous values of y (they are overwritten
10095 by alpha*A*x without ever being read). However, when alpha is zero, we
10096 still calculate A*x and multiply it by alpha (this distinction can be
10097 important when A or x contain infinities/NANs).
10098
10099 IMPORTANT:
10100 * 0<=M<=alglib_c_block, 0<=N<=alglib_c_block
10101 * A must be stored in row-major order, as sequence of double precision
10102 pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not
10103 in doubles).
10104 * Y may be referenced by cy (pointer to ae_complex) or
10105 dy (pointer to array of double precision pair) depending on what type of
10106 output you wish. Pass pointer to Y as one of these parameters,
10107 AND SET OTHER PARAMETER TO NULL.
10108 * both A and x must be aligned; y may be non-aligned.
10109 *************************************************************************/
_ialglib_cmv(ae_int_t m,ae_int_t n,const double * a,const double * x,ae_complex * cy,double * dy,ae_int_t stride,ae_complex alpha,ae_complex beta)10110 void _ialglib_cmv(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta)
10111 {
10112 ae_int_t i, j;
10113 const double *pa, *parow, *pb;
10114
10115 parow = a;
10116 for(i=0; i<m; i++)
10117 {
10118 double v0 = 0, v1 = 0;
10119 pa = parow;
10120 pb = x;
10121 for(j=0; j<n; j++)
10122 {
10123 v0 += pa[0]*pb[0];
10124 v1 += pa[0]*pb[1];
10125 v0 -= pa[1]*pb[1];
10126 v1 += pa[1]*pb[0];
10127
10128 pa += 2;
10129 pb += 2;
10130 }
10131 if( cy!=NULL )
10132 {
10133 double tx = (beta.x*cy->x-beta.y*cy->y)+(alpha.x*v0-alpha.y*v1);
10134 double ty = (beta.x*cy->y+beta.y*cy->x)+(alpha.x*v1+alpha.y*v0);
10135 cy->x = tx;
10136 cy->y = ty;
10137 cy+=stride;
10138 }
10139 else
10140 {
10141 double tx = (beta.x*dy[0]-beta.y*dy[1])+(alpha.x*v0-alpha.y*v1);
10142 double ty = (beta.x*dy[1]+beta.y*dy[0])+(alpha.x*v1+alpha.y*v0);
10143 dy[0] = tx;
10144 dy[1] = ty;
10145 dy += 2*stride;
10146 }
10147 parow += 2*alglib_c_block;
10148 }
10149 }
10150
10151
10152 /*************************************************************************
10153 This subroutine calculates fast MxN complex matrix-vector product:
10154
10155 y := beta*y + alpha*A*x
10156
10157 using generic C code, where A, x, y, alpha and beta are complex.
10158
10159 If beta is zero, we do not use previous values of y (they are overwritten
10160 by alpha*A*x without ever being read). However, when alpha is zero, we
10161 still calculate A*x and multiply it by alpha (this distinction can be
10162 important when A or x contain infinities/NANs).
10163
10164 IMPORTANT:
10165 * 0<=M<=alglib_c_block, 0<=N<=alglib_c_block
10166 * A must be stored in row-major order, as sequence of double precision
10167 pairs. Stride is alglib_c_block (it is measured in pairs of doubles, not
10168 in doubles).
10169 * Y may be referenced by cy (pointer to ae_complex) or
10170 dy (pointer to array of double precision pair) depending on what type of
10171 output you wish. Pass pointer to Y as one of these parameters,
10172 AND SET OTHER PARAMETER TO NULL.
10173 * both A and x must be aligned; y may be non-aligned.
10174
10175 This function supports SSE2; it can be used when:
10176 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
10177 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
10178
10179 If (1) is failed, this function will be undefined. If (2) is failed, call
10180 to this function will probably crash your system.
10181
10182 If you want to know whether it is safe to call it, you should check
10183 results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
10184 and will do its work.
10185 *************************************************************************/
10186 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_cmv_sse2(ae_int_t m,ae_int_t n,const double * a,const double * x,ae_complex * cy,double * dy,ae_int_t stride,ae_complex alpha,ae_complex beta)10187 void _ialglib_cmv_sse2(ae_int_t m, ae_int_t n, const double *a, const double *x, ae_complex *cy, double *dy, ae_int_t stride, ae_complex alpha, ae_complex beta)
10188 {
10189 ae_int_t i, j, m2;
10190 const double *pa0, *pa1, *parow, *pb;
10191 __m128d vbeta, vbetax, vbetay;
10192 __m128d valpha, valphax, valphay;
10193
10194 m2 = m/2;
10195 parow = a;
10196 if( cy!=NULL )
10197 {
10198 dy = (double*)cy;
10199 cy = NULL;
10200 }
10201 vbeta = _mm_loadh_pd(_mm_load_sd(&beta.x),&beta.y);
10202 vbetax = _mm_unpacklo_pd(vbeta,vbeta);
10203 vbetay = _mm_unpackhi_pd(vbeta,vbeta);
10204 valpha = _mm_loadh_pd(_mm_load_sd(&alpha.x),&alpha.y);
10205 valphax = _mm_unpacklo_pd(valpha,valpha);
10206 valphay = _mm_unpackhi_pd(valpha,valpha);
10207 for(i=0; i<m2; i++)
10208 {
10209 __m128d vx, vy, vt0, vt1, vt2, vt3, vt4, vt5, vrx, vry, vtx, vty;
10210 pa0 = parow;
10211 pa1 = parow+2*alglib_c_block;
10212 pb = x;
10213 vx = _mm_setzero_pd();
10214 vy = _mm_setzero_pd();
10215 for(j=0; j<n; j++)
10216 {
10217 vt0 = _mm_load1_pd(pb);
10218 vt1 = _mm_load1_pd(pb+1);
10219 vt2 = _mm_load_pd(pa0);
10220 vt3 = _mm_load_pd(pa1);
10221 vt5 = _mm_unpacklo_pd(vt2,vt3);
10222 vt4 = _mm_unpackhi_pd(vt2,vt3);
10223 vt2 = vt5;
10224 vt3 = vt4;
10225
10226 vt2 = _mm_mul_pd(vt2,vt0);
10227 vx = _mm_add_pd(vx,vt2);
10228 vt3 = _mm_mul_pd(vt3,vt1);
10229 vx = _mm_sub_pd(vx,vt3);
10230 vt4 = _mm_mul_pd(vt4,vt0);
10231 vy = _mm_add_pd(vy,vt4);
10232 vt5 = _mm_mul_pd(vt5,vt1);
10233 vy = _mm_add_pd(vy,vt5);
10234
10235 pa0 += 2;
10236 pa1 += 2;
10237 pb += 2;
10238 }
10239 if( beta.x==0.0 && beta.y==0.0 )
10240 {
10241 vrx = _mm_setzero_pd();
10242 vry = _mm_setzero_pd();
10243 }
10244 else
10245 {
10246 vtx = _mm_loadh_pd(_mm_load_sd(dy+0),dy+2*stride+0);
10247 vty = _mm_loadh_pd(_mm_load_sd(dy+1),dy+2*stride+1);
10248 vrx = _mm_sub_pd(_mm_mul_pd(vbetax,vtx),_mm_mul_pd(vbetay,vty));
10249 vry = _mm_add_pd(_mm_mul_pd(vbetax,vty),_mm_mul_pd(vbetay,vtx));
10250 }
10251 vtx = _mm_sub_pd(_mm_mul_pd(valphax,vx),_mm_mul_pd(valphay,vy));
10252 vty = _mm_add_pd(_mm_mul_pd(valphax,vy),_mm_mul_pd(valphay,vx));
10253 vrx = _mm_add_pd(vrx,vtx);
10254 vry = _mm_add_pd(vry,vty);
10255 _mm_storel_pd(dy+0, vrx);
10256 _mm_storeh_pd(dy+2*stride+0, vrx);
10257 _mm_storel_pd(dy+1, vry);
10258 _mm_storeh_pd(dy+2*stride+1, vry);
10259 dy += 4*stride;
10260 parow += 4*alglib_c_block;
10261 }
10262 if( m%2 )
10263 {
10264 double v0 = 0, v1 = 0;
10265 double tx, ty;
10266 pa0 = parow;
10267 pb = x;
10268 for(j=0; j<n; j++)
10269 {
10270 v0 += pa0[0]*pb[0];
10271 v1 += pa0[0]*pb[1];
10272 v0 -= pa0[1]*pb[1];
10273 v1 += pa0[1]*pb[0];
10274
10275 pa0 += 2;
10276 pb += 2;
10277 }
10278 if( beta.x==0.0 && beta.y==0.0 )
10279 {
10280 tx = 0.0;
10281 ty = 0.0;
10282 }
10283 else
10284 {
10285 tx = beta.x*dy[0]-beta.y*dy[1];
10286 ty = beta.x*dy[1]+beta.y*dy[0];
10287 }
10288 tx += alpha.x*v0-alpha.y*v1;
10289 ty += alpha.x*v1+alpha.y*v0;
10290 dy[0] = tx;
10291 dy[1] = ty;
10292 dy += 2*stride;
10293 parow += 2*alglib_c_block;
10294 }
10295 }
10296 #endif
10297
10298 /********************************************************************
10299 This subroutine sets vector to zero
10300 ********************************************************************/
_ialglib_vzero(ae_int_t n,double * p,ae_int_t stride)10301 void _ialglib_vzero(ae_int_t n, double *p, ae_int_t stride)
10302 {
10303 ae_int_t i;
10304 if( stride==1 )
10305 {
10306 for(i=0; i<n; i++,p++)
10307 *p = 0.0;
10308 }
10309 else
10310 {
10311 for(i=0; i<n; i++,p+=stride)
10312 *p = 0.0;
10313 }
10314 }
10315
10316 /********************************************************************
10317 This subroutine sets vector to zero
10318 ********************************************************************/
_ialglib_vzero_complex(ae_int_t n,ae_complex * p,ae_int_t stride)10319 void _ialglib_vzero_complex(ae_int_t n, ae_complex *p, ae_int_t stride)
10320 {
10321 ae_int_t i;
10322 if( stride==1 )
10323 {
10324 for(i=0; i<n; i++,p++)
10325 {
10326 p->x = 0.0;
10327 p->y = 0.0;
10328 }
10329 }
10330 else
10331 {
10332 for(i=0; i<n; i++,p+=stride)
10333 {
10334 p->x = 0.0;
10335 p->y = 0.0;
10336 }
10337 }
10338 }
10339
10340
10341 /********************************************************************
10342 This subroutine copies unaligned real vector
10343 ********************************************************************/
_ialglib_vcopy(ae_int_t n,const double * a,ae_int_t stridea,double * b,ae_int_t strideb)10344 void _ialglib_vcopy(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb)
10345 {
10346 ae_int_t i, n2;
10347 if( stridea==1 && strideb==1 )
10348 {
10349 n2 = n/2;
10350 for(i=n2; i!=0; i--, a+=2, b+=2)
10351 {
10352 b[0] = a[0];
10353 b[1] = a[1];
10354 }
10355 if( n%2!=0 )
10356 b[0] = a[0];
10357 }
10358 else
10359 {
10360 for(i=0; i<n; i++,a+=stridea,b+=strideb)
10361 *b = *a;
10362 }
10363 }
10364
10365
10366 /********************************************************************
10367 This subroutine copies unaligned complex vector
10368 (passed as ae_complex*)
10369
10370 1. strideb is stride measured in complex numbers, not doubles
10371 2. conj may be "N" (no conj.) or "C" (conj.)
10372 ********************************************************************/
_ialglib_vcopy_complex(ae_int_t n,const ae_complex * a,ae_int_t stridea,double * b,ae_int_t strideb,const char * conj)10373 void _ialglib_vcopy_complex(ae_int_t n, const ae_complex *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj)
10374 {
10375 ae_int_t i;
10376
10377 /*
10378 * more general case
10379 */
10380 if( conj[0]=='N' || conj[0]=='n' )
10381 {
10382 for(i=0; i<n; i++,a+=stridea,b+=2*strideb)
10383 {
10384 b[0] = a->x;
10385 b[1] = a->y;
10386 }
10387 }
10388 else
10389 {
10390 for(i=0; i<n; i++,a+=stridea,b+=2*strideb)
10391 {
10392 b[0] = a->x;
10393 b[1] = -a->y;
10394 }
10395 }
10396 }
10397
10398
10399 /********************************************************************
10400 This subroutine copies unaligned complex vector (passed as double*)
10401
10402 1. strideb is stride measured in complex numbers, not doubles
10403 2. conj may be "N" (no conj.) or "C" (conj.)
10404 ********************************************************************/
_ialglib_vcopy_dcomplex(ae_int_t n,const double * a,ae_int_t stridea,double * b,ae_int_t strideb,const char * conj)10405 void _ialglib_vcopy_dcomplex(ae_int_t n, const double *a, ae_int_t stridea, double *b, ae_int_t strideb, const char *conj)
10406 {
10407 ae_int_t i;
10408
10409 /*
10410 * more general case
10411 */
10412 if( conj[0]=='N' || conj[0]=='n' )
10413 {
10414 for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb)
10415 {
10416 b[0] = a[0];
10417 b[1] = a[1];
10418 }
10419 }
10420 else
10421 {
10422 for(i=0; i<n; i++,a+=2*stridea,b+=2*strideb)
10423 {
10424 b[0] = a[0];
10425 b[1] = -a[1];
10426 }
10427 }
10428 }
10429
10430
10431 /********************************************************************
10432 This subroutine copies matrix from non-aligned non-contigous storage
10433 to aligned contigous storage
10434
10435 A:
10436 * MxN
10437 * non-aligned
10438 * non-contigous
10439 * may be transformed during copying (as prescribed by op)
10440
10441 B:
10442 * alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
10443 * aligned
10444 * stride is alglib_r_block
10445
10446 Transformation types:
10447 * 0 - no transform
10448 * 1 - transposition
10449 ********************************************************************/
_ialglib_mcopyblock(ae_int_t m,ae_int_t n,const double * a,ae_int_t op,ae_int_t stride,double * b)10450 void _ialglib_mcopyblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b)
10451 {
10452 ae_int_t i, j, n2;
10453 const double *psrc;
10454 double *pdst;
10455 if( op==0 )
10456 {
10457 n2 = n/2;
10458 for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc=a)
10459 {
10460 for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2)
10461 {
10462 pdst[0] = psrc[0];
10463 pdst[1] = psrc[1];
10464 }
10465 if( n%2!=0 )
10466 pdst[0] = psrc[0];
10467 }
10468 }
10469 else
10470 {
10471 n2 = n/2;
10472 for(i=0,psrc=a; i<m; i++,a+=stride,b+=1,psrc=a)
10473 {
10474 for(j=0,pdst=b; j<n2; j++,pdst+=alglib_twice_r_block,psrc+=2)
10475 {
10476 pdst[0] = psrc[0];
10477 pdst[alglib_r_block] = psrc[1];
10478 }
10479 if( n%2!=0 )
10480 pdst[0] = psrc[0];
10481 }
10482 }
10483 }
10484
10485
10486 /********************************************************************
10487 This subroutine copies matrix from non-aligned non-contigous storage
10488 to aligned contigous storage
10489
10490 A:
10491 * MxN
10492 * non-aligned
10493 * non-contigous
10494 * may be transformed during copying (as prescribed by op)
10495
10496 B:
10497 * alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
10498 * aligned
10499 * stride is alglib_r_block
10500
10501 Transformation types:
10502 * 0 - no transform
10503 * 1 - transposition
10504
10505 This function supports SSE2; it can be used when:
10506 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
10507 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
10508
10509 If (1) is failed, this function will be undefined. If (2) is failed, call
10510 to this function will probably crash your system.
10511
10512 If you want to know whether it is safe to call it, you should check
10513 results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
10514 and will do its work.
10515 ********************************************************************/
10516 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_mcopyblock_sse2(ae_int_t m,ae_int_t n,const double * a,ae_int_t op,ae_int_t stride,double * b)10517 void _ialglib_mcopyblock_sse2(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_int_t stride, double *b)
10518 {
10519 ae_int_t i, j, mb2;
10520 const double *psrc0, *psrc1;
10521 double *pdst;
10522 if( op==0 )
10523 {
10524 ae_int_t nb8, ntail;
10525 nb8 = n/8;
10526 ntail = n-8*nb8;
10527 for(i=0,psrc0=a; i<m; i++,a+=stride,b+=alglib_r_block,psrc0=a)
10528 {
10529 pdst=b;
10530 for(j=0; j<nb8; j++)
10531 {
10532 __m128d v0, v1;
10533 v0 = _mm_loadu_pd(psrc0);
10534 _mm_store_pd(pdst, v0);
10535 v1 = _mm_loadu_pd(psrc0+2);
10536 _mm_store_pd(pdst+2, v1);
10537 v1 = _mm_loadu_pd(psrc0+4);
10538 _mm_store_pd(pdst+4, v1);
10539 v1 = _mm_loadu_pd(psrc0+6);
10540 _mm_store_pd(pdst+6, v1);
10541 pdst+=8;
10542 psrc0+=8;
10543 }
10544 for(j=0; j<ntail; j++)
10545 pdst[j] = psrc0[j];
10546 }
10547 }
10548 else
10549 {
10550 const double *arow0, *arow1;
10551 double *bcol0, *bcol1, *pdst0, *pdst1;
10552 ae_int_t nb4, ntail, n2;
10553
10554 n2 = n/2;
10555 mb2 = m/2;
10556 nb4 = n/4;
10557 ntail = n-4*nb4;
10558
10559 arow0 = a;
10560 arow1 = a+stride;
10561 bcol0 = b;
10562 bcol1 = b+1;
10563 for(i=0; i<mb2; i++)
10564 {
10565 psrc0 = arow0;
10566 psrc1 = arow1;
10567 pdst0 = bcol0;
10568 pdst1 = bcol1;
10569 for(j=0; j<nb4; j++)
10570 {
10571 __m128d v0, v1, v2, v3;
10572 v0 = _mm_loadu_pd(psrc0);
10573 v1 = _mm_loadu_pd(psrc1);
10574 v2 = _mm_loadu_pd(psrc0+2);
10575 v3 = _mm_loadu_pd(psrc1+2);
10576 _mm_store_pd(pdst0, _mm_unpacklo_pd(v0,v1));
10577 _mm_store_pd(pdst0+alglib_r_block, _mm_unpackhi_pd(v0,v1));
10578 _mm_store_pd(pdst0+2*alglib_r_block, _mm_unpacklo_pd(v2,v3));
10579 _mm_store_pd(pdst0+3*alglib_r_block, _mm_unpackhi_pd(v2,v3));
10580
10581 pdst0 += 4*alglib_r_block;
10582 pdst1 += 4*alglib_r_block;
10583 psrc0 += 4;
10584 psrc1 += 4;
10585 }
10586 for(j=0; j<ntail; j++)
10587 {
10588 pdst0[0] = psrc0[0];
10589 pdst1[0] = psrc1[0];
10590 pdst0 += alglib_r_block;
10591 pdst1 += alglib_r_block;
10592 psrc0 += 1;
10593 psrc1 += 1;
10594 }
10595 arow0 += 2*stride;
10596 arow1 += 2*stride;
10597 bcol0 += 2;
10598 bcol1 += 2;
10599 }
10600 if( m%2 )
10601 {
10602 psrc0 = arow0;
10603 pdst0 = bcol0;
10604 for(j=0; j<n2; j++)
10605 {
10606 pdst0[0] = psrc0[0];
10607 pdst0[alglib_r_block] = psrc0[1];
10608 pdst0 += alglib_twice_r_block;
10609 psrc0 += 2;
10610 }
10611 if( n%2!=0 )
10612 pdst0[0] = psrc0[0];
10613 }
10614 }
10615 }
10616 #endif
10617
10618
10619 /********************************************************************
10620 This subroutine copies matrix from aligned contigous storage to non-
10621 aligned non-contigous storage
10622
10623 A:
10624 * MxN
10625 * aligned
10626 * contigous
10627 * stride is alglib_r_block
10628 * may be transformed during copying (as prescribed by op)
10629
10630 B:
10631 * alglib_r_block*alglib_r_block (only MxN/NxM submatrix is used)
10632 * non-aligned, non-contigous
10633
10634 Transformation types:
10635 * 0 - no transform
10636 * 1 - transposition
10637 ********************************************************************/
_ialglib_mcopyunblock(ae_int_t m,ae_int_t n,const double * a,ae_int_t op,double * b,ae_int_t stride)10638 void _ialglib_mcopyunblock(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, double *b, ae_int_t stride)
10639 {
10640 ae_int_t i, j, n2;
10641 const double *psrc;
10642 double *pdst;
10643 if( op==0 )
10644 {
10645 n2 = n/2;
10646 for(i=0,psrc=a; i<m; i++,a+=alglib_r_block,b+=stride,psrc=a)
10647 {
10648 for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=2)
10649 {
10650 pdst[0] = psrc[0];
10651 pdst[1] = psrc[1];
10652 }
10653 if( n%2!=0 )
10654 pdst[0] = psrc[0];
10655 }
10656 }
10657 else
10658 {
10659 n2 = n/2;
10660 for(i=0,psrc=a; i<m; i++,a++,b+=stride,psrc=a)
10661 {
10662 for(j=0,pdst=b; j<n2; j++,pdst+=2,psrc+=alglib_twice_r_block)
10663 {
10664 pdst[0] = psrc[0];
10665 pdst[1] = psrc[alglib_r_block];
10666 }
10667 if( n%2!=0 )
10668 pdst[0] = psrc[0];
10669 }
10670 }
10671 }
10672
10673
10674 /********************************************************************
10675 This subroutine copies matrix from non-aligned non-contigous storage
10676 to aligned contigous storage
10677
10678 A:
10679 * MxN
10680 * non-aligned
10681 * non-contigous
10682 * may be transformed during copying (as prescribed by op)
10683 * pointer to ae_complex is passed
10684
10685 B:
10686 * 2*alglib_c_block*alglib_c_block doubles (only MxN/NxM submatrix is used)
10687 * aligned
10688 * stride is alglib_c_block
10689 * pointer to double is passed
10690
10691 Transformation types:
10692 * 0 - no transform
10693 * 1 - transposition
10694 * 2 - conjugate transposition
10695 * 3 - conjugate, but no transposition
10696 ********************************************************************/
_ialglib_mcopyblock_complex(ae_int_t m,ae_int_t n,const ae_complex * a,ae_int_t op,ae_int_t stride,double * b)10697 void _ialglib_mcopyblock_complex(ae_int_t m, ae_int_t n, const ae_complex *a, ae_int_t op, ae_int_t stride, double *b)
10698 {
10699 ae_int_t i, j;
10700 const ae_complex *psrc;
10701 double *pdst;
10702 if( op==0 )
10703 {
10704 for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a)
10705 for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++)
10706 {
10707 pdst[0] = psrc->x;
10708 pdst[1] = psrc->y;
10709 }
10710 }
10711 if( op==1 )
10712 {
10713 for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a)
10714 for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++)
10715 {
10716 pdst[0] = psrc->x;
10717 pdst[1] = psrc->y;
10718 }
10719 }
10720 if( op==2 )
10721 {
10722 for(i=0,psrc=a; i<m; i++,a+=stride,b+=2,psrc=a)
10723 for(j=0,pdst=b; j<n; j++,pdst+=alglib_twice_c_block,psrc++)
10724 {
10725 pdst[0] = psrc->x;
10726 pdst[1] = -psrc->y;
10727 }
10728 }
10729 if( op==3 )
10730 {
10731 for(i=0,psrc=a; i<m; i++,a+=stride,b+=alglib_twice_c_block,psrc=a)
10732 for(j=0,pdst=b; j<n; j++,pdst+=2,psrc++)
10733 {
10734 pdst[0] = psrc->x;
10735 pdst[1] = -psrc->y;
10736 }
10737 }
10738 }
10739
10740
10741 /********************************************************************
10742 This subroutine copies matrix from aligned contigous storage to
10743 non-aligned non-contigous storage
10744
10745 A:
10746 * 2*alglib_c_block*alglib_c_block doubles (only MxN submatrix is used)
10747 * aligned
10748 * stride is alglib_c_block
10749 * pointer to double is passed
10750 * may be transformed during copying (as prescribed by op)
10751
10752 B:
10753 * MxN
10754 * non-aligned
10755 * non-contigous
10756 * pointer to ae_complex is passed
10757
10758 Transformation types:
10759 * 0 - no transform
10760 * 1 - transposition
10761 * 2 - conjugate transposition
10762 * 3 - conjugate, but no transposition
10763 ********************************************************************/
_ialglib_mcopyunblock_complex(ae_int_t m,ae_int_t n,const double * a,ae_int_t op,ae_complex * b,ae_int_t stride)10764 void _ialglib_mcopyunblock_complex(ae_int_t m, ae_int_t n, const double *a, ae_int_t op, ae_complex* b, ae_int_t stride)
10765 {
10766 ae_int_t i, j;
10767 const double *psrc;
10768 ae_complex *pdst;
10769 if( op==0 )
10770 {
10771 for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a)
10772 for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2)
10773 {
10774 pdst->x = psrc[0];
10775 pdst->y = psrc[1];
10776 }
10777 }
10778 if( op==1 )
10779 {
10780 for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a)
10781 for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block)
10782 {
10783 pdst->x = psrc[0];
10784 pdst->y = psrc[1];
10785 }
10786 }
10787 if( op==2 )
10788 {
10789 for(i=0,psrc=a; i<m; i++,a+=2,b+=stride,psrc=a)
10790 for(j=0,pdst=b; j<n; j++,pdst++,psrc+=alglib_twice_c_block)
10791 {
10792 pdst->x = psrc[0];
10793 pdst->y = -psrc[1];
10794 }
10795 }
10796 if( op==3 )
10797 {
10798 for(i=0,psrc=a; i<m; i++,a+=alglib_twice_c_block,b+=stride,psrc=a)
10799 for(j=0,pdst=b; j<n; j++,pdst++,psrc+=2)
10800 {
10801 pdst->x = psrc[0];
10802 pdst->y = -psrc[1];
10803 }
10804 }
10805 }
10806
10807
10808 /********************************************************************
10809 Real GEMM kernel
10810 ********************************************************************/
_ialglib_rmatrixgemm(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,double * _a,ae_int_t _a_stride,ae_int_t optypea,double * _b,ae_int_t _b_stride,ae_int_t optypeb,double beta,double * _c,ae_int_t _c_stride)10811 ae_bool _ialglib_rmatrixgemm(ae_int_t m,
10812 ae_int_t n,
10813 ae_int_t k,
10814 double alpha,
10815 double *_a,
10816 ae_int_t _a_stride,
10817 ae_int_t optypea,
10818 double *_b,
10819 ae_int_t _b_stride,
10820 ae_int_t optypeb,
10821 double beta,
10822 double *_c,
10823 ae_int_t _c_stride)
10824 {
10825 int i;
10826 double *crow;
10827 double _abuf[alglib_r_block+alglib_simd_alignment];
10828 double _bbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
10829 double * const abuf = (double * ) ae_align(_abuf,alglib_simd_alignment);
10830 double * const b = (double * ) ae_align(_bbuf,alglib_simd_alignment);
10831 void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
10832 void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
10833
10834 if( m>alglib_r_block || n>alglib_r_block || k>alglib_r_block || m<=0 || n<=0 || k<=0 || alpha==0.0 )
10835 return ae_false;
10836
10837 /*
10838 * Check for SSE2 support
10839 */
10840 #ifdef AE_HAS_SSE2_INTRINSICS
10841 if( ae_cpuid() & CPU_SSE2 )
10842 {
10843 rmv = &_ialglib_rmv_sse2;
10844 mcopyblock = &_ialglib_mcopyblock_sse2;
10845 }
10846 #endif
10847
10848 /*
10849 * copy b
10850 */
10851 if( optypeb==0 )
10852 mcopyblock(k, n, _b, 1, _b_stride, b);
10853 else
10854 mcopyblock(n, k, _b, 0, _b_stride, b);
10855
10856 /*
10857 * multiply B by A (from the right, by rows)
10858 * and store result in C
10859 */
10860 crow = _c;
10861 if( optypea==0 )
10862 {
10863 const double *arow = _a;
10864 for(i=0; i<m; i++)
10865 {
10866 _ialglib_vcopy(k, arow, 1, abuf, 1);
10867 if( beta==0 )
10868 _ialglib_vzero(n, crow, 1);
10869 rmv(n, k, b, abuf, crow, 1, alpha, beta);
10870 crow += _c_stride;
10871 arow += _a_stride;
10872 }
10873 }
10874 else
10875 {
10876 const double *acol = _a;
10877 for(i=0; i<m; i++)
10878 {
10879 _ialglib_vcopy(k, acol, _a_stride, abuf, 1);
10880 if( beta==0 )
10881 _ialglib_vzero(n, crow, 1);
10882 rmv(n, k, b, abuf, crow, 1, alpha, beta);
10883 crow += _c_stride;
10884 acol++;
10885 }
10886 }
10887 return ae_true;
10888 }
10889
10890
10891 /********************************************************************
10892 Complex GEMM kernel
10893 ********************************************************************/
_ialglib_cmatrixgemm(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_complex * _a,ae_int_t _a_stride,ae_int_t optypea,ae_complex * _b,ae_int_t _b_stride,ae_int_t optypeb,ae_complex beta,ae_complex * _c,ae_int_t _c_stride)10894 ae_bool _ialglib_cmatrixgemm(ae_int_t m,
10895 ae_int_t n,
10896 ae_int_t k,
10897 ae_complex alpha,
10898 ae_complex *_a,
10899 ae_int_t _a_stride,
10900 ae_int_t optypea,
10901 ae_complex *_b,
10902 ae_int_t _b_stride,
10903 ae_int_t optypeb,
10904 ae_complex beta,
10905 ae_complex *_c,
10906 ae_int_t _c_stride)
10907 {
10908 const ae_complex *arow;
10909 ae_complex *crow;
10910 ae_int_t i;
10911 double _loc_abuf[2*alglib_c_block+alglib_simd_alignment];
10912 double _loc_b[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
10913 double * const abuf = (double *)ae_align(_loc_abuf,alglib_simd_alignment);
10914 double * const b = (double *)ae_align(_loc_b, alglib_simd_alignment);
10915 ae_int_t brows;
10916 ae_int_t bcols;
10917 void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
10918
10919 if( m>alglib_c_block || n>alglib_c_block || k>alglib_c_block )
10920 return ae_false;
10921
10922 /*
10923 * Check for SSE2 support
10924 */
10925 #ifdef AE_HAS_SSE2_INTRINSICS
10926 if( ae_cpuid() & CPU_SSE2 )
10927 {
10928 cmv = &_ialglib_cmv_sse2;
10929 }
10930 #endif
10931
10932 /*
10933 * copy b
10934 */
10935 brows = optypeb==0 ? k : n;
10936 bcols = optypeb==0 ? n : k;
10937 if( optypeb==0 )
10938 _ialglib_mcopyblock_complex(brows, bcols, _b, 1, _b_stride, b);
10939 if( optypeb==1 )
10940 _ialglib_mcopyblock_complex(brows, bcols, _b, 0, _b_stride, b);
10941 if( optypeb==2 )
10942 _ialglib_mcopyblock_complex(brows, bcols, _b, 3, _b_stride, b);
10943
10944 /*
10945 * multiply B by A (from the right, by rows)
10946 * and store result in C
10947 */
10948 arow = _a;
10949 crow = _c;
10950 for(i=0; i<m; i++)
10951 {
10952 if( optypea==0 )
10953 {
10954 _ialglib_vcopy_complex(k, arow, 1, abuf, 1, "No conj");
10955 arow += _a_stride;
10956 }
10957 else if( optypea==1 )
10958 {
10959 _ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "No conj");
10960 arow++;
10961 }
10962 else
10963 {
10964 _ialglib_vcopy_complex(k, arow, _a_stride, abuf, 1, "Conj");
10965 arow++;
10966 }
10967 if( beta.x==0 && beta.y==0 )
10968 _ialglib_vzero_complex(n, crow, 1);
10969 cmv(n, k, b, abuf, crow, NULL, 1, alpha, beta);
10970 crow += _c_stride;
10971 }
10972 return ae_true;
10973 }
10974
10975
10976 /********************************************************************
10977 complex TRSM kernel
10978 ********************************************************************/
_ialglib_cmatrixrighttrsm(ae_int_t m,ae_int_t n,ae_complex * _a,ae_int_t _a_stride,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_complex * _x,ae_int_t _x_stride)10979 ae_bool _ialglib_cmatrixrighttrsm(ae_int_t m,
10980 ae_int_t n,
10981 ae_complex *_a,
10982 ae_int_t _a_stride,
10983 ae_bool isupper,
10984 ae_bool isunit,
10985 ae_int_t optype,
10986 ae_complex *_x,
10987 ae_int_t _x_stride)
10988 {
10989 /*
10990 * local buffers
10991 */
10992 double *pdiag;
10993 ae_int_t i;
10994 double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
10995 double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
10996 double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
10997 double * const abuf = (double*)ae_align(_loc_abuf, alglib_simd_alignment);
10998 double * const xbuf = (double*)ae_align(_loc_xbuf, alglib_simd_alignment);
10999 double * const tmpbuf = (double*)ae_align(_loc_tmpbuf,alglib_simd_alignment);
11000 ae_bool uppera;
11001 void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
11002
11003 if( m>alglib_c_block || n>alglib_c_block )
11004 return ae_false;
11005
11006 /*
11007 * Check for SSE2 support
11008 */
11009 #ifdef AE_HAS_SSE2_INTRINSICS
11010 if( ae_cpuid() & CPU_SSE2 )
11011 {
11012 cmv = &_ialglib_cmv_sse2;
11013 }
11014 #endif
11015
11016 /*
11017 * Prepare
11018 */
11019 _ialglib_mcopyblock_complex(n, n, _a, optype, _a_stride, abuf);
11020 _ialglib_mcopyblock_complex(m, n, _x, 0, _x_stride, xbuf);
11021 if( isunit )
11022 for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
11023 {
11024 pdiag[0] = 1.0;
11025 pdiag[1] = 0.0;
11026 }
11027 if( optype==0 )
11028 uppera = isupper;
11029 else
11030 uppera = !isupper;
11031
11032 /*
11033 * Solve Y*A^-1=X where A is upper or lower triangular
11034 */
11035 if( uppera )
11036 {
11037 for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1))
11038 {
11039 ae_complex tmp_c;
11040 ae_complex beta;
11041 ae_complex alpha;
11042 tmp_c.x = pdiag[0];
11043 tmp_c.y = pdiag[1];
11044 beta = ae_c_d_div(1.0, tmp_c);
11045 alpha.x = -beta.x;
11046 alpha.y = -beta.y;
11047 _ialglib_vcopy_dcomplex(i, abuf+2*i, alglib_c_block, tmpbuf, 1, "No conj");
11048 cmv(m, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
11049 }
11050 _ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride);
11051 }
11052 else
11053 {
11054 for(i=n-1,pdiag=abuf+2*((n-1)*alglib_c_block+(n-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
11055 {
11056 ae_complex tmp_c;
11057 ae_complex beta;
11058 ae_complex alpha;
11059 tmp_c.x = pdiag[0];
11060 tmp_c.y = pdiag[1];
11061 beta = ae_c_d_div(1.0, tmp_c);
11062 alpha.x = -beta.x;
11063 alpha.y = -beta.y;
11064 _ialglib_vcopy_dcomplex(n-1-i, pdiag+2*alglib_c_block, alglib_c_block, tmpbuf, 1, "No conj");
11065 cmv(m, n-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
11066 }
11067 _ialglib_mcopyunblock_complex(m, n, xbuf, 0, _x, _x_stride);
11068 }
11069 return ae_true;
11070 }
11071
11072
11073 /********************************************************************
11074 real TRSM kernel
11075 ********************************************************************/
_ialglib_rmatrixrighttrsm(ae_int_t m,ae_int_t n,double * _a,ae_int_t _a_stride,ae_bool isupper,ae_bool isunit,ae_int_t optype,double * _x,ae_int_t _x_stride)11076 ae_bool _ialglib_rmatrixrighttrsm(ae_int_t m,
11077 ae_int_t n,
11078 double *_a,
11079 ae_int_t _a_stride,
11080 ae_bool isupper,
11081 ae_bool isunit,
11082 ae_int_t optype,
11083 double *_x,
11084 ae_int_t _x_stride)
11085 {
11086 /*
11087 * local buffers
11088 */
11089 double *pdiag;
11090 ae_int_t i;
11091 double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11092 double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11093 double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment];
11094 double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
11095 double * const xbuf = (double *) ae_align(_loc_xbuf, alglib_simd_alignment);
11096 double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11097 ae_bool uppera;
11098 void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
11099 void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
11100
11101 if( m>alglib_r_block || n>alglib_r_block )
11102 return ae_false;
11103
11104 /*
11105 * Check for SSE2 support
11106 */
11107 #ifdef AE_HAS_SSE2_INTRINSICS
11108 if( ae_cpuid() & CPU_SSE2 )
11109 {
11110 rmv = &_ialglib_rmv_sse2;
11111 mcopyblock = &_ialglib_mcopyblock_sse2;
11112 }
11113 #endif
11114
11115 /*
11116 * Prepare
11117 */
11118 mcopyblock(n, n, _a, optype, _a_stride, abuf);
11119 mcopyblock(m, n, _x, 0, _x_stride, xbuf);
11120 if( isunit )
11121 for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
11122 *pdiag = 1.0;
11123 if( optype==0 )
11124 uppera = isupper;
11125 else
11126 uppera = !isupper;
11127
11128 /*
11129 * Solve Y*A^-1=X where A is upper or lower triangular
11130 */
11131 if( uppera )
11132 {
11133 for(i=0,pdiag=abuf; i<n; i++,pdiag+=alglib_r_block+1)
11134 {
11135 double beta = 1.0/(*pdiag);
11136 double alpha = -beta;
11137 _ialglib_vcopy(i, abuf+i, alglib_r_block, tmpbuf, 1);
11138 rmv(m, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
11139 }
11140 _ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride);
11141 }
11142 else
11143 {
11144 for(i=n-1,pdiag=abuf+(n-1)*alglib_r_block+(n-1); i>=0; i--,pdiag-=alglib_r_block+1)
11145 {
11146 double beta = 1.0/(*pdiag);
11147 double alpha = -beta;
11148 _ialglib_vcopy(n-1-i, pdiag+alglib_r_block, alglib_r_block, tmpbuf+i+1, 1);
11149 rmv(m, n-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta);
11150 }
11151 _ialglib_mcopyunblock(m, n, xbuf, 0, _x, _x_stride);
11152 }
11153 return ae_true;
11154 }
11155
11156
11157 /********************************************************************
11158 complex TRSM kernel
11159 ********************************************************************/
_ialglib_cmatrixlefttrsm(ae_int_t m,ae_int_t n,ae_complex * _a,ae_int_t _a_stride,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_complex * _x,ae_int_t _x_stride)11160 ae_bool _ialglib_cmatrixlefttrsm(ae_int_t m,
11161 ae_int_t n,
11162 ae_complex *_a,
11163 ae_int_t _a_stride,
11164 ae_bool isupper,
11165 ae_bool isunit,
11166 ae_int_t optype,
11167 ae_complex *_x,
11168 ae_int_t _x_stride)
11169 {
11170 /*
11171 * local buffers
11172 */
11173 double *pdiag, *arow;
11174 ae_int_t i;
11175 double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11176 double _loc_xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11177 double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
11178 double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
11179 double * const xbuf = (double *) ae_align(_loc_xbuf, alglib_simd_alignment);
11180 double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11181 ae_bool uppera;
11182 void (*cmv)(ae_int_t, ae_int_t, const double *, const double *, ae_complex *, double *, ae_int_t, ae_complex, ae_complex) = &_ialglib_cmv;
11183
11184 if( m>alglib_c_block || n>alglib_c_block )
11185 return ae_false;
11186
11187 /*
11188 * Check for SSE2 support
11189 */
11190 #ifdef AE_HAS_SSE2_INTRINSICS
11191 if( ae_cpuid() & CPU_SSE2 )
11192 {
11193 cmv = &_ialglib_cmv_sse2;
11194 }
11195 #endif
11196
11197 /*
11198 * Prepare
11199 * Transpose X (so we may use mv, which calculates A*x, but not x*A)
11200 */
11201 _ialglib_mcopyblock_complex(m, m, _a, optype, _a_stride, abuf);
11202 _ialglib_mcopyblock_complex(m, n, _x, 1, _x_stride, xbuf);
11203 if( isunit )
11204 for(i=0,pdiag=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1))
11205 {
11206 pdiag[0] = 1.0;
11207 pdiag[1] = 0.0;
11208 }
11209 if( optype==0 )
11210 uppera = isupper;
11211 else
11212 uppera = !isupper;
11213
11214 /*
11215 * Solve A^-1*Y^T=X^T where A is upper or lower triangular
11216 */
11217 if( uppera )
11218 {
11219 for(i=m-1,pdiag=abuf+2*((m-1)*alglib_c_block+(m-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1))
11220 {
11221 ae_complex tmp_c;
11222 ae_complex beta;
11223 ae_complex alpha;
11224 tmp_c.x = pdiag[0];
11225 tmp_c.y = pdiag[1];
11226 beta = ae_c_d_div(1.0, tmp_c);
11227 alpha.x = -beta.x;
11228 alpha.y = -beta.y;
11229 _ialglib_vcopy_dcomplex(m-1-i, pdiag+2, 1, tmpbuf, 1, "No conj");
11230 cmv(n, m-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
11231 }
11232 _ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride);
11233 }
11234 else
11235 { for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1),arow+=2*alglib_c_block)
11236 {
11237 ae_complex tmp_c;
11238 ae_complex beta;
11239 ae_complex alpha;
11240 tmp_c.x = pdiag[0];
11241 tmp_c.y = pdiag[1];
11242 beta = ae_c_d_div(1.0, tmp_c);
11243 alpha.x = -beta.x;
11244 alpha.y = -beta.y;
11245 _ialglib_vcopy_dcomplex(i, arow, 1, tmpbuf, 1, "No conj");
11246 cmv(n, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta);
11247 }
11248 _ialglib_mcopyunblock_complex(m, n, xbuf, 1, _x, _x_stride);
11249 }
11250 return ae_true;
11251 }
11252
11253
11254 /********************************************************************
11255 real TRSM kernel
11256 ********************************************************************/
_ialglib_rmatrixlefttrsm(ae_int_t m,ae_int_t n,double * _a,ae_int_t _a_stride,ae_bool isupper,ae_bool isunit,ae_int_t optype,double * _x,ae_int_t _x_stride)11257 ae_bool _ialglib_rmatrixlefttrsm(ae_int_t m,
11258 ae_int_t n,
11259 double *_a,
11260 ae_int_t _a_stride,
11261 ae_bool isupper,
11262 ae_bool isunit,
11263 ae_int_t optype,
11264 double *_x,
11265 ae_int_t _x_stride)
11266 {
11267 /*
11268 * local buffers
11269 */
11270 double *pdiag, *arow;
11271 ae_int_t i;
11272 double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11273 double _loc_xbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11274 double _loc_tmpbuf[alglib_r_block+alglib_simd_alignment];
11275 double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
11276 double * const xbuf = (double *) ae_align(_loc_xbuf, alglib_simd_alignment);
11277 double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11278 ae_bool uppera;
11279 void (*rmv)(ae_int_t, ae_int_t, const double *, const double *, double *, ae_int_t, double, double) = &_ialglib_rmv;
11280 void (*mcopyblock)(ae_int_t, ae_int_t, const double *, ae_int_t, ae_int_t, double *) = &_ialglib_mcopyblock;
11281
11282 if( m>alglib_r_block || n>alglib_r_block )
11283 return ae_false;
11284
11285 /*
11286 * Check for SSE2 support
11287 */
11288 #ifdef AE_HAS_SSE2_INTRINSICS
11289 if( ae_cpuid() & CPU_SSE2 )
11290 {
11291 rmv = &_ialglib_rmv_sse2;
11292 mcopyblock = &_ialglib_mcopyblock_sse2;
11293 }
11294 #endif
11295
11296 /*
11297 * Prepare
11298 * Transpose X (so we may use mv, which calculates A*x, but not x*A)
11299 */
11300 mcopyblock(m, m, _a, optype, _a_stride, abuf);
11301 mcopyblock(m, n, _x, 1, _x_stride, xbuf);
11302 if( isunit )
11303 for(i=0,pdiag=abuf; i<m; i++,pdiag+=alglib_r_block+1)
11304 *pdiag = 1.0;
11305 if( optype==0 )
11306 uppera = isupper;
11307 else
11308 uppera = !isupper;
11309
11310 /*
11311 * Solve A^-1*Y^T=X^T where A is upper or lower triangular
11312 */
11313 if( uppera )
11314 {
11315 for(i=m-1,pdiag=abuf+(m-1)*alglib_r_block+(m-1); i>=0; i--,pdiag-=alglib_r_block+1)
11316 {
11317 double beta = 1.0/(*pdiag);
11318 double alpha = -beta;
11319 _ialglib_vcopy(m-1-i, pdiag+1, 1, tmpbuf+i+1, 1);
11320 rmv(n, m-1-i, xbuf+i+1, tmpbuf+i+1, xbuf+i, alglib_r_block, alpha, beta);
11321 }
11322 _ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride);
11323 }
11324 else
11325 { for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=alglib_r_block+1,arow+=alglib_r_block)
11326 {
11327 double beta = 1.0/(*pdiag);
11328 double alpha = -beta;
11329 _ialglib_vcopy(i, arow, 1, tmpbuf, 1);
11330 rmv(n, i, xbuf, tmpbuf, xbuf+i, alglib_r_block, alpha, beta);
11331 }
11332 _ialglib_mcopyunblock(m, n, xbuf, 1, _x, _x_stride);
11333 }
11334 return ae_true;
11335 }
11336
11337
11338 /********************************************************************
11339 complex SYRK kernel
11340 ********************************************************************/
_ialglib_cmatrixherk(ae_int_t n,ae_int_t k,double alpha,ae_complex * _a,ae_int_t _a_stride,ae_int_t optypea,double beta,ae_complex * _c,ae_int_t _c_stride,ae_bool isupper)11341 ae_bool _ialglib_cmatrixherk(ae_int_t n,
11342 ae_int_t k,
11343 double alpha,
11344 ae_complex *_a,
11345 ae_int_t _a_stride,
11346 ae_int_t optypea,
11347 double beta,
11348 ae_complex *_c,
11349 ae_int_t _c_stride,
11350 ae_bool isupper)
11351 {
11352 /*
11353 * local buffers
11354 */
11355 double *arow, *crow;
11356 ae_complex c_alpha, c_beta;
11357 ae_int_t i;
11358 double _loc_abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11359 double _loc_cbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment];
11360 double _loc_tmpbuf[2*alglib_c_block+alglib_simd_alignment];
11361 double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
11362 double * const cbuf = (double *) ae_align(_loc_cbuf, alglib_simd_alignment);
11363 double * const tmpbuf = (double *) ae_align(_loc_tmpbuf,alglib_simd_alignment);
11364
11365 if( n>alglib_c_block || k>alglib_c_block )
11366 return ae_false;
11367 if( n==0 )
11368 return ae_true;
11369
11370 /*
11371 * copy A and C, task is transformed to "A*A^H"-form.
11372 * if beta==0, then C is filled by zeros (and not referenced)
11373 *
11374 * alpha==0 or k==0 are correctly processed (A is not referenced)
11375 */
11376 c_alpha.x = alpha;
11377 c_alpha.y = 0;
11378 c_beta.x = beta;
11379 c_beta.y = 0;
11380 if( alpha==0 )
11381 k = 0;
11382 if( k>0 )
11383 {
11384 if( optypea==0 )
11385 _ialglib_mcopyblock_complex(n, k, _a, 3, _a_stride, abuf);
11386 else
11387 _ialglib_mcopyblock_complex(k, n, _a, 1, _a_stride, abuf);
11388 }
11389 _ialglib_mcopyblock_complex(n, n, _c, 0, _c_stride, cbuf);
11390 if( beta==0 )
11391 {
11392 for(i=0,crow=cbuf; i<n; i++,crow+=2*alglib_c_block)
11393 if( isupper )
11394 _ialglib_vzero(2*(n-i), crow+2*i, 1);
11395 else
11396 _ialglib_vzero(2*(i+1), crow, 1);
11397 }
11398
11399
11400 /*
11401 * update C
11402 */
11403 if( isupper )
11404 {
11405 for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
11406 {
11407 _ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj");
11408 _ialglib_cmv(n-i, k, arow, tmpbuf, NULL, crow+2*i, 1, c_alpha, c_beta);
11409 }
11410 }
11411 else
11412 {
11413 for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block)
11414 {
11415 _ialglib_vcopy_dcomplex(k, arow, 1, tmpbuf, 1, "Conj");
11416 _ialglib_cmv(i+1, k, abuf, tmpbuf, NULL, crow, 1, c_alpha, c_beta);
11417 }
11418 }
11419
11420 /*
11421 * copy back
11422 */
11423 _ialglib_mcopyunblock_complex(n, n, cbuf, 0, _c, _c_stride);
11424
11425 return ae_true;
11426 }
11427
11428
11429 /********************************************************************
11430 real SYRK kernel
11431 ********************************************************************/
_ialglib_rmatrixsyrk(ae_int_t n,ae_int_t k,double alpha,double * _a,ae_int_t _a_stride,ae_int_t optypea,double beta,double * _c,ae_int_t _c_stride,ae_bool isupper)11432 ae_bool _ialglib_rmatrixsyrk(ae_int_t n,
11433 ae_int_t k,
11434 double alpha,
11435 double *_a,
11436 ae_int_t _a_stride,
11437 ae_int_t optypea,
11438 double beta,
11439 double *_c,
11440 ae_int_t _c_stride,
11441 ae_bool isupper)
11442 {
11443 /*
11444 * local buffers
11445 */
11446 double *arow, *crow;
11447 ae_int_t i;
11448 double _loc_abuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11449 double _loc_cbuf[alglib_r_block*alglib_r_block+alglib_simd_alignment];
11450 double * const abuf = (double *) ae_align(_loc_abuf, alglib_simd_alignment);
11451 double * const cbuf = (double *) ae_align(_loc_cbuf, alglib_simd_alignment);
11452
11453 if( n>alglib_r_block || k>alglib_r_block )
11454 return ae_false;
11455 if( n==0 )
11456 return ae_true;
11457
11458 /*
11459 * copy A and C, task is transformed to "A*A^T"-form.
11460 * if beta==0, then C is filled by zeros (and not referenced)
11461 *
11462 * alpha==0 or k==0 are correctly processed (A is not referenced)
11463 */
11464 if( alpha==0 )
11465 k = 0;
11466 if( k>0 )
11467 {
11468 if( optypea==0 )
11469 _ialglib_mcopyblock(n, k, _a, 0, _a_stride, abuf);
11470 else
11471 _ialglib_mcopyblock(k, n, _a, 1, _a_stride, abuf);
11472 }
11473 _ialglib_mcopyblock(n, n, _c, 0, _c_stride, cbuf);
11474 if( beta==0 )
11475 {
11476 for(i=0,crow=cbuf; i<n; i++,crow+=alglib_r_block)
11477 if( isupper )
11478 _ialglib_vzero(n-i, crow+i, 1);
11479 else
11480 _ialglib_vzero(i+1, crow, 1);
11481 }
11482
11483
11484 /*
11485 * update C
11486 */
11487 if( isupper )
11488 {
11489 for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block)
11490 {
11491 _ialglib_rmv(n-i, k, arow, arow, crow+i, 1, alpha, beta);
11492 }
11493 }
11494 else
11495 {
11496 for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=alglib_r_block,crow+=alglib_r_block)
11497 {
11498 _ialglib_rmv(i+1, k, abuf, arow, crow, 1, alpha, beta);
11499 }
11500 }
11501
11502 /*
11503 * copy back
11504 */
11505 _ialglib_mcopyunblock(n, n, cbuf, 0, _c, _c_stride);
11506
11507 return ae_true;
11508 }
11509
11510
11511 /********************************************************************
11512 complex rank-1 kernel
11513 ********************************************************************/
_ialglib_cmatrixrank1(ae_int_t m,ae_int_t n,ae_complex * _a,ae_int_t _a_stride,ae_complex * _u,ae_complex * _v)11514 ae_bool _ialglib_cmatrixrank1(ae_int_t m,
11515 ae_int_t n,
11516 ae_complex *_a,
11517 ae_int_t _a_stride,
11518 ae_complex *_u,
11519 ae_complex *_v)
11520 {
11521 /*
11522 * Locals
11523 */
11524 ae_complex *arow, *pu, *pv, *vtmp, *dst;
11525 ae_int_t n2 = n/2;
11526 ae_int_t i, j;
11527
11528 /*
11529 * Quick exit
11530 */
11531 if( m<=0 || n<=0 )
11532 return ae_false;
11533
11534
11535 /*
11536 * update pairs of rows
11537 */
11538 arow = _a;
11539 pu = _u;
11540 vtmp = _v;
11541 for(i=0; i<m; i++, arow+=_a_stride, pu++)
11542 {
11543 /*
11544 * update by two
11545 */
11546 for(j=0,pv=vtmp, dst=arow; j<n2; j++, dst+=2, pv+=2)
11547 {
11548 double ux = pu[0].x;
11549 double uy = pu[0].y;
11550 double v0x = pv[0].x;
11551 double v0y = pv[0].y;
11552 double v1x = pv[1].x;
11553 double v1y = pv[1].y;
11554 dst[0].x += ux*v0x-uy*v0y;
11555 dst[0].y += ux*v0y+uy*v0x;
11556 dst[1].x += ux*v1x-uy*v1y;
11557 dst[1].y += ux*v1y+uy*v1x;
11558 }
11559
11560 /*
11561 * final update
11562 */
11563 if( n%2!=0 )
11564 {
11565 double ux = pu[0].x;
11566 double uy = pu[0].y;
11567 double vx = pv[0].x;
11568 double vy = pv[0].y;
11569 dst[0].x += ux*vx-uy*vy;
11570 dst[0].y += ux*vy+uy*vx;
11571 }
11572 }
11573 return ae_true;
11574 }
11575
11576
11577 /********************************************************************
11578 real rank-1 kernel
11579 deprecated version
11580 ********************************************************************/
_ialglib_rmatrixrank1(ae_int_t m,ae_int_t n,double * _a,ae_int_t _a_stride,double * _u,double * _v)11581 ae_bool _ialglib_rmatrixrank1(ae_int_t m,
11582 ae_int_t n,
11583 double *_a,
11584 ae_int_t _a_stride,
11585 double *_u,
11586 double *_v)
11587 {
11588 /*
11589 * Locals
11590 */
11591 double *arow0, *arow1, *pu, *pv, *vtmp, *dst0, *dst1;
11592 ae_int_t m2 = m/2;
11593 ae_int_t n2 = n/2;
11594 ae_int_t stride = _a_stride;
11595 ae_int_t stride2 = 2*_a_stride;
11596 ae_int_t i, j;
11597
11598 /*
11599 * Quick exit
11600 */
11601 if( m<=0 || n<=0 )
11602 return ae_false;
11603
11604 /*
11605 * update pairs of rows
11606 */
11607 arow0 = _a;
11608 arow1 = arow0+stride;
11609 pu = _u;
11610 vtmp = _v;
11611 for(i=0; i<m2; i++,arow0+=stride2,arow1+=stride2,pu+=2)
11612 {
11613 /*
11614 * update by two
11615 */
11616 for(j=0,pv=vtmp, dst0=arow0, dst1=arow1; j<n2; j++, dst0+=2, dst1+=2, pv+=2)
11617 {
11618 dst0[0] += pu[0]*pv[0];
11619 dst0[1] += pu[0]*pv[1];
11620 dst1[0] += pu[1]*pv[0];
11621 dst1[1] += pu[1]*pv[1];
11622 }
11623
11624 /*
11625 * final update
11626 */
11627 if( n%2!=0 )
11628 {
11629 dst0[0] += pu[0]*pv[0];
11630 dst1[0] += pu[1]*pv[0];
11631 }
11632 }
11633
11634 /*
11635 * update last row
11636 */
11637 if( m%2!=0 )
11638 {
11639 /*
11640 * update by two
11641 */
11642 for(j=0,pv=vtmp, dst0=arow0; j<n2; j++, dst0+=2, pv+=2)
11643 {
11644 dst0[0] += pu[0]*pv[0];
11645 dst0[1] += pu[0]*pv[1];
11646 }
11647
11648 /*
11649 * final update
11650 */
11651 if( n%2!=0 )
11652 dst0[0] += pu[0]*pv[0];
11653 }
11654 return ae_true;
11655 }
11656
11657
11658
11659 /********************************************************************
11660 real rank-1 kernel
11661 deprecated version
11662 ********************************************************************/
_ialglib_rmatrixger(ae_int_t m,ae_int_t n,double * _a,ae_int_t _a_stride,double alpha,double * _u,double * _v)11663 ae_bool _ialglib_rmatrixger(ae_int_t m,
11664 ae_int_t n,
11665 double *_a,
11666 ae_int_t _a_stride,
11667 double alpha,
11668 double *_u,
11669 double *_v)
11670 {
11671 /*
11672 * Locals
11673 */
11674 double *arow0, *arow1, *pu, *pv, *vtmp, *dst0, *dst1;
11675 ae_int_t m2 = m/2;
11676 ae_int_t n2 = n/2;
11677 ae_int_t stride = _a_stride;
11678 ae_int_t stride2 = 2*_a_stride;
11679 ae_int_t i, j;
11680
11681 /*
11682 * Quick exit
11683 */
11684 if( m<=0 || n<=0 || alpha==0.0 )
11685 return ae_false;
11686
11687 /*
11688 * update pairs of rows
11689 */
11690 arow0 = _a;
11691 arow1 = arow0+stride;
11692 pu = _u;
11693 vtmp = _v;
11694 for(i=0; i<m2; i++,arow0+=stride2,arow1+=stride2,pu+=2)
11695 {
11696 double au0 = alpha*pu[0];
11697 double au1 = alpha*pu[1];
11698
11699 /*
11700 * update by two
11701 */
11702 for(j=0,pv=vtmp, dst0=arow0, dst1=arow1; j<n2; j++, dst0+=2, dst1+=2, pv+=2)
11703 {
11704 dst0[0] += au0*pv[0];
11705 dst0[1] += au0*pv[1];
11706 dst1[0] += au1*pv[0];
11707 dst1[1] += au1*pv[1];
11708 }
11709
11710 /*
11711 * final update
11712 */
11713 if( n%2!=0 )
11714 {
11715 dst0[0] += au0*pv[0];
11716 dst1[0] += au1*pv[0];
11717 }
11718 }
11719
11720 /*
11721 * update last row
11722 */
11723 if( m%2!=0 )
11724 {
11725 double au0 = alpha*pu[0];
11726
11727 /*
11728 * update by two
11729 */
11730 for(j=0,pv=vtmp, dst0=arow0; j<n2; j++, dst0+=2, pv+=2)
11731 {
11732 dst0[0] += au0*pv[0];
11733 dst0[1] += au0*pv[1];
11734 }
11735
11736 /*
11737 * final update
11738 */
11739 if( n%2!=0 )
11740 dst0[0] += au0*pv[0];
11741 }
11742 return ae_true;
11743 }
11744
11745 /********************************************************************
11746 Interface functions for efficient kernels
11747 ********************************************************************/
_ialglib_i_rmatrixgemmf(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * _a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * _b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * _c,ae_int_t ic,ae_int_t jc)11748 ae_bool _ialglib_i_rmatrixgemmf(ae_int_t m,
11749 ae_int_t n,
11750 ae_int_t k,
11751 double alpha,
11752 ae_matrix *_a,
11753 ae_int_t ia,
11754 ae_int_t ja,
11755 ae_int_t optypea,
11756 ae_matrix *_b,
11757 ae_int_t ib,
11758 ae_int_t jb,
11759 ae_int_t optypeb,
11760 double beta,
11761 ae_matrix *_c,
11762 ae_int_t ic,
11763 ae_int_t jc)
11764 {
11765 /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11766 if( alpha==0.0 || k==0 || n==0 || m==0)
11767 return ae_false;
11768
11769 /* handle with optimized ALGLIB kernel */
11770 return _ialglib_rmatrixgemm(m, n, k, alpha, _a->ptr.pp_double[ia]+ja, _a->stride, optypea, _b->ptr.pp_double[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_double[ic]+jc, _c->stride);
11771 }
11772
_ialglib_i_cmatrixgemmf(ae_int_t m,ae_int_t n,ae_int_t k,ae_complex alpha,ae_matrix * _a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * _b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,ae_complex beta,ae_matrix * _c,ae_int_t ic,ae_int_t jc)11773 ae_bool _ialglib_i_cmatrixgemmf(ae_int_t m,
11774 ae_int_t n,
11775 ae_int_t k,
11776 ae_complex alpha,
11777 ae_matrix *_a,
11778 ae_int_t ia,
11779 ae_int_t ja,
11780 ae_int_t optypea,
11781 ae_matrix *_b,
11782 ae_int_t ib,
11783 ae_int_t jb,
11784 ae_int_t optypeb,
11785 ae_complex beta,
11786 ae_matrix *_c,
11787 ae_int_t ic,
11788 ae_int_t jc)
11789 {
11790 /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11791 if( (alpha.x==0.0 && alpha.y==0) || k==0 || n==0 || m==0 )
11792 return ae_false;
11793
11794 /* handle with optimized ALGLIB kernel */
11795 return _ialglib_cmatrixgemm(m, n, k, alpha, _a->ptr.pp_complex[ia]+ja, _a->stride, optypea, _b->ptr.pp_complex[ib]+jb, _b->stride, optypeb, beta, _c->ptr.pp_complex[ic]+jc, _c->stride);
11796 }
11797
_ialglib_i_cmatrixrighttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2)11798 ae_bool _ialglib_i_cmatrixrighttrsmf(ae_int_t m,
11799 ae_int_t n,
11800 ae_matrix *a,
11801 ae_int_t i1,
11802 ae_int_t j1,
11803 ae_bool isupper,
11804 ae_bool isunit,
11805 ae_int_t optype,
11806 ae_matrix *x,
11807 ae_int_t i2,
11808 ae_int_t j2)
11809 {
11810 /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11811 if( m==0 || n==0)
11812 return ae_false;
11813
11814 /* handle with optimized ALGLIB kernel */
11815 return _ialglib_cmatrixrighttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride);
11816 }
11817
_ialglib_i_rmatrixrighttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2)11818 ae_bool _ialglib_i_rmatrixrighttrsmf(ae_int_t m,
11819 ae_int_t n,
11820 ae_matrix *a,
11821 ae_int_t i1,
11822 ae_int_t j1,
11823 ae_bool isupper,
11824 ae_bool isunit,
11825 ae_int_t optype,
11826 ae_matrix *x,
11827 ae_int_t i2,
11828 ae_int_t j2)
11829 {
11830 /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11831 if( m==0 || n==0)
11832 return ae_false;
11833
11834 /* handle with optimized ALGLIB kernel */
11835 return _ialglib_rmatrixrighttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride);
11836 }
11837
_ialglib_i_cmatrixlefttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2)11838 ae_bool _ialglib_i_cmatrixlefttrsmf(ae_int_t m,
11839 ae_int_t n,
11840 ae_matrix *a,
11841 ae_int_t i1,
11842 ae_int_t j1,
11843 ae_bool isupper,
11844 ae_bool isunit,
11845 ae_int_t optype,
11846 ae_matrix *x,
11847 ae_int_t i2,
11848 ae_int_t j2)
11849 {
11850 /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11851 if( m==0 || n==0)
11852 return ae_false;
11853
11854 /* handle with optimized ALGLIB kernel */
11855 return _ialglib_cmatrixlefttrsm(m, n, &a->ptr.pp_complex[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_complex[i2][j2], x->stride);
11856 }
11857
_ialglib_i_rmatrixlefttrsmf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t i1,ae_int_t j1,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_matrix * x,ae_int_t i2,ae_int_t j2)11858 ae_bool _ialglib_i_rmatrixlefttrsmf(ae_int_t m,
11859 ae_int_t n,
11860 ae_matrix *a,
11861 ae_int_t i1,
11862 ae_int_t j1,
11863 ae_bool isupper,
11864 ae_bool isunit,
11865 ae_int_t optype,
11866 ae_matrix *x,
11867 ae_int_t i2,
11868 ae_int_t j2)
11869 {
11870 /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11871 if( m==0 || n==0)
11872 return ae_false;
11873
11874 /* handle with optimized ALGLIB kernel */
11875 return _ialglib_rmatrixlefttrsm(m, n, &a->ptr.pp_double[i1][j1], a->stride, isupper, isunit, optype, &x->ptr.pp_double[i2][j2], x->stride);
11876 }
11877
_ialglib_i_cmatrixherkf(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper)11878 ae_bool _ialglib_i_cmatrixherkf(ae_int_t n,
11879 ae_int_t k,
11880 double alpha,
11881 ae_matrix *a,
11882 ae_int_t ia,
11883 ae_int_t ja,
11884 ae_int_t optypea,
11885 double beta,
11886 ae_matrix *c,
11887 ae_int_t ic,
11888 ae_int_t jc,
11889 ae_bool isupper)
11890 {
11891 /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11892 if( alpha==0.0 || k==0 || n==0)
11893 return ae_false;
11894
11895 /* ALGLIB kernel */
11896 return _ialglib_cmatrixherk(n, k, alpha, &a->ptr.pp_complex[ia][ja], a->stride, optypea, beta, &c->ptr.pp_complex[ic][jc], c->stride, isupper);
11897 }
11898
_ialglib_i_rmatrixsyrkf(ae_int_t n,ae_int_t k,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,double beta,ae_matrix * c,ae_int_t ic,ae_int_t jc,ae_bool isupper)11899 ae_bool _ialglib_i_rmatrixsyrkf(ae_int_t n,
11900 ae_int_t k,
11901 double alpha,
11902 ae_matrix *a,
11903 ae_int_t ia,
11904 ae_int_t ja,
11905 ae_int_t optypea,
11906 double beta,
11907 ae_matrix *c,
11908 ae_int_t ic,
11909 ae_int_t jc,
11910 ae_bool isupper)
11911 {
11912 /* handle degenerate cases like zero matrices by ALGLIB - greatly simplifies passing data to ALGLIB kernel */
11913 if( alpha==0.0 || k==0 || n==0)
11914 return ae_false;
11915
11916 /* ALGLIB kernel */
11917 return _ialglib_rmatrixsyrk(n, k, alpha, &a->ptr.pp_double[ia][ja], a->stride, optypea, beta, &c->ptr.pp_double[ic][jc], c->stride, isupper);
11918 }
11919
_ialglib_i_cmatrixrank1f(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t uoffs,ae_vector * v,ae_int_t voffs)11920 ae_bool _ialglib_i_cmatrixrank1f(ae_int_t m,
11921 ae_int_t n,
11922 ae_matrix *a,
11923 ae_int_t ia,
11924 ae_int_t ja,
11925 ae_vector *u,
11926 ae_int_t uoffs,
11927 ae_vector *v,
11928 ae_int_t voffs)
11929 {
11930 return _ialglib_cmatrixrank1(m, n, &a->ptr.pp_complex[ia][ja], a->stride, &u->ptr.p_complex[uoffs], &v->ptr.p_complex[voffs]);
11931 }
11932
_ialglib_i_rmatrixrank1f(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_vector * u,ae_int_t uoffs,ae_vector * v,ae_int_t voffs)11933 ae_bool _ialglib_i_rmatrixrank1f(ae_int_t m,
11934 ae_int_t n,
11935 ae_matrix *a,
11936 ae_int_t ia,
11937 ae_int_t ja,
11938 ae_vector *u,
11939 ae_int_t uoffs,
11940 ae_vector *v,
11941 ae_int_t voffs)
11942 {
11943 return _ialglib_rmatrixrank1(m, n, &a->ptr.pp_double[ia][ja], a->stride, &u->ptr.p_double[uoffs], &v->ptr.p_double[voffs]);
11944 }
11945
_ialglib_i_rmatrixgerf(ae_int_t m,ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,double alpha,ae_vector * u,ae_int_t uoffs,ae_vector * v,ae_int_t voffs)11946 ae_bool _ialglib_i_rmatrixgerf(ae_int_t m,
11947 ae_int_t n,
11948 ae_matrix *a,
11949 ae_int_t ia,
11950 ae_int_t ja,
11951 double alpha,
11952 ae_vector *u,
11953 ae_int_t uoffs,
11954 ae_vector *v,
11955 ae_int_t voffs)
11956 {
11957 return _ialglib_rmatrixger(m, n, &a->ptr.pp_double[ia][ja], a->stride, alpha, &u->ptr.p_double[uoffs], &v->ptr.p_double[voffs]);
11958 }
11959
11960
11961
11962
11963 /********************************************************************
11964 This function reads rectangular matrix A given by two column pointers
11965 col0 and col1 and stride src_stride and moves it into contiguous row-
11966 by-row storage given by dst.
11967
11968 It can handle following special cases:
11969 * col1==NULL in this case second column of A is filled by zeros
11970 ********************************************************************/
_ialglib_pack_n2(double * col0,double * col1,ae_int_t n,ae_int_t src_stride,double * dst)11971 void _ialglib_pack_n2(
11972 double *col0,
11973 double *col1,
11974 ae_int_t n,
11975 ae_int_t src_stride,
11976 double *dst)
11977 {
11978 ae_int_t n2, j, stride2;
11979
11980 /*
11981 * handle special case
11982 */
11983 if( col1==NULL )
11984 {
11985 for(j=0; j<n; j++)
11986 {
11987 dst[0] = *col0;
11988 dst[1] = 0.0;
11989 col0 += src_stride;
11990 dst += 2;
11991 }
11992 return;
11993 }
11994
11995 /*
11996 * handle general case
11997 */
11998 n2 = n/2;
11999 stride2 = src_stride*2;
12000 for(j=0; j<n2; j++)
12001 {
12002 dst[0] = *col0;
12003 dst[1] = *col1;
12004 dst[2] = col0[src_stride];
12005 dst[3] = col1[src_stride];
12006 col0 += stride2;
12007 col1 += stride2;
12008 dst += 4;
12009 }
12010 if( n%2 )
12011 {
12012 dst[0] = *col0;
12013 dst[1] = *col1;
12014 }
12015 }
12016
12017 /*************************************************************************
12018 This function reads rectangular matrix A given by two column pointers col0
12019 and col1 and stride src_stride and moves it into contiguous row-by-row
12020 storage given by dst.
12021
12022 dst must be aligned, col0 and col1 may be non-aligned.
12023
12024 It can handle following special cases:
12025 * col1==NULL in this case second column of A is filled by zeros
12026 * src_stride==1 efficient SSE-based code is used
12027 * col1-col0==1 efficient SSE-based code is used
12028
12029 This function supports SSE2; it can be used when:
12030 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
12031 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
12032
12033 If you want to know whether it is safe to call it, you should check
12034 results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
12035 and will do its work.
12036 *************************************************************************/
12037 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_pack_n2_sse2(double * col0,double * col1,ae_int_t n,ae_int_t src_stride,double * dst)12038 void _ialglib_pack_n2_sse2(
12039 double *col0,
12040 double *col1,
12041 ae_int_t n,
12042 ae_int_t src_stride,
12043 double *dst)
12044 {
12045 ae_int_t n2, j, stride2;
12046
12047 /*
12048 * handle special case: col1==NULL
12049 */
12050 if( col1==NULL )
12051 {
12052 for(j=0; j<n; j++)
12053 {
12054 dst[0] = *col0;
12055 dst[1] = 0.0;
12056 col0 += src_stride;
12057 dst += 2;
12058 }
12059 return;
12060 }
12061
12062 /*
12063 * handle unit stride
12064 */
12065 if( src_stride==1 )
12066 {
12067 __m128d v0, v1;
12068 n2 = n/2;
12069 for(j=0; j<n2; j++)
12070 {
12071 v0 = _mm_loadu_pd(col0);
12072 col0 += 2;
12073 v1 = _mm_loadu_pd(col1);
12074 col1 += 2;
12075 _mm_store_pd(dst, _mm_unpacklo_pd(v0,v1));
12076 _mm_store_pd(dst+2,_mm_unpackhi_pd(v0,v1));
12077 dst += 4;
12078 }
12079 if( n%2 )
12080 {
12081 dst[0] = *col0;
12082 dst[1] = *col1;
12083 }
12084 return;
12085 }
12086
12087 /*
12088 * handle col1-col0==1
12089 */
12090 if( col1-col0==1 )
12091 {
12092 __m128d v0, v1;
12093 n2 = n/2;
12094 stride2 = 2*src_stride;
12095 for(j=0; j<n2; j++)
12096 {
12097 v0 = _mm_loadu_pd(col0);
12098 v1 = _mm_loadu_pd(col0+src_stride);
12099 _mm_store_pd(dst, v0);
12100 _mm_store_pd(dst+2,v1);
12101 col0 += stride2;
12102 dst += 4;
12103 }
12104 if( n%2 )
12105 {
12106 dst[0] = col0[0];
12107 dst[1] = col0[1];
12108 }
12109 return;
12110 }
12111
12112 /*
12113 * handle general case
12114 */
12115 n2 = n/2;
12116 stride2 = src_stride*2;
12117 for(j=0; j<n2; j++)
12118 {
12119 dst[0] = *col0;
12120 dst[1] = *col1;
12121 dst[2] = col0[src_stride];
12122 dst[3] = col1[src_stride];
12123 col0 += stride2;
12124 col1 += stride2;
12125 dst += 4;
12126 }
12127 if( n%2 )
12128 {
12129 dst[0] = *col0;
12130 dst[1] = *col1;
12131 }
12132 }
12133 #endif
12134
12135
12136 /********************************************************************
12137 This function calculates R := alpha*A'*B+beta*R where A and B are Kx2
12138 matrices stored in contiguous row-by-row storage, R is 2x2 matrix
12139 stored in non-contiguous row-by-row storage.
12140
12141 A and B must be aligned; R may be non-aligned.
12142
12143 If beta is zero, contents of R is ignored (not multiplied by zero -
12144 just ignored).
12145
12146 However, when alpha is zero, we still calculate A'*B, which is
12147 multiplied by zero afterwards.
12148
12149 Function accepts additional parameter store_mode:
12150 * if 0, full R is stored
12151 * if 1, only first row of R is stored
12152 * if 2, only first column of R is stored
12153 * if 3, only top left element of R is stored
12154 ********************************************************************/
_ialglib_mm22(double alpha,const double * a,const double * b,ae_int_t k,double beta,double * r,ae_int_t stride,ae_int_t store_mode)12155 void _ialglib_mm22(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode)
12156 {
12157 double v00, v01, v10, v11;
12158 ae_int_t t;
12159 v00 = 0.0;
12160 v01 = 0.0;
12161 v10 = 0.0;
12162 v11 = 0.0;
12163 for(t=0; t<k; t++)
12164 {
12165 v00 += a[0]*b[0];
12166 v01 += a[0]*b[1];
12167 v10 += a[1]*b[0];
12168 v11 += a[1]*b[1];
12169 a+=2;
12170 b+=2;
12171 }
12172 if( store_mode==0 )
12173 {
12174 if( beta==0 )
12175 {
12176 r[0] = alpha*v00;
12177 r[1] = alpha*v01;
12178 r[stride+0] = alpha*v10;
12179 r[stride+1] = alpha*v11;
12180 }
12181 else
12182 {
12183 r[0] = beta*r[0] + alpha*v00;
12184 r[1] = beta*r[1] + alpha*v01;
12185 r[stride+0] = beta*r[stride+0] + alpha*v10;
12186 r[stride+1] = beta*r[stride+1] + alpha*v11;
12187 }
12188 return;
12189 }
12190 if( store_mode==1 )
12191 {
12192 if( beta==0 )
12193 {
12194 r[0] = alpha*v00;
12195 r[1] = alpha*v01;
12196 }
12197 else
12198 {
12199 r[0] = beta*r[0] + alpha*v00;
12200 r[1] = beta*r[1] + alpha*v01;
12201 }
12202 return;
12203 }
12204 if( store_mode==2 )
12205 {
12206 if( beta==0 )
12207 {
12208 r[0] =alpha*v00;
12209 r[stride+0] = alpha*v10;
12210 }
12211 else
12212 {
12213 r[0] = beta*r[0] + alpha*v00;
12214 r[stride+0] = beta*r[stride+0] + alpha*v10;
12215 }
12216 return;
12217 }
12218 if( store_mode==3 )
12219 {
12220 if( beta==0 )
12221 {
12222 r[0] = alpha*v00;
12223 }
12224 else
12225 {
12226 r[0] = beta*r[0] + alpha*v00;
12227 }
12228 return;
12229 }
12230 }
12231
12232
12233 /********************************************************************
12234 This function calculates R := alpha*A'*B+beta*R where A and B are Kx2
12235 matrices stored in contiguous row-by-row storage, R is 2x2 matrix
12236 stored in non-contiguous row-by-row storage.
12237
12238 A and B must be aligned; R may be non-aligned.
12239
12240 If beta is zero, contents of R is ignored (not multiplied by zero -
12241 just ignored).
12242
12243 However, when alpha is zero, we still calculate A'*B, which is
12244 multiplied by zero afterwards.
12245
12246 Function accepts additional parameter store_mode:
12247 * if 0, full R is stored
12248 * if 1, only first row of R is stored
12249 * if 2, only first column of R is stored
12250 * if 3, only top left element of R is stored
12251
12252 This function supports SSE2; it can be used when:
12253 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
12254 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
12255
12256 If (1) is failed, this function will still be defined and callable, but it
12257 will do nothing. If (2) is failed , call to this function will probably
12258 crash your system.
12259
12260 If you want to know whether it is safe to call it, you should check
12261 results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
12262 and will do its work.
12263 ********************************************************************/
12264 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_mm22_sse2(double alpha,const double * a,const double * b,ae_int_t k,double beta,double * r,ae_int_t stride,ae_int_t store_mode)12265 void _ialglib_mm22_sse2(double alpha, const double *a, const double *b, ae_int_t k, double beta, double *r, ae_int_t stride, ae_int_t store_mode)
12266 {
12267 /*
12268 * We calculate product of two Kx2 matrices (result is 2x2).
12269 * VA and VB store result as follows:
12270 *
12271 * [ VD[0] VE[0] ]
12272 * A'*B = [ ]
12273 * [ VE[1] VD[1] ]
12274 *
12275 */
12276 __m128d va, vb, vd, ve, vt, r0, r1, valpha, vbeta;
12277 ae_int_t t, k2;
12278
12279 /*
12280 * calculate product
12281 */
12282 k2 = k/2;
12283 vd = _mm_setzero_pd();
12284 ve = _mm_setzero_pd();
12285 for(t=0; t<k2; t++)
12286 {
12287 vb = _mm_load_pd(b);
12288 va = _mm_load_pd(a);
12289 vt = vb;
12290 vb = _mm_mul_pd(va,vb);
12291 vt = _mm_shuffle_pd(vt, vt, 1);
12292 vd = _mm_add_pd(vb,vd);
12293 vt = _mm_mul_pd(va,vt);
12294 vb = _mm_load_pd(b+2);
12295 ve = _mm_add_pd(vt,ve);
12296 va = _mm_load_pd(a+2);
12297 vt = vb;
12298 vb = _mm_mul_pd(va,vb);
12299 vt = _mm_shuffle_pd(vt, vt, 1);
12300 vd = _mm_add_pd(vb,vd);
12301 vt = _mm_mul_pd(va,vt);
12302 ve = _mm_add_pd(vt,ve);
12303 a+=4;
12304 b+=4;
12305 }
12306 if( k%2 )
12307 {
12308 va = _mm_load_pd(a);
12309 vb = _mm_load_pd(b);
12310 vt = _mm_shuffle_pd(vb, vb, 1);
12311 vd = _mm_add_pd(_mm_mul_pd(va,vb),vd);
12312 ve = _mm_add_pd(_mm_mul_pd(va,vt),ve);
12313 }
12314
12315 /*
12316 * r0 is first row of alpha*A'*B, r1 is second row
12317 */
12318 valpha = _mm_load1_pd(&alpha);
12319 r0 = _mm_mul_pd(_mm_unpacklo_pd(vd,ve),valpha);
12320 r1 = _mm_mul_pd(_mm_unpackhi_pd(ve,vd),valpha);
12321
12322 /*
12323 * store
12324 */
12325 if( store_mode==0 )
12326 {
12327 if( beta==0 )
12328 {
12329 _mm_storeu_pd(r,r0);
12330 _mm_storeu_pd(r+stride,r1);
12331 }
12332 else
12333 {
12334 vbeta = _mm_load1_pd(&beta);
12335 _mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r0));
12336 _mm_storeu_pd(r+stride,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r1));
12337 }
12338 return;
12339 }
12340 if( store_mode==1 )
12341 {
12342 if( beta==0 )
12343 _mm_storeu_pd(r,r0);
12344 else
12345 _mm_storeu_pd(r,_mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),_mm_load1_pd(&beta)),r0));
12346 return;
12347 }
12348 if( store_mode==2 )
12349 {
12350 double buf[4];
12351 _mm_storeu_pd(buf,r0);
12352 _mm_storeu_pd(buf+2,r1);
12353 if( beta==0 )
12354 {
12355 r[0] =buf[0];
12356 r[stride+0] = buf[2];
12357 }
12358 else
12359 {
12360 r[0] = beta*r[0] + buf[0];
12361 r[stride+0] = beta*r[stride+0] + buf[2];
12362 }
12363 return;
12364 }
12365 if( store_mode==3 )
12366 {
12367 double buf[2];
12368 _mm_storeu_pd(buf,r0);
12369 if( beta==0 )
12370 r[0] = buf[0];
12371 else
12372 r[0] = beta*r[0] + buf[0];
12373 return;
12374 }
12375 }
12376 #endif
12377
12378
12379 /*************************************************************************
12380 This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0 and B1
12381 are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4 matrix
12382 stored in non-contiguous row-by-row storage.
12383
12384 A, B0 and B1 must be aligned; R may be non-aligned.
12385
12386 Note that B0 and B1 are two separate matrices stored in different
12387 locations.
12388
12389 If beta is zero, contents of R is ignored (not multiplied by zero - just
12390 ignored).
12391
12392 However, when alpha is zero , we still calculate MM product, which is
12393 multiplied by zero afterwards.
12394
12395 Unlike mm22 functions, this function does NOT support partial output of R
12396 - we always store full 2x4 matrix.
12397 *************************************************************************/
_ialglib_mm22x2(double alpha,const double * a,const double * b0,const double * b1,ae_int_t k,double beta,double * r,ae_int_t stride)12398 void _ialglib_mm22x2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride)
12399 {
12400 _ialglib_mm22(alpha, a, b0, k, beta, r, stride, 0);
12401 _ialglib_mm22(alpha, a, b1, k, beta, r+2, stride, 0);
12402 }
12403
12404 /*************************************************************************
12405 This function calculates R := alpha*A'*(B0|B1)+beta*R where A, B0 and B1
12406 are Kx2 matrices stored in contiguous row-by-row storage, R is 2x4 matrix
12407 stored in non-contiguous row-by-row storage.
12408
12409 A, B0 and B1 must be aligned; R may be non-aligned.
12410
12411 Note that B0 and B1 are two separate matrices stored in different
12412 locations.
12413
12414 If beta is zero, contents of R is ignored (not multiplied by zero - just
12415 ignored).
12416
12417 However, when alpha is zero , we still calculate MM product, which is
12418 multiplied by zero afterwards.
12419
12420 Unlike mm22 functions, this function does NOT support partial output of R
12421 - we always store full 2x4 matrix.
12422
12423 This function supports SSE2; it can be used when:
12424 1. AE_HAS_SSE2_INTRINSICS was defined (checked at compile-time)
12425 2. ae_cpuid() result contains CPU_SSE2 (checked at run-time)
12426
12427 If (1) is failed, this function will still be defined and callable, but it
12428 will do nothing. If (2) is failed , call to this function will probably
12429 crash your system.
12430
12431 If you want to know whether it is safe to call it, you should check
12432 results of ae_cpuid(). If CPU_SSE2 bit is set, this function is callable
12433 and will do its work.
12434 *************************************************************************/
12435 #if defined(AE_HAS_SSE2_INTRINSICS)
_ialglib_mm22x2_sse2(double alpha,const double * a,const double * b0,const double * b1,ae_int_t k,double beta,double * r,ae_int_t stride)12436 void _ialglib_mm22x2_sse2(double alpha, const double *a, const double *b0, const double *b1, ae_int_t k, double beta, double *r, ae_int_t stride)
12437 {
12438 /*
12439 * We calculate product of two Kx2 matrices (result is 2x2).
12440 * V0, V1, V2, V3 store result as follows:
12441 *
12442 * [ V0[0] V1[1] V2[0] V3[1] ]
12443 * R = [ ]
12444 * [ V1[0] V0[1] V3[0] V2[1] ]
12445 *
12446 * VA0 stores current 1x2 block of A, VA1 stores shuffle of VA0,
12447 * VB0 and VB1 are used to store two copies of 1x2 block of B0 or B1
12448 * (both vars store same data - either B0 or B1). Results from multiplication
12449 * by VA0/VA1 are stored in VB0/VB1 too.
12450 *
12451 */
12452 __m128d v0, v1, v2, v3, va0, va1, vb0, vb1;
12453 __m128d r00, r01, r10, r11, valpha, vbeta;
12454 ae_int_t t;
12455
12456 v0 = _mm_setzero_pd();
12457 v1 = _mm_setzero_pd();
12458 v2 = _mm_setzero_pd();
12459 v3 = _mm_setzero_pd();
12460 for(t=0; t<k; t++)
12461 {
12462 va0 = _mm_load_pd(a);
12463 vb0 = _mm_load_pd(b0);
12464 va1 = _mm_load_pd(a);
12465
12466 vb0 = _mm_mul_pd(va0,vb0);
12467 vb1 = _mm_load_pd(b0);
12468 v0 = _mm_add_pd(v0,vb0);
12469 vb1 = _mm_mul_pd(va1,vb1);
12470 vb0 = _mm_load_pd(b1);
12471 v1 = _mm_add_pd(v1,vb1);
12472
12473 vb0 = _mm_mul_pd(va0,vb0);
12474 vb1 = _mm_load_pd(b1);
12475 v2 = _mm_add_pd(v2,vb0);
12476 vb1 = _mm_mul_pd(va1,vb1);
12477 v3 = _mm_add_pd(v3,vb1);
12478
12479 a+=2;
12480 b0+=2;
12481 b1+=2;
12482 }
12483
12484 /*
12485 * shuffle V1 and V3 (conversion to more convenient storage format):
12486 *
12487 * [ V0[0] V1[0] V2[0] V3[0] ]
12488 * R = [ ]
12489 * [ V1[1] V0[1] V3[1] V2[1] ]
12490 *
12491 * unpack results to
12492 *
12493 * [ r00 r01 ]
12494 * [ r10 r11 ]
12495 *
12496 */
12497 valpha = _mm_load1_pd(&alpha);
12498 v1 = _mm_shuffle_pd(v1, v1, 1);
12499 v3 = _mm_shuffle_pd(v3, v3, 1);
12500 r00 = _mm_mul_pd(_mm_unpacklo_pd(v0,v1),valpha);
12501 r10 = _mm_mul_pd(_mm_unpackhi_pd(v1,v0),valpha);
12502 r01 = _mm_mul_pd(_mm_unpacklo_pd(v2,v3),valpha);
12503 r11 = _mm_mul_pd(_mm_unpackhi_pd(v3,v2),valpha);
12504
12505 /*
12506 * store
12507 */
12508 if( beta==0 )
12509 {
12510 _mm_storeu_pd(r,r00);
12511 _mm_storeu_pd(r+2,r01);
12512 _mm_storeu_pd(r+stride,r10);
12513 _mm_storeu_pd(r+stride+2,r11);
12514 }
12515 else
12516 {
12517 vbeta = _mm_load1_pd(&beta);
12518 _mm_storeu_pd(r, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r),vbeta),r00));
12519 _mm_storeu_pd(r+2, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+2),vbeta),r01));
12520 _mm_storeu_pd(r+stride, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride),vbeta),r10));
12521 _mm_storeu_pd(r+stride+2, _mm_add_pd(_mm_mul_pd(_mm_loadu_pd(r+stride+2),vbeta),r11));
12522 }
12523 }
12524 #endif
12525
12526 #if !defined(ALGLIB_NO_FAST_KERNELS)
12527
12528 /*************************************************************************
12529 Computes dot product (X,Y) for elements [0,N) of X[] and Y[]
12530
12531 INPUT PARAMETERS:
12532 N - vector length
12533 X - array[N], vector to process
12534 Y - array[N], vector to process
12535
12536 RESULT:
12537 (X,Y)
12538
12539 -- ALGLIB --
12540 Copyright 20.01.2020 by Bochkanov Sergey
12541 *************************************************************************/
rdotv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)12542 double rdotv(ae_int_t n,
12543 /* Real */ ae_vector* x,
12544 /* Real */ ae_vector* y,
12545 ae_state *_state)
12546 {
12547 ae_int_t i;
12548 double result;
12549
12550 /*
12551 * Try fast kernels.
12552 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12553 */
12554 if( n>=_ABLASF_KERNEL_SIZE1 )
12555 _ALGLIB_KERNEL_RETURN_SSE2_AVX2_FMA(rdotv,(n,x->ptr.p_double,y->ptr.p_double,_state)) /* use _ALGLIB_KERNEL_VOID_ for a kernel that does not return result */
12556
12557 /*
12558 * Original generic C implementation
12559 */
12560 result = (double)(0);
12561 for(i=0; i<=n-1; i++)
12562 {
12563 result = result+x->ptr.p_double[i]*y->ptr.p_double[i];
12564 }
12565 return result;
12566 }
12567
12568
12569
12570 /*************************************************************************
12571 Computes dot product (X,A[i]) for elements [0,N) of vector X[] and row A[i,*]
12572
12573 INPUT PARAMETERS:
12574 N - vector length
12575 X - array[N], vector to process
12576 A - array[?,N], matrix to process
12577 I - row index
12578
12579 RESULT:
12580 (X,Ai)
12581
12582 -- ALGLIB --
12583 Copyright 20.01.2020 by Bochkanov Sergey
12584 *************************************************************************/
rdotvr(ae_int_t n,ae_vector * x,ae_matrix * a,ae_int_t i,ae_state * _state)12585 double rdotvr(ae_int_t n,
12586 /* Real */ ae_vector* x,
12587 /* Real */ ae_matrix* a,
12588 ae_int_t i,
12589 ae_state *_state)
12590 {
12591 ae_int_t j;
12592 double result;
12593
12594 /*
12595 * Try fast kernels.
12596 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12597 */
12598 if( n>=_ABLASF_KERNEL_SIZE1 )
12599 _ALGLIB_KERNEL_RETURN_SSE2_AVX2_FMA(rdotv,(n,x->ptr.p_double,a->ptr.pp_double[i],_state))
12600
12601 result = (double)(0);
12602 for(j=0; j<=n-1; j++)
12603 {
12604 result = result+x->ptr.p_double[j]*a->ptr.pp_double[i][j];
12605 }
12606 return result;
12607 }
12608
12609
12610 /*************************************************************************
12611 Computes dot product (X,A[i]) for rows A[ia,*] and B[ib,*]
12612
12613 INPUT PARAMETERS:
12614 N - vector length
12615 X - array[N], vector to process
12616 A - array[?,N], matrix to process
12617 I - row index
12618
12619 RESULT:
12620 (X,Ai)
12621
12622 -- ALGLIB --
12623 Copyright 20.01.2020 by Bochkanov Sergey
12624 *************************************************************************/
rdotrr(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_matrix * b,ae_int_t ib,ae_state * _state)12625 double rdotrr(ae_int_t n,
12626 /* Real */ ae_matrix* a,
12627 ae_int_t ia,
12628 /* Real */ ae_matrix* b,
12629 ae_int_t ib,
12630 ae_state *_state)
12631 {
12632 ae_int_t j;
12633 double result;
12634
12635 /*
12636 * Try fast kernels.
12637 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12638 */
12639 if( n>=_ABLASF_KERNEL_SIZE1 )
12640 _ALGLIB_KERNEL_RETURN_SSE2_AVX2_FMA(rdotv,(n,a->ptr.pp_double[ia],b->ptr.pp_double[ib],_state))
12641
12642 result = (double)(0);
12643 for(j=0; j<=n-1; j++)
12644 {
12645 result = result+a->ptr.pp_double[ia][j]*b->ptr.pp_double[ib][j];
12646 }
12647 return result;
12648 }
12649
12650
12651 /*************************************************************************
12652 Computes dot product (X,X) for elements [0,N) of X[]
12653
12654 INPUT PARAMETERS:
12655 N - vector length
12656 X - array[N], vector to process
12657
12658 RESULT:
12659 (X,X)
12660
12661 -- ALGLIB --
12662 Copyright 20.01.2020 by Bochkanov Sergey
12663 *************************************************************************/
rdotv2(ae_int_t n,ae_vector * x,ae_state * _state)12664 double rdotv2(ae_int_t n, /* Real */ ae_vector* x, ae_state *_state)
12665 {
12666 ae_int_t i;
12667 double v;
12668 double result;
12669
12670 /*
12671 * Try fast kernels.
12672 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12673 */
12674 if( n>=_ABLASF_KERNEL_SIZE1 )
12675 _ALGLIB_KERNEL_RETURN_SSE2_AVX2_FMA(rdotv2,(n,x->ptr.p_double,_state))
12676
12677 result = (double)(0);
12678 for(i=0; i<=n-1; i++)
12679 {
12680 v = x->ptr.p_double[i];
12681 result = result+v*v;
12682 }
12683 return result;
12684 }
12685
12686
12687 /*************************************************************************
12688 Copies vector X[] to Y[]
12689
12690 INPUT PARAMETERS:
12691 N - vector length
12692 X - array[N], source
12693 Y - preallocated array[N]
12694
12695 OUTPUT PARAMETERS:
12696 Y - leading N elements are replaced by X
12697
12698
12699 NOTE: destination and source should NOT overlap
12700
12701 -- ALGLIB --
12702 Copyright 20.01.2020 by Bochkanov Sergey
12703 *************************************************************************/
rcopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)12704 void rcopyv(ae_int_t n,
12705 /* Real */ ae_vector* x,
12706 /* Real */ ae_vector* y,
12707 ae_state *_state)
12708 {
12709 ae_int_t j;
12710
12711 /*
12712 * Try fast kernels.
12713 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12714 */
12715 if( n>=_ABLASF_KERNEL_SIZE1 )
12716 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopyv,
12717 (n,x->ptr.p_double,y->ptr.p_double,_state))
12718
12719
12720 for(j=0; j<=n-1; j++)
12721 {
12722 y->ptr.p_double[j] = x->ptr.p_double[j];
12723 }
12724 }
12725
12726 /*************************************************************************
12727 Copies vector X[] to row I of A[,]
12728
12729 INPUT PARAMETERS:
12730 N - vector length
12731 X - array[N], source
12732 A - preallocated 2D array large enough to store result
12733 I - destination row index
12734
12735 OUTPUT PARAMETERS:
12736 A - leading N elements of I-th row are replaced by X
12737
12738 -- ALGLIB --
12739 Copyright 20.01.2020 by Bochkanov Sergey
12740 *************************************************************************/
rcopyvr(ae_int_t n,ae_vector * x,ae_matrix * a,ae_int_t i,ae_state * _state)12741 void rcopyvr(ae_int_t n,
12742 /* Real */ ae_vector* x,
12743 /* Real */ ae_matrix* a,
12744 ae_int_t i,
12745 ae_state *_state)
12746 {
12747 ae_int_t j;
12748
12749 /*
12750 * Try fast kernels.
12751 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12752 */
12753 if( n>=_ABLASF_KERNEL_SIZE1 )
12754 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopyv,
12755 (n, x->ptr.p_double, a->ptr.pp_double[i], _state))
12756
12757 for(j=0; j<=n-1; j++)
12758 {
12759 a->ptr.pp_double[i][j] = x->ptr.p_double[j];
12760 }
12761 }
12762
12763
12764 /*************************************************************************
12765 Copies row I of A[,] to vector X[]
12766
12767 INPUT PARAMETERS:
12768 N - vector length
12769 A - 2D array, source
12770 I - source row index
12771 X - preallocated destination
12772
12773 OUTPUT PARAMETERS:
12774 X - array[N], destination
12775
12776 -- ALGLIB --
12777 Copyright 20.01.2020 by Bochkanov Sergey
12778 *************************************************************************/
rcopyrv(ae_int_t n,ae_matrix * a,ae_int_t i,ae_vector * x,ae_state * _state)12779 void rcopyrv(ae_int_t n,
12780 /* Real */ ae_matrix* a,
12781 ae_int_t i,
12782 /* Real */ ae_vector* x,
12783 ae_state *_state)
12784 {
12785 ae_int_t j;
12786
12787 /*
12788 * Try fast kernels.
12789 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12790 */
12791 if( n>=_ABLASF_KERNEL_SIZE1 )
12792 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopyv,
12793 (n, a->ptr.pp_double[i], x->ptr.p_double, _state))
12794
12795 for(j=0; j<=n-1; j++)
12796 {
12797 x->ptr.p_double[j] = a->ptr.pp_double[i][j];
12798 }
12799 }
12800
12801
12802 /*************************************************************************
12803 Copies row I of A[,] to row K of B[,].
12804
12805 A[i,...] and B[k,...] may overlap.
12806
12807 INPUT PARAMETERS:
12808 N - vector length
12809 A - 2D array, source
12810 I - source row index
12811 B - preallocated destination
12812 K - destination row index
12813
12814 OUTPUT PARAMETERS:
12815 B - row K overwritten
12816
12817 -- ALGLIB --
12818 Copyright 20.01.2020 by Bochkanov Sergey
12819 *************************************************************************/
rcopyrr(ae_int_t n,ae_matrix * a,ae_int_t i,ae_matrix * b,ae_int_t k,ae_state * _state)12820 void rcopyrr(ae_int_t n,
12821 /* Real */ ae_matrix* a,
12822 ae_int_t i,
12823 /* Real */ ae_matrix* b,
12824 ae_int_t k,
12825 ae_state *_state)
12826 {
12827 ae_int_t j;
12828
12829 /*
12830 * Try fast kernels.
12831 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12832 */
12833 if( n>=_ABLASF_KERNEL_SIZE1 )
12834 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopyv,
12835 (n, a->ptr.pp_double[i], b->ptr.pp_double[k], _state))
12836
12837 for(j=0; j<=n-1; j++)
12838 {
12839 b->ptr.pp_double[k][j] = a->ptr.pp_double[i][j];
12840 }
12841 }
12842
12843 /*************************************************************************
12844 Performs copying with multiplication of V*X[] to Y[]
12845
12846 INPUT PARAMETERS:
12847 N - vector length
12848 V - multiplier
12849 X - array[N], source
12850 Y - preallocated array[N]
12851
12852 OUTPUT PARAMETERS:
12853 Y - array[N], Y = V*X
12854
12855 -- ALGLIB --
12856 Copyright 20.01.2020 by Bochkanov Sergey
12857 *************************************************************************/
rcopymulv(ae_int_t n,double v,ae_vector * x,ae_vector * y,ae_state * _state)12858 void rcopymulv(ae_int_t n,
12859 double v,
12860 /* Real */ ae_vector* x,
12861 /* Real */ ae_vector* y,
12862 ae_state *_state)
12863 {
12864 ae_int_t i;
12865
12866 /*
12867 * Try fast kernels.
12868 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12869 */
12870 if( n>=_ABLASF_KERNEL_SIZE1 )
12871 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopymulv,
12872 (n,v,x->ptr.p_double,y->ptr.p_double,_state))
12873
12874 for(i=0; i<=n-1; i++)
12875 {
12876 y->ptr.p_double[i] = v*x->ptr.p_double[i];
12877 }
12878 }
12879
12880
12881 /*************************************************************************
12882 Performs copying with multiplication of V*X[] to Y[I,*]
12883
12884 INPUT PARAMETERS:
12885 N - vector length
12886 V - multiplier
12887 X - array[N], source
12888 Y - preallocated array[?,N]
12889 RIdx - destination row index
12890
12891 OUTPUT PARAMETERS:
12892 Y - Y[RIdx,...] = V*X
12893
12894 -- ALGLIB --
12895 Copyright 20.01.2020 by Bochkanov Sergey
12896 *************************************************************************/
rcopymulvr(ae_int_t n,double v,ae_vector * x,ae_matrix * y,ae_int_t ridx,ae_state * _state)12897 void rcopymulvr(ae_int_t n,
12898 double v,
12899 /* Real */ ae_vector* x,
12900 /* Real */ ae_matrix* y,
12901 ae_int_t ridx,
12902 ae_state *_state)
12903 {
12904 ae_int_t i;
12905
12906 /*
12907 * Try fast kernels.
12908 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12909 */
12910 if( n>=_ABLASF_KERNEL_SIZE1 )
12911 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopymulv,
12912 (n,v,x->ptr.p_double,y->ptr.pp_double[ridx],_state))
12913
12914 for(i=0; i<=n-1; i++)
12915 {
12916 y->ptr.pp_double[ridx][i] = v*x->ptr.p_double[i];
12917 }
12918 }
12919
12920 /*************************************************************************
12921 Copies vector X[] to Y[]
12922
12923 INPUT PARAMETERS:
12924 N - vector length
12925 X - source array
12926 Y - preallocated array[N]
12927
12928 OUTPUT PARAMETERS:
12929 Y - X copied to Y
12930
12931 -- ALGLIB --
12932 Copyright 20.01.2020 by Bochkanov Sergey
12933 *************************************************************************/
icopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)12934 void icopyv(ae_int_t n,
12935 /* Integer */ ae_vector* x,
12936 /* Integer */ ae_vector* y,
12937 ae_state *_state)
12938 {
12939 ae_int_t j;
12940
12941 /*
12942 * Try fast kernels.
12943 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12944 */
12945 if( n>=_ABLASF_KERNEL_SIZE1 )
12946 _ALGLIB_KERNEL_VOID_SSE2_AVX2(icopyv,
12947 (n, x->ptr.p_int, y->ptr.p_int, _state))
12948
12949 for(j=0; j<=n-1; j++)
12950 {
12951 y->ptr.p_int[j] = x->ptr.p_int[j];
12952 }
12953 }
12954
12955 /*************************************************************************
12956 Copies vector X[] to Y[]
12957
12958 INPUT PARAMETERS:
12959 N - vector length
12960 X - array[N], source
12961 Y - preallocated array[N]
12962
12963 OUTPUT PARAMETERS:
12964 Y - leading N elements are replaced by X
12965
12966
12967 NOTE: destination and source should NOT overlap
12968
12969 -- ALGLIB --
12970 Copyright 20.01.2020 by Bochkanov Sergey
12971 *************************************************************************/
bcopyv(ae_int_t n,ae_vector * x,ae_vector * y,ae_state * _state)12972 void bcopyv(ae_int_t n,
12973 /* Boolean */ ae_vector* x,
12974 /* Boolean */ ae_vector* y,
12975 ae_state *_state)
12976 {
12977 ae_int_t j;
12978
12979 /*
12980 * Try fast kernels.
12981 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
12982 */
12983 if( n>=_ABLASF_KERNEL_SIZE1*8 )
12984 _ALGLIB_KERNEL_VOID_SSE2_AVX2(bcopyv,
12985 (n, x->ptr.p_bool, y->ptr.p_bool, _state))
12986
12987 for(j=0; j<=n-1; j++)
12988 {
12989 y->ptr.p_bool[j] = x->ptr.p_bool[j];
12990 }
12991 }
12992
12993
12994 /*************************************************************************
12995 Sets vector X[] to V
12996
12997 INPUT PARAMETERS:
12998 N - vector length
12999 V - value to set
13000 X - array[N]
13001
13002 OUTPUT PARAMETERS:
13003 X - leading N elements are replaced by V
13004
13005 -- ALGLIB --
13006 Copyright 20.01.2020 by Bochkanov Sergey
13007 *************************************************************************/
rsetv(ae_int_t n,double v,ae_vector * x,ae_state * _state)13008 void rsetv(ae_int_t n,
13009 double v,
13010 /* Real */ ae_vector* x,
13011 ae_state *_state)
13012 {
13013 ae_int_t j;
13014
13015 /*
13016 * Try fast kernels.
13017 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13018 */
13019 if( n>=_ABLASF_KERNEL_SIZE1 )
13020 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rsetv,
13021 (n, v, x->ptr.p_double, _state))
13022
13023 for(j=0; j<=n-1; j++)
13024 {
13025 x->ptr.p_double[j] = v;
13026 }
13027 }
13028
13029 /*************************************************************************
13030 Sets row I of A[,] to V
13031
13032 INPUT PARAMETERS:
13033 N - vector length
13034 V - value to set
13035 A - array[N,N] or larger
13036 I - row index
13037
13038 OUTPUT PARAMETERS:
13039 A - leading N elements of I-th row are replaced by V
13040
13041 -- ALGLIB --
13042 Copyright 20.01.2020 by Bochkanov Sergey
13043 *************************************************************************/
rsetr(ae_int_t n,double v,ae_matrix * a,ae_int_t i,ae_state * _state)13044 void rsetr(ae_int_t n,
13045 double v,
13046 /* Real */ ae_matrix* a,
13047 ae_int_t i,
13048 ae_state *_state)
13049 {
13050 ae_int_t j;
13051
13052 /*
13053 * Try fast kernels.
13054 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13055 */
13056 if( n>=_ABLASF_KERNEL_SIZE1 )
13057 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rsetv,
13058 (n, v, a->ptr.pp_double[i], _state))
13059
13060 for(j=0; j<=n-1; j++)
13061 {
13062 a->ptr.pp_double[i][j] = v;
13063 }
13064 }
13065
13066
13067 /*************************************************************************
13068 Sets X[OffsX:OffsX+N-1] to V
13069
13070 INPUT PARAMETERS:
13071 N - subvector length
13072 V - value to set
13073 X - array[N]
13074
13075 OUTPUT PARAMETERS:
13076 X - X[OffsX:OffsX+N-1] is replaced by V
13077
13078 -- ALGLIB --
13079 Copyright 20.01.2020 by Bochkanov Sergey
13080 *************************************************************************/
rsetvx(ae_int_t n,double v,ae_vector * x,ae_int_t offsx,ae_state * _state)13081 void rsetvx(ae_int_t n,
13082 double v,
13083 /* Real */ ae_vector* x,
13084 ae_int_t offsx,
13085 ae_state *_state)
13086 {
13087 ae_int_t j;
13088
13089 /*
13090 * Try fast kernels.
13091 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13092 */
13093 if( n>=_ABLASF_KERNEL_SIZE1 )
13094 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rsetvx,
13095 (n, v, x->ptr.p_double+offsx, _state))
13096
13097 for(j=0; j<=n-1; j++)
13098 {
13099 x->ptr.p_double[offsx+j] = v;
13100 }
13101 }
13102
13103
13104 /*************************************************************************
13105 Sets matrix A[] to V
13106
13107 INPUT PARAMETERS:
13108 M, N - rows/cols count
13109 V - value to set
13110 A - array[M,N]
13111
13112 OUTPUT PARAMETERS:
13113 A - leading M rows, N cols are replaced by V
13114
13115 -- ALGLIB --
13116 Copyright 20.01.2020 by Bochkanov Sergey
13117 *************************************************************************/
rsetm_simd(const ae_int_t n,const double v,double * pDest,ae_state * _state)13118 static void rsetm_simd(const ae_int_t n, const double v, double *pDest, ae_state *_state)
13119 {
13120 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rsetv, (n, v, pDest, _state));
13121
13122 ae_int_t j;
13123 for(j=0; j<=n-1; j++) {
13124 pDest[j] = v;
13125 }
13126 }
13127
rsetm(ae_int_t m,ae_int_t n,double v,ae_matrix * a,ae_state * _state)13128 void rsetm(ae_int_t m,
13129 ae_int_t n,
13130 double v,
13131 /* Real */ ae_matrix* a,
13132 ae_state *_state)
13133 {
13134 ae_int_t i;
13135 ae_int_t j;
13136
13137 /*
13138 * Try fast kernels.
13139 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13140 */
13141 if( n >=_ABLASF_KERNEL_SIZE1 ) {
13142 for(i=0; i<m; i++) {
13143 rsetm_simd(n, v, a->ptr.pp_double[i], _state);
13144 }
13145 return;
13146 }
13147
13148 for(i=0; i<=m-1; i++)
13149 {
13150 for(j=0; j<=n-1; j++)
13151 {
13152 a->ptr.pp_double[i][j] = v;
13153 }
13154 }
13155 }
13156
13157
13158 /*************************************************************************
13159 Sets vector X[] to V
13160
13161 INPUT PARAMETERS:
13162 N - vector length
13163 V - value to set
13164 X - array[N]
13165
13166 OUTPUT PARAMETERS:
13167 X - leading N elements are replaced by V
13168
13169 -- ALGLIB --
13170 Copyright 20.01.2020 by Bochkanov Sergey
13171 *************************************************************************/
isetv(ae_int_t n,ae_int_t v,ae_vector * x,ae_state * _state)13172 void isetv(ae_int_t n,
13173 ae_int_t v,
13174 /* Integer */ ae_vector* x,
13175 ae_state *_state)
13176 {
13177 ae_int_t j;
13178
13179 /*
13180 * Try fast kernels.
13181 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13182 */
13183 if( n>=_ABLASF_KERNEL_SIZE1 )
13184 _ALGLIB_KERNEL_VOID_SSE2_AVX2(isetv,
13185 (n, v, x->ptr.p_int, _state))
13186
13187 for(j=0; j<=n-1; j++)
13188 {
13189 x->ptr.p_int[j] = v;
13190 }
13191 }
13192
13193 /*************************************************************************
13194 Sets vector X[] to V
13195
13196 INPUT PARAMETERS:
13197 N - vector length
13198 V - value to set
13199 X - array[N]
13200
13201 OUTPUT PARAMETERS:
13202 X - leading N elements are replaced by V
13203
13204 -- ALGLIB --
13205 Copyright 20.01.2020 by Bochkanov Sergey
13206 *************************************************************************/
bsetv(ae_int_t n,ae_bool v,ae_vector * x,ae_state * _state)13207 void bsetv(ae_int_t n,
13208 ae_bool v,
13209 /* Boolean */ ae_vector* x,
13210 ae_state *_state)
13211 {
13212 ae_int_t j;
13213
13214 /*
13215 * Try fast kernels.
13216 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13217 */
13218 if( n>=_ABLASF_KERNEL_SIZE1*8 )
13219 _ALGLIB_KERNEL_VOID_SSE2_AVX2(bsetv,
13220 (n, v, x->ptr.p_bool, _state))
13221
13222 for(j=0; j<=n-1; j++)
13223 {
13224 x->ptr.p_bool[j] = v;
13225 }
13226 }
13227
13228
13229 /*************************************************************************
13230 Performs inplace multiplication of X[] by V
13231
13232 INPUT PARAMETERS:
13233 N - vector length
13234 X - array[N], vector to process
13235 V - multiplier
13236
13237 OUTPUT PARAMETERS:
13238 X - elements 0...N-1 multiplied by V
13239
13240 -- ALGLIB --
13241 Copyright 20.01.2020 by Bochkanov Sergey
13242 *************************************************************************/
rmulv(ae_int_t n,double v,ae_vector * x,ae_state * _state)13243 void rmulv(ae_int_t n,
13244 double v,
13245 /* Real */ ae_vector* x,
13246 ae_state *_state)
13247 {
13248 ae_int_t i;
13249
13250 /*
13251 * Try fast kernels.
13252 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13253 */
13254 if( n>=_ABLASF_KERNEL_SIZE1 )
13255 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmulv,
13256 (n,v,x->ptr.p_double,_state))
13257
13258 for(i=0; i<=n-1; i++)
13259 {
13260 x->ptr.p_double[i] = x->ptr.p_double[i]*v;
13261 }
13262 }
13263
13264
13265 /*************************************************************************
13266 Performs inplace multiplication of X[] by V
13267
13268 INPUT PARAMETERS:
13269 N - row length
13270 X - array[?,N], row to process
13271 V - multiplier
13272
13273 OUTPUT PARAMETERS:
13274 X - elements 0...N-1 of row RowIdx are multiplied by V
13275
13276 -- ALGLIB --
13277 Copyright 20.01.2020 by Bochkanov Sergey
13278 *************************************************************************/
rmulr(ae_int_t n,double v,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13279 void rmulr(ae_int_t n,
13280 double v,
13281 /* Real */ ae_matrix* x,
13282 ae_int_t rowidx,
13283 ae_state *_state)
13284 {
13285 ae_int_t i;
13286
13287 /*
13288 * Try fast kernels.
13289 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13290 */
13291 if( n>=_ABLASF_KERNEL_SIZE1 )
13292 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmulv,
13293 (n, v, x->ptr.pp_double[rowidx], _state))
13294
13295 for(i=0; i<=n-1; i++)
13296 {
13297 x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]*v;
13298 }
13299 }
13300
13301
13302 /*************************************************************************
13303 Performs inplace multiplication of X[OffsX:OffsX+N-1] by V
13304
13305 INPUT PARAMETERS:
13306 N - subvector length
13307 X - vector to process
13308 V - multiplier
13309
13310 OUTPUT PARAMETERS:
13311 X - elements OffsX:OffsX+N-1 multiplied by V
13312
13313 -- ALGLIB --
13314 Copyright 20.01.2020 by Bochkanov Sergey
13315 *************************************************************************/
rmulvx(ae_int_t n,double v,ae_vector * x,ae_int_t offsx,ae_state * _state)13316 void rmulvx(ae_int_t n,
13317 double v,
13318 /* Real */ ae_vector* x,
13319 ae_int_t offsx,
13320 ae_state *_state)
13321 {
13322 ae_int_t i;
13323
13324 /*
13325 * Try fast kernels.
13326 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13327 */
13328 if( n>=_ABLASF_KERNEL_SIZE1 )
13329 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmulvx,
13330 (n, v, x->ptr.p_double+offsx, _state))
13331
13332 for(i=0; i<=n-1; i++)
13333 {
13334 x->ptr.p_double[offsx+i] = x->ptr.p_double[offsx+i]*v;
13335 }
13336 }
13337
13338
13339 /*************************************************************************
13340 Performs inplace addition of Y[] to X[]
13341
13342 INPUT PARAMETERS:
13343 N - vector length
13344 Alpha - multiplier
13345 Y - array[N], vector to process
13346 X - array[N], vector to process
13347
13348 RESULT:
13349 X := X + alpha*Y
13350
13351 -- ALGLIB --
13352 Copyright 20.01.2020 by Bochkanov Sergey
13353 *************************************************************************/
raddv(ae_int_t n,double alpha,ae_vector * y,ae_vector * x,ae_state * _state)13354 void raddv(ae_int_t n,
13355 double alpha,
13356 /* Real */ ae_vector* y,
13357 /* Real */ ae_vector* x,
13358 ae_state *_state)
13359 {
13360 ae_int_t i;
13361
13362 /*
13363 * Try fast kernels.
13364 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13365 */
13366 if( n>=_ABLASF_KERNEL_SIZE1 )
13367 _ALGLIB_KERNEL_VOID_SSE2_AVX2_FMA(raddv,
13368 (n,alpha,y->ptr.p_double,x->ptr.p_double,_state))
13369
13370
13371 for(i=0; i<=n-1; i++)
13372 {
13373 x->ptr.p_double[i] = x->ptr.p_double[i]+alpha*y->ptr.p_double[i];
13374 }
13375 }
13376
13377
13378 /*************************************************************************
13379 Performs inplace addition of vector Y[] to row X[]
13380
13381 INPUT PARAMETERS:
13382 N - vector length
13383 Alpha - multiplier
13384 Y - vector to add
13385 X - target row RowIdx
13386
13387 RESULT:
13388 X := X + alpha*Y
13389
13390 -- ALGLIB --
13391 Copyright 20.01.2020 by Bochkanov Sergey
13392 *************************************************************************/
raddvr(ae_int_t n,double alpha,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13393 void raddvr(ae_int_t n,
13394 double alpha,
13395 /* Real */ ae_vector* y,
13396 /* Real */ ae_matrix* x,
13397 ae_int_t rowidx,
13398 ae_state *_state)
13399 {
13400 ae_int_t i;
13401
13402 /*
13403 * Try fast kernels.
13404 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13405 */
13406 if( n>=_ABLASF_KERNEL_SIZE1 )
13407 _ALGLIB_KERNEL_VOID_SSE2_AVX2_FMA(raddv,
13408 (n,alpha,y->ptr.p_double,x->ptr.pp_double[rowidx],_state))
13409
13410
13411 for(i=0; i<=n-1; i++)
13412 {
13413 x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]+alpha*y->ptr.p_double[i];
13414 }
13415 }
13416
13417
13418 /*************************************************************************
13419 Performs inplace addition of Y[RIdx,...] to X[]
13420
13421 INPUT PARAMETERS:
13422 N - vector length
13423 Alpha - multiplier
13424 Y - array[?,N], matrix whose RIdx-th row is added
13425 RIdx - row index
13426 X - array[N], vector to process
13427
13428 RESULT:
13429 X := X + alpha*Y
13430
13431 -- ALGLIB --
13432 Copyright 20.01.2020 by Bochkanov Sergey
13433 *************************************************************************/
raddrv(ae_int_t n,double alpha,ae_matrix * y,ae_int_t ridx,ae_vector * x,ae_state * _state)13434 void raddrv(ae_int_t n,
13435 double alpha,
13436 /* Real */ ae_matrix* y,
13437 ae_int_t ridx,
13438 /* Real */ ae_vector* x,
13439 ae_state *_state)
13440 {
13441 ae_int_t i;
13442
13443 /*
13444 * Try fast kernels.
13445 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13446 */
13447 if( n>=_ABLASF_KERNEL_SIZE1 )
13448 _ALGLIB_KERNEL_VOID_SSE2_AVX2_FMA(raddv,
13449 (n,alpha,y->ptr.pp_double[ridx],x->ptr.p_double,_state))
13450
13451 for(i=0; i<=n-1; i++)
13452 {
13453 x->ptr.p_double[i] = x->ptr.p_double[i]+alpha*y->ptr.pp_double[ridx][i];
13454 }
13455 }
13456
13457
13458 /*************************************************************************
13459 Performs inplace addition of Y[RIdx,...] to X[RIdxDst]
13460
13461 INPUT PARAMETERS:
13462 N - vector length
13463 Alpha - multiplier
13464 Y - array[?,N], matrix whose RIdxSrc-th row is added
13465 RIdxSrc - source row index
13466 X - array[?,N], matrix whose RIdxDst-th row is target
13467 RIdxDst - destination row index
13468
13469 RESULT:
13470 X := X + alpha*Y
13471
13472 -- ALGLIB --
13473 Copyright 20.01.2020 by Bochkanov Sergey
13474 *************************************************************************/
raddrr(ae_int_t n,double alpha,ae_matrix * y,ae_int_t ridxsrc,ae_matrix * x,ae_int_t ridxdst,ae_state * _state)13475 void raddrr(ae_int_t n,
13476 double alpha,
13477 /* Real */ ae_matrix* y,
13478 ae_int_t ridxsrc,
13479 /* Real */ ae_matrix* x,
13480 ae_int_t ridxdst,
13481 ae_state *_state)
13482 {
13483 ae_int_t i;
13484
13485 /*
13486 * Try fast kernels.
13487 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13488 */
13489 if( n>=_ABLASF_KERNEL_SIZE1 )
13490 _ALGLIB_KERNEL_VOID_SSE2_AVX2_FMA(raddv,
13491 (n,alpha,y->ptr.pp_double[ridxsrc],x->ptr.pp_double[ridxdst],_state))
13492
13493 for(i=0; i<=n-1; i++)
13494 {
13495 x->ptr.pp_double[ridxdst][i] = x->ptr.pp_double[ridxdst][i]+alpha*y->ptr.pp_double[ridxsrc][i];
13496 }
13497 }
13498
13499
13500 /*************************************************************************
13501 Performs inplace addition of Y[] to X[]
13502
13503 INPUT PARAMETERS:
13504 N - vector length
13505 Alpha - multiplier
13506 Y - source vector
13507 OffsY - source offset
13508 X - destination vector
13509 OffsX - destination offset
13510
13511 RESULT:
13512 X := X + alpha*Y
13513
13514 -- ALGLIB --
13515 Copyright 20.01.2020 by Bochkanov Sergey
13516 *************************************************************************/
raddvx(ae_int_t n,double alpha,ae_vector * y,ae_int_t offsy,ae_vector * x,ae_int_t offsx,ae_state * _state)13517 void raddvx(ae_int_t n,
13518 double alpha,
13519 /* Real */ ae_vector* y,
13520 ae_int_t offsy,
13521 /* Real */ ae_vector* x,
13522 ae_int_t offsx,
13523 ae_state *_state)
13524 {
13525 ae_int_t i;
13526
13527 /*
13528 * Try fast kernels.
13529 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13530 */
13531 if( n>=_ABLASF_KERNEL_SIZE1 )
13532 _ALGLIB_KERNEL_VOID_SSE2_AVX2_FMA(raddvx,
13533 (n, alpha, y->ptr.p_double+offsy, x->ptr.p_double+offsx, _state))
13534
13535 for(i=0; i<=n-1; i++)
13536 {
13537 x->ptr.p_double[offsx+i] = x->ptr.p_double[offsx+i]+alpha*y->ptr.p_double[offsy+i];
13538 }
13539 }
13540
13541
13542 /*************************************************************************
13543 Performs componentwise multiplication of vector X[] by vector Y[]
13544
13545 INPUT PARAMETERS:
13546 N - vector length
13547 Y - vector to multiply by
13548 X - target vector
13549
13550 RESULT:
13551 X := componentwise(X*Y)
13552
13553 -- ALGLIB --
13554 Copyright 20.01.2020 by Bochkanov Sergey
13555 *************************************************************************/
rmergemulv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)13556 void rmergemulv(ae_int_t n,
13557 /* Real */ ae_vector* y,
13558 /* Real */ ae_vector* x,
13559 ae_state *_state)
13560 {
13561 ae_int_t i;
13562
13563 /*
13564 * Try fast kernels.
13565 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13566 */
13567 if( n>=_ABLASF_KERNEL_SIZE1 )
13568 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemulv,
13569 (n,y->ptr.p_double,x->ptr.p_double,_state))
13570
13571
13572 for(i=0; i<=n-1; i++)
13573 {
13574 x->ptr.p_double[i] = x->ptr.p_double[i]*y->ptr.p_double[i];
13575 }
13576 }
13577
13578
13579 /*************************************************************************
13580 Performs componentwise multiplication of row X[] by vector Y[]
13581
13582 INPUT PARAMETERS:
13583 N - vector length
13584 Y - vector to multiply by
13585 X - target row RowIdx
13586
13587 RESULT:
13588 X := componentwise(X*Y)
13589
13590 -- ALGLIB --
13591 Copyright 20.01.2020 by Bochkanov Sergey
13592 *************************************************************************/
rmergemulvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13593 void rmergemulvr(ae_int_t n,
13594 /* Real */ ae_vector* y,
13595 /* Real */ ae_matrix* x,
13596 ae_int_t rowidx,
13597 ae_state *_state)
13598 {
13599 ae_int_t i;
13600
13601 /*
13602 * Try fast kernels.
13603 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13604 */
13605 if( n>=_ABLASF_KERNEL_SIZE1 )
13606 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemulv,
13607 (n,y->ptr.p_double,x->ptr.pp_double[rowidx],_state))
13608
13609
13610 for(i=0; i<=n-1; i++)
13611 {
13612 x->ptr.pp_double[rowidx][i] = x->ptr.pp_double[rowidx][i]*y->ptr.p_double[i];
13613 }
13614 }
13615
13616
13617 /*************************************************************************
13618 Performs componentwise multiplication of row X[] by vector Y[]
13619
13620 INPUT PARAMETERS:
13621 N - vector length
13622 Y - vector to multiply by
13623 X - target row RowIdx
13624
13625 RESULT:
13626 X := componentwise(X*Y)
13627
13628 -- ALGLIB --
13629 Copyright 20.01.2020 by Bochkanov Sergey
13630 *************************************************************************/
rmergemulrv(ae_int_t n,ae_matrix * y,ae_int_t rowidx,ae_vector * x,ae_state * _state)13631 void rmergemulrv(ae_int_t n,
13632 /* Real */ ae_matrix* y,
13633 ae_int_t rowidx,
13634 /* Real */ ae_vector* x,
13635 ae_state *_state)
13636 {
13637 ae_int_t i;
13638
13639 /*
13640 * Try fast kernels.
13641 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13642 */
13643 if( n>=_ABLASF_KERNEL_SIZE1 )
13644 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemulv,
13645 (n,y->ptr.pp_double[rowidx],x->ptr.p_double,_state))
13646
13647 for(i=0; i<=n-1; i++)
13648 {
13649 x->ptr.p_double[i] = x->ptr.p_double[i]*y->ptr.pp_double[rowidx][i];
13650 }
13651 }
13652
13653 /*************************************************************************
13654 Performs componentwise max of vector X[] and vector Y[]
13655
13656 INPUT PARAMETERS:
13657 N - vector length
13658 Y - vector to multiply by
13659 X - target vector
13660
13661 RESULT:
13662 X := componentwise_max(X,Y)
13663
13664 -- ALGLIB --
13665 Copyright 20.01.2020 by Bochkanov Sergey
13666 *************************************************************************/
rmergemaxv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)13667 void rmergemaxv(ae_int_t n,
13668 /* Real */ ae_vector* y,
13669 /* Real */ ae_vector* x,
13670 ae_state *_state)
13671 {
13672 ae_int_t i;
13673
13674 /*
13675 * Try fast kernels.
13676 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13677 */
13678 if( n>=_ABLASF_KERNEL_SIZE1 )
13679 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemaxv,
13680 (n,y->ptr.p_double,x->ptr.p_double,_state))
13681
13682 for(i=0; i<=n-1; i++)
13683 {
13684 x->ptr.p_double[i] = ae_maxreal(x->ptr.p_double[i], y->ptr.p_double[i], _state);
13685 }
13686 }
13687
13688
13689 /*************************************************************************
13690 Performs componentwise max of row X[] and vector Y[]
13691
13692 INPUT PARAMETERS:
13693 N - vector length
13694 Y - vector to multiply by
13695 X - target row RowIdx
13696
13697 RESULT:
13698 X := componentwise_max(X,Y)
13699
13700 -- ALGLIB --
13701 Copyright 20.01.2020 by Bochkanov Sergey
13702 *************************************************************************/
rmergemaxvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13703 void rmergemaxvr(ae_int_t n,
13704 /* Real */ ae_vector* y,
13705 /* Real */ ae_matrix* x,
13706 ae_int_t rowidx,
13707 ae_state *_state)
13708 {
13709 ae_int_t i;
13710
13711 /*
13712 * Try fast kernels.
13713 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13714 */
13715 if( n>=_ABLASF_KERNEL_SIZE1 )
13716 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemaxv,
13717 (n,y->ptr.p_double,x->ptr.pp_double[rowidx],_state))
13718
13719 for(i=0; i<=n-1; i++)
13720 {
13721 x->ptr.pp_double[rowidx][i] = ae_maxreal(x->ptr.pp_double[rowidx][i], y->ptr.p_double[i], _state);
13722 }
13723 }
13724
13725
13726 /*************************************************************************
13727 Performs componentwise max of row X[I] and vector Y[]
13728
13729 INPUT PARAMETERS:
13730 N - vector length
13731 X - matrix, I-th row is source
13732 rowidx - target row RowIdx
13733
13734 RESULT:
13735 Y := componentwise_max(X,Y)
13736
13737 -- ALGLIB --
13738 Copyright 20.01.2020 by Bochkanov Sergey
13739 *************************************************************************/
rmergemaxrv(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_vector * y,ae_state * _state)13740 void rmergemaxrv(ae_int_t n,
13741 /* Real */ ae_matrix* x,
13742 ae_int_t rowidx,
13743 /* Real */ ae_vector* y,
13744 ae_state *_state)
13745 {
13746 ae_int_t i;
13747
13748 /*
13749 * Try fast kernels.
13750 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13751 */
13752 if( n>=_ABLASF_KERNEL_SIZE1 )
13753 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergemaxv,
13754 (n,x->ptr.pp_double[rowidx],y->ptr.p_double,_state))
13755
13756 for(i=0; i<=n-1; i++)
13757 {
13758 y->ptr.p_double[i] = ae_maxreal(y->ptr.p_double[i], x->ptr.pp_double[rowidx][i], _state);
13759 }
13760 }
13761
13762 /*************************************************************************
13763 Performs componentwise min of vector X[] and vector Y[]
13764
13765 INPUT PARAMETERS:
13766 N - vector length
13767 Y - source vector
13768 X - target vector
13769
13770 RESULT:
13771 X := componentwise_max(X,Y)
13772
13773 -- ALGLIB --
13774 Copyright 20.01.2020 by Bochkanov Sergey
13775 *************************************************************************/
rmergeminv(ae_int_t n,ae_vector * y,ae_vector * x,ae_state * _state)13776 void rmergeminv(ae_int_t n,
13777 /* Real */ ae_vector* y,
13778 /* Real */ ae_vector* x,
13779 ae_state *_state)
13780 {
13781 ae_int_t i;
13782
13783 /*
13784 * Try fast kernels.
13785 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13786 */
13787 if( n>=_ABLASF_KERNEL_SIZE1 )
13788 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergeminv,
13789 (n,y->ptr.p_double,x->ptr.p_double,_state))
13790
13791 for(i=0; i<=n-1; i++)
13792 {
13793 x->ptr.p_double[i] = ae_minreal(x->ptr.p_double[i], y->ptr.p_double[i], _state);
13794 }
13795 }
13796
13797
13798 /*************************************************************************
13799 Performs componentwise max of row X[] and vector Y[]
13800
13801 INPUT PARAMETERS:
13802 N - vector length
13803 Y - vector to multiply by
13804 X - target row RowIdx
13805
13806 RESULT:
13807 X := componentwise_max(X,Y)
13808
13809 -- ALGLIB --
13810 Copyright 20.01.2020 by Bochkanov Sergey
13811 *************************************************************************/
rmergeminvr(ae_int_t n,ae_vector * y,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13812 void rmergeminvr(ae_int_t n,
13813 /* Real */ ae_vector* y,
13814 /* Real */ ae_matrix* x,
13815 ae_int_t rowidx,
13816 ae_state *_state)
13817 {
13818 ae_int_t i;
13819
13820 /*
13821 * Try fast kernels.
13822 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13823 */
13824 if( n>=_ABLASF_KERNEL_SIZE1 )
13825 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergeminv,
13826 (n,y->ptr.p_double,x->ptr.pp_double[rowidx],_state))
13827
13828 for(i=0; i<=n-1; i++)
13829 {
13830 x->ptr.pp_double[rowidx][i] = ae_minreal(x->ptr.pp_double[rowidx][i], y->ptr.p_double[i], _state);
13831 }
13832 }
13833
13834
13835 /*************************************************************************
13836 Performs componentwise max of row X[I] and vector Y[]
13837
13838 INPUT PARAMETERS:
13839 N - vector length
13840 X - matrix, I-th row is source
13841 X - target row RowIdx
13842
13843 RESULT:
13844 X := componentwise_max(X,Y)
13845
13846 -- ALGLIB --
13847 Copyright 20.01.2020 by Bochkanov Sergey
13848 *************************************************************************/
rmergeminrv(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_vector * y,ae_state * _state)13849 void rmergeminrv(ae_int_t n,
13850 /* Real */ ae_matrix* x,
13851 ae_int_t rowidx,
13852 /* Real */ ae_vector* y,
13853 ae_state *_state)
13854 {
13855 ae_int_t i;
13856
13857 /*
13858 * Try fast kernels.
13859 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13860 */
13861 if( n>=_ABLASF_KERNEL_SIZE1 )
13862 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rmergeminv,
13863 (n,x->ptr.pp_double[rowidx],y->ptr.p_double,_state))
13864
13865 for(i=0; i<=n-1; i++)
13866 {
13867 y->ptr.p_double[i] = ae_minreal(y->ptr.p_double[i], x->ptr.pp_double[rowidx][i], _state);
13868 }
13869 }
13870 /*************************************************************************
13871 Returns maximum X
13872
13873 INPUT PARAMETERS:
13874 N - vector length
13875 X - array[N], vector to process
13876
13877 OUTPUT PARAMETERS:
13878 max(X[i])
13879 zero for N=0
13880
13881 -- ALGLIB --
13882 Copyright 20.01.2020 by Bochkanov Sergey
13883 *************************************************************************/
rmaxv(ae_int_t n,ae_vector * x,ae_state * _state)13884 double rmaxv(ae_int_t n, /* Real */ ae_vector* x, ae_state *_state)
13885 {
13886 ae_int_t i;
13887 double v;
13888 double result;
13889
13890 /*
13891 * Try fast kernels.
13892 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13893 */
13894 if( n>=_ABLASF_KERNEL_SIZE1 )
13895 _ALGLIB_KERNEL_RETURN_SSE2_AVX2(rmaxv, (n, x->ptr.p_double, _state));
13896
13897 if(n == 0)
13898 return 0.0;
13899 result = x->ptr.p_double[0];
13900 for(i=1; i<=n-1; i++)
13901 {
13902 v = x->ptr.p_double[i];
13903 if( v>result )
13904 {
13905 result = v;
13906 }
13907 }
13908 return result;
13909 }
13910
13911 /*************************************************************************
13912 Returns maximum X
13913
13914 INPUT PARAMETERS:
13915 N - vector length
13916 X - matrix to process, RowIdx-th row is processed
13917
13918 OUTPUT PARAMETERS:
13919 max(X[RowIdx,i])
13920 zero for N=0
13921
13922 -- ALGLIB --
13923 Copyright 20.01.2020 by Bochkanov Sergey
13924 *************************************************************************/
rmaxr(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_state * _state)13925 double rmaxr(ae_int_t n,
13926 /* Real */ ae_matrix* x,
13927 ae_int_t rowidx,
13928 ae_state *_state)
13929 {
13930 ae_int_t i;
13931 double v;
13932 double result;
13933
13934 /*
13935 * Try fast kernels.
13936 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13937 */
13938 if( n>=_ABLASF_KERNEL_SIZE1 )
13939 _ALGLIB_KERNEL_RETURN_SSE2_AVX2(rmaxv,(n, x->ptr.pp_double[rowidx], _state))
13940
13941 if(n == 0)
13942 return 0.0;
13943 result = x->ptr.pp_double[rowidx][0];
13944 for(i=1; i<=n-1; i++)
13945 {
13946 v = x->ptr.pp_double[rowidx][i];
13947 if( v>result )
13948 {
13949 result = v;
13950 }
13951 }
13952 return result;
13953 }
13954
13955 /*************************************************************************
13956 Returns maximum |X|
13957
13958 INPUT PARAMETERS:
13959 N - vector length
13960 X - array[N], vector to process
13961
13962 OUTPUT PARAMETERS:
13963 max(|X[i]|)
13964 zero for N=0
13965
13966 -- ALGLIB --
13967 Copyright 20.01.2020 by Bochkanov Sergey
13968 *************************************************************************/
rmaxabsv(ae_int_t n,ae_vector * x,ae_state * _state)13969 double rmaxabsv(ae_int_t n, /* Real */ ae_vector* x, ae_state *_state)
13970 {
13971 ae_int_t i;
13972 double v;
13973 double result;
13974
13975 /*
13976 * Try fast kernels.
13977 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
13978 */
13979 if( n>=_ABLASF_KERNEL_SIZE1 )
13980 _ALGLIB_KERNEL_RETURN_SSE2_AVX2(rmaxabsv, (n, x->ptr.p_double, _state))
13981
13982 result = (double)(0);
13983 for(i=0; i<=n-1; i++)
13984 {
13985 v = ae_fabs(x->ptr.p_double[i], _state);
13986 if( v>result )
13987 {
13988 result = v;
13989 }
13990 }
13991 return result;
13992 }
13993
13994
13995 /*************************************************************************
13996 Returns maximum |X|
13997
13998 INPUT PARAMETERS:
13999 N - vector length
14000 X - matrix to process, RowIdx-th row is processed
14001
14002 OUTPUT PARAMETERS:
14003 max(|X[RowIdx,i]|)
14004 zero for N=0
14005
14006 -- ALGLIB --
14007 Copyright 20.01.2020 by Bochkanov Sergey
14008 *************************************************************************/
rmaxabsr(ae_int_t n,ae_matrix * x,ae_int_t rowidx,ae_state * _state)14009 double rmaxabsr(ae_int_t n,
14010 /* Real */ ae_matrix* x,
14011 ae_int_t rowidx,
14012 ae_state *_state)
14013 {
14014 ae_int_t i;
14015 double v;
14016 double result;
14017
14018 /*
14019 * Try fast kernels.
14020 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
14021 */
14022 if( n>=_ABLASF_KERNEL_SIZE1 )
14023 _ALGLIB_KERNEL_RETURN_SSE2_AVX2(rmaxabsv,(n, x->ptr.pp_double[rowidx], _state))
14024
14025 result = (double)(0);
14026 for(i=0; i<=n-1; i++)
14027 {
14028 v = ae_fabs(x->ptr.pp_double[rowidx][i], _state);
14029 if( v>result )
14030 {
14031 result = v;
14032 }
14033 }
14034 return result;
14035 }
14036
14037 /*************************************************************************
14038 Copies vector X[] to Y[], extended version
14039
14040 INPUT PARAMETERS:
14041 N - vector length
14042 X - source array
14043 OffsX - source offset
14044 Y - preallocated array[N]
14045 OffsY - destination offset
14046
14047 OUTPUT PARAMETERS:
14048 Y - N elements starting from OffsY are replaced by X[OffsX:OffsX+N-1]
14049
14050 NOTE: destination and source should NOT overlap
14051
14052 -- ALGLIB --
14053 Copyright 20.01.2020 by Bochkanov Sergey
14054 *************************************************************************/
rcopyvx(ae_int_t n,ae_vector * x,ae_int_t offsx,ae_vector * y,ae_int_t offsy,ae_state * _state)14055 void rcopyvx(ae_int_t n,
14056 /* Real */ ae_vector* x,
14057 ae_int_t offsx,
14058 /* Real */ ae_vector* y,
14059 ae_int_t offsy,
14060 ae_state *_state)
14061 {
14062 ae_int_t j;
14063
14064 /*
14065 * Try fast kernels.
14066 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
14067 */
14068 if( n>=_ABLASF_KERNEL_SIZE1 )
14069 _ALGLIB_KERNEL_VOID_SSE2_AVX2(rcopyvx,(n, x->ptr.p_double+offsx, y->ptr.p_double+offsy, _state))
14070
14071 for(j=0; j<=n-1; j++)
14072 {
14073 y->ptr.p_double[offsy+j] = x->ptr.p_double[offsx+j];
14074 }
14075 }
14076
14077 /*************************************************************************
14078 Copies vector X[] to Y[], extended version
14079
14080 INPUT PARAMETERS:
14081 N - vector length
14082 X - source array
14083 OffsX - source offset
14084 Y - preallocated array[N]
14085 OffsY - destination offset
14086
14087 OUTPUT PARAMETERS:
14088 Y - N elements starting from OffsY are replaced by X[OffsX:OffsX+N-1]
14089
14090 NOTE: destination and source should NOT overlap
14091
14092 -- ALGLIB --
14093 Copyright 20.01.2020 by Bochkanov Sergey
14094 *************************************************************************/
icopyvx(ae_int_t n,ae_vector * x,ae_int_t offsx,ae_vector * y,ae_int_t offsy,ae_state * _state)14095 void icopyvx(ae_int_t n,
14096 /* Integer */ ae_vector* x,
14097 ae_int_t offsx,
14098 /* Integer */ ae_vector* y,
14099 ae_int_t offsy,
14100 ae_state *_state)
14101 {
14102 ae_int_t j;
14103
14104 /*
14105 * Try fast kernels.
14106 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
14107 */
14108 if( n>=_ABLASF_KERNEL_SIZE1 )
14109 _ALGLIB_KERNEL_VOID_SSE2_AVX2(icopyvx,(n, x->ptr.p_int+offsx, y->ptr.p_int+offsy, _state))
14110
14111 for(j=0; j<=n-1; j++)
14112 {
14113 y->ptr.p_int[offsy+j] = x->ptr.p_int[offsx+j];
14114 }
14115 }
14116
14117 /*************************************************************************
14118 Matrix-vector product: y := alpha*op(A)*x + beta*y
14119
14120 NOTE: this function expects Y to be large enough to store result. No
14121 automatic preallocation happens for smaller arrays. No integrity
14122 checks is performed for sizes of A, x, y.
14123
14124 INPUT PARAMETERS:
14125 M - number of rows of op(A)
14126 N - number of columns of op(A)
14127 Alpha- coefficient
14128 A - source matrix
14129 OpA - operation type:
14130 * OpA=0 => op(A) = A
14131 * OpA=1 => op(A) = A^T
14132 X - input vector, has at least N elements
14133 Beta- coefficient
14134 Y - preallocated output array, has at least M elements
14135
14136 OUTPUT PARAMETERS:
14137 Y - vector which stores result
14138
14139 HANDLING OF SPECIAL CASES:
14140 * if M=0, then subroutine does nothing. It does not even touch arrays.
14141 * if N=0 or Alpha=0.0, then:
14142 * if Beta=0, then Y is filled by zeros. A and X are not referenced
14143 at all. Initial values of Y are ignored (we do not multiply Y by
14144 zero, we just rewrite it by zeros)
14145 * if Beta<>0, then Y is replaced by Beta*Y
14146 * if M>0, N>0, Alpha<>0, but Beta=0, then Y is replaced by A*x;
14147 initial state of Y is ignored (rewritten by A*x, without initial
14148 multiplication by zeros).
14149
14150
14151 -- ALGLIB routine --
14152
14153 01.09.2021
14154 Bochkanov Sergey
14155 *************************************************************************/
rgemv(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t opa,ae_vector * x,double beta,ae_vector * y,ae_state * _state)14156 void rgemv(ae_int_t m,
14157 ae_int_t n,
14158 double alpha,
14159 /* Real */ ae_matrix* a,
14160 ae_int_t opa,
14161 /* Real */ ae_vector* x,
14162 double beta,
14163 /* Real */ ae_vector* y,
14164 ae_state *_state)
14165 {
14166 ae_int_t i;
14167 ae_int_t j;
14168 double v;
14169
14170
14171
14172 /*
14173 * Properly premultiply Y by Beta.
14174 *
14175 * Quick exit for M=0, N=0 or Alpha=0.
14176 * After this block we have M>0, N>0, Alpha<>0.
14177 */
14178 if( m<=0 )
14179 {
14180 return;
14181 }
14182 if( ae_fp_neq(beta,(double)(0)) )
14183 {
14184 rmulv(m, beta, y, _state);
14185 }
14186 else
14187 {
14188 rsetv(m, 0.0, y, _state);
14189 }
14190 if( n<=0||ae_fp_eq(alpha,0.0) )
14191 {
14192 return;
14193 }
14194
14195 /*
14196 * Straight or transposed?
14197 */
14198 if( opa==0 )
14199 {
14200 /*
14201 * Try SIMD code
14202 */
14203 if( n>=_ABLASF_KERNEL_SIZE2 )
14204 _ALGLIB_KERNEL_VOID_AVX2_FMA(rgemv_straight, (m, n, alpha, a,
14205 x->ptr.p_double, y->ptr.p_double, _state))
14206
14207 /*
14208 * Generic C version: y += A*x
14209 */
14210 for(i=0; i<=m-1; i++)
14211 {
14212 v = (double)(0);
14213 for(j=0; j<=n-1; j++)
14214 {
14215 v = v+a->ptr.pp_double[i][j]*x->ptr.p_double[j];
14216 }
14217 y->ptr.p_double[i] = alpha*v+y->ptr.p_double[i];
14218 }
14219 return;
14220 }
14221 if( opa==1 )
14222 {
14223 /*
14224 * Try SIMD code
14225 */
14226 if( m>=_ABLASF_KERNEL_SIZE2 )
14227 _ALGLIB_KERNEL_VOID_AVX2_FMA(rgemv_transposed, (m, n, alpha, a,
14228 x->ptr.p_double, y->ptr.p_double, _state))
14229
14230
14231 /*
14232 * Generic C version: y += A^T*x
14233 */
14234 for(i=0; i<=n-1; i++)
14235 {
14236 v = alpha*x->ptr.p_double[i];
14237 for(j=0; j<=m-1; j++)
14238 {
14239 y->ptr.p_double[j] = y->ptr.p_double[j]+v*a->ptr.pp_double[i][j];
14240 }
14241 }
14242 return;
14243 }
14244 }
14245
14246
14247 /*************************************************************************
14248 Matrix-vector product: y := alpha*op(A)*x + beta*y
14249
14250 Here x, y, A are subvectors/submatrices of larger vectors/matrices.
14251
14252 NOTE: this function expects Y to be large enough to store result. No
14253 automatic preallocation happens for smaller arrays. No integrity
14254 checks is performed for sizes of A, x, y.
14255
14256 INPUT PARAMETERS:
14257 M - number of rows of op(A)
14258 N - number of columns of op(A)
14259 Alpha- coefficient
14260 A - source matrix
14261 IA - submatrix offset (row index)
14262 JA - submatrix offset (column index)
14263 OpA - operation type:
14264 * OpA=0 => op(A) = A
14265 * OpA=1 => op(A) = A^T
14266 X - input vector, has at least N+IX elements
14267 IX - subvector offset
14268 Beta- coefficient
14269 Y - preallocated output array, has at least M+IY elements
14270 IY - subvector offset
14271
14272 OUTPUT PARAMETERS:
14273 Y - vector which stores result
14274
14275 HANDLING OF SPECIAL CASES:
14276 * if M=0, then subroutine does nothing. It does not even touch arrays.
14277 * if N=0 or Alpha=0.0, then:
14278 * if Beta=0, then Y is filled by zeros. A and X are not referenced
14279 at all. Initial values of Y are ignored (we do not multiply Y by
14280 zero, we just rewrite it by zeros)
14281 * if Beta<>0, then Y is replaced by Beta*Y
14282 * if M>0, N>0, Alpha<>0, but Beta=0, then Y is replaced by A*x;
14283 initial state of Y is ignored (rewritten by A*x, without initial
14284 multiplication by zeros).
14285
14286
14287 -- ALGLIB routine --
14288
14289 01.09.2021
14290 Bochkanov Sergey
14291 *************************************************************************/
rgemvx(ae_int_t m,ae_int_t n,double alpha,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_int_t opa,ae_vector * x,ae_int_t ix,double beta,ae_vector * y,ae_int_t iy,ae_state * _state)14292 void rgemvx(ae_int_t m,
14293 ae_int_t n,
14294 double alpha,
14295 /* Real */ ae_matrix* a,
14296 ae_int_t ia,
14297 ae_int_t ja,
14298 ae_int_t opa,
14299 /* Real */ ae_vector* x,
14300 ae_int_t ix,
14301 double beta,
14302 /* Real */ ae_vector* y,
14303 ae_int_t iy,
14304 ae_state *_state)
14305 {
14306 ae_int_t i;
14307 ae_int_t j;
14308 double v;
14309
14310
14311
14312 /*
14313 * Properly premultiply Y by Beta.
14314 *
14315 * Quick exit for M=0, N=0 or Alpha=0.
14316 * After this block we have M>0, N>0, Alpha<>0.
14317 */
14318 if( m<=0 )
14319 {
14320 return;
14321 }
14322 if( ae_fp_neq(beta,(double)(0)) )
14323 {
14324 rmulvx(m, beta, y, iy, _state);
14325 }
14326 else
14327 {
14328 rsetvx(m, 0.0, y, iy, _state);
14329 }
14330 if( n<=0||ae_fp_eq(alpha,0.0) )
14331 {
14332 return;
14333 }
14334
14335 /*
14336 * Straight or transposed?
14337 */
14338 if( opa==0 )
14339 {
14340 /*
14341 * Try SIMD code
14342 */
14343 if( n>=_ABLASF_KERNEL_SIZE2 )
14344 _ALGLIB_KERNEL_VOID_AVX2_FMA(rgemvx_straight, (m, n, alpha, a, ia, ja,
14345 x->ptr.p_double + ix, y->ptr.p_double + iy, _state))
14346
14347
14348 /*
14349 * Generic C code: y += A*x
14350 */
14351 for(i=0; i<=m-1; i++)
14352 {
14353 v = (double)(0);
14354 for(j=0; j<=n-1; j++)
14355 {
14356 v = v+a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
14357 }
14358 y->ptr.p_double[iy+i] = alpha*v+y->ptr.p_double[iy+i];
14359 }
14360 return;
14361 }
14362 if( opa==1 )
14363 {
14364 /*
14365 * Try SIMD code
14366 */
14367 if( m>=_ABLASF_KERNEL_SIZE2 )
14368 _ALGLIB_KERNEL_VOID_AVX2_FMA(rgemvx_transposed, (m, n, alpha, a, ia, ja,
14369 x->ptr.p_double+ix, y->ptr.p_double+iy, _state))
14370
14371 /*
14372 * Generic C code: y += A^T*x
14373 */
14374 for(i=0; i<=n-1; i++)
14375 {
14376 v = alpha*x->ptr.p_double[ix+i];
14377 for(j=0; j<=m-1; j++)
14378 {
14379 y->ptr.p_double[iy+j] = y->ptr.p_double[iy+j]+v*a->ptr.pp_double[ia+i][ja+j];
14380 }
14381 }
14382 return;
14383 }
14384 }
14385
14386
14387 /*************************************************************************
14388 Rank-1 correction: A := A + alpha*u*v'
14389
14390 NOTE: this function expects A to be large enough to store result. No
14391 automatic preallocation happens for smaller arrays. No integrity
14392 checks is performed for sizes of A, u, v.
14393
14394 INPUT PARAMETERS:
14395 M - number of rows
14396 N - number of columns
14397 A - target MxN matrix
14398 Alpha- coefficient
14399 U - vector #1
14400 V - vector #2
14401
14402
14403 -- ALGLIB routine --
14404 07.09.2021
14405 Bochkanov Sergey
14406 *************************************************************************/
rger(ae_int_t m,ae_int_t n,double alpha,ae_vector * u,ae_vector * v,ae_matrix * a,ae_state * _state)14407 void rger(ae_int_t m,
14408 ae_int_t n,
14409 double alpha,
14410 /* Real */ ae_vector* u,
14411 /* Real */ ae_vector* v,
14412 /* Real */ ae_matrix* a,
14413 ae_state *_state)
14414 {
14415 ae_int_t i;
14416 ae_int_t j;
14417 double s;
14418
14419
14420 if( (m<=0||n<=0)||ae_fp_eq(alpha,(double)(0)) )
14421 {
14422 return;
14423 }
14424 for(i=0; i<=m-1; i++)
14425 {
14426 s = alpha*u->ptr.p_double[i];
14427 for(j=0; j<=n-1; j++)
14428 {
14429 a->ptr.pp_double[i][j] = a->ptr.pp_double[i][j]+s*v->ptr.p_double[j];
14430 }
14431 }
14432 }
14433
14434
14435 /*************************************************************************
14436 This subroutine solves linear system op(A)*x=b where:
14437 * A is NxN upper/lower triangular/unitriangular matrix
14438 * X and B are Nx1 vectors
14439 * "op" may be identity transformation or transposition
14440
14441 Solution replaces X.
14442
14443 IMPORTANT: * no overflow/underflow/denegeracy tests is performed.
14444 * no integrity checks for operand sizes, out-of-bounds accesses
14445 and so on is performed
14446
14447 INPUT PARAMETERS
14448 N - matrix size, N>=0
14449 A - matrix, actial matrix is stored in A[IA:IA+N-1,JA:JA+N-1]
14450 IA - submatrix offset
14451 JA - submatrix offset
14452 IsUpper - whether matrix is upper triangular
14453 IsUnit - whether matrix is unitriangular
14454 OpType - transformation type:
14455 * 0 - no transformation
14456 * 1 - transposition
14457 X - right part, actual vector is stored in X[IX:IX+N-1]
14458 IX - offset
14459
14460 OUTPUT PARAMETERS
14461 X - solution replaces elements X[IX:IX+N-1]
14462
14463 -- ALGLIB routine --
14464 (c) 07.09.2021 Bochkanov Sergey
14465 *************************************************************************/
rtrsvx(ae_int_t n,ae_matrix * a,ae_int_t ia,ae_int_t ja,ae_bool isupper,ae_bool isunit,ae_int_t optype,ae_vector * x,ae_int_t ix,ae_state * _state)14466 void rtrsvx(ae_int_t n,
14467 /* Real */ ae_matrix* a,
14468 ae_int_t ia,
14469 ae_int_t ja,
14470 ae_bool isupper,
14471 ae_bool isunit,
14472 ae_int_t optype,
14473 /* Real */ ae_vector* x,
14474 ae_int_t ix,
14475 ae_state *_state)
14476 {
14477 ae_int_t i;
14478 ae_int_t j;
14479 double v;
14480
14481
14482 if( n<=0 )
14483 {
14484 return;
14485 }
14486 if( optype==0&&isupper )
14487 {
14488 for(i=n-1; i>=0; i--)
14489 {
14490 v = x->ptr.p_double[ix+i];
14491 for(j=i+1; j<=n-1; j++)
14492 {
14493 v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
14494 }
14495 if( !isunit )
14496 {
14497 v = v/a->ptr.pp_double[ia+i][ja+i];
14498 }
14499 x->ptr.p_double[ix+i] = v;
14500 }
14501 return;
14502 }
14503 if( optype==0&&!isupper )
14504 {
14505 for(i=0; i<=n-1; i++)
14506 {
14507 v = x->ptr.p_double[ix+i];
14508 for(j=0; j<=i-1; j++)
14509 {
14510 v = v-a->ptr.pp_double[ia+i][ja+j]*x->ptr.p_double[ix+j];
14511 }
14512 if( !isunit )
14513 {
14514 v = v/a->ptr.pp_double[ia+i][ja+i];
14515 }
14516 x->ptr.p_double[ix+i] = v;
14517 }
14518 return;
14519 }
14520 if( optype==1&&isupper )
14521 {
14522 for(i=0; i<=n-1; i++)
14523 {
14524 v = x->ptr.p_double[ix+i];
14525 if( !isunit )
14526 {
14527 v = v/a->ptr.pp_double[ia+i][ja+i];
14528 }
14529 x->ptr.p_double[ix+i] = v;
14530 if( v==0 )
14531 {
14532 continue;
14533 }
14534 for(j=i+1; j<=n-1; j++)
14535 {
14536 x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
14537 }
14538 }
14539 return;
14540 }
14541 if( optype==1&&!isupper )
14542 {
14543 for(i=n-1; i>=0; i--)
14544 {
14545 v = x->ptr.p_double[ix+i];
14546 if( !isunit )
14547 {
14548 v = v/a->ptr.pp_double[ia+i][ja+i];
14549 }
14550 x->ptr.p_double[ix+i] = v;
14551 if( v==0 )
14552 {
14553 continue;
14554 }
14555 for(j=0; j<=i-1; j++)
14556 {
14557 x->ptr.p_double[ix+j] = x->ptr.p_double[ix+j]-v*a->ptr.pp_double[ia+i][ja+j];
14558 }
14559 }
14560 return;
14561 }
14562 ae_assert(ae_false, "rTRSVX: unexpected operation type", _state);
14563 }
14564
14565 /*************************************************************************
14566 Fast rGEMM kernel with AVX2/FMA support
14567
14568 -- ALGLIB routine --
14569 19.09.2021
14570 Bochkanov Sergey
14571 *************************************************************************/
ablasf_rgemm32basecase(ae_int_t m,ae_int_t n,ae_int_t k,double alpha,ae_matrix * _a,ae_int_t ia,ae_int_t ja,ae_int_t optypea,ae_matrix * _b,ae_int_t ib,ae_int_t jb,ae_int_t optypeb,double beta,ae_matrix * _c,ae_int_t ic,ae_int_t jc,ae_state * _state)14572 ae_bool ablasf_rgemm32basecase(
14573 ae_int_t m,
14574 ae_int_t n,
14575 ae_int_t k,
14576 double alpha,
14577 /* Real */ ae_matrix* _a,
14578 ae_int_t ia,
14579 ae_int_t ja,
14580 ae_int_t optypea,
14581 /* Real */ ae_matrix* _b,
14582 ae_int_t ib,
14583 ae_int_t jb,
14584 ae_int_t optypeb,
14585 double beta,
14586 /* Real */ ae_matrix* _c,
14587 ae_int_t ic,
14588 ae_int_t jc,
14589 ae_state *_state)
14590 {
14591 #if !defined(_ALGLIB_HAS_AVX2_INTRINSICS)
14592 return ae_false;
14593 #else
14594 const ae_int_t block_size = _ABLASF_BLOCK_SIZE;
14595 const ae_int_t micro_size = _ABLASF_MICRO_SIZE;
14596 ae_int_t out0, out1;
14597 double *c;
14598 ae_int_t stride_c;
14599 ae_int_t cpu_id = ae_cpuid();
14600 ae_int_t (*ablasf_packblk)(const double*, ae_int_t, ae_int_t, ae_int_t, ae_int_t, double*, ae_int_t, ae_int_t) = (k==32 && block_size==32) ? ablasf_packblkh32_avx2 : ablasf_packblkh_avx2;
14601 void (*ablasf_dotblk)(const double *, const double *, ae_int_t, ae_int_t, ae_int_t, double *, ae_int_t) = ablasf_dotblkh_avx2;
14602 void (*ablasf_daxpby)(ae_int_t, double, const double *, double, double*) = ablasf_daxpby_avx2;
14603
14604 /*
14605 * Determine CPU and kernel support
14606 */
14607 if( m>block_size || n>block_size || k>block_size || m==0 || n==0 || !(cpu_id&CPU_AVX2) )
14608 return ae_false;
14609 #if defined(_ALGLIB_HAS_FMA_INTRINSICS)
14610 if( cpu_id&CPU_FMA )
14611 ablasf_dotblk = ablasf_dotblkh_fma;
14612 #endif
14613
14614 /*
14615 * Prepare C
14616 */
14617 c = _c->ptr.pp_double[ic]+jc;
14618 stride_c = _c->stride;
14619
14620 /*
14621 * Do we have alpha*A*B ?
14622 */
14623 if( alpha!=0 && k>0 )
14624 {
14625 /*
14626 * Prepare structures
14627 */
14628 ae_int_t base0, base1, offs0;
14629 double *a = _a->ptr.pp_double[ia]+ja;
14630 double *b = _b->ptr.pp_double[ib]+jb;
14631 ae_int_t stride_a = _a->stride;
14632 ae_int_t stride_b = _b->stride;
14633 double _blka[_ABLASF_BLOCK_SIZE*_ABLASF_MICRO_SIZE+_ALGLIB_SIMD_ALIGNMENT_DOUBLES];
14634 double _blkb_long[_ABLASF_BLOCK_SIZE*_ABLASF_BLOCK_SIZE+_ALGLIB_SIMD_ALIGNMENT_DOUBLES];
14635 double _blkc[_ABLASF_MICRO_SIZE*_ABLASF_BLOCK_SIZE+_ALGLIB_SIMD_ALIGNMENT_DOUBLES];
14636 double *blka = (double*)ae_align(_blka, _ALGLIB_SIMD_ALIGNMENT_BYTES);
14637 double *storageb_long = (double*)ae_align(_blkb_long,_ALGLIB_SIMD_ALIGNMENT_BYTES);
14638 double *blkc = (double*)ae_align(_blkc, _ALGLIB_SIMD_ALIGNMENT_BYTES);
14639
14640 /*
14641 * Pack transform(B) into precomputed block form
14642 */
14643 for(base1=0; base1<n; base1+=micro_size)
14644 {
14645 const ae_int_t lim1 = n-base1<micro_size ? n-base1 : micro_size;
14646 double *curb = storageb_long+base1*block_size;
14647 ablasf_packblk(
14648 b + (optypeb==0 ? base1 : base1*stride_b), stride_b, optypeb==0 ? 1 : 0, k, lim1,
14649 curb, block_size, micro_size);
14650 }
14651
14652 /*
14653 * Output
14654 */
14655 for(base0=0; base0<m; base0+=micro_size)
14656 {
14657 /*
14658 * Load block row of transform(A)
14659 */
14660 const ae_int_t lim0 = m-base0<micro_size ? m-base0 : micro_size;
14661 const ae_int_t round_k = ablasf_packblk(
14662 a + (optypea==0 ? base0*stride_a : base0), stride_a, optypea, k, lim0,
14663 blka, block_size, micro_size);
14664
14665 /*
14666 * Compute block(A)'*entire(B)
14667 */
14668 for(base1=0; base1<n; base1+=micro_size)
14669 ablasf_dotblk(blka, storageb_long+base1*block_size, round_k, block_size, micro_size, blkc+base1, block_size);
14670
14671 /*
14672 * Output block row of block(A)'*entire(B)
14673 */
14674 for(offs0=0; offs0<lim0; offs0++)
14675 ablasf_daxpby(n, alpha, blkc+offs0*block_size, beta, c+(base0+offs0)*stride_c);
14676 }
14677 }
14678 else
14679 {
14680 /*
14681 * No A*B, just beta*C (degenerate case, not optimized)
14682 */
14683 if( beta==0 )
14684 {
14685 for(out0=0; out0<m; out0++)
14686 for(out1=0; out1<n; out1++)
14687 c[out0*stride_c+out1] = 0.0;
14688 }
14689 else if( beta!=1 )
14690 {
14691 for(out0=0; out0<m; out0++)
14692 for(out1=0; out1<n; out1++)
14693 c[out0*stride_c+out1] *= beta;
14694 }
14695 }
14696 return ae_true;
14697 #endif
14698 }
14699
14700
14701 /*************************************************************************
14702 Returns recommended width of the SIMD-friendly buffer
14703 *************************************************************************/
spchol_spsymmgetmaxsimd(ae_state * _state)14704 ae_int_t spchol_spsymmgetmaxsimd(ae_state *_state)
14705 {
14706 #if AE_CPU==AE_INTEL
14707 return 4;
14708 #else
14709 return 1;
14710 #endif
14711 }
14712
14713 /*************************************************************************
14714 Solving linear system: propagating computed supernode.
14715
14716 Propagates computed supernode to the rest of the RHS using SIMD-friendly
14717 RHS storage format.
14718
14719 INPUT PARAMETERS:
14720
14721 OUTPUT PARAMETERS:
14722
14723 -- ALGLIB routine --
14724 08.09.2021
14725 Bochkanov Sergey
14726 *************************************************************************/
spchol_propagatefwd(ae_vector * x,ae_int_t cols0,ae_int_t blocksize,ae_vector * superrowidx,ae_int_t rbase,ae_int_t offdiagsize,ae_vector * rowstorage,ae_int_t offss,ae_int_t sstride,ae_vector * simdbuf,ae_int_t simdwidth,ae_state * _state)14727 void spchol_propagatefwd(/* Real */ ae_vector* x,
14728 ae_int_t cols0,
14729 ae_int_t blocksize,
14730 /* Integer */ ae_vector* superrowidx,
14731 ae_int_t rbase,
14732 ae_int_t offdiagsize,
14733 /* Real */ ae_vector* rowstorage,
14734 ae_int_t offss,
14735 ae_int_t sstride,
14736 /* Real */ ae_vector* simdbuf,
14737 ae_int_t simdwidth,
14738 ae_state *_state)
14739 {
14740 ae_int_t i;
14741 ae_int_t j;
14742 ae_int_t k;
14743 ae_int_t baseoffs;
14744 double v;
14745
14746 /*
14747 * Try SIMD kernels
14748 */
14749 #if defined(_ALGLIB_HAS_FMA_INTRINSICS)
14750 if( sstride==4 || (blocksize==2 && sstride==2) )
14751 if( ae_cpuid()&CPU_FMA )
14752 {
14753 spchol_propagatefwd_fma(x, cols0, blocksize, superrowidx, rbase, offdiagsize, rowstorage, offss, sstride, simdbuf, simdwidth, _state);
14754 return;
14755 }
14756 #endif
14757
14758 /*
14759 * Propagate rank-1 node (can not be accelerated with SIMD)
14760 */
14761 if( blocksize==1 && sstride==1 )
14762 {
14763 /*
14764 * blocksize is 1, stride is 1
14765 */
14766 double vx = x->ptr.p_double[cols0];
14767 double *p_mat_row = rowstorage->ptr.p_double+offss+1*1;
14768 double *p_simd_buf = simdbuf->ptr.p_double;
14769 ae_int_t *p_rowidx = superrowidx->ptr.p_int+rbase;
14770 if( simdwidth==4 )
14771 {
14772 for(k=0; k<offdiagsize; k++)
14773 p_simd_buf[p_rowidx[k]*4] -= p_mat_row[k]*vx;
14774 }
14775 else
14776 {
14777 for(k=0; k<offdiagsize; k++)
14778 p_simd_buf[p_rowidx[k]*simdwidth] -= p_mat_row[k]*vx;
14779 }
14780 return;
14781 }
14782
14783 /*
14784 * Generic C code for generic propagate
14785 */
14786 for(k=0; k<=offdiagsize-1; k++)
14787 {
14788 i = superrowidx->ptr.p_int[rbase+k];
14789 baseoffs = offss+(k+blocksize)*sstride;
14790 v = simdbuf->ptr.p_double[i*simdwidth];
14791 for(j=0; j<=blocksize-1; j++)
14792 {
14793 v = v-rowstorage->ptr.p_double[baseoffs+j]*x->ptr.p_double[cols0+j];
14794 }
14795 simdbuf->ptr.p_double[i*simdwidth] = v;
14796 }
14797 }
14798
14799
14800 /*************************************************************************
14801 Fast kernels for small supernodal updates: special 4x4x4x4 function.
14802
14803 ! See comments on UpdateSupernode() for information on generic supernodal
14804 ! updates, including notation used below.
14805
14806 The generic update has following form:
14807
14808 S := S - scatter(U*D*Uc')
14809
14810 This specialized function performs AxBxCx4 update, i.e.:
14811 * S is a tHeight*A matrix with row stride equal to 4 (usually it means that
14812 it has 3 or 4 columns)
14813 * U is a uHeight*B matrix
14814 * Uc' is a B*C matrix, with C<=A
14815 * scatter() scatters rows and columns of U*Uc'
14816
14817 Return value:
14818 * True if update was applied
14819 * False if kernel refused to perform an update (quick exit for unsupported
14820 combinations of input sizes)
14821
14822 -- ALGLIB routine --
14823 20.09.2020
14824 Bochkanov Sergey
14825 *************************************************************************/
spchol_updatekernelabc4(ae_vector * rowstorage,ae_int_t offss,ae_int_t twidth,ae_int_t offsu,ae_int_t uheight,ae_int_t urank,ae_int_t urowstride,ae_int_t uwidth,ae_vector * diagd,ae_int_t offsd,ae_vector * raw2smap,ae_vector * superrowidx,ae_int_t urbase,ae_state * _state)14826 ae_bool spchol_updatekernelabc4(/* Real */ ae_vector* rowstorage,
14827 ae_int_t offss,
14828 ae_int_t twidth,
14829 ae_int_t offsu,
14830 ae_int_t uheight,
14831 ae_int_t urank,
14832 ae_int_t urowstride,
14833 ae_int_t uwidth,
14834 /* Real */ ae_vector* diagd,
14835 ae_int_t offsd,
14836 /* Integer */ ae_vector* raw2smap,
14837 /* Integer */ ae_vector* superrowidx,
14838 ae_int_t urbase,
14839 ae_state *_state)
14840 {
14841 /*
14842 * Try fast kernels.
14843 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
14844 */
14845 _ALGLIB_KERNEL_RETURN_AVX2_FMA(spchol_updatekernelabc4,(rowstorage->ptr.p_double, offss, twidth, offsu, uheight, urank, urowstride, uwidth, diagd->ptr.p_double, offsd, raw2smap->ptr.p_int, superrowidx->ptr.p_int, urbase, _state))
14846
14847 /*
14848 * Generic code
14849 */
14850 ae_int_t k;
14851 ae_int_t targetrow;
14852 ae_int_t targetcol;
14853 ae_int_t offsk;
14854 double d0;
14855 double d1;
14856 double d2;
14857 double d3;
14858 double u00;
14859 double u01;
14860 double u02;
14861 double u03;
14862 double u10;
14863 double u11;
14864 double u12;
14865 double u13;
14866 double u20;
14867 double u21;
14868 double u22;
14869 double u23;
14870 double u30;
14871 double u31;
14872 double u32;
14873 double u33;
14874 double uk0;
14875 double uk1;
14876 double uk2;
14877 double uk3;
14878 ae_int_t srccol0;
14879 ae_int_t srccol1;
14880 ae_int_t srccol2;
14881 ae_int_t srccol3;
14882 ae_bool result;
14883
14884
14885
14886 /*
14887 * Filter out unsupported combinations (ones that are too sparse for the non-SIMD code)
14888 */
14889 result = ae_false;
14890 if( twidth<3||twidth>4 )
14891 {
14892 return result;
14893 }
14894 if( uwidth<1||uwidth>4 )
14895 {
14896 return result;
14897 }
14898 if( urank>4 )
14899 {
14900 return result;
14901 }
14902
14903 /*
14904 * Determine source columns for target columns, -1 if target column
14905 * is not updated.
14906 */
14907 srccol0 = -1;
14908 srccol1 = -1;
14909 srccol2 = -1;
14910 srccol3 = -1;
14911 for(k=0; k<=uwidth-1; k++)
14912 {
14913 targetcol = raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]];
14914 if( targetcol==0 )
14915 {
14916 srccol0 = k;
14917 }
14918 if( targetcol==1 )
14919 {
14920 srccol1 = k;
14921 }
14922 if( targetcol==2 )
14923 {
14924 srccol2 = k;
14925 }
14926 if( targetcol==3 )
14927 {
14928 srccol3 = k;
14929 }
14930 }
14931
14932 /*
14933 * Load update matrix into aligned/rearranged 4x4 storage
14934 */
14935 d0 = (double)(0);
14936 d1 = (double)(0);
14937 d2 = (double)(0);
14938 d3 = (double)(0);
14939 u00 = (double)(0);
14940 u01 = (double)(0);
14941 u02 = (double)(0);
14942 u03 = (double)(0);
14943 u10 = (double)(0);
14944 u11 = (double)(0);
14945 u12 = (double)(0);
14946 u13 = (double)(0);
14947 u20 = (double)(0);
14948 u21 = (double)(0);
14949 u22 = (double)(0);
14950 u23 = (double)(0);
14951 u30 = (double)(0);
14952 u31 = (double)(0);
14953 u32 = (double)(0);
14954 u33 = (double)(0);
14955 if( urank>=1 )
14956 {
14957 d0 = diagd->ptr.p_double[offsd+0];
14958 }
14959 if( urank>=2 )
14960 {
14961 d1 = diagd->ptr.p_double[offsd+1];
14962 }
14963 if( urank>=3 )
14964 {
14965 d2 = diagd->ptr.p_double[offsd+2];
14966 }
14967 if( urank>=4 )
14968 {
14969 d3 = diagd->ptr.p_double[offsd+3];
14970 }
14971 if( srccol0>=0 )
14972 {
14973 if( urank>=1 )
14974 {
14975 u00 = d0*rowstorage->ptr.p_double[offsu+srccol0*urowstride+0];
14976 }
14977 if( urank>=2 )
14978 {
14979 u01 = d1*rowstorage->ptr.p_double[offsu+srccol0*urowstride+1];
14980 }
14981 if( urank>=3 )
14982 {
14983 u02 = d2*rowstorage->ptr.p_double[offsu+srccol0*urowstride+2];
14984 }
14985 if( urank>=4 )
14986 {
14987 u03 = d3*rowstorage->ptr.p_double[offsu+srccol0*urowstride+3];
14988 }
14989 }
14990 if( srccol1>=0 )
14991 {
14992 if( urank>=1 )
14993 {
14994 u10 = d0*rowstorage->ptr.p_double[offsu+srccol1*urowstride+0];
14995 }
14996 if( urank>=2 )
14997 {
14998 u11 = d1*rowstorage->ptr.p_double[offsu+srccol1*urowstride+1];
14999 }
15000 if( urank>=3 )
15001 {
15002 u12 = d2*rowstorage->ptr.p_double[offsu+srccol1*urowstride+2];
15003 }
15004 if( urank>=4 )
15005 {
15006 u13 = d3*rowstorage->ptr.p_double[offsu+srccol1*urowstride+3];
15007 }
15008 }
15009 if( srccol2>=0 )
15010 {
15011 if( urank>=1 )
15012 {
15013 u20 = d0*rowstorage->ptr.p_double[offsu+srccol2*urowstride+0];
15014 }
15015 if( urank>=2 )
15016 {
15017 u21 = d1*rowstorage->ptr.p_double[offsu+srccol2*urowstride+1];
15018 }
15019 if( urank>=3 )
15020 {
15021 u22 = d2*rowstorage->ptr.p_double[offsu+srccol2*urowstride+2];
15022 }
15023 if( urank>=4 )
15024 {
15025 u23 = d3*rowstorage->ptr.p_double[offsu+srccol2*urowstride+3];
15026 }
15027 }
15028 if( srccol3>=0 )
15029 {
15030 if( urank>=1 )
15031 {
15032 u30 = d0*rowstorage->ptr.p_double[offsu+srccol3*urowstride+0];
15033 }
15034 if( urank>=2 )
15035 {
15036 u31 = d1*rowstorage->ptr.p_double[offsu+srccol3*urowstride+1];
15037 }
15038 if( urank>=3 )
15039 {
15040 u32 = d2*rowstorage->ptr.p_double[offsu+srccol3*urowstride+2];
15041 }
15042 if( urank>=4 )
15043 {
15044 u33 = d3*rowstorage->ptr.p_double[offsu+srccol3*urowstride+3];
15045 }
15046 }
15047
15048 /*
15049 * Run update
15050 */
15051 if( urank==1 )
15052 {
15053 for(k=0; k<=uheight-1; k++)
15054 {
15055 targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
15056 offsk = offsu+k*urowstride;
15057 uk0 = rowstorage->ptr.p_double[offsk+0];
15058 rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0;
15059 rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0;
15060 rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0;
15061 rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0;
15062 }
15063 }
15064 if( urank==2 )
15065 {
15066 for(k=0; k<=uheight-1; k++)
15067 {
15068 targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
15069 offsk = offsu+k*urowstride;
15070 uk0 = rowstorage->ptr.p_double[offsk+0];
15071 uk1 = rowstorage->ptr.p_double[offsk+1];
15072 rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1;
15073 rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1;
15074 rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1;
15075 rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1;
15076 }
15077 }
15078 if( urank==3 )
15079 {
15080 for(k=0; k<=uheight-1; k++)
15081 {
15082 targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
15083 offsk = offsu+k*urowstride;
15084 uk0 = rowstorage->ptr.p_double[offsk+0];
15085 uk1 = rowstorage->ptr.p_double[offsk+1];
15086 uk2 = rowstorage->ptr.p_double[offsk+2];
15087 rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2;
15088 rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2;
15089 rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2;
15090 rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2;
15091 }
15092 }
15093 if( urank==4 )
15094 {
15095 for(k=0; k<=uheight-1; k++)
15096 {
15097 targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
15098 offsk = offsu+k*urowstride;
15099 uk0 = rowstorage->ptr.p_double[offsk+0];
15100 uk1 = rowstorage->ptr.p_double[offsk+1];
15101 uk2 = rowstorage->ptr.p_double[offsk+2];
15102 uk3 = rowstorage->ptr.p_double[offsk+3];
15103 rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2-u03*uk3;
15104 rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2-u13*uk3;
15105 rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2-u23*uk3;
15106 rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2-u33*uk3;
15107 }
15108 }
15109 result = ae_true;
15110 return result;
15111 }
15112
15113
15114 /*************************************************************************
15115 Fast kernels for small supernodal updates: special 4x4x4x4 function.
15116
15117 ! See comments on UpdateSupernode() for information on generic supernodal
15118 ! updates, including notation used below.
15119
15120 The generic update has following form:
15121
15122 S := S - scatter(U*D*Uc')
15123
15124 This specialized function performs 4x4x4x4 update, i.e.:
15125 * S is a tHeight*4 matrix
15126 * U is a uHeight*4 matrix
15127 * Uc' is a 4*4 matrix
15128 * scatter() scatters rows of U*Uc', but does not scatter columns (they are
15129 densely packed).
15130
15131 Return value:
15132 * True if update was applied
15133 * False if kernel refused to perform an update.
15134
15135 -- ALGLIB routine --
15136 20.09.2020
15137 Bochkanov Sergey
15138 *************************************************************************/
spchol_updatekernel4444(ae_vector * rowstorage,ae_int_t offss,ae_int_t sheight,ae_int_t offsu,ae_int_t uheight,ae_vector * diagd,ae_int_t offsd,ae_vector * raw2smap,ae_vector * superrowidx,ae_int_t urbase,ae_state * _state)15139 ae_bool spchol_updatekernel4444(/* Real */ ae_vector* rowstorage,
15140 ae_int_t offss,
15141 ae_int_t sheight,
15142 ae_int_t offsu,
15143 ae_int_t uheight,
15144 /* Real */ ae_vector* diagd,
15145 ae_int_t offsd,
15146 /* Integer */ ae_vector* raw2smap,
15147 /* Integer */ ae_vector* superrowidx,
15148 ae_int_t urbase,
15149 ae_state *_state)
15150 {
15151 ae_int_t k;
15152 ae_int_t targetrow;
15153 ae_int_t offsk;
15154 double d0;
15155 double d1;
15156 double d2;
15157 double d3;
15158 double u00;
15159 double u01;
15160 double u02;
15161 double u03;
15162 double u10;
15163 double u11;
15164 double u12;
15165 double u13;
15166 double u20;
15167 double u21;
15168 double u22;
15169 double u23;
15170 double u30;
15171 double u31;
15172 double u32;
15173 double u33;
15174 double uk0;
15175 double uk1;
15176 double uk2;
15177 double uk3;
15178 ae_bool result;
15179
15180
15181 /*
15182 * Try fast kernels.
15183 * On success this macro will return, on failure to find kernel it will pass execution to the generic C implementation
15184 */
15185 _ALGLIB_KERNEL_RETURN_AVX2_FMA(spchol_updatekernel4444,(rowstorage->ptr.p_double, offss, sheight, offsu, uheight, diagd->ptr.p_double, offsd, raw2smap->ptr.p_int, superrowidx->ptr.p_int, urbase, _state))
15186
15187 /*
15188 * Generic C fallback code
15189 */
15190 d0 = diagd->ptr.p_double[offsd+0];
15191 d1 = diagd->ptr.p_double[offsd+1];
15192 d2 = diagd->ptr.p_double[offsd+2];
15193 d3 = diagd->ptr.p_double[offsd+3];
15194 u00 = d0*rowstorage->ptr.p_double[offsu+0*4+0];
15195 u01 = d1*rowstorage->ptr.p_double[offsu+0*4+1];
15196 u02 = d2*rowstorage->ptr.p_double[offsu+0*4+2];
15197 u03 = d3*rowstorage->ptr.p_double[offsu+0*4+3];
15198 u10 = d0*rowstorage->ptr.p_double[offsu+1*4+0];
15199 u11 = d1*rowstorage->ptr.p_double[offsu+1*4+1];
15200 u12 = d2*rowstorage->ptr.p_double[offsu+1*4+2];
15201 u13 = d3*rowstorage->ptr.p_double[offsu+1*4+3];
15202 u20 = d0*rowstorage->ptr.p_double[offsu+2*4+0];
15203 u21 = d1*rowstorage->ptr.p_double[offsu+2*4+1];
15204 u22 = d2*rowstorage->ptr.p_double[offsu+2*4+2];
15205 u23 = d3*rowstorage->ptr.p_double[offsu+2*4+3];
15206 u30 = d0*rowstorage->ptr.p_double[offsu+3*4+0];
15207 u31 = d1*rowstorage->ptr.p_double[offsu+3*4+1];
15208 u32 = d2*rowstorage->ptr.p_double[offsu+3*4+2];
15209 u33 = d3*rowstorage->ptr.p_double[offsu+3*4+3];
15210 if( sheight==uheight )
15211 {
15212 /*
15213 * No row scatter, the most efficient code
15214 */
15215 for(k=0; k<=uheight-1; k++)
15216 {
15217 targetrow = offss+k*4;
15218 offsk = offsu+k*4;
15219 uk0 = rowstorage->ptr.p_double[offsk+0];
15220 uk1 = rowstorage->ptr.p_double[offsk+1];
15221 uk2 = rowstorage->ptr.p_double[offsk+2];
15222 uk3 = rowstorage->ptr.p_double[offsk+3];
15223 rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2-u03*uk3;
15224 rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2-u13*uk3;
15225 rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2-u23*uk3;
15226 rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2-u33*uk3;
15227 }
15228 }
15229 else
15230 {
15231 /*
15232 * Row scatter is performed, less efficient code using double mapping to determine target row index
15233 */
15234 for(k=0; k<=uheight-1; k++)
15235 {
15236 targetrow = offss+raw2smap->ptr.p_int[superrowidx->ptr.p_int[urbase+k]]*4;
15237 offsk = offsu+k*4;
15238 uk0 = rowstorage->ptr.p_double[offsk+0];
15239 uk1 = rowstorage->ptr.p_double[offsk+1];
15240 uk2 = rowstorage->ptr.p_double[offsk+2];
15241 uk3 = rowstorage->ptr.p_double[offsk+3];
15242 rowstorage->ptr.p_double[targetrow+0] = rowstorage->ptr.p_double[targetrow+0]-u00*uk0-u01*uk1-u02*uk2-u03*uk3;
15243 rowstorage->ptr.p_double[targetrow+1] = rowstorage->ptr.p_double[targetrow+1]-u10*uk0-u11*uk1-u12*uk2-u13*uk3;
15244 rowstorage->ptr.p_double[targetrow+2] = rowstorage->ptr.p_double[targetrow+2]-u20*uk0-u21*uk1-u22*uk2-u23*uk3;
15245 rowstorage->ptr.p_double[targetrow+3] = rowstorage->ptr.p_double[targetrow+3]-u30*uk0-u31*uk1-u32*uk2-u33*uk3;
15246 }
15247 }
15248 result = ae_true;
15249 return result;
15250 }
15251
15252 /* ALGLIB_NO_FAST_KERNELS */
15253 #endif
15254
15255
15256
15257 }
15258
15259
15260 /////////////////////////////////////////////////////////////////////////
15261 //
15262 // THIS SECTION CONTAINS PARALLEL SUBROUTINES
15263 //
15264 /////////////////////////////////////////////////////////////////////////
15265 namespace alglib_impl
15266 {
15267
15268
15269 }
15270
15271