1 /*******************************************************************************
2 * Copyright 2016-2019 Intel Corporation.
3 *
4 * This software and the related documents are Intel copyrighted materials, and
5 * your use of them is governed by the express license under which they were
6 * provided to you (License). Unless the License provides otherwise, you may not
7 * use, modify, copy, publish, distribute, disclose or transmit this software or
8 * the related documents without Intel's prior written permission.
9 *
10 * This software and the related documents are provided as is, with no express
11 * or implied warranties, other than those that are expressly stated in the
12 * License.
13 *******************************************************************************/
14
15 #include "iw_own.h"
16
17 #if defined _WIN32
18 #include <windows.h>
19 #include <intrin.h>
20 #else
21 #if IW_ENABLE_TLS
22 #ifndef _GNU_SOURCE
23 #define _GNU_SOURCE 1 /* for PTHREAD_MUTEX_RECURSIVE */
24 #endif
25 #ifndef __USE_UNIX98
26 #define __USE_UNIX98 1 /* for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */
27 #endif
28 #include <pthread.h>
29 #endif
30 #include <stdlib.h>
31 #ifndef __APPLE__
32 #include <malloc.h>
33 #endif
34 #endif
35
36 #if IW_ENABLE_THREADING_LAYER
37 #include "omp.h"
38 #endif
39
40 #define OWN_ENABLE_BUFFER_POOL 0
41
42 /* /////////////////////////////////////////////////////////////////////////////
43 // Global initialization state
44 ///////////////////////////////////////////////////////////////////////////// */
45 #define OWN_STATE_NOT_INITIALIZED 0
46 #define OWN_STATE_INITIALIZATION 1
47 #define OWN_STATE_INITIALIZED 2
48
ownGlobalInitState(void)49 static int* ownGlobalInitState(void)
50 {
51 static int state = 0;
52 return &state;
53 }
ownGlobalGetInitState(void)54 static int ownGlobalGetInitState(void)
55 {
56 return *ownGlobalInitState();
57 }
ownGlobalSetInitState(int state)58 static void ownGlobalSetInitState(int state)
59 {
60 *ownGlobalInitState() = state;
61 }
62
63 /* /////////////////////////////////////////////////////////////////////////////
64 // ownAlignedMalloc
65 ///////////////////////////////////////////////////////////////////////////// */
ownAlignedMalloc(size_t iSize,size_t iAlign)66 void* IPP_STDCALL ownAlignedMalloc(size_t iSize, size_t iAlign)
67 {
68 #if defined _WIN32
69 return _aligned_malloc(iSize, iAlign);
70 #elif defined __APPLE__
71 if(iAlign <= 1)
72 return malloc(iSize);
73 else
74 {
75 void *pBuffer = malloc(iSize + (iAlign - 1) + sizeof(void*));
76 char *pABuffer = ((char*)pBuffer) + sizeof(void*);
77
78 pABuffer += (iAlign - (((size_t)pABuffer) & (iAlign - 1)));
79
80 ((void**)pABuffer)[-1] = pBuffer;
81 return pABuffer;
82 }
83 #else
84 return memalign(iAlign, iSize);
85 #endif
86 }
ownAlignedFree(void * pBuffer)87 void IPP_STDCALL ownAlignedFree(void* pBuffer)
88 {
89 #if defined _WIN32
90 _aligned_free(pBuffer);
91 #elif defined __APPLE__
92 free(((void**)pBuffer)[-1]);
93 #else
94 free(pBuffer);
95 #endif
96 }
97
98 /* /////////////////////////////////////////////////////////////////////////////
99 // OwnMutex
100 ///////////////////////////////////////////////////////////////////////////// */
101 #if IW_ENABLE_TLS
102 typedef struct _OwnMutex
103 {
104 #if defined _WIN32
105 CRITICAL_SECTION mutex;
106 #else
107 pthread_mutex_t mutex;
108 #endif
109 } OwnMutex;
110
ownMutex_Init(OwnMutex * pMutex)111 static int ownMutex_Init(OwnMutex *pMutex)
112 {
113 #if defined _WIN32
114 #if _WIN32_WINNT >= 0x0600
115 if(InitializeCriticalSectionEx(&pMutex->mutex, 0, 0) == 0)
116 return OWN_STATUS_FAIL;
117 return OWN_STATUS_OK;
118 #else
119 __try
120 {
121 InitializeCriticalSection(&pMutex->mutex);
122 }
123 __except(EXCEPTION_EXECUTE_HANDLER)
124 {
125 return OWN_STATUS_FAIL;
126 }
127 return OWN_STATUS_OK;
128 #endif
129 #else
130 pthread_mutexattr_t mutAttib;
131 if(pthread_mutexattr_init(&mutAttib) != 0)
132 return OWN_STATUS_FAIL;
133 if(pthread_mutexattr_settype(&mutAttib, PTHREAD_MUTEX_RECURSIVE) != 0)
134 {
135 pthread_mutexattr_destroy(&mutAttib);
136 return OWN_STATUS_FAIL;
137 }
138 if(pthread_mutex_init(&pMutex->mutex, &mutAttib) != 0)
139 {
140 pthread_mutexattr_destroy(&mutAttib);
141 return OWN_STATUS_FAIL;
142 }
143 if(pthread_mutexattr_destroy(&mutAttib) != 0)
144 {
145 pthread_mutex_destroy(&pMutex->mutex);
146 return OWN_STATUS_FAIL;
147 }
148 return OWN_STATUS_OK;
149 #endif
150 }
151
ownMutex_Release(OwnMutex * pMutex)152 static int ownMutex_Release(OwnMutex *pMutex)
153 {
154 #if defined _WIN32
155 __try
156 {
157 DeleteCriticalSection(&pMutex->mutex);
158 }
159 __except(EXCEPTION_EXECUTE_HANDLER)
160 {
161 return OWN_STATUS_FAIL;
162 }
163 return OWN_STATUS_OK;
164 #else
165 if(pthread_mutex_destroy(&pMutex->mutex) != 0)
166 return OWN_STATUS_FAIL;
167 return OWN_STATUS_OK;
168 #endif
169 }
170
ownMutex_Lock(OwnMutex * pMutex)171 static int ownMutex_Lock(OwnMutex *pMutex)
172 {
173 #if defined _WIN32
174 __try
175 {
176 EnterCriticalSection(&pMutex->mutex);
177 }
178 __except(EXCEPTION_EXECUTE_HANDLER)
179 {
180 return OWN_STATUS_FAIL;
181 }
182 return OWN_STATUS_OK;
183 #else
184 if(pthread_mutex_lock(&pMutex->mutex) != 0)
185 return OWN_STATUS_FAIL;
186 return OWN_STATUS_OK;
187 #endif
188 }
189
ownMutex_Unlock(OwnMutex * pMutex)190 static int ownMutex_Unlock(OwnMutex *pMutex)
191 {
192 #if defined _WIN32
193 __try
194 {
195 LeaveCriticalSection(&pMutex->mutex);
196 }
197 __except(EXCEPTION_EXECUTE_HANDLER)
198 {
199 return OWN_STATUS_FAIL;
200 }
201 return OWN_STATUS_OK;
202 #else
203 if(pthread_mutex_unlock(&pMutex->mutex) != 0)
204 return OWN_STATUS_FAIL;
205 return OWN_STATUS_OK;
206 #endif
207 }
208 #endif
209
210 /* /////////////////////////////////////////////////////////////////////////////
211 // OwnVector - C Vector
212 ///////////////////////////////////////////////////////////////////////////// */
ownVector_Reserve(OwnVector * pVector,size_t reserveSize)213 IW_DECL(void) ownVector_Reserve(OwnVector *pVector, size_t reserveSize)
214 {
215 if(reserveSize*pVector->m_elemSize > pVector->m_bufferLen)
216 {
217 if(pVector->m_pBuffer)
218 {
219 size_t newBufferLen = reserveSize*pVector->m_elemSize;
220 Ipp8u *pNewBuffer = (Ipp8u*)OWN_SAFE_MALLOC(newBufferLen);
221
222 OWN_SAFE_COPY(pVector->m_pBuffer, pNewBuffer, pVector->m_bufferLen);
223 OWN_SAFE_RESET(pNewBuffer + pVector->m_bufferLen, (newBufferLen-pVector->m_bufferLen));
224 OWN_SAFE_FREE(pVector->m_pBuffer);
225
226 pVector->m_pBuffer = pNewBuffer;
227 pVector->m_bufferLen = newBufferLen;
228 }
229 else
230 {
231 pVector->m_bufferLen = reserveSize*pVector->m_elemSize;
232 pVector->m_pBuffer = (Ipp8u*)OWN_SAFE_MALLOC(pVector->m_bufferLen);
233 if(!pVector->m_pBuffer)
234 {
235 pVector->m_pBuffer = NULL;
236 pVector->m_bufferLen = 0;
237 return;
238 }
239 OWN_SAFE_RESET(pVector->m_pBuffer, pVector->m_bufferLen);
240 }
241 }
242 }
243
ownVector_Init(OwnVector * pVector,size_t elemSize,size_t reserve)244 IW_DECL(void) ownVector_Init(OwnVector *pVector, size_t elemSize, size_t reserve)
245 {
246 OWN_SAFE_RESET(pVector, sizeof(*pVector));
247
248 pVector->m_elemSize = elemSize;
249 ownVector_Reserve(pVector, reserve);
250 }
251
ownVector_Release(OwnVector * pVector)252 IW_DECL(void) ownVector_Release(OwnVector *pVector)
253 {
254 if(pVector->m_pBuffer)
255 {
256 OWN_SAFE_FREE(pVector->m_pBuffer);
257 pVector->m_pBuffer = 0;
258 }
259 pVector->m_elemSize = pVector->m_size = pVector->m_bufferLen = 0;
260 }
261
ownVector_Resize(OwnVector * pVector,size_t newSize)262 IW_DECL(void) ownVector_Resize(OwnVector *pVector, size_t newSize)
263 {
264 ownVector_Reserve(pVector, newSize);
265 pVector->m_size = newSize;
266 }
267
ownVector_PushBack(OwnVector * pVector,void * pData)268 IW_DECL(void) ownVector_PushBack(OwnVector *pVector, void *pData)
269 {
270 ownVector_Resize(pVector, pVector->m_size + 1);
271 OWN_SAFE_COPY(pData, pVector->m_pBuffer + pVector->m_elemSize*(pVector->m_size-1), pVector->m_elemSize);
272 }
273
ownVector_PopBack(OwnVector * pVector,void * pData)274 IW_DECL(void) ownVector_PopBack(OwnVector *pVector, void *pData)
275 {
276 if(pVector->m_size && pData)
277 {
278 OWN_SAFE_COPY(pVector->m_pBuffer + pVector->m_elemSize*(pVector->m_size-1), pData, pVector->m_elemSize);
279 ownVector_Resize(pVector, pVector->m_size - 1);
280 }
281 }
282
283 /* /////////////////////////////////////////////////////////////////////////////
284 // OwnTlsCore - TLS system abstraction
285 ///////////////////////////////////////////////////////////////////////////// */
286 #if IW_ENABLE_TLS
287 typedef struct _OwnTlsCore
288 {
289 #if defined _WIN32
290 DWORD tlsKey;
291 #else
292 pthread_key_t tlsKey;
293 #endif
294 } OwnTlsCore;
295
ownTlsCore_Init(OwnTlsCore * pTls)296 static int ownTlsCore_Init(OwnTlsCore *pTls)
297 {
298 OWN_SAFE_RESET(pTls, sizeof(*pTls));
299 #if defined _WIN32
300 pTls->tlsKey = TlsAlloc();
301 if(pTls->tlsKey == TLS_OUT_OF_INDEXES)
302 return OWN_STATUS_FAIL;
303 #else
304 if(pthread_key_create(&pTls->tlsKey, NULL) != 0)
305 return OWN_STATUS_FAIL;
306 #endif
307 return OWN_STATUS_OK;
308 }
309
ownTlsCore_Release(OwnTlsCore * pTls)310 static int ownTlsCore_Release(OwnTlsCore *pTls)
311 {
312 #if defined _WIN32
313 if(TlsFree(pTls->tlsKey) == 0)
314 return OWN_STATUS_FAIL;
315 #else
316 if(pthread_key_delete(pTls->tlsKey) != 0)
317 return OWN_STATUS_FAIL;
318 #endif
319 return OWN_STATUS_OK;
320 }
321
ownTlsCore_GetData(OwnTlsCore * pTls)322 static void* ownTlsCore_GetData(OwnTlsCore *pTls)
323 {
324 #if defined _WIN32
325 return TlsGetValue(pTls->tlsKey);
326 #else
327 return pthread_getspecific(pTls->tlsKey);
328 #endif
329 }
330
ownTlsCore_SetData(OwnTlsCore * pTls,void * pData)331 static int ownTlsCore_SetData(OwnTlsCore *pTls, void *pData)
332 {
333 #if defined _WIN32
334 if(TlsSetValue(pTls->tlsKey, pData) == 0)
335 return OWN_STATUS_FAIL;
336 #else
337 if(pthread_setspecific(pTls->tlsKey, pData) != 0)
338 return OWN_STATUS_FAIL;
339 #endif
340 return OWN_STATUS_OK;
341 }
342 #endif
343
344 /* /////////////////////////////////////////////////////////////////////////////
345 // OwnTlsStorage - TLS data storage interface
346 ///////////////////////////////////////////////////////////////////////////// */
347 #if IW_ENABLE_TLS
348 typedef struct _OwnTlsStorage
349 {
350 OwnVector statesVector;
351 OwnVector threadsVector;
352
353 OwnTlsCore tls;
354 OwnMutex mutex;
355 } OwnTlsStorage;
356
357 typedef struct _OwnTlsStorageTD
358 {
359 OwnVector dataVector; /* Thread data array */
360 size_t index; /* Index of the thread */
361 } OwnTlsStorageTD;
362
ownTlsStorage_Init(OwnTlsStorage * pTls)363 static int ownTlsStorage_Init(OwnTlsStorage *pTls)
364 {
365 OWN_SAFE_RESET(pTls, sizeof(*pTls));
366
367 if(ownMutex_Init(&pTls->mutex) < 0)
368 return OWN_STATUS_FAIL;
369 if(ownTlsCore_Init(&pTls->tls) < 0)
370 {
371 ownMutex_Release(&pTls->mutex);
372 return OWN_STATUS_FAIL;
373 }
374
375 ownVector_Init(&pTls->statesVector, sizeof(int), 4);
376 ownVector_Init(&pTls->threadsVector, sizeof(OwnTlsStorageTD*), 16);
377
378 return OWN_STATUS_OK;
379 }
380
ownTlsStorage_Release(OwnTlsStorage * pTls)381 static int ownTlsStorage_Release(OwnTlsStorage *pTls)
382 {
383 size_t i, j;
384
385 OwnTlsStorageTD *pThreads;
386
387 for(i = 0; i < pTls->threadsVector.m_size; i++)
388 {
389 pThreads = ((OwnTlsStorageTD**)pTls->threadsVector.m_pBuffer)[i];
390 if(pThreads)
391 {
392 for(j = 0; j < pThreads->dataVector.m_size; j++)
393 {
394 /* Check that all data is destroyed. Data pointers must be deallocated externally*/
395 if(((void**)pThreads->dataVector.m_pBuffer)[j])
396 return OWN_STATUS_FAIL;
397 }
398 ownVector_Release(&pThreads->dataVector);
399 OWN_SAFE_FREE(pThreads);
400 }
401 }
402
403 ownVector_Release(&pTls->statesVector);
404 ownVector_Release(&pTls->threadsVector);
405
406 if(ownTlsCore_Release(&pTls->tls) < 0)
407 return OWN_STATUS_FAIL;
408 if(ownMutex_Release(&pTls->mutex) < 0)
409 return OWN_STATUS_FAIL;
410
411 return OWN_STATUS_OK;
412 }
413
ownTlsStorage_ReserveDataIndex(OwnTlsStorage * pTls)414 static size_t ownTlsStorage_ReserveDataIndex(OwnTlsStorage *pTls)
415 {
416 size_t slot;
417 if(ownMutex_Lock(&pTls->mutex) != OWN_STATUS_OK)
418 return OWN_IDX_INVALID;
419
420 // Find unused slots
421 for(slot = 0; slot < pTls->statesVector.m_size; slot++)
422 {
423 int *pStates = (int*)pTls->statesVector.m_pBuffer;
424 if(!pStates[slot])
425 {
426 pStates[slot] = 1;
427 ownMutex_Unlock(&pTls->mutex);
428 return slot;
429 }
430 }
431
432 // Create new slot
433 slot = pTls->statesVector.m_size;
434 ownVector_Resize(&pTls->statesVector, pTls->statesVector.m_size+1);
435 ((int*)pTls->statesVector.m_pBuffer)[slot] = 1;
436 ownMutex_Unlock(&pTls->mutex);
437 return slot;
438 }
439
ownTlsStorage_DataVector(OwnTlsStorage * pTls,size_t dataIdx,OwnVector * pDataVector,int bClear)440 static int ownTlsStorage_DataVector(OwnTlsStorage *pTls, size_t dataIdx, OwnVector *pDataVector, int bClear)
441 {
442 size_t i;
443 OwnVector *pThreadDataVector;
444 void *pThreadData;
445
446 ownMutex_Lock(&pTls->mutex);
447 if(pTls->statesVector.m_size <= dataIdx)
448 {
449 ownMutex_Unlock(&pTls->mutex);
450 return OWN_STATUS_FAIL;
451 }
452
453 for(i = 0; i < pTls->threadsVector.m_size; i++)
454 {
455 pThreadDataVector = &((OwnTlsStorageTD**)pTls->threadsVector.m_pBuffer)[i]->dataVector;
456 pThreadData = ((void**)pThreadDataVector->m_pBuffer)[dataIdx];
457 if(pThreadDataVector->m_size > dataIdx && pThreadData)
458 {
459 if(pDataVector)
460 ownVector_PushBack(pDataVector, &pThreadData);
461 if(bClear)
462 ((void**)pThreadDataVector->m_pBuffer)[dataIdx] = 0;
463 }
464 }
465
466 if(bClear)
467 ((int*)pTls->statesVector.m_pBuffer)[dataIdx] = 0;
468 ownMutex_Unlock(&pTls->mutex);
469
470 return OWN_STATUS_OK;
471 }
472
ownTlsStorage_GetDataVector(OwnTlsStorage * pTls,size_t dataIdx,OwnVector * pDataVector)473 static int ownTlsStorage_GetDataVector(OwnTlsStorage *pTls, size_t dataIdx, OwnVector *pDataVector)
474 {
475 if(!pDataVector)
476 return OWN_STATUS_FAIL;
477
478 return ownTlsStorage_DataVector(pTls, dataIdx, pDataVector, 0);
479 }
480
ownTlsStorage_ResetData(OwnTlsStorage * pTls,size_t dataIdx)481 static int ownTlsStorage_ResetData(OwnTlsStorage *pTls, size_t dataIdx)
482 {
483 return ownTlsStorage_DataVector(pTls, dataIdx, 0, 1);
484 }
485
ownTlsStorage_GetData(OwnTlsStorage * pTls,size_t dataIdx)486 static void* ownTlsStorage_GetData(OwnTlsStorage *pTls, size_t dataIdx)
487 {
488 OwnTlsStorageTD *pThreadData;
489 if(pTls->statesVector.m_size <= dataIdx)
490 return 0;
491
492 pThreadData = (OwnTlsStorageTD*)ownTlsCore_GetData(&pTls->tls);
493 if(pThreadData && pThreadData->dataVector.m_size > dataIdx)
494 return ((void**)(pThreadData->dataVector.m_pBuffer))[dataIdx];
495
496 return 0;
497 }
498
ownTlsStorage_SetData(OwnTlsStorage * pTls,size_t dataIdx,void * pData)499 static int ownTlsStorage_SetData(OwnTlsStorage *pTls, size_t dataIdx, void* pData)
500 {
501 OwnTlsStorageTD *pThreadData = 0;
502 if(pTls->statesVector.m_size <= dataIdx && !pData)
503 return OWN_STATUS_FAIL;
504
505 pThreadData = (OwnTlsStorageTD*)ownTlsCore_GetData(&pTls->tls);
506 if(!pThreadData)
507 {
508 pThreadData = (OwnTlsStorageTD*)OWN_SAFE_MALLOC(sizeof(OwnTlsStorageTD));
509 if(!pThreadData)
510 return OWN_STATUS_FAIL;
511
512 ownVector_Init(&pThreadData->dataVector, sizeof(void*), 32);
513 ownTlsCore_SetData(&pTls->tls, pThreadData);
514 {
515 ownMutex_Lock(&pTls->mutex);
516 pThreadData->index = pTls->threadsVector.m_size;
517 ownVector_PushBack(&pTls->threadsVector, &pThreadData);
518 ownMutex_Unlock(&pTls->mutex);
519 }
520 }
521
522 if(dataIdx >= pThreadData->dataVector.m_size)
523 {
524 void *null = NULL;
525 ownMutex_Lock(&pTls->mutex);
526 while(dataIdx >= pThreadData->dataVector.m_size)
527 ownVector_PushBack(&pThreadData->dataVector, &null);
528 ownMutex_Unlock(&pTls->mutex);
529 }
530 ((void**)(pThreadData->dataVector.m_pBuffer))[dataIdx] = pData;
531
532 return OWN_STATUS_OK;
533 }
534
ownGlobalTlsStorage(int bRelease)535 static OwnTlsStorage* ownGlobalTlsStorage(int bRelease)
536 {
537 static OwnTlsStorage *pStorage = NULL;
538 if(!pStorage && ownGlobalGetInitState() == OWN_STATE_INITIALIZATION)
539 {
540 pStorage = (OwnTlsStorage*)OWN_SAFE_MALLOC(sizeof(OwnTlsStorage));
541 if(!pStorage)
542 return NULL;
543
544 ownTlsStorage_Init(pStorage);
545 }
546 else if(bRelease && pStorage)
547 {
548 ownTlsStorage_Release(pStorage);
549 OWN_SAFE_FREE(pStorage);
550 pStorage = 0;
551 }
552 return pStorage;
553 }
554
ownGlobalGetTlsStorage(void)555 static OwnTlsStorage* ownGlobalGetTlsStorage(void)
556 {
557 return ownGlobalTlsStorage(0);
558 }
559 #endif
560
561 /* /////////////////////////////////////////////////////////////////////////////
562 // IwTls - TLS data storage interface
563 ///////////////////////////////////////////////////////////////////////////// */
iwTls_Init(IwTls * pTls,IwTlsDestructor destructor)564 IW_DECL(IppStatus) iwTls_Init(IwTls *pTls, IwTlsDestructor destructor)
565 {
566 #if IW_ENABLE_TLS
567 OwnTlsStorage *pStorage;
568
569 if(!pTls || !destructor)
570 return ippStsNullPtrErr;
571
572 pTls->m_idx = OWN_IDX_INVALID;
573 pTls->m_desctuctor = destructor;
574 pTls->m_pTlsStorage = NULL;
575
576 pStorage = ownGlobalGetTlsStorage();
577 if(!pStorage) // No global storage available, create new storage
578 {
579 pStorage = (OwnTlsStorage*)OWN_SAFE_MALLOC(sizeof(OwnTlsStorage));
580 if(!pStorage)
581 return ippStsMemAllocErr;
582
583 if(ownTlsStorage_Init(pStorage) < OWN_STATUS_OK)
584 {
585 OWN_SAFE_FREE(pStorage);
586 return ippStsErr;
587 }
588
589 pTls->m_idx = ownTlsStorage_ReserveDataIndex(pStorage);
590 if(pTls->m_idx == OWN_IDX_INVALID)
591 {
592 ownTlsStorage_Release(pStorage);
593 OWN_SAFE_FREE(pStorage);
594 return ippStsErr;
595 }
596 pTls->m_pTlsStorage = pStorage;
597 }
598 else
599 {
600 pTls->m_idx = ownTlsStorage_ReserveDataIndex(pStorage);
601 if(pTls->m_idx == OWN_IDX_INVALID)
602 return ippStsErr;
603 }
604
605 return ippStsNoErr;
606 #else
607 if(!pTls || !destructor)
608 return ippStsNullPtrErr;
609
610 pTls->m_idx = OWN_IDX_INVALID;
611 pTls->m_desctuctor = destructor;
612 pTls->m_pTlsStorage = NULL;
613
614 return ippStsUnknownFeature;
615 #endif
616 }
617
iwTls_Set(IwTls * pTls,void * pData)618 IW_DECL(IppStatus) iwTls_Set(IwTls *pTls, void *pData)
619 {
620 #if IW_ENABLE_TLS
621 OwnTlsStorage *pStorage;
622 void *pOldData;
623
624 if(!pTls)
625 return ippStsNullPtrErr;
626 if(pTls->m_idx == OWN_IDX_INVALID)
627 return ippStsErr;
628
629 pStorage = ownGlobalGetTlsStorage();
630 if(!pStorage)
631 {
632 pStorage = (OwnTlsStorage*)pTls->m_pTlsStorage;
633 if(!pStorage)
634 return ippStsErr;
635 }
636
637 pOldData = ownTlsStorage_GetData(pStorage, pTls->m_idx);
638 if(pOldData != pData)
639 {
640 if(pTls->m_desctuctor && pOldData)
641 pTls->m_desctuctor(pOldData);
642
643 if(ownTlsStorage_SetData(pStorage, pTls->m_idx, pData) != OWN_STATUS_OK)
644 return ippStsErr;
645 }
646 return ippStsNoErr;
647 #else
648 if(!pTls)
649 return ippStsNullPtrErr;
650
651 pTls->m_pTlsStorage = pData;
652
653 return ippStsUnknownFeature;
654 #endif
655 }
656
iwTls_Get(const IwTls * pTls)657 IW_DECL(void*) iwTls_Get(const IwTls *pTls)
658 {
659 #if IW_ENABLE_TLS
660 OwnTlsStorage *pStorage;
661
662 if(!pTls)
663 return NULL;
664 if(pTls->m_idx == OWN_IDX_INVALID)
665 return NULL;
666
667 pStorage = ownGlobalGetTlsStorage();
668 if(!pStorage)
669 {
670 pStorage = (OwnTlsStorage*)pTls->m_pTlsStorage;
671 if(!pStorage)
672 return NULL;
673 }
674
675 return ownTlsStorage_GetData(pStorage, pTls->m_idx);
676 #else
677 if(!pTls)
678 return NULL;
679
680 return pTls->m_pTlsStorage;
681 #endif
682 }
683
iwTls_ReleaseData(IwTls * pTls)684 IW_DECL(IppStatus) iwTls_ReleaseData(IwTls *pTls)
685 {
686 #if IW_ENABLE_TLS
687 OwnTlsStorage *pStorage;
688
689 if(!pTls)
690 return ippStsNullPtrErr;
691
692 pStorage = ownGlobalGetTlsStorage();
693 if(!pStorage)
694 {
695 pStorage = (OwnTlsStorage*)pTls->m_pTlsStorage;
696 if(!pStorage)
697 return ippStsErr;
698 }
699
700 if(pTls->m_idx != OWN_IDX_INVALID)
701 {
702 if(pTls->m_desctuctor)
703 {
704 size_t i;
705 OwnVector vData;
706 ownVector_Init(&vData, sizeof(void*), 16);
707
708 if(ownTlsStorage_GetDataVector(pStorage, pTls->m_idx, &vData) != OWN_STATUS_OK)
709 {
710 ownVector_Release(&vData);
711 return ippStsErr;
712 }
713
714 for(i = 0; i < vData.m_size; i++)
715 {
716 pTls->m_desctuctor(((void**)vData.m_pBuffer)[i]);
717 }
718 ownVector_Release(&vData);
719 }
720
721 if(ownTlsStorage_ResetData(pStorage, pTls->m_idx) != OWN_STATUS_OK)
722 return ippStsErr;
723 }
724
725 return ippStsNoErr;
726 #else
727 if(!pTls)
728 return ippStsNullPtrErr;
729
730 pTls->m_desctuctor(pTls->m_pTlsStorage);
731
732 return ippStsUnknownFeature;
733 #endif
734 }
735
iwTls_Release(IwTls * pTls)736 IW_DECL(IppStatus) iwTls_Release(IwTls *pTls)
737 {
738 #if IW_ENABLE_TLS
739 OwnTlsStorage *pStorage;
740
741 if(!pTls)
742 return ippStsNullPtrErr;
743
744 pStorage = ownGlobalGetTlsStorage();
745 if(!pStorage)
746 {
747 pStorage = (OwnTlsStorage*)pTls->m_pTlsStorage;
748 if(!pStorage)
749 return ippStsNoErr;
750 }
751
752 if(pTls->m_idx != OWN_IDX_INVALID)
753 {
754 iwTls_ReleaseData(pTls);
755 pTls->m_idx = OWN_IDX_INVALID;
756 }
757
758 if(pTls->m_pTlsStorage)
759 {
760 ownTlsStorage_Release(pStorage);
761 OWN_SAFE_FREE(pStorage);
762 pTls->m_pTlsStorage = NULL;
763 }
764 return ippStsNoErr;
765 #else
766 if(!pTls)
767 return ippStsNullPtrErr;
768
769 pTls->m_desctuctor(pTls->m_pTlsStorage);
770
771 return ippStsUnknownFeature;
772 #endif
773 }
774
775 /* /////////////////////////////////////////////////////////////////////////////
776 // OwnBufferPool - Memory pool manager
777 ///////////////////////////////////////////////////////////////////////////// */
778 #if OWN_ENABLE_BUFFER_POOL
779
780 #define OWN_BUFFER_POOL_INIT_SIZE 131072 /* 128KB initial chunk size */
781 #define OWN_BUFFER_POOL_MAX_RETAIN_CYCLES 4 /* Maximum number of release cycles before buffer will be actually removed */
782
783 typedef struct _OwnBufferPoolEntry
784 {
785 void *ptr;
786 size_t size;
787 int locked;
788 int cycle;
789 } OwnBufferPoolEntry;
790
791 typedef struct _OwnBufferPool
792 {
793 OwnVector poolVector;
794 size_t memoryTotal;
795 int chunks;
796 int chunksActive;
797 int allocations;
798 int releases;
799 int cycles;
800 } OwnBufferPool;
801
ownBufferPool_Init(OwnBufferPool * pPool)802 static void ownBufferPool_Init(OwnBufferPool *pPool)
803 {
804 OWN_SAFE_RESET(pPool, sizeof(*pPool));
805
806 pPool->memoryTotal = 0;
807 pPool->chunks = 0;
808 pPool->chunksActive = 0;
809 pPool->allocations = 0;
810 pPool->releases = 0;
811 pPool->cycles = 0;
812 ownVector_Init(&pPool->poolVector, sizeof(OwnBufferPoolEntry), 8);
813 }
814
ownBufferPool_CleanUp(OwnBufferPool * pPool,int bForce)815 static int ownBufferPool_CleanUp(OwnBufferPool *pPool, int bForce)
816 {
817 OwnBufferPoolEntry *pEntry;
818 int status = OWN_STATUS_FAIL;
819 size_t i;
820
821 for(i = 0; i < pPool->poolVector.m_size; i++)
822 {
823 pEntry = &((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[i];
824
825 if(pEntry->ptr && (!pEntry->locked || bForce))
826 {
827 pPool->chunks--;
828 pPool->releases++;
829 pPool->memoryTotal -= pEntry->size;
830
831 OWN_SAFE_FREE(pEntry->ptr);
832 pEntry->size = 0;
833 pEntry->ptr = 0;
834 pEntry->cycle = 0;
835 }
836 }
837 return status;
838 }
839
ownBufferPool_Release(OwnBufferPool * pPool)840 static void ownBufferPool_Release(OwnBufferPool *pPool)
841 {
842 if(!pPool)
843 return;
844
845 ownBufferPool_CleanUp(pPool, 1);
846 ownVector_Release(&pPool->poolVector);
847 }
848
ownBufferPool_GetBuffer(OwnBufferPool * pPool,size_t size)849 static void* ownBufferPool_GetBuffer(OwnBufferPool *pPool, size_t size)
850 {
851 OwnBufferPoolEntry *pEntry;
852 size_t i = 0;
853 int idx = -1;
854
855 if(!size)
856 return 0;
857
858 for(i = 0; i < pPool->poolVector.m_size; i++)
859 {
860 pEntry = &((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[i];
861 if(pEntry->ptr && pEntry->size >= size && !pEntry->locked)
862 {
863 pPool->chunksActive++;
864 pEntry->locked = 1;
865 return pEntry->ptr;
866 }
867 }
868
869 /* Find empty index */
870 for(i = 0; i < pPool->poolVector.m_size; i++)
871 {
872 if(!((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[i].ptr)
873 idx = (int)i;
874 }
875 if(idx < 0)
876 {
877 idx = (int)pPool->poolVector.m_size;
878 ownVector_Resize(&pPool->poolVector, pPool->poolVector.m_size+1);
879 }
880
881 /* Create new buffer */
882 pEntry = &((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[idx];
883 pEntry->size = size;
884 if(pEntry->size < OWN_BUFFER_POOL_INIT_SIZE)
885 pEntry->size = OWN_BUFFER_POOL_INIT_SIZE;
886
887 pEntry->ptr = OWN_SAFE_MALLOC(pEntry->size);
888 pEntry->locked = 1;
889 pPool->memoryTotal += pEntry->size;
890 pPool->allocations++;
891 pPool->chunks++;
892 pPool->chunksActive++;
893
894 return pEntry->ptr;
895 }
896
ownBufferPool_ReleaseBuffer(OwnBufferPool * pPool,void * pBuffer,int bHard)897 static int ownBufferPool_ReleaseBuffer(OwnBufferPool *pPool, void *pBuffer, int bHard)
898 {
899 OwnBufferPoolEntry *pEntry;
900 int status = OWN_STATUS_FAIL;
901 size_t i;
902
903 if(!pBuffer)
904 return OWN_STATUS_FAIL;
905
906 for(i = 0; i < pPool->poolVector.m_size; i++)
907 {
908 pEntry = &((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[i];
909
910 if(pEntry->ptr == pBuffer && pEntry->locked)
911 {
912 pPool->chunksActive--;
913 pEntry->locked = 0;
914 if(bHard)
915 {
916 pPool->chunks--;
917 pPool->releases++;
918 pPool->memoryTotal -= pEntry->size;
919
920 OWN_SAFE_FREE(pEntry->ptr);
921 pEntry->ptr = 0;
922 pEntry->size = 0;
923 pEntry->cycle = 0;
924 }
925 else
926 pEntry->cycle = pPool->cycles;
927 pPool->cycles++;
928 return OWN_STATUS_OK;
929 }
930 else if(pEntry->ptr && !pEntry->locked && (pPool->cycles - pEntry->cycle) > OWN_BUFFER_POOL_MAX_RETAIN_CYCLES)
931 {
932 pPool->chunks--;
933 pPool->releases++;
934 pPool->memoryTotal -= pEntry->size;
935
936 OWN_SAFE_FREE(pEntry->ptr);
937 pEntry->ptr = 0;
938 pEntry->size = 0;
939 pEntry->cycle = 0;
940 }
941 }
942 return status;
943 }
944
ownGlobalBufferRelease(size_t idx,int full)945 static void ownGlobalBufferRelease(size_t idx, int full)
946 {
947 if(idx == OWN_IDX_INVALID)
948 return;
949
950 {
951 size_t i;
952 OwnVector dataVector;
953 OwnTlsStorage *pTls = ownGlobalGetTlsStorage(); /* Get global TLS state */
954 if(!pTls)
955 return;
956
957 ownVector_Init(&dataVector, sizeof(void*), 32);
958 if(ownTlsStorage_GetDataVector(pTls, idx, &dataVector) != OWN_STATUS_OK)
959 return;
960 for(i = 0; i < dataVector.m_size; i++)
961 {
962 OwnBufferPool *pPool = ((OwnBufferPool**)dataVector.m_pBuffer)[i];
963 if(pPool)
964 {
965 ownBufferPool_CleanUp(pPool, full); /* Release unused buffer pool memory */
966 if(full)
967 {
968 ownVector_Release(&pPool->poolVector); /* Release buffers vector */
969 OWN_SAFE_FREE(pPool); /* Release object itself */
970 }
971 }
972 }
973 if(full)
974 ownTlsStorage_ResetData(pTls, idx);
975 ownVector_Release(&dataVector);
976 }
977 }
978
ownGlobalBufferPoolIdx(int bRelease)979 static size_t ownGlobalBufferPoolIdx(int bRelease)
980 {
981 static size_t idx = OWN_IDX_INVALID;
982 if(idx == OWN_IDX_INVALID && ownGlobalGetInitState() == OWN_STATE_INITIALIZATION)
983 {
984 OwnTlsStorage *pTls = ownGlobalGetTlsStorage(); /* Get global TLS state */
985 if(!pTls)
986 return idx; /* Global TLS was not initialized */
987
988 idx = ownTlsStorage_ReserveDataIndex(pTls);
989 }
990 else if(bRelease && idx != OWN_IDX_INVALID)
991 {
992 ownGlobalBufferRelease(idx, 1);
993 idx = OWN_IDX_INVALID;
994 }
995 return idx;
996 }
997
ownGlobalGetBufferPool(void)998 static OwnBufferPool* ownGlobalGetBufferPool(void)
999 {
1000 size_t idx = ownGlobalBufferPoolIdx(0);
1001 OwnBufferPool *pPool = 0;
1002 OwnTlsStorage *pTls = ownGlobalGetTlsStorage(); /* Get global TLS state */
1003 if(!pTls)
1004 return 0; /* Global TLS was not initialized */
1005
1006 if(idx == OWN_IDX_INVALID)
1007 return 0;
1008
1009 pPool = (OwnBufferPool*)ownTlsStorage_GetData(pTls, idx);
1010 if(!pPool)
1011 {
1012 pPool = (OwnBufferPool*)OWN_SAFE_MALLOC(sizeof(OwnBufferPool));
1013 if(!pPool)
1014 return NULL;
1015
1016 ownBufferPool_Init(pPool);
1017 ownTlsStorage_SetData(pTls, idx, pPool);
1018 }
1019 return pPool;
1020 }
1021 #endif
1022
1023 /* /////////////////////////////////////////////////////////////////////////////
1024 // OwnBufferPool - External memory interface
1025 ///////////////////////////////////////////////////////////////////////////// */
ownSharedMalloc(IwSize size)1026 void* IPP_STDCALL ownSharedMalloc(IwSize size)
1027 {
1028 #if OWN_ENABLE_BUFFER_POOL
1029 OwnBufferPool *pPool = ownGlobalGetBufferPool(); /* Get global buffer pool state */
1030 if(pPool)
1031 {
1032 void *pBuffer = ownBufferPool_GetBuffer(pPool, size);
1033 if(pBuffer)
1034 return pBuffer;
1035 }
1036 #endif
1037 /* Uninitialized or error in buffer pool, use simple malloc */
1038 return ippMalloc_L(size);
1039 }
1040
ownSharedFree(void * pBuffer)1041 void IPP_STDCALL ownSharedFree(void* pBuffer)
1042 {
1043 #if OWN_ENABLE_BUFFER_POOL
1044 OwnBufferPool *pPool = ownGlobalGetBufferPool(); /* Get global buffer pool state */
1045 if(pPool)
1046 {
1047 if(ownBufferPool_ReleaseBuffer(pPool, pBuffer, 0) >= 0)
1048 return;
1049 }
1050 #endif
1051
1052 /* Uninitialized or error in buffer pool, use simple free */
1053 ippFree(pBuffer);
1054 }
1055
1056 /* /////////////////////////////////////////////////////////////////////////////
1057 // IW library-scope objects initialization
1058 ///////////////////////////////////////////////////////////////////////////// */
1059 #if OWN_ENABLE_BUFFER_POOL
iwInit()1060 IW_DECL(void) iwInit()
1061 {
1062 int state = ownGlobalGetInitState();
1063 if(state == OWN_STATE_NOT_INITIALIZED)
1064 {
1065 ownGlobalSetInitState(OWN_STATE_INITIALIZATION);
1066
1067 ownGlobalGetTlsStorage(); /* Initialize tls static object */
1068 ownGlobalGetBufferPool(); /* initialize global buffer pool */
1069
1070 ownGlobalSetInitState(OWN_STATE_INITIALIZED);
1071 }
1072 }
iwCleanup()1073 IW_DECL(void) iwCleanup()
1074 {
1075 ownGlobalBufferRelease(ownGlobalBufferPoolIdx(0), 0);
1076 }
iwRelease()1077 IW_DECL(void) iwRelease()
1078 {
1079 int state = ownGlobalGetInitState();
1080 if(state == OWN_STATE_INITIALIZED)
1081 {
1082 ownGlobalBufferPoolIdx(1);
1083 ownGlobalTlsStorage(1);
1084
1085 ownGlobalSetInitState(OWN_STATE_NOT_INITIALIZED);
1086 }
1087 }
1088 #endif
1089
iwSetThreadsNum(int threads)1090 IW_DECL(void) iwSetThreadsNum(int threads)
1091 {
1092 #if IW_ENABLE_THREADING_LAYER
1093 ippSetNumThreads_LT(threads);
1094 #else
1095 (void)threads;
1096 #endif
1097 }
iwGetThreadsNum()1098 IW_DECL(int) iwGetThreadsNum()
1099 {
1100 #if IW_ENABLE_THREADING_LAYER
1101 int threads;
1102 ippGetNumThreads_LT(&threads);
1103 return threads;
1104 #else
1105 return 0;
1106 #endif
1107 }
iwGetThreadsNumDefault()1108 IW_DECL(int) iwGetThreadsNumDefault()
1109 {
1110 #if IW_ENABLE_THREADING_LAYER
1111 return IPP_MIN(omp_get_num_procs(), omp_get_max_threads());
1112 #else
1113 return 0;
1114 #endif
1115 }
1116
1117 #if OWN_ENABLE_BUFFER_POOL
iwGetDebugInfo(IwStateDebugInfo * pInfo)1118 IW_DECL(void) iwGetDebugInfo(IwStateDebugInfo *pInfo)
1119 {
1120 OwnTlsStorage *pTls = NULL;
1121 OwnBufferPool *pPool = NULL;
1122
1123 pTls = ownGlobalGetTlsStorage();
1124 if(pTls)
1125 {
1126 pInfo->m_tlsInitialized = 1;
1127 pInfo->m_tlsDataIndexesMax = pTls->statesVector.m_size;
1128 pInfo->m_tlsThreadsMax = pTls->threadsVector.m_size;
1129 }
1130 else
1131 pInfo->m_tlsInitialized = 0;
1132
1133 pPool = ownGlobalGetBufferPool(); /* Get global buffer pool state */
1134 if(pPool)
1135 {
1136 size_t i;
1137 OwnBufferPoolEntry *pEntry;
1138
1139 pInfo->m_poolInitialized = 1;
1140 pInfo->m_poolMemoryTotal = pPool->memoryTotal;
1141 pInfo->m_poolChunks = pPool->chunks;
1142 pInfo->m_poolChunksLocked = pPool->chunksActive;
1143 pInfo->m_poolAllocations = pPool->allocations;
1144 pInfo->m_poolReleases = pPool->releases;
1145 pInfo->m_poolEntries = pPool->poolVector.m_size;
1146 for(i = 0; i < IPP_MIN(pPool->poolVector.m_size, 16); i++)
1147 {
1148 pEntry = &((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[i];
1149 pInfo->m_poolEntrySizes[i] = pEntry->size;
1150 }
1151 }
1152 else
1153 pInfo->m_poolInitialized = 0;
1154 }
1155 #endif
1156