1 /*******************************************************************************
2 * Copyright 2016-2019 Intel Corporation.
3 *
4 * This software and the related documents are Intel copyrighted  materials,  and
5 * your use of  them is  governed by the  express license  under which  they were
6 * provided to you (License).  Unless the License provides otherwise, you may not
7 * use, modify, copy, publish, distribute,  disclose or transmit this software or
8 * the related documents without Intel's prior written permission.
9 *
10 * This software and the related documents  are provided as  is,  with no express
11 * or implied  warranties,  other  than those  that are  expressly stated  in the
12 * License.
13 *******************************************************************************/
14 
15 #include "iw_own.h"
16 
17 #if defined _WIN32
18 #include <windows.h>
19 #include <intrin.h>
20 #else
21 #if IW_ENABLE_TLS
22 #ifndef _GNU_SOURCE
23 #define _GNU_SOURCE 1 /* for PTHREAD_MUTEX_RECURSIVE */
24 #endif
25 #ifndef __USE_UNIX98
26 #define __USE_UNIX98 1 /* for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */
27 #endif
28 #include <pthread.h>
29 #endif
30 #include <stdlib.h>
31 #ifndef __APPLE__
32 #include <malloc.h>
33 #endif
34 #endif
35 
36 #if IW_ENABLE_THREADING_LAYER
37 #include "omp.h"
38 #endif
39 
40 #define OWN_ENABLE_BUFFER_POOL 0
41 
42 /* /////////////////////////////////////////////////////////////////////////////
43 //                   Global initialization state
44 ///////////////////////////////////////////////////////////////////////////// */
45 #define OWN_STATE_NOT_INITIALIZED 0
46 #define OWN_STATE_INITIALIZATION  1
47 #define OWN_STATE_INITIALIZED     2
48 
ownGlobalInitState(void)49 static int* ownGlobalInitState(void)
50 {
51     static int state = 0;
52     return &state;
53 }
ownGlobalGetInitState(void)54 static int ownGlobalGetInitState(void)
55 {
56     return *ownGlobalInitState();
57 }
ownGlobalSetInitState(int state)58 static void ownGlobalSetInitState(int state)
59 {
60     *ownGlobalInitState() = state;
61 }
62 
63 /* /////////////////////////////////////////////////////////////////////////////
64 //                   ownAlignedMalloc
65 ///////////////////////////////////////////////////////////////////////////// */
ownAlignedMalloc(size_t iSize,size_t iAlign)66 void* IPP_STDCALL ownAlignedMalloc(size_t iSize, size_t iAlign)
67 {
68 #if defined _WIN32
69     return _aligned_malloc(iSize, iAlign);
70 #elif defined __APPLE__
71     if(iAlign <= 1)
72         return malloc(iSize);
73     else
74     {
75         void *pBuffer  = malloc(iSize + (iAlign - 1) + sizeof(void*));
76         char *pABuffer = ((char*)pBuffer) + sizeof(void*);
77 
78         pABuffer += (iAlign - (((size_t)pABuffer) & (iAlign - 1)));
79 
80         ((void**)pABuffer)[-1] = pBuffer;
81         return pABuffer;
82     }
83 #else
84     return memalign(iAlign, iSize);
85 #endif
86 }
ownAlignedFree(void * pBuffer)87 void IPP_STDCALL ownAlignedFree(void* pBuffer)
88 {
89 #if defined _WIN32
90     _aligned_free(pBuffer);
91 #elif defined __APPLE__
92     free(((void**)pBuffer)[-1]);
93 #else
94     free(pBuffer);
95 #endif
96 }
97 
98 /* /////////////////////////////////////////////////////////////////////////////
99 //                   OwnMutex
100 ///////////////////////////////////////////////////////////////////////////// */
101 #if IW_ENABLE_TLS
102 typedef struct _OwnMutex
103 {
104 #if defined _WIN32
105     CRITICAL_SECTION mutex;
106 #else
107     pthread_mutex_t  mutex;
108 #endif
109 } OwnMutex;
110 
ownMutex_Init(OwnMutex * pMutex)111 static int ownMutex_Init(OwnMutex *pMutex)
112 {
113 #if defined _WIN32
114 #if _WIN32_WINNT >= 0x0600
115     if(InitializeCriticalSectionEx(&pMutex->mutex, 0, 0) == 0)
116         return OWN_STATUS_FAIL;
117     return OWN_STATUS_OK;
118 #else
119     __try
120     {
121         InitializeCriticalSection(&pMutex->mutex);
122     }
123     __except(EXCEPTION_EXECUTE_HANDLER)
124     {
125         return OWN_STATUS_FAIL;
126     }
127     return OWN_STATUS_OK;
128 #endif
129 #else
130     pthread_mutexattr_t mutAttib;
131     if(pthread_mutexattr_init(&mutAttib) != 0)
132         return OWN_STATUS_FAIL;
133     if(pthread_mutexattr_settype(&mutAttib, PTHREAD_MUTEX_RECURSIVE) != 0)
134     {
135         pthread_mutexattr_destroy(&mutAttib);
136         return OWN_STATUS_FAIL;
137     }
138     if(pthread_mutex_init(&pMutex->mutex, &mutAttib) != 0)
139     {
140         pthread_mutexattr_destroy(&mutAttib);
141         return OWN_STATUS_FAIL;
142     }
143     if(pthread_mutexattr_destroy(&mutAttib) != 0)
144     {
145         pthread_mutex_destroy(&pMutex->mutex);
146         return OWN_STATUS_FAIL;
147     }
148     return OWN_STATUS_OK;
149 #endif
150 }
151 
ownMutex_Release(OwnMutex * pMutex)152 static int ownMutex_Release(OwnMutex *pMutex)
153 {
154 #if defined _WIN32
155     __try
156     {
157         DeleteCriticalSection(&pMutex->mutex);
158     }
159     __except(EXCEPTION_EXECUTE_HANDLER)
160     {
161         return OWN_STATUS_FAIL;
162     }
163     return OWN_STATUS_OK;
164 #else
165     if(pthread_mutex_destroy(&pMutex->mutex) != 0)
166         return OWN_STATUS_FAIL;
167     return OWN_STATUS_OK;
168 #endif
169 }
170 
ownMutex_Lock(OwnMutex * pMutex)171 static int ownMutex_Lock(OwnMutex *pMutex)
172 {
173 #if defined _WIN32
174     __try
175     {
176         EnterCriticalSection(&pMutex->mutex);
177     }
178     __except(EXCEPTION_EXECUTE_HANDLER)
179     {
180         return OWN_STATUS_FAIL;
181     }
182     return OWN_STATUS_OK;
183 #else
184     if(pthread_mutex_lock(&pMutex->mutex) != 0)
185         return OWN_STATUS_FAIL;
186     return OWN_STATUS_OK;
187 #endif
188 }
189 
ownMutex_Unlock(OwnMutex * pMutex)190 static int ownMutex_Unlock(OwnMutex *pMutex)
191 {
192 #if defined _WIN32
193     __try
194     {
195         LeaveCriticalSection(&pMutex->mutex);
196     }
197     __except(EXCEPTION_EXECUTE_HANDLER)
198     {
199         return OWN_STATUS_FAIL;
200     }
201     return OWN_STATUS_OK;
202 #else
203     if(pthread_mutex_unlock(&pMutex->mutex) != 0)
204         return OWN_STATUS_FAIL;
205     return OWN_STATUS_OK;
206 #endif
207 }
208 #endif
209 
210 /* /////////////////////////////////////////////////////////////////////////////
211 //                   OwnVector - C Vector
212 ///////////////////////////////////////////////////////////////////////////// */
ownVector_Reserve(OwnVector * pVector,size_t reserveSize)213 IW_DECL(void) ownVector_Reserve(OwnVector *pVector, size_t reserveSize)
214 {
215     if(reserveSize*pVector->m_elemSize > pVector->m_bufferLen)
216     {
217         if(pVector->m_pBuffer)
218         {
219             size_t newBufferLen = reserveSize*pVector->m_elemSize;
220             Ipp8u *pNewBuffer   = (Ipp8u*)OWN_SAFE_MALLOC(newBufferLen);
221 
222             OWN_SAFE_COPY(pVector->m_pBuffer, pNewBuffer, pVector->m_bufferLen);
223             OWN_SAFE_RESET(pNewBuffer + pVector->m_bufferLen, (newBufferLen-pVector->m_bufferLen));
224             OWN_SAFE_FREE(pVector->m_pBuffer);
225 
226             pVector->m_pBuffer   = pNewBuffer;
227             pVector->m_bufferLen = newBufferLen;
228         }
229         else
230         {
231             pVector->m_bufferLen = reserveSize*pVector->m_elemSize;
232             pVector->m_pBuffer   = (Ipp8u*)OWN_SAFE_MALLOC(pVector->m_bufferLen);
233             if(!pVector->m_pBuffer)
234             {
235                 pVector->m_pBuffer   = NULL;
236                 pVector->m_bufferLen = 0;
237                 return;
238             }
239             OWN_SAFE_RESET(pVector->m_pBuffer, pVector->m_bufferLen);
240         }
241     }
242 }
243 
ownVector_Init(OwnVector * pVector,size_t elemSize,size_t reserve)244 IW_DECL(void) ownVector_Init(OwnVector *pVector, size_t elemSize, size_t reserve)
245 {
246     OWN_SAFE_RESET(pVector, sizeof(*pVector));
247 
248     pVector->m_elemSize = elemSize;
249     ownVector_Reserve(pVector, reserve);
250 }
251 
ownVector_Release(OwnVector * pVector)252 IW_DECL(void) ownVector_Release(OwnVector *pVector)
253 {
254     if(pVector->m_pBuffer)
255     {
256         OWN_SAFE_FREE(pVector->m_pBuffer);
257         pVector->m_pBuffer = 0;
258     }
259     pVector->m_elemSize = pVector->m_size = pVector->m_bufferLen = 0;
260 }
261 
ownVector_Resize(OwnVector * pVector,size_t newSize)262 IW_DECL(void) ownVector_Resize(OwnVector *pVector, size_t newSize)
263 {
264     ownVector_Reserve(pVector, newSize);
265     pVector->m_size = newSize;
266 }
267 
ownVector_PushBack(OwnVector * pVector,void * pData)268 IW_DECL(void) ownVector_PushBack(OwnVector *pVector, void *pData)
269 {
270     ownVector_Resize(pVector, pVector->m_size + 1);
271     OWN_SAFE_COPY(pData, pVector->m_pBuffer + pVector->m_elemSize*(pVector->m_size-1), pVector->m_elemSize);
272 }
273 
ownVector_PopBack(OwnVector * pVector,void * pData)274 IW_DECL(void) ownVector_PopBack(OwnVector *pVector, void *pData)
275 {
276     if(pVector->m_size && pData)
277     {
278         OWN_SAFE_COPY(pVector->m_pBuffer + pVector->m_elemSize*(pVector->m_size-1), pData, pVector->m_elemSize);
279         ownVector_Resize(pVector, pVector->m_size - 1);
280     }
281 }
282 
283 /* /////////////////////////////////////////////////////////////////////////////
284 //                   OwnTlsCore - TLS system abstraction
285 ///////////////////////////////////////////////////////////////////////////// */
286 #if IW_ENABLE_TLS
287 typedef struct _OwnTlsCore
288 {
289 #if defined _WIN32
290     DWORD tlsKey;
291 #else
292     pthread_key_t tlsKey;
293 #endif
294 } OwnTlsCore;
295 
ownTlsCore_Init(OwnTlsCore * pTls)296 static int ownTlsCore_Init(OwnTlsCore *pTls)
297 {
298     OWN_SAFE_RESET(pTls, sizeof(*pTls));
299 #if defined _WIN32
300     pTls->tlsKey = TlsAlloc();
301     if(pTls->tlsKey == TLS_OUT_OF_INDEXES)
302         return OWN_STATUS_FAIL;
303 #else
304     if(pthread_key_create(&pTls->tlsKey, NULL) != 0)
305         return OWN_STATUS_FAIL;
306 #endif
307     return OWN_STATUS_OK;
308 }
309 
ownTlsCore_Release(OwnTlsCore * pTls)310 static int ownTlsCore_Release(OwnTlsCore *pTls)
311 {
312 #if defined _WIN32
313     if(TlsFree(pTls->tlsKey) == 0)
314         return OWN_STATUS_FAIL;
315 #else
316     if(pthread_key_delete(pTls->tlsKey) != 0)
317         return OWN_STATUS_FAIL;
318 #endif
319     return OWN_STATUS_OK;
320 }
321 
ownTlsCore_GetData(OwnTlsCore * pTls)322 static void* ownTlsCore_GetData(OwnTlsCore *pTls)
323 {
324 #if defined _WIN32
325     return TlsGetValue(pTls->tlsKey);
326 #else
327     return pthread_getspecific(pTls->tlsKey);
328 #endif
329 }
330 
ownTlsCore_SetData(OwnTlsCore * pTls,void * pData)331 static int ownTlsCore_SetData(OwnTlsCore *pTls, void *pData)
332 {
333 #if defined _WIN32
334     if(TlsSetValue(pTls->tlsKey, pData) == 0)
335         return OWN_STATUS_FAIL;
336 #else
337     if(pthread_setspecific(pTls->tlsKey, pData) != 0)
338         return OWN_STATUS_FAIL;
339 #endif
340     return OWN_STATUS_OK;
341 }
342 #endif
343 
344 /* /////////////////////////////////////////////////////////////////////////////
345 //                   OwnTlsStorage - TLS data storage interface
346 ///////////////////////////////////////////////////////////////////////////// */
347 #if IW_ENABLE_TLS
348 typedef struct _OwnTlsStorage
349 {
350     OwnVector    statesVector;
351     OwnVector    threadsVector;
352 
353     OwnTlsCore   tls;
354     OwnMutex     mutex;
355 } OwnTlsStorage;
356 
357 typedef struct _OwnTlsStorageTD
358 {
359     OwnVector  dataVector;  /* Thread data array */
360     size_t    index;       /* Index of the thread */
361 } OwnTlsStorageTD;
362 
ownTlsStorage_Init(OwnTlsStorage * pTls)363 static int ownTlsStorage_Init(OwnTlsStorage *pTls)
364 {
365     OWN_SAFE_RESET(pTls, sizeof(*pTls));
366 
367     if(ownMutex_Init(&pTls->mutex) < 0)
368         return OWN_STATUS_FAIL;
369     if(ownTlsCore_Init(&pTls->tls) < 0)
370     {
371         ownMutex_Release(&pTls->mutex);
372         return OWN_STATUS_FAIL;
373     }
374 
375     ownVector_Init(&pTls->statesVector, sizeof(int), 4);
376     ownVector_Init(&pTls->threadsVector, sizeof(OwnTlsStorageTD*), 16);
377 
378     return OWN_STATUS_OK;
379 }
380 
ownTlsStorage_Release(OwnTlsStorage * pTls)381 static int ownTlsStorage_Release(OwnTlsStorage *pTls)
382 {
383     size_t i, j;
384 
385     OwnTlsStorageTD *pThreads;
386 
387     for(i = 0; i < pTls->threadsVector.m_size; i++)
388     {
389         pThreads = ((OwnTlsStorageTD**)pTls->threadsVector.m_pBuffer)[i];
390         if(pThreads)
391         {
392             for(j = 0; j < pThreads->dataVector.m_size; j++)
393             {
394                 /* Check that all data is destroyed. Data pointers must be deallocated externally*/
395                 if(((void**)pThreads->dataVector.m_pBuffer)[j])
396                     return OWN_STATUS_FAIL;
397             }
398             ownVector_Release(&pThreads->dataVector);
399             OWN_SAFE_FREE(pThreads);
400         }
401     }
402 
403     ownVector_Release(&pTls->statesVector);
404     ownVector_Release(&pTls->threadsVector);
405 
406     if(ownTlsCore_Release(&pTls->tls) < 0)
407         return OWN_STATUS_FAIL;
408     if(ownMutex_Release(&pTls->mutex) < 0)
409         return OWN_STATUS_FAIL;
410 
411     return OWN_STATUS_OK;
412 }
413 
ownTlsStorage_ReserveDataIndex(OwnTlsStorage * pTls)414 static size_t ownTlsStorage_ReserveDataIndex(OwnTlsStorage *pTls)
415 {
416     size_t slot;
417     if(ownMutex_Lock(&pTls->mutex) != OWN_STATUS_OK)
418         return OWN_IDX_INVALID;
419 
420     // Find unused slots
421     for(slot = 0; slot < pTls->statesVector.m_size; slot++)
422     {
423         int *pStates = (int*)pTls->statesVector.m_pBuffer;
424         if(!pStates[slot])
425         {
426             pStates[slot] = 1;
427             ownMutex_Unlock(&pTls->mutex);
428             return slot;
429         }
430     }
431 
432     // Create new slot
433     slot = pTls->statesVector.m_size;
434     ownVector_Resize(&pTls->statesVector, pTls->statesVector.m_size+1);
435     ((int*)pTls->statesVector.m_pBuffer)[slot] = 1;
436     ownMutex_Unlock(&pTls->mutex);
437     return slot;
438 }
439 
ownTlsStorage_DataVector(OwnTlsStorage * pTls,size_t dataIdx,OwnVector * pDataVector,int bClear)440 static int ownTlsStorage_DataVector(OwnTlsStorage *pTls, size_t dataIdx, OwnVector *pDataVector, int bClear)
441 {
442     size_t    i;
443     OwnVector *pThreadDataVector;
444     void     *pThreadData;
445 
446     ownMutex_Lock(&pTls->mutex);
447     if(pTls->statesVector.m_size <= dataIdx)
448     {
449         ownMutex_Unlock(&pTls->mutex);
450         return OWN_STATUS_FAIL;
451     }
452 
453     for(i = 0; i < pTls->threadsVector.m_size; i++)
454     {
455         pThreadDataVector = &((OwnTlsStorageTD**)pTls->threadsVector.m_pBuffer)[i]->dataVector;
456         pThreadData       = ((void**)pThreadDataVector->m_pBuffer)[dataIdx];
457         if(pThreadDataVector->m_size > dataIdx && pThreadData)
458         {
459             if(pDataVector)
460                 ownVector_PushBack(pDataVector, &pThreadData);
461             if(bClear)
462                 ((void**)pThreadDataVector->m_pBuffer)[dataIdx] = 0;
463         }
464     }
465 
466     if(bClear)
467         ((int*)pTls->statesVector.m_pBuffer)[dataIdx] = 0;
468     ownMutex_Unlock(&pTls->mutex);
469 
470     return OWN_STATUS_OK;
471 }
472 
ownTlsStorage_GetDataVector(OwnTlsStorage * pTls,size_t dataIdx,OwnVector * pDataVector)473 static int ownTlsStorage_GetDataVector(OwnTlsStorage *pTls, size_t dataIdx, OwnVector *pDataVector)
474 {
475     if(!pDataVector)
476         return OWN_STATUS_FAIL;
477 
478     return ownTlsStorage_DataVector(pTls, dataIdx, pDataVector, 0);
479 }
480 
ownTlsStorage_ResetData(OwnTlsStorage * pTls,size_t dataIdx)481 static int ownTlsStorage_ResetData(OwnTlsStorage *pTls, size_t dataIdx)
482 {
483     return ownTlsStorage_DataVector(pTls, dataIdx, 0, 1);
484 }
485 
ownTlsStorage_GetData(OwnTlsStorage * pTls,size_t dataIdx)486 static void* ownTlsStorage_GetData(OwnTlsStorage *pTls, size_t dataIdx)
487 {
488     OwnTlsStorageTD *pThreadData;
489     if(pTls->statesVector.m_size <= dataIdx)
490         return 0;
491 
492     pThreadData = (OwnTlsStorageTD*)ownTlsCore_GetData(&pTls->tls);
493     if(pThreadData && pThreadData->dataVector.m_size > dataIdx)
494         return ((void**)(pThreadData->dataVector.m_pBuffer))[dataIdx];
495 
496     return 0;
497 }
498 
ownTlsStorage_SetData(OwnTlsStorage * pTls,size_t dataIdx,void * pData)499 static int ownTlsStorage_SetData(OwnTlsStorage *pTls, size_t dataIdx, void* pData)
500 {
501     OwnTlsStorageTD *pThreadData = 0;
502     if(pTls->statesVector.m_size <= dataIdx && !pData)
503         return OWN_STATUS_FAIL;
504 
505     pThreadData = (OwnTlsStorageTD*)ownTlsCore_GetData(&pTls->tls);
506     if(!pThreadData)
507     {
508         pThreadData = (OwnTlsStorageTD*)OWN_SAFE_MALLOC(sizeof(OwnTlsStorageTD));
509         if(!pThreadData)
510             return OWN_STATUS_FAIL;
511 
512         ownVector_Init(&pThreadData->dataVector, sizeof(void*), 32);
513         ownTlsCore_SetData(&pTls->tls, pThreadData);
514         {
515             ownMutex_Lock(&pTls->mutex);
516             pThreadData->index = pTls->threadsVector.m_size;
517             ownVector_PushBack(&pTls->threadsVector, &pThreadData);
518             ownMutex_Unlock(&pTls->mutex);
519         }
520     }
521 
522     if(dataIdx >= pThreadData->dataVector.m_size)
523     {
524         void *null = NULL;
525         ownMutex_Lock(&pTls->mutex);
526         while(dataIdx >= pThreadData->dataVector.m_size)
527             ownVector_PushBack(&pThreadData->dataVector, &null);
528         ownMutex_Unlock(&pTls->mutex);
529     }
530     ((void**)(pThreadData->dataVector.m_pBuffer))[dataIdx] = pData;
531 
532     return OWN_STATUS_OK;
533 }
534 
ownGlobalTlsStorage(int bRelease)535 static OwnTlsStorage* ownGlobalTlsStorage(int bRelease)
536 {
537     static OwnTlsStorage *pStorage = NULL;
538     if(!pStorage && ownGlobalGetInitState() == OWN_STATE_INITIALIZATION)
539     {
540         pStorage = (OwnTlsStorage*)OWN_SAFE_MALLOC(sizeof(OwnTlsStorage));
541         if(!pStorage)
542             return NULL;
543 
544         ownTlsStorage_Init(pStorage);
545     }
546     else if(bRelease && pStorage)
547     {
548         ownTlsStorage_Release(pStorage);
549         OWN_SAFE_FREE(pStorage);
550         pStorage = 0;
551     }
552     return pStorage;
553 }
554 
ownGlobalGetTlsStorage(void)555 static OwnTlsStorage* ownGlobalGetTlsStorage(void)
556 {
557     return ownGlobalTlsStorage(0);
558 }
559 #endif
560 
561 /* /////////////////////////////////////////////////////////////////////////////
562 //                   IwTls - TLS data storage interface
563 ///////////////////////////////////////////////////////////////////////////// */
iwTls_Init(IwTls * pTls,IwTlsDestructor destructor)564 IW_DECL(IppStatus) iwTls_Init(IwTls *pTls, IwTlsDestructor destructor)
565 {
566 #if IW_ENABLE_TLS
567     OwnTlsStorage *pStorage;
568 
569     if(!pTls || !destructor)
570         return ippStsNullPtrErr;
571 
572     pTls->m_idx         = OWN_IDX_INVALID;
573     pTls->m_desctuctor  = destructor;
574     pTls->m_pTlsStorage = NULL;
575 
576     pStorage = ownGlobalGetTlsStorage();
577     if(!pStorage) // No global storage available, create new storage
578     {
579         pStorage = (OwnTlsStorage*)OWN_SAFE_MALLOC(sizeof(OwnTlsStorage));
580         if(!pStorage)
581             return ippStsMemAllocErr;
582 
583         if(ownTlsStorage_Init(pStorage) < OWN_STATUS_OK)
584         {
585             OWN_SAFE_FREE(pStorage);
586             return ippStsErr;
587         }
588 
589         pTls->m_idx = ownTlsStorage_ReserveDataIndex(pStorage);
590         if(pTls->m_idx == OWN_IDX_INVALID)
591         {
592             ownTlsStorage_Release(pStorage);
593             OWN_SAFE_FREE(pStorage);
594             return ippStsErr;
595         }
596         pTls->m_pTlsStorage = pStorage;
597     }
598     else
599     {
600         pTls->m_idx = ownTlsStorage_ReserveDataIndex(pStorage);
601         if(pTls->m_idx == OWN_IDX_INVALID)
602             return ippStsErr;
603     }
604 
605     return ippStsNoErr;
606 #else
607     if(!pTls || !destructor)
608         return ippStsNullPtrErr;
609 
610     pTls->m_idx         = OWN_IDX_INVALID;
611     pTls->m_desctuctor  = destructor;
612     pTls->m_pTlsStorage = NULL;
613 
614     return ippStsUnknownFeature;
615 #endif
616 }
617 
iwTls_Set(IwTls * pTls,void * pData)618 IW_DECL(IppStatus) iwTls_Set(IwTls *pTls, void *pData)
619 {
620 #if IW_ENABLE_TLS
621     OwnTlsStorage *pStorage;
622     void          *pOldData;
623 
624     if(!pTls)
625         return ippStsNullPtrErr;
626     if(pTls->m_idx == OWN_IDX_INVALID)
627         return ippStsErr;
628 
629     pStorage = ownGlobalGetTlsStorage();
630     if(!pStorage)
631     {
632         pStorage = (OwnTlsStorage*)pTls->m_pTlsStorage;
633         if(!pStorage)
634             return ippStsErr;
635     }
636 
637     pOldData = ownTlsStorage_GetData(pStorage, pTls->m_idx);
638     if(pOldData != pData)
639     {
640         if(pTls->m_desctuctor && pOldData)
641             pTls->m_desctuctor(pOldData);
642 
643         if(ownTlsStorage_SetData(pStorage, pTls->m_idx, pData) != OWN_STATUS_OK)
644             return ippStsErr;
645     }
646     return ippStsNoErr;
647 #else
648     if(!pTls)
649         return ippStsNullPtrErr;
650 
651     pTls->m_pTlsStorage = pData;
652 
653     return ippStsUnknownFeature;
654 #endif
655 }
656 
iwTls_Get(const IwTls * pTls)657 IW_DECL(void*) iwTls_Get(const IwTls *pTls)
658 {
659 #if IW_ENABLE_TLS
660     OwnTlsStorage *pStorage;
661 
662     if(!pTls)
663         return NULL;
664     if(pTls->m_idx == OWN_IDX_INVALID)
665         return NULL;
666 
667     pStorage = ownGlobalGetTlsStorage();
668     if(!pStorage)
669     {
670         pStorage = (OwnTlsStorage*)pTls->m_pTlsStorage;
671         if(!pStorage)
672             return NULL;
673     }
674 
675     return ownTlsStorage_GetData(pStorage, pTls->m_idx);
676 #else
677     if(!pTls)
678         return NULL;
679 
680     return pTls->m_pTlsStorage;
681 #endif
682 }
683 
iwTls_ReleaseData(IwTls * pTls)684 IW_DECL(IppStatus) iwTls_ReleaseData(IwTls *pTls)
685 {
686 #if IW_ENABLE_TLS
687     OwnTlsStorage *pStorage;
688 
689     if(!pTls)
690         return ippStsNullPtrErr;
691 
692     pStorage = ownGlobalGetTlsStorage();
693     if(!pStorage)
694     {
695         pStorage = (OwnTlsStorage*)pTls->m_pTlsStorage;
696         if(!pStorage)
697             return ippStsErr;
698     }
699 
700     if(pTls->m_idx != OWN_IDX_INVALID)
701     {
702         if(pTls->m_desctuctor)
703         {
704             size_t   i;
705             OwnVector vData;
706             ownVector_Init(&vData, sizeof(void*), 16);
707 
708             if(ownTlsStorage_GetDataVector(pStorage, pTls->m_idx, &vData) != OWN_STATUS_OK)
709             {
710                 ownVector_Release(&vData);
711                 return ippStsErr;
712             }
713 
714             for(i = 0; i < vData.m_size; i++)
715             {
716                 pTls->m_desctuctor(((void**)vData.m_pBuffer)[i]);
717             }
718             ownVector_Release(&vData);
719         }
720 
721         if(ownTlsStorage_ResetData(pStorage, pTls->m_idx) != OWN_STATUS_OK)
722             return ippStsErr;
723     }
724 
725     return ippStsNoErr;
726 #else
727     if(!pTls)
728         return ippStsNullPtrErr;
729 
730     pTls->m_desctuctor(pTls->m_pTlsStorage);
731 
732     return ippStsUnknownFeature;
733 #endif
734 }
735 
iwTls_Release(IwTls * pTls)736 IW_DECL(IppStatus) iwTls_Release(IwTls *pTls)
737 {
738 #if IW_ENABLE_TLS
739     OwnTlsStorage *pStorage;
740 
741     if(!pTls)
742         return ippStsNullPtrErr;
743 
744     pStorage = ownGlobalGetTlsStorage();
745     if(!pStorage)
746     {
747         pStorage = (OwnTlsStorage*)pTls->m_pTlsStorage;
748         if(!pStorage)
749             return ippStsNoErr;
750     }
751 
752     if(pTls->m_idx != OWN_IDX_INVALID)
753     {
754         iwTls_ReleaseData(pTls);
755         pTls->m_idx = OWN_IDX_INVALID;
756     }
757 
758     if(pTls->m_pTlsStorage)
759     {
760         ownTlsStorage_Release(pStorage);
761         OWN_SAFE_FREE(pStorage);
762         pTls->m_pTlsStorage = NULL;
763     }
764     return ippStsNoErr;
765 #else
766     if(!pTls)
767         return ippStsNullPtrErr;
768 
769     pTls->m_desctuctor(pTls->m_pTlsStorage);
770 
771     return ippStsUnknownFeature;
772 #endif
773 }
774 
775 /* /////////////////////////////////////////////////////////////////////////////
776 //                   OwnBufferPool - Memory pool manager
777 ///////////////////////////////////////////////////////////////////////////// */
778 #if OWN_ENABLE_BUFFER_POOL
779 
780 #define OWN_BUFFER_POOL_INIT_SIZE         131072  /* 128KB initial chunk size */
781 #define OWN_BUFFER_POOL_MAX_RETAIN_CYCLES 4       /* Maximum number of release cycles before buffer will be actually removed */
782 
783 typedef struct _OwnBufferPoolEntry
784 {
785     void   *ptr;
786     size_t  size;
787     int     locked;
788     int     cycle;
789 } OwnBufferPoolEntry;
790 
791 typedef struct _OwnBufferPool
792 {
793     OwnVector poolVector;
794     size_t   memoryTotal;
795     int      chunks;
796     int      chunksActive;
797     int      allocations;
798     int      releases;
799     int      cycles;
800 } OwnBufferPool;
801 
ownBufferPool_Init(OwnBufferPool * pPool)802 static void ownBufferPool_Init(OwnBufferPool *pPool)
803 {
804     OWN_SAFE_RESET(pPool, sizeof(*pPool));
805 
806     pPool->memoryTotal  = 0;
807     pPool->chunks       = 0;
808     pPool->chunksActive = 0;
809     pPool->allocations  = 0;
810     pPool->releases     = 0;
811     pPool->cycles       = 0;
812     ownVector_Init(&pPool->poolVector, sizeof(OwnBufferPoolEntry), 8);
813 }
814 
ownBufferPool_CleanUp(OwnBufferPool * pPool,int bForce)815 static int ownBufferPool_CleanUp(OwnBufferPool *pPool, int bForce)
816 {
817     OwnBufferPoolEntry *pEntry;
818     int status = OWN_STATUS_FAIL;
819     size_t i;
820 
821     for(i = 0; i < pPool->poolVector.m_size; i++)
822     {
823         pEntry = &((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[i];
824 
825         if(pEntry->ptr && (!pEntry->locked || bForce))
826         {
827             pPool->chunks--;
828             pPool->releases++;
829             pPool->memoryTotal -= pEntry->size;
830 
831             OWN_SAFE_FREE(pEntry->ptr);
832             pEntry->size  = 0;
833             pEntry->ptr   = 0;
834             pEntry->cycle = 0;
835         }
836     }
837     return status;
838 }
839 
ownBufferPool_Release(OwnBufferPool * pPool)840 static void ownBufferPool_Release(OwnBufferPool *pPool)
841 {
842     if(!pPool)
843         return;
844 
845     ownBufferPool_CleanUp(pPool, 1);
846     ownVector_Release(&pPool->poolVector);
847 }
848 
ownBufferPool_GetBuffer(OwnBufferPool * pPool,size_t size)849 static void* ownBufferPool_GetBuffer(OwnBufferPool *pPool, size_t size)
850 {
851     OwnBufferPoolEntry *pEntry;
852     size_t i = 0;
853     int idx = -1;
854 
855     if(!size)
856         return 0;
857 
858     for(i = 0; i < pPool->poolVector.m_size; i++)
859     {
860         pEntry = &((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[i];
861         if(pEntry->ptr && pEntry->size >= size && !pEntry->locked)
862         {
863             pPool->chunksActive++;
864             pEntry->locked = 1;
865             return pEntry->ptr;
866         }
867     }
868 
869     /* Find empty index */
870     for(i = 0; i < pPool->poolVector.m_size; i++)
871     {
872         if(!((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[i].ptr)
873             idx = (int)i;
874     }
875     if(idx < 0)
876     {
877         idx = (int)pPool->poolVector.m_size;
878         ownVector_Resize(&pPool->poolVector, pPool->poolVector.m_size+1);
879     }
880 
881     /* Create new buffer */
882     pEntry = &((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[idx];
883     pEntry->size = size;
884     if(pEntry->size < OWN_BUFFER_POOL_INIT_SIZE)
885         pEntry->size = OWN_BUFFER_POOL_INIT_SIZE;
886 
887     pEntry->ptr    = OWN_SAFE_MALLOC(pEntry->size);
888     pEntry->locked = 1;
889     pPool->memoryTotal += pEntry->size;
890     pPool->allocations++;
891     pPool->chunks++;
892     pPool->chunksActive++;
893 
894     return pEntry->ptr;
895 }
896 
ownBufferPool_ReleaseBuffer(OwnBufferPool * pPool,void * pBuffer,int bHard)897 static int ownBufferPool_ReleaseBuffer(OwnBufferPool *pPool, void *pBuffer, int bHard)
898 {
899     OwnBufferPoolEntry *pEntry;
900     int status = OWN_STATUS_FAIL;
901     size_t i;
902 
903     if(!pBuffer)
904         return OWN_STATUS_FAIL;
905 
906     for(i = 0; i < pPool->poolVector.m_size; i++)
907     {
908         pEntry = &((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[i];
909 
910         if(pEntry->ptr == pBuffer && pEntry->locked)
911         {
912             pPool->chunksActive--;
913             pEntry->locked = 0;
914             if(bHard)
915             {
916                 pPool->chunks--;
917                 pPool->releases++;
918                 pPool->memoryTotal -= pEntry->size;
919 
920                 OWN_SAFE_FREE(pEntry->ptr);
921                 pEntry->ptr   = 0;
922                 pEntry->size  = 0;
923                 pEntry->cycle = 0;
924             }
925             else
926                 pEntry->cycle = pPool->cycles;
927             pPool->cycles++;
928             return OWN_STATUS_OK;
929         }
930         else if(pEntry->ptr && !pEntry->locked && (pPool->cycles - pEntry->cycle) > OWN_BUFFER_POOL_MAX_RETAIN_CYCLES)
931         {
932             pPool->chunks--;
933             pPool->releases++;
934             pPool->memoryTotal -= pEntry->size;
935 
936             OWN_SAFE_FREE(pEntry->ptr);
937             pEntry->ptr   = 0;
938             pEntry->size  = 0;
939             pEntry->cycle = 0;
940         }
941     }
942     return status;
943 }
944 
ownGlobalBufferRelease(size_t idx,int full)945 static void ownGlobalBufferRelease(size_t idx, int full)
946 {
947     if(idx == OWN_IDX_INVALID)
948         return;
949 
950     {
951         size_t         i;
952         OwnVector       dataVector;
953         OwnTlsStorage *pTls  = ownGlobalGetTlsStorage(); /* Get global TLS state */
954         if(!pTls)
955             return;
956 
957         ownVector_Init(&dataVector, sizeof(void*), 32);
958         if(ownTlsStorage_GetDataVector(pTls, idx, &dataVector) != OWN_STATUS_OK)
959             return;
960         for(i = 0; i < dataVector.m_size; i++)
961         {
962             OwnBufferPool *pPool = ((OwnBufferPool**)dataVector.m_pBuffer)[i];
963             if(pPool)
964             {
965                 ownBufferPool_CleanUp(pPool, full); /* Release unused buffer pool memory */
966                 if(full)
967                 {
968                     ownVector_Release(&pPool->poolVector);   /* Release buffers vector */
969                     OWN_SAFE_FREE(pPool);                   /* Release object itself */
970                 }
971             }
972         }
973         if(full)
974             ownTlsStorage_ResetData(pTls, idx);
975         ownVector_Release(&dataVector);
976     }
977 }
978 
ownGlobalBufferPoolIdx(int bRelease)979 static size_t ownGlobalBufferPoolIdx(int bRelease)
980 {
981     static size_t idx = OWN_IDX_INVALID;
982     if(idx == OWN_IDX_INVALID && ownGlobalGetInitState() == OWN_STATE_INITIALIZATION)
983     {
984         OwnTlsStorage *pTls  = ownGlobalGetTlsStorage(); /* Get global TLS state */
985         if(!pTls)
986             return idx; /* Global TLS was not initialized */
987 
988         idx = ownTlsStorage_ReserveDataIndex(pTls);
989     }
990     else if(bRelease && idx != OWN_IDX_INVALID)
991     {
992         ownGlobalBufferRelease(idx, 1);
993         idx = OWN_IDX_INVALID;
994     }
995     return idx;
996 }
997 
ownGlobalGetBufferPool(void)998 static OwnBufferPool* ownGlobalGetBufferPool(void)
999 {
1000     size_t         idx   = ownGlobalBufferPoolIdx(0);
1001     OwnBufferPool *pPool = 0;
1002     OwnTlsStorage *pTls  = ownGlobalGetTlsStorage(); /* Get global TLS state */
1003     if(!pTls)
1004         return 0; /* Global TLS was not initialized */
1005 
1006     if(idx == OWN_IDX_INVALID)
1007         return 0;
1008 
1009     pPool = (OwnBufferPool*)ownTlsStorage_GetData(pTls, idx);
1010     if(!pPool)
1011     {
1012         pPool = (OwnBufferPool*)OWN_SAFE_MALLOC(sizeof(OwnBufferPool));
1013         if(!pPool)
1014             return NULL;
1015 
1016         ownBufferPool_Init(pPool);
1017         ownTlsStorage_SetData(pTls, idx, pPool);
1018     }
1019     return pPool;
1020 }
1021 #endif
1022 
1023 /* /////////////////////////////////////////////////////////////////////////////
1024 //                   OwnBufferPool - External memory interface
1025 ///////////////////////////////////////////////////////////////////////////// */
ownSharedMalloc(IwSize size)1026 void* IPP_STDCALL ownSharedMalloc(IwSize size)
1027 {
1028 #if OWN_ENABLE_BUFFER_POOL
1029     OwnBufferPool *pPool = ownGlobalGetBufferPool(); /* Get global buffer pool state */
1030     if(pPool)
1031     {
1032         void *pBuffer = ownBufferPool_GetBuffer(pPool, size);
1033         if(pBuffer)
1034             return pBuffer;
1035     }
1036 #endif
1037     /* Uninitialized or error in buffer pool, use simple malloc */
1038     return ippMalloc_L(size);
1039 }
1040 
ownSharedFree(void * pBuffer)1041 void IPP_STDCALL ownSharedFree(void* pBuffer)
1042 {
1043 #if OWN_ENABLE_BUFFER_POOL
1044     OwnBufferPool *pPool = ownGlobalGetBufferPool(); /* Get global buffer pool state */
1045     if(pPool)
1046     {
1047         if(ownBufferPool_ReleaseBuffer(pPool, pBuffer, 0) >= 0)
1048             return;
1049     }
1050 #endif
1051 
1052     /* Uninitialized or error in buffer pool, use simple free */
1053     ippFree(pBuffer);
1054 }
1055 
1056 /* /////////////////////////////////////////////////////////////////////////////
1057 //                   IW library-scope objects initialization
1058 ///////////////////////////////////////////////////////////////////////////// */
1059 #if OWN_ENABLE_BUFFER_POOL
iwInit()1060 IW_DECL(void) iwInit()
1061 {
1062     int state = ownGlobalGetInitState();
1063     if(state == OWN_STATE_NOT_INITIALIZED)
1064     {
1065         ownGlobalSetInitState(OWN_STATE_INITIALIZATION);
1066 
1067         ownGlobalGetTlsStorage();   /* Initialize tls static object  */
1068         ownGlobalGetBufferPool();   /* initialize global buffer pool */
1069 
1070         ownGlobalSetInitState(OWN_STATE_INITIALIZED);
1071     }
1072 }
iwCleanup()1073 IW_DECL(void) iwCleanup()
1074 {
1075     ownGlobalBufferRelease(ownGlobalBufferPoolIdx(0), 0);
1076 }
iwRelease()1077 IW_DECL(void) iwRelease()
1078 {
1079     int state = ownGlobalGetInitState();
1080     if(state == OWN_STATE_INITIALIZED)
1081     {
1082         ownGlobalBufferPoolIdx(1);
1083         ownGlobalTlsStorage(1);
1084 
1085         ownGlobalSetInitState(OWN_STATE_NOT_INITIALIZED);
1086     }
1087 }
1088 #endif
1089 
iwSetThreadsNum(int threads)1090 IW_DECL(void) iwSetThreadsNum(int threads)
1091 {
1092 #if IW_ENABLE_THREADING_LAYER
1093     ippSetNumThreads_LT(threads);
1094 #else
1095     (void)threads;
1096 #endif
1097 }
iwGetThreadsNum()1098 IW_DECL(int)  iwGetThreadsNum()
1099 {
1100 #if IW_ENABLE_THREADING_LAYER
1101     int threads;
1102     ippGetNumThreads_LT(&threads);
1103     return threads;
1104 #else
1105     return 0;
1106 #endif
1107 }
iwGetThreadsNumDefault()1108 IW_DECL(int)  iwGetThreadsNumDefault()
1109 {
1110 #if IW_ENABLE_THREADING_LAYER
1111     return IPP_MIN(omp_get_num_procs(), omp_get_max_threads());
1112 #else
1113     return 0;
1114 #endif
1115 }
1116 
1117 #if OWN_ENABLE_BUFFER_POOL
iwGetDebugInfo(IwStateDebugInfo * pInfo)1118 IW_DECL(void) iwGetDebugInfo(IwStateDebugInfo *pInfo)
1119 {
1120     OwnTlsStorage *pTls  = NULL;
1121     OwnBufferPool *pPool = NULL;
1122 
1123     pTls  = ownGlobalGetTlsStorage();
1124     if(pTls)
1125     {
1126         pInfo->m_tlsInitialized    = 1;
1127         pInfo->m_tlsDataIndexesMax = pTls->statesVector.m_size;
1128         pInfo->m_tlsThreadsMax     = pTls->threadsVector.m_size;
1129     }
1130     else
1131         pInfo->m_tlsInitialized = 0;
1132 
1133     pPool = ownGlobalGetBufferPool(); /* Get global buffer pool state */
1134     if(pPool)
1135     {
1136         size_t i;
1137         OwnBufferPoolEntry *pEntry;
1138 
1139         pInfo->m_poolInitialized  = 1;
1140         pInfo->m_poolMemoryTotal  = pPool->memoryTotal;
1141         pInfo->m_poolChunks       = pPool->chunks;
1142         pInfo->m_poolChunksLocked = pPool->chunksActive;
1143         pInfo->m_poolAllocations  = pPool->allocations;
1144         pInfo->m_poolReleases     = pPool->releases;
1145         pInfo->m_poolEntries      = pPool->poolVector.m_size;
1146         for(i = 0; i < IPP_MIN(pPool->poolVector.m_size, 16); i++)
1147         {
1148             pEntry = &((OwnBufferPoolEntry*)pPool->poolVector.m_pBuffer)[i];
1149             pInfo->m_poolEntrySizes[i] = pEntry->size;
1150         }
1151     }
1152     else
1153         pInfo->m_poolInitialized = 0;
1154 }
1155 #endif
1156