1 /****************************************************************************
2  * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file fifo.hpp
24  *
25  * @brief Definitions for our fifos used for thread communication.
26  *
27  ******************************************************************************/
28 #pragma once
29 
30 #include "common/os.h"
31 #include "arena.h"
32 
33 #include <vector>
34 #include <cassert>
35 
36 template <class T>
37 struct QUEUE
38 {
OSALIGNLINEQUEUE39     OSALIGNLINE(volatile uint32_t) mLock{0};
OSALIGNLINEQUEUE40     OSALIGNLINE(volatile uint32_t) mNumEntries{0};
41     std::vector<T*> mBlocks;
42     T*              mCurBlock{nullptr};
43     uint32_t        mHead{0};
44     uint32_t        mTail{0};
45     uint32_t        mCurBlockIdx{0};
46 
47     // power of 2
48     static const uint32_t mBlockSizeShift = 6;
49     static const uint32_t mBlockSize      = 1 << mBlockSizeShift;
50 
51     template <typename ArenaT>
clearQUEUE52     void clear(ArenaT& arena)
53     {
54         mHead = 0;
55         mTail = 0;
56         mBlocks.clear();
57         T* pNewBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4);
58         mBlocks.push_back(pNewBlock);
59         mCurBlock    = pNewBlock;
60         mCurBlockIdx = 0;
61         mNumEntries  = 0;
62         mLock        = 0;
63     }
64 
getNumQueuedQUEUE65     uint32_t getNumQueued() { return mNumEntries; }
66 
tryLockQUEUE67     bool tryLock()
68     {
69         if (mLock)
70         {
71             return false;
72         }
73 
74         // try to lock the FIFO
75         long initial = InterlockedCompareExchange(&mLock, 1, 0);
76         return (initial == 0);
77     }
78 
unlockQUEUE79     void unlock() { mLock = 0; }
80 
peekQUEUE81     T* peek()
82     {
83         if (mNumEntries == 0)
84         {
85             return nullptr;
86         }
87         uint32_t block = mHead >> mBlockSizeShift;
88         return &mBlocks[block][mHead & (mBlockSize - 1)];
89     }
90 
dequeue_noincQUEUE91     void dequeue_noinc()
92     {
93         mHead++;
94         mNumEntries--;
95     }
96 
97     template <typename ArenaT>
enqueue_try_nosyncQUEUE98     bool enqueue_try_nosync(ArenaT& arena, const T* entry)
99     {
100         const float* pSrc = (const float*)entry;
101         float*       pDst = (float*)&mCurBlock[mTail];
102 
103         auto lambda = [&](int32_t i) {
104             __m256 vSrc = _mm256_load_ps(pSrc + i * KNOB_SIMD_WIDTH);
105             _mm256_stream_ps(pDst + i * KNOB_SIMD_WIDTH, vSrc);
106         };
107 
108         const uint32_t numSimdLines = sizeof(T) / (KNOB_SIMD_WIDTH * 4);
109         static_assert(numSimdLines * KNOB_SIMD_WIDTH * 4 == sizeof(T),
110                       "FIFO element size should be multiple of SIMD width.");
111 
112         UnrollerL<0, numSimdLines, 1>::step(lambda);
113 
114         mTail++;
115         if (mTail == mBlockSize)
116         {
117             if (++mCurBlockIdx < mBlocks.size())
118             {
119                 mCurBlock = mBlocks[mCurBlockIdx];
120             }
121             else
122             {
123                 T* newBlock = (T*)arena.AllocAligned(sizeof(T) * mBlockSize, KNOB_SIMD_WIDTH * 4);
124                 SWR_ASSERT(newBlock);
125 
126                 mBlocks.push_back(newBlock);
127                 mCurBlock = newBlock;
128             }
129 
130             mTail = 0;
131         }
132 
133         mNumEntries++;
134         return true;
135     }
136 
destroyQUEUE137     void destroy() {}
138 };
139