1 /****************************************************************************
2  * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file format_conversion.h
24  *
25  * @brief API implementation
26  *
27  ******************************************************************************/
28 #include "format_types.h"
29 #include "format_traits.h"
30 
31 //////////////////////////////////////////////////////////////////////////
32 /// @brief Load SIMD packed pixels in SOA format and converts to
33 ///        SOA RGBA32_FLOAT format.
34 /// @param pSrc - source data in SOA form
35 /// @param dst - output data in SOA form
36 template <typename SIMD_T, SWR_FORMAT SrcFormat>
LoadSOA(const uint8_t * pSrc,Vec4<SIMD_T> & dst)37 INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, Vec4<SIMD_T>& dst)
38 {
39     // fast path for float32
40     if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
41         (FormatTraits<SrcFormat>::GetBPC(0) == 32))
42     {
43         auto lambda = [&](int comp)
44         {
45             Float<SIMD_T> vComp =
46                 SIMD_T::load_ps(reinterpret_cast<const float*>(pSrc + comp * sizeof(Float<SIMD_T>)));
47 
48             dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
49         };
50 
51         UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
52         return;
53     }
54 
55     auto lambda = [&](int comp)
56     {
57         // load SIMD components
58         Float<SIMD_T> vComp;
59         FormatTraits<SrcFormat>::loadSOA(comp, pSrc, vComp);
60 
61         // unpack
62         vComp = FormatTraits<SrcFormat>::unpack(comp, vComp);
63 
64         // convert
65         if (FormatTraits<SrcFormat>::isNormalized(comp))
66         {
67             vComp = SIMD_T::cvtepi32_ps(SIMD_T::castps_si(vComp));
68             vComp = SIMD_T::mul_ps(vComp, SIMD_T::set1_ps(FormatTraits<SrcFormat>::toFloat(comp)));
69         }
70 
71         dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
72 
73         // is there a better way to get this from the SIMD traits?
74         const uint32_t SIMD_WIDTH = sizeof(typename SIMD_T::Float) / sizeof(float);
75 
76         pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * SIMD_WIDTH) / 8;
77     };
78 
79     UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
80 }
81 
82 template <SWR_FORMAT SrcFormat>
LoadSOA(const uint8_t * pSrc,simdvector & dst)83 INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, simdvector& dst)
84 {
85     LoadSOA<SIMD256, SrcFormat>(pSrc, dst);
86 }
87 
88 template <SWR_FORMAT SrcFormat>
LoadSOA(const uint8_t * pSrc,simd16vector & dst)89 INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, simd16vector& dst)
90 {
91     LoadSOA<SIMD512, SrcFormat>(pSrc, dst);
92 }
93 
94 //////////////////////////////////////////////////////////////////////////
95 /// @brief Clamps the given component based on the requirements on the
96 ///        Format template arg
97 /// @param vComp - SIMD vector of floats
98 /// @param Component - component
99 template <typename SIMD_T, SWR_FORMAT Format>
Clamp(Float<SIMD_T> const & v,uint32_t Component)100 INLINE Float<SIMD_T> SIMDCALL Clamp(Float<SIMD_T> const& v, uint32_t Component)
101 {
102     Float<SIMD_T> vComp = v;
103     if (Component >= 4 || Component < 0)
104     {
105 	// Component shouldn't out of <0;3> range
106 	assert(false);
107 	return vComp;
108     }
109     if (FormatTraits<Format>::isNormalized(Component))
110     {
111         if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM)
112         {
113             vComp = SIMD_T::max_ps(vComp, SIMD_T::setzero_ps());
114         }
115 
116         if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM)
117         {
118             vComp = SIMD_T::max_ps(vComp, SIMD_T::set1_ps(-1.0f));
119         }
120         vComp = SIMD_T::min_ps(vComp, SIMD_T::set1_ps(1.0f));
121     }
122     else if (FormatTraits<Format>::GetBPC(Component) < 32)
123     {
124         if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
125         {
126             int           iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
127             int           iMin = 0;
128             Integer<SIMD_T> vCompi = SIMD_T::castps_si(vComp);
129             vCompi = SIMD_T::max_epu32(vCompi, SIMD_T::set1_epi32(iMin));
130             vCompi = SIMD_T::min_epu32(vCompi, SIMD_T::set1_epi32(iMax));
131             vComp = SIMD_T::castsi_ps(vCompi);
132         }
133         else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
134         {
135             int           iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
136             int           iMin = -1 - iMax;
137             Integer<SIMD_T> vCompi = SIMD_T::castps_si(vComp);
138             vCompi = SIMD_T::max_epi32(vCompi, SIMD_T::set1_epi32(iMin));
139             vCompi = SIMD_T::min_epi32(vCompi, SIMD_T::set1_epi32(iMax));
140             vComp = SIMD_T::castsi_ps(vCompi);
141         }
142     }
143 
144     return vComp;
145 }
146 
147 template <SWR_FORMAT Format>
Clamp(simdscalar const & v,uint32_t Component)148 INLINE simdscalar SIMDCALL Clamp(simdscalar const& v, uint32_t Component)
149 {
150     return Clamp<SIMD256, Format>(v, Component);
151 }
152 
153 template <SWR_FORMAT Format>
Clamp(simd16scalar const & v,uint32_t Component)154 INLINE simd16scalar SIMDCALL Clamp(simd16scalar const& v, uint32_t Component)
155 {
156     return Clamp<SIMD512, Format>(v, Component);
157 }
158 
159 //////////////////////////////////////////////////////////////////////////
160 /// @brief Normalize the given component based on the requirements on the
161 ///        Format template arg
162 /// @param vComp - SIMD vector of floats
163 /// @param Component - component
164 template <typename SIMD_T, SWR_FORMAT Format>
Normalize(Float<SIMD_T> const & vComp,uint32_t Component)165 INLINE Float<SIMD_T> SIMDCALL Normalize(Float<SIMD_T> const& vComp, uint32_t Component)
166 {
167     Float<SIMD_T> r = vComp;
168     if (FormatTraits<Format>::isNormalized(Component))
169     {
170         r = SIMD_T::mul_ps(r, SIMD_T::set1_ps(FormatTraits<Format>::fromFloat(Component)));
171         r = SIMD_T::castsi_ps(SIMD_T::cvtps_epi32(r));
172     }
173     return r;
174 }
175 
176 template <SWR_FORMAT Format>
Normalize(simdscalar const & vComp,uint32_t Component)177 INLINE simdscalar SIMDCALL Normalize(simdscalar const& vComp, uint32_t Component)
178 {
179     return Normalize<SIMD256, Format>(vComp, Component);
180 }
181 
182 template <SWR_FORMAT Format>
Normalize(simd16scalar const & vComp,uint32_t Component)183 INLINE simd16scalar SIMDCALL Normalize(simd16scalar const& vComp, uint32_t Component)
184 {
185     return Normalize<SIMD512, Format>(vComp, Component);
186 }
187 
188 //////////////////////////////////////////////////////////////////////////
189 /// @brief Convert and store simdvector of pixels in SOA
190 ///        RGBA32_FLOAT to SOA format
191 /// @param src - source data in SOA form
192 /// @param dst - output data in SOA form
193 template <typename SIMD_T, SWR_FORMAT DstFormat>
StoreSOA(const Vec4<SIMD_T> & src,uint8_t * pDst)194 INLINE void SIMDCALL StoreSOA(const Vec4<SIMD_T>& src, uint8_t* pDst)
195 {
196     // fast path for float32
197     if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
198         (FormatTraits<DstFormat>::GetBPC(0) == 32))
199     {
200         for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
201         {
202             Float<SIMD_T> vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
203 
204             // Gamma-correct
205             if (FormatTraits<DstFormat>::isSRGB)
206             {
207                 if (comp < 3) // Input format is always RGBA32_FLOAT.
208                 {
209                     vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
210                 }
211             }
212 
213             SIMD_T::store_ps(reinterpret_cast<float*>(pDst + comp * sizeof(simd16scalar)), vComp);
214         }
215         return;
216     }
217 
218     auto lambda = [&](int comp) {
219         Float<SIMD_T> vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
220 
221         // Gamma-correct
222         if (FormatTraits<DstFormat>::isSRGB)
223         {
224             if (comp < 3) // Input format is always RGBA32_FLOAT.
225             {
226                 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
227             }
228         }
229 
230         // clamp
231         vComp = Clamp<SIMD_T, DstFormat>(vComp, comp);
232 
233         // normalize
234         vComp = Normalize<SIMD_T, DstFormat>(vComp, comp);
235 
236         // pack
237         vComp = FormatTraits<DstFormat>::pack(comp, vComp);
238 
239         // store
240         FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp);
241 
242         // is there a better way to get this from the SIMD traits?
243         const uint32_t SIMD_WIDTH = sizeof(typename SIMD_T::Float) / sizeof(float);
244 
245         pDst += (FormatTraits<DstFormat>::GetBPC(comp) * SIMD_WIDTH) / 8;
246     };
247 
248     UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda);
249 }
250 
251 template <SWR_FORMAT DstFormat>
StoreSOA(const simdvector & src,uint8_t * pDst)252 INLINE void SIMDCALL StoreSOA(const simdvector& src, uint8_t* pDst)
253 {
254     StoreSOA<SIMD256, DstFormat>(src, pDst);
255 }
256 
257 template <SWR_FORMAT DstFormat>
StoreSOA(const simd16vector & src,uint8_t * pDst)258 INLINE void SIMDCALL StoreSOA(const simd16vector& src, uint8_t* pDst)
259 {
260     StoreSOA<SIMD512, DstFormat>(src, pDst);
261 }
262 
263