1 /*
2 PTEX SOFTWARE
3 Copyright 2014 Disney Enterprises, Inc.  All rights reserved
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 
9   * Redistributions of source code must retain the above copyright
10     notice, this list of conditions and the following disclaimer.
11 
12   * Redistributions in binary form must reproduce the above copyright
13     notice, this list of conditions and the following disclaimer in
14     the documentation and/or other materials provided with the
15     distribution.
16 
17   * The names "Disney", "Walt Disney Pictures", "Walt Disney Animation
18     Studios" or the names of its contributors may NOT be used to
19     endorse or promote products derived from this software without
20     specific prior written permission from Walt Disney Pictures.
21 
22 Disclaimer: THIS SOFTWARE IS PROVIDED BY WALT DISNEY PICTURES AND
23 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
24 BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS
25 FOR A PARTICULAR PURPOSE, NONINFRINGEMENT AND TITLE ARE DISCLAIMED.
26 IN NO EVENT SHALL WALT DISNEY PICTURES, THE COPYRIGHT HOLDER OR
27 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
29 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND BASED ON ANY
31 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
34 */
35 #include "PtexPlatform.h"
36 #include "PtexUtils.h"
37 #include "PtexHalf.h"
38 #include "PtexSeparableKernel.h"
39 
40 PTEX_NAMESPACE_BEGIN
41 
42 namespace {
43     // apply to 1..4 channels (unrolled channel loop) of packed data (nTxChan==nChan)
44     template<class T, int nChan>
Apply(PtexSeparableKernel & k,float * result,void * data,int,int)45     void Apply(PtexSeparableKernel& k, float* result, void* data, int /*nChan*/, int /*nTxChan*/)
46     {
47         float* rowResult = (float*) alloca(nChan*sizeof(float));
48         int rowlen = k.res.u() * nChan;
49         int datalen = k.uw * nChan;
50         int rowskip = rowlen - datalen;
51         float* kvp = k.kv;
52         T* p = static_cast<T*>(data) + (k.v * k.res.u() + k.u) * nChan;
53         T* pEnd = p + k.vw * rowlen;
54         while (p != pEnd)
55         {
56             float* kup = k.ku;
57             T* pRowEnd = p + datalen;
58             // just mult and copy first element
59             PtexUtils::VecMult<T,nChan>()(rowResult, p, *kup++);
60             p += nChan;
61             // accumulate remaining elements
62             while (p != pRowEnd) {
63                 // rowResult[i] = p[i] * ku[u] for i in {0..n-1}
64                 PtexUtils::VecAccum<T,nChan>()(rowResult, p, *kup++);
65                 p += nChan;
66             }
67             // result[i] += rowResult[i] * kv[v] for i in {0..n-1}
68             PtexUtils::VecAccum<float,nChan>()(result, rowResult, *kvp++);
69             p += rowskip;
70         }
71     }
72 
73     // apply to 1..4 channels (unrolled channel loop) w/ pixel stride
74     template<class T, int nChan>
ApplyS(PtexSeparableKernel & k,float * result,void * data,int,int nTxChan)75     void ApplyS(PtexSeparableKernel& k, float* result, void* data, int /*nChan*/, int nTxChan)
76     {
77         float* rowResult = (float*) alloca(nChan*sizeof(float));
78         int rowlen = k.res.u() * nTxChan;
79         int datalen = k.uw * nTxChan;
80         int rowskip = rowlen - datalen;
81         float* kvp = k.kv;
82         T* p = static_cast<T*>(data) + (k.v * k.res.u() + k.u) * nTxChan;
83         T* pEnd = p + k.vw * rowlen;
84         while (p != pEnd)
85         {
86             float* kup = k.ku;
87             T* pRowEnd = p + datalen;
88             // just mult and copy first element
89             PtexUtils::VecMult<T,nChan>()(rowResult, p, *kup++);
90             p += nTxChan;
91             // accumulate remaining elements
92             while (p != pRowEnd) {
93                 // rowResult[i] = p[i] * ku[u] for i in {0..n-1}
94                 PtexUtils::VecAccum<T,nChan>()(rowResult, p, *kup++);
95                 p += nTxChan;
96             }
97             // result[i] += rowResult[i] * kv[v] for i in {0..n-1}
98             PtexUtils::VecAccum<float,nChan>()(result, rowResult, *kvp++);
99             p += rowskip;
100         }
101     }
102 
103     // apply to N channels (general case)
104     template<class T>
ApplyN(PtexSeparableKernel & k,float * result,void * data,int nChan,int nTxChan)105     void ApplyN(PtexSeparableKernel& k, float* result, void* data, int nChan, int nTxChan)
106     {
107         float* rowResult = (float*) alloca(nChan*sizeof(float));
108         int rowlen = k.res.u() * nTxChan;
109         int datalen = k.uw * nTxChan;
110         int rowskip = rowlen - datalen;
111         float* kvp = k.kv;
112         T* p = static_cast<T*>(data) + (k.v * k.res.u() + k.u) * nTxChan;
113         T* pEnd = p + k.vw * rowlen;
114         while (p != pEnd)
115         {
116             float* kup = k.ku;
117             T* pRowEnd = p + datalen;
118             // just mult and copy first element
119             PtexUtils::VecMultN<T>()(rowResult, p, nChan, *kup++);
120             p += nTxChan;
121             // accumulate remaining elements
122             while (p != pRowEnd) {
123                 // rowResult[i] = p[i] * ku[u] for i in {0..n-1}
124                 PtexUtils::VecAccumN<T>()(rowResult, p, nChan, *kup++);
125                 p += nTxChan;
126             }
127             // result[i] += rowResult[i] * kv[v] for i in {0..n-1}
128             PtexUtils::VecAccumN<float>()(result, rowResult, nChan, *kvp++);
129             p += rowskip;
130         }
131     }
132 }
133 
134 
135 
136 PtexSeparableKernel::ApplyFn
137 PtexSeparableKernel::applyFunctions[] = {
138     // nChan == nTxChan
139     ApplyN<uint8_t>,  ApplyN<uint16_t>,  ApplyN<PtexHalf>,  ApplyN<float>,
140     Apply<uint8_t,1>, Apply<uint16_t,1>, Apply<PtexHalf,1>, Apply<float,1>,
141     Apply<uint8_t,2>, Apply<uint16_t,2>, Apply<PtexHalf,2>, Apply<float,2>,
142     Apply<uint8_t,3>, Apply<uint16_t,3>, Apply<PtexHalf,3>, Apply<float,3>,
143     Apply<uint8_t,4>, Apply<uint16_t,4>, Apply<PtexHalf,4>, Apply<float,4>,
144 
145     // nChan != nTxChan (need pixel stride)
146     ApplyN<uint8_t>,   ApplyN<uint16_t>,   ApplyN<PtexHalf>,   ApplyN<float>,
147     ApplyS<uint8_t,1>, ApplyS<uint16_t,1>, ApplyS<PtexHalf,1>, ApplyS<float,1>,
148     ApplyS<uint8_t,2>, ApplyS<uint16_t,2>, ApplyS<PtexHalf,2>, ApplyS<float,2>,
149     ApplyS<uint8_t,3>, ApplyS<uint16_t,3>, ApplyS<PtexHalf,3>, ApplyS<float,3>,
150     ApplyS<uint8_t,4>, ApplyS<uint16_t,4>, ApplyS<PtexHalf,4>, ApplyS<float,4>,
151 };
152 
153 PTEX_NAMESPACE_END
154