1 /*
2 Copyright 2007 nVidia, Inc.
3 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
4 
5 You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
6 
7 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
8 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 
10 See the License for the specific language governing permissions and limitations under the License.
11 */
12 
13 // Utility and common routines
14 
15 #include "zoh_utils.h"
16 #include "nvmath/vector.inl"
17 #include <math.h>
18 
19 using namespace nv;
20 using namespace ZOH;
21 
22 static const int denom7_weights_64[] = {0, 9, 18, 27, 37, 46, 55, 64};										// divided by 64
23 static const int denom15_weights_64[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};		// divided by 64
24 
25 /*static*/ Format Utils::FORMAT;
26 
lerp(int a,int b,int i,int denom)27 int Utils::lerp(int a, int b, int i, int denom)
28 {
29 	nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
30 	nvDebugCheck (i >= 0 && i <= denom);
31 
32 	int round = 32, shift = 6;
33 	const int *weights;
34 
35 	switch(denom)
36 	{
37 	case 3:		denom *= 5; i *= 5;	// fall through to case 15
38 	case 15:	weights = denom15_weights_64; break;
39 	case 7:		weights = denom7_weights_64; break;
40 	default:	nvDebugCheck(0);
41 	}
42 
43 	return (a*weights[denom-i] +b*weights[i] + round) >> shift;
44 }
45 
lerp(const Vector3 & a,const Vector3 & b,int i,int denom)46 Vector3 Utils::lerp(const Vector3& a, const Vector3 &b, int i, int denom)
47 {
48 	nvDebugCheck (denom == 3 || denom == 7 || denom == 15);
49 	nvDebugCheck (i >= 0 && i <= denom);
50 
51 	int shift = 6;
52 	const int *weights;
53 
54 	switch(denom)
55 	{
56 	case 3:		denom *= 5; i *= 5;	// fall through to case 15
57 	case 15:	weights = denom15_weights_64; break;
58 	case 7:		weights = denom7_weights_64; break;
59 	default:	nvUnreachable();
60 	}
61 
62 	// no need to round these as this is an exact division
63 	return (a*float(weights[denom-i]) +b*float(weights[i])) / float(1 << shift);
64 }
65 
66 
67 /*
68 	For unsigned f16, clamp the input to [0,F16MAX]. Thus u15.
69 	For signed f16, clamp the input to [-F16MAX,F16MAX]. Thus s16.
70 
71 	The conversions proceed as follows:
72 
73 	unsigned f16: get bits. if high bit set, clamp to 0, else clamp to F16MAX.
74 	signed f16: get bits. extract exp+mantissa and clamp to F16MAX. return -value if sign bit was set, else value
75 	unsigned int: get bits. return as a positive value.
76 	signed int. get bits. return as a value in -32768..32767.
77 
78 	The inverse conversions are just the inverse of the above.
79 */
80 
81 // clamp the 3 channels of the input vector to the allowable range based on FORMAT
82 // note that each channel is a float storing the allowable range as a bit pattern converted to float
83 // that is, for unsigned f16 say, we would clamp each channel to the range [0, F16MAX]
84 
clamp(Vector3 & v)85 void Utils::clamp(Vector3 &v)
86 {
87 	for (int i=0; i<3; ++i)
88 	{
89 		switch(Utils::FORMAT)
90 		{
91 		case UNSIGNED_F16:
92 			if (v.component[i] < 0.0) v.component[i] = 0;
93 			else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
94 			break;
95 
96 		case SIGNED_F16:
97 			if (v.component[i] < -F16MAX) v.component[i] = -F16MAX;
98 			else if (v.component[i] > F16MAX) v.component[i] = F16MAX;
99 			break;
100 
101 		default:
102 			nvUnreachable();
103 		}
104 	}
105 }
106 
107 // convert a u16 value to s17 (represented as an int) based on the format expected
ushort_to_format(unsigned short input)108 int Utils::ushort_to_format(unsigned short input)
109 {
110 	int out, s;
111 
112 	// clamp to the valid range we are expecting
113 	switch (Utils::FORMAT)
114 	{
115 	case UNSIGNED_F16:
116 		if (input & F16S_MASK) out = 0;
117 		else if (input > F16MAX) out = F16MAX;
118 		else out = input;
119 		break;
120 
121 	case SIGNED_F16:
122 		s = input & F16S_MASK;
123 		input &= F16EM_MASK;
124 		if (input > F16MAX) out = F16MAX;
125 		else out = input;
126 		out = s ? -out : out;
127 		break;
128 	}
129 	return out;
130 }
131 
132 // convert a s17 value to u16 based on the format expected
format_to_ushort(int input)133 unsigned short Utils::format_to_ushort(int input)
134 {
135 	unsigned short out;
136 
137 	// clamp to the valid range we are expecting
138 	switch (Utils::FORMAT)
139 	{
140 	case UNSIGNED_F16:
141 		nvDebugCheck (input >= 0 && input <= F16MAX);
142 		out = input;
143 		break;
144 
145 	case SIGNED_F16:
146 		nvDebugCheck (input >= -F16MAX && input <= F16MAX);
147 		// convert to sign-magnitude
148 		int s;
149 		if (input < 0) { s = F16S_MASK; input = -input; }
150 		else           { s = 0; }
151 		out = s | input;
152 		break;
153 	}
154 	return out;
155 }
156 
157 // quantize the input range into equal-sized bins
quantize(float value,int prec)158 int Utils::quantize(float value, int prec)
159 {
160 	int q, ivalue, s;
161 
162 	nvDebugCheck (prec > 1);	// didn't bother to make it work for 1
163 
164 	value = (float)floor(value + 0.5);
165 
166 	int bias = (prec > 10) ? ((1<<(prec-1))-1) : 0;	// bias precisions 11..16 to get a more accurate quantization
167 
168 	switch (Utils::FORMAT)
169 	{
170 	case UNSIGNED_F16:
171 		nvDebugCheck (value >= 0 && value <= F16MAX);
172 		ivalue = (int)value;
173 		q = ((ivalue << prec) + bias) / (F16MAX+1);
174 		nvDebugCheck (q >= 0 && q < (1 << prec));
175 		break;
176 
177 	case SIGNED_F16:
178 		nvDebugCheck (value >= -F16MAX && value <= F16MAX);
179 		// convert to sign-magnitude
180 		ivalue = (int)value;
181 		if (ivalue < 0) { s = 1; ivalue = -ivalue; } else s = 0;
182 
183 		q = ((ivalue << (prec-1)) + bias) / (F16MAX+1);
184 		if (s)
185 			q = -q;
186 		nvDebugCheck (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
187 		break;
188 	}
189 
190 	return q;
191 }
192 
finish_unquantize(int q,int prec)193 int Utils::finish_unquantize(int q, int prec)
194 {
195 	if (Utils::FORMAT == UNSIGNED_F16)
196 		return (q * 31) >> 6;										// scale the magnitude by 31/64
197 	else if (Utils::FORMAT == SIGNED_F16)
198 		return (q < 0) ? -(((-q) * 31) >> 5) : (q * 31) >> 5;		// scale the magnitude by 31/32
199 	else
200 		return q;
201 }
202 
203 // unquantize each bin to midpoint of original bin range, except
204 // for the end bins which we push to an endpoint of the bin range.
205 // we do this to ensure we can represent all possible original values.
206 // the asymmetric end bins do not affect PSNR for the test images.
207 //
208 // code this function assuming an arbitrary bit pattern as the encoded block
unquantize(int q,int prec)209 int Utils::unquantize(int q, int prec)
210 {
211 	int unq, s;
212 
213 	nvDebugCheck (prec > 1);	// not implemented for prec 1
214 
215 	switch (Utils::FORMAT)
216 	{
217 	// modify this case to move the multiplication by 31 after interpolation.
218 	// Need to use finish_unquantize.
219 
220 	// since we have 16 bits available, let's unquantize this to 16 bits unsigned
221 	// thus the scale factor is [0-7c00)/[0-10000) = 31/64
222 	case UNSIGNED_F16:
223 		if (prec >= 15)
224 			unq = q;
225 		else if (q == 0)
226 			unq = 0;
227 		else if (q == ((1<<prec)-1))
228 			unq = U16MAX;
229 		else
230 			unq = (q * (U16MAX+1) + (U16MAX+1)/2) >> prec;
231 		break;
232 
233 	// here, let's stick with S16 (no apparent quality benefit from going to S17)
234 	// range is (-7c00..7c00)/(-8000..8000) = 31/32
235 	case SIGNED_F16:
236 		// don't remove this test even though it appears equivalent to the code below
237 		// as it isn't -- the code below can overflow for prec = 16
238 		if (prec >= 16)
239 			unq = q;
240 		else
241 		{
242 			if (q < 0) { s = 1; q = -q; } else s = 0;
243 
244 			if (q == 0)
245 				unq = 0;
246 			else if (q >= ((1<<(prec-1))-1))
247 				unq = s ? -S16MAX : S16MAX;
248 			else
249 			{
250 				unq = (q * (S16MAX+1) + (S16MAX+1)/2) >> (prec-1);
251 				if (s)
252 					unq = -unq;
253 			}
254 		}
255 		break;
256 	}
257 	return unq;
258 }
259 
260 
261 
262 // pick a norm!
263 #define	NORM_EUCLIDEAN 1
264 
norm(const Vector3 & a,const Vector3 & b)265 float Utils::norm(const Vector3 &a, const Vector3 &b)
266 {
267 #ifdef	NORM_EUCLIDEAN
268 	return lengthSquared(a - b);
269 #endif
270 #ifdef	NORM_ABS
271 	Vector3 err = a - b;
272 	return fabs(err.x) + fabs(err.y) + fabs(err.z);
273 #endif
274 }
275 
276 // parse <name>[<start>{:<end>}]{,}
277 // the pointer starts here         ^
278 // name is 1 or 2 chars and matches field names. start and end are decimal numbers
parse(const char * encoding,int & ptr,Field & field,int & endbit,int & len)279 void Utils::parse(const char *encoding, int &ptr, Field &field, int &endbit, int &len)
280 {
281 	if (ptr <= 0) return;
282 	--ptr;
283 	if (encoding[ptr] == ',') --ptr;
284 	nvDebugCheck (encoding[ptr] == ']');
285 	--ptr;
286 	endbit = 0;
287 	int scale = 1;
288 	while (encoding[ptr] != ':' && encoding[ptr] != '[')
289 	{
290 		nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
291 		endbit += (encoding[ptr--] - '0') * scale;
292 		scale *= 10;
293 	}
294 	int startbit = 0; scale = 1;
295 	if (encoding[ptr] == '[')
296 		startbit = endbit;
297 	else
298 	{
299 		ptr--;
300 		while (encoding[ptr] != '[')
301 		{
302 			nvDebugCheck(encoding[ptr] >= '0' && encoding[ptr] <= '9');
303 			startbit += (encoding[ptr--] - '0') * scale;
304 			scale *= 10;
305 		}
306 	}
307 	len = startbit - endbit + 1;	// startbit>=endbit note
308 	--ptr;
309 	if (encoding[ptr] == 'm')		field = FIELD_M;
310 	else if (encoding[ptr] == 'd')	field = FIELD_D;
311 	else {
312 		// it's wxyz
313 		nvDebugCheck (encoding[ptr] >= 'w' && encoding[ptr] <= 'z');
314 		int foo = encoding[ptr--] - 'w';
315 		// now it is r g or b
316 		if (encoding[ptr] == 'r')		foo += 10;
317 		else if (encoding[ptr] == 'g')	foo += 20;
318 		else if (encoding[ptr] == 'b')	foo += 30;
319 		else nvDebugCheck(0);
320 		field = (Field) foo;
321 	}
322 }
323 
324 
325