1 //
2 //  Test_maxdot.cpp
3 //  BulletTest
4 //
5 //  Copyright (c) 2011 Apple Inc.
6 //
7 
8 #include "LinearMath/btScalar.h"
9 #if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
10 
11 #include "Test_maxdot.h"
12 #include "vector.h"
13 #include "Utils.h"
14 #include "main.h"
15 #include <math.h>
16 #include <string.h>
17 
18 #include <LinearMath/btVector3.h>
19 
20 // reference code for testing purposes
21 static long maxdot_ref(const btSimdFloat4 *vertices,
22 					   float *vec,
23 					   size_t count,
24 					   float *dotResult);
25 
26 #ifdef __arm__
27 #define MAX_LOG2_SIZE 9
28 #else
29 #define MAX_LOG2_SIZE 10
30 #endif
31 #define MAX_SIZE (1U << MAX_LOG2_SIZE)
32 #define LOOPCOUNT 10
33 
Test_maxdot(void)34 int Test_maxdot(void)
35 {
36 	// Init an array flanked by guard pages
37 	btSimdFloat4 *data = (btSimdFloat4 *)GuardCalloc(1, MAX_SIZE * sizeof(btSimdFloat4), NULL);
38 	float *fp = (float *)data;
39 	long correct, test;
40 	btVector3 localScaling(0.1f, 0.2f, 0.3f);
41 	size_t size;
42 
43 	// Init the data
44 	size_t i;
45 	for (i = 0; i < MAX_SIZE; i++)
46 	{
47 		fp[4 * i] = (int32_t)RANDF_16;
48 		fp[4 * i + 1] = (int32_t)RANDF_16;
49 		fp[4 * i + 2] = (int32_t)RANDF_16;
50 		fp[4 * i + 3] = BT_NAN;  // w channel NaN
51 	}
52 
53 	float correctDot, testDot;
54 	fp = (float *)localScaling;
55 	float maxRelativeError = 0.f;
56 
57 	for (size = 1; size <= MAX_SIZE; size++)
58 	{
59 		float *in = (float *)(data + MAX_SIZE - size);
60 		size_t position;
61 
62 		for (position = 0; position < size; position++)
63 		{
64 			float *biggest = in + position * 4;
65 			float old[4] = {biggest[0], biggest[1], biggest[2], biggest[3]};
66 			biggest[0] += LARGE_FLOAT17;
67 			biggest[1] += LARGE_FLOAT17;
68 			biggest[2] += LARGE_FLOAT17;
69 			biggest[3] += LARGE_FLOAT17;
70 
71 			correctDot = BT_NAN;
72 			testDot = BT_NAN;
73 			correct = maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
74 			test = localScaling.maxDot((btVector3 *)in, size, testDot);
75 			if (test < 0 || test >= size)
76 			{
77 				vlog("Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
78 				continue;
79 			}
80 			if (correct != test)
81 			{
82 				vlog("Error @ %ld: index misreported! *%ld vs %ld  (*%f, %f)\n", size, correct, test,
83 					 fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
84 					 fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
85 				return 1;
86 			}
87 			if (test != position)
88 			{
89 				vlog("Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
90 					 fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2],
91 					 fp[0] * in[4 * position] + fp[1] * in[4 * position + 1] + fp[2] * in[4 * position + 2]);
92 				return 1;
93 			}
94 
95 			if (correctDot != testDot)
96 			{
97 				float relativeError = btFabs((testDot - correctDot) / correctDot);
98 				if (relativeError > 1e-6)
99 				{
100 					vlog("Error @ %ld: dotpr misreported! *%f vs %f    (*%f, %f)\n", size, correctDot, testDot,
101 						 fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
102 						 fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
103 					return 1;
104 				}
105 				else
106 				{
107 					if (maxRelativeError < relativeError)
108 					{
109 						maxRelativeError = relativeError;
110 #ifdef VERBOSE_WARNING
111 						sprintf(errStr, "Warning @ %ld: dotpr misreported! *%f vs %f    (*%f, %f)\n", size, correctDot, testDot,
112 								fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
113 								fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
114 #endif  //VERBOSE_WARNING
115 					}
116 				}
117 			}
118 
119 			memcpy(biggest, old, 16);
120 		}
121 	}
122 
123 	if (maxRelativeError)
124 	{
125 		printf("Warning: relative error = %e\n", maxRelativeError);
126 #ifdef VERBOSE_WARNING
127 		vlog(errStr);
128 #endif
129 	}
130 
131 	uint64_t scalarTimes[33 + (MAX_LOG2_SIZE - 5)];
132 	uint64_t vectorTimes[33 + (MAX_LOG2_SIZE - 5)];
133 	size_t j, k;
134 	float *in = (float *)data;
135 	for (size = 1; size <= 32; size++)
136 	{
137 		uint64_t startTime, bestTime, currentTime;
138 
139 		bestTime = -1LL;
140 		scalarTimes[size] = 0;
141 		for (j = 0; j < 100; j++)
142 		{
143 			startTime = ReadTicks();
144 			for (k = 0; k < LOOPCOUNT; k++)
145 				correct += maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
146 			currentTime = ReadTicks() - startTime;
147 			scalarTimes[size] += currentTime;
148 			if (currentTime < bestTime)
149 				bestTime = currentTime;
150 		}
151 		if (0 == gReportAverageTimes)
152 			scalarTimes[size] = bestTime;
153 		else
154 			scalarTimes[size] /= 100;
155 	}
156 
157 	uint64_t *timep = &scalarTimes[33];
158 	for (size = 64; size <= MAX_SIZE; size *= 2)
159 	{
160 		uint64_t startTime, bestTime, currentTime;
161 
162 		bestTime = -1LL;
163 		timep[0] = 0;
164 		for (j = 0; j < 100; j++)
165 		{
166 			startTime = ReadTicks();
167 			for (k = 0; k < LOOPCOUNT; k++)
168 				correct += maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
169 			currentTime = ReadTicks() - startTime;
170 			timep[0] += currentTime;
171 			if (currentTime < bestTime)
172 				bestTime = currentTime;
173 		}
174 		if (0 == gReportAverageTimes)
175 			timep[0] = bestTime;
176 		else
177 			timep[0] /= 100;
178 
179 		timep++;
180 	}
181 
182 	for (size = 1; size <= 32; size++)
183 	{
184 		uint64_t startTime, bestTime, currentTime;
185 
186 		bestTime = -1LL;
187 		vectorTimes[size] = 0;
188 		for (j = 0; j < 100; j++)
189 		{
190 			startTime = ReadTicks();
191 			for (k = 0; k < LOOPCOUNT; k++)
192 				test += localScaling.maxDot((btVector3 *)in, size, testDot);
193 			currentTime = ReadTicks() - startTime;
194 			vectorTimes[size] += currentTime;
195 			if (currentTime < bestTime)
196 				bestTime = currentTime;
197 		}
198 		if (0 == gReportAverageTimes)
199 			vectorTimes[size] = bestTime;
200 		else
201 			vectorTimes[size] /= 100;
202 	}
203 
204 	timep = &vectorTimes[33];
205 	for (size = 64; size <= MAX_SIZE; size *= 2)
206 	{
207 		uint64_t startTime, bestTime, currentTime;
208 
209 		bestTime = -1LL;
210 		timep[0] = 0;
211 		for (j = 0; j < 100; j++)
212 		{
213 			startTime = ReadTicks();
214 			for (k = 0; k < LOOPCOUNT; k++)
215 				test += localScaling.maxDot((btVector3 *)in, size, testDot);
216 			currentTime = ReadTicks() - startTime;
217 			timep[0] += currentTime;
218 			if (currentTime < bestTime)
219 				bestTime = currentTime;
220 		}
221 		if (0 == gReportAverageTimes)
222 			timep[0] = bestTime;
223 		else
224 			timep[0] /= 100;
225 
226 		timep++;
227 	}
228 
229 	vlog("Timing:\n");
230 	vlog(" size\t    scalar\t    vector\n");
231 	for (size = 1; size <= 32; size++)
232 		vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[size]) / LOOPCOUNT, TicksToCycles(vectorTimes[size]) / LOOPCOUNT);
233 	size_t index = 33;
234 	for (size = 64; size <= MAX_SIZE; size *= 2)
235 	{
236 		vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[index]) / LOOPCOUNT, TicksToCycles(vectorTimes[index]) / LOOPCOUNT);
237 		index++;
238 	}
239 
240 	// Useless check to make sure that the timing loops are not optimized away
241 	if (test != correct)
242 		vlog("Error: Test != correct: *%ld vs. %ld\n", correct, test);
243 
244 	GuardFree(data);
245 
246 	return 0;
247 }
248 
maxdot_ref(const btSimdFloat4 * vertices,float * vec,size_t count,float * dotResult)249 static long maxdot_ref(const btSimdFloat4 *vertices,
250 					   float *vec,
251 					   size_t count,
252 					   float *dotResult)
253 {
254 	const float *dp = (const float *)vertices;
255 	float maxDot = -BT_INFINITY;
256 	long i = 0;
257 	long ptIndex = -1;
258 
259 	for (i = 0; i < count; i++)
260 	{
261 		float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2];
262 		dp += 4;
263 
264 		if (dot > maxDot)
265 		{
266 			maxDot = dot;
267 			ptIndex = i;
268 		}
269 	}
270 
271 	*dotResult = maxDot;
272 
273 	return ptIndex;
274 }
275 
276 #endif  //BT_USE_SSE
277