1 //
2 // Test_maxdot.cpp
3 // BulletTest
4 //
5 // Copyright (c) 2011 Apple Inc.
6 //
7
8 #include "LinearMath/btScalar.h"
9 #if defined(BT_USE_SSE_IN_API) || defined(BT_USE_NEON)
10
11 #include "Test_maxdot.h"
12 #include "vector.h"
13 #include "Utils.h"
14 #include "main.h"
15 #include <math.h>
16 #include <string.h>
17
18 #include <LinearMath/btVector3.h>
19
20 // reference code for testing purposes
21 static long maxdot_ref(const btSimdFloat4 *vertices,
22 float *vec,
23 size_t count,
24 float *dotResult);
25
26 #ifdef __arm__
27 #define MAX_LOG2_SIZE 9
28 #else
29 #define MAX_LOG2_SIZE 10
30 #endif
31 #define MAX_SIZE (1U << MAX_LOG2_SIZE)
32 #define LOOPCOUNT 10
33
Test_maxdot(void)34 int Test_maxdot(void)
35 {
36 // Init an array flanked by guard pages
37 btSimdFloat4 *data = (btSimdFloat4 *)GuardCalloc(1, MAX_SIZE * sizeof(btSimdFloat4), NULL);
38 float *fp = (float *)data;
39 long correct, test;
40 btVector3 localScaling(0.1f, 0.2f, 0.3f);
41 size_t size;
42
43 // Init the data
44 size_t i;
45 for (i = 0; i < MAX_SIZE; i++)
46 {
47 fp[4 * i] = (int32_t)RANDF_16;
48 fp[4 * i + 1] = (int32_t)RANDF_16;
49 fp[4 * i + 2] = (int32_t)RANDF_16;
50 fp[4 * i + 3] = BT_NAN; // w channel NaN
51 }
52
53 float correctDot, testDot;
54 fp = (float *)localScaling;
55 float maxRelativeError = 0.f;
56
57 for (size = 1; size <= MAX_SIZE; size++)
58 {
59 float *in = (float *)(data + MAX_SIZE - size);
60 size_t position;
61
62 for (position = 0; position < size; position++)
63 {
64 float *biggest = in + position * 4;
65 float old[4] = {biggest[0], biggest[1], biggest[2], biggest[3]};
66 biggest[0] += LARGE_FLOAT17;
67 biggest[1] += LARGE_FLOAT17;
68 biggest[2] += LARGE_FLOAT17;
69 biggest[3] += LARGE_FLOAT17;
70
71 correctDot = BT_NAN;
72 testDot = BT_NAN;
73 correct = maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
74 test = localScaling.maxDot((btVector3 *)in, size, testDot);
75 if (test < 0 || test >= size)
76 {
77 vlog("Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
78 continue;
79 }
80 if (correct != test)
81 {
82 vlog("Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test,
83 fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
84 fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
85 return 1;
86 }
87 if (test != position)
88 {
89 vlog("Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
90 fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2],
91 fp[0] * in[4 * position] + fp[1] * in[4 * position + 1] + fp[2] * in[4 * position + 2]);
92 return 1;
93 }
94
95 if (correctDot != testDot)
96 {
97 float relativeError = btFabs((testDot - correctDot) / correctDot);
98 if (relativeError > 1e-6)
99 {
100 vlog("Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
101 fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
102 fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
103 return 1;
104 }
105 else
106 {
107 if (maxRelativeError < relativeError)
108 {
109 maxRelativeError = relativeError;
110 #ifdef VERBOSE_WARNING
111 sprintf(errStr, "Warning @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot,
112 fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
113 fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
114 #endif //VERBOSE_WARNING
115 }
116 }
117 }
118
119 memcpy(biggest, old, 16);
120 }
121 }
122
123 if (maxRelativeError)
124 {
125 printf("Warning: relative error = %e\n", maxRelativeError);
126 #ifdef VERBOSE_WARNING
127 vlog(errStr);
128 #endif
129 }
130
131 uint64_t scalarTimes[33 + (MAX_LOG2_SIZE - 5)];
132 uint64_t vectorTimes[33 + (MAX_LOG2_SIZE - 5)];
133 size_t j, k;
134 float *in = (float *)data;
135 for (size = 1; size <= 32; size++)
136 {
137 uint64_t startTime, bestTime, currentTime;
138
139 bestTime = -1LL;
140 scalarTimes[size] = 0;
141 for (j = 0; j < 100; j++)
142 {
143 startTime = ReadTicks();
144 for (k = 0; k < LOOPCOUNT; k++)
145 correct += maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
146 currentTime = ReadTicks() - startTime;
147 scalarTimes[size] += currentTime;
148 if (currentTime < bestTime)
149 bestTime = currentTime;
150 }
151 if (0 == gReportAverageTimes)
152 scalarTimes[size] = bestTime;
153 else
154 scalarTimes[size] /= 100;
155 }
156
157 uint64_t *timep = &scalarTimes[33];
158 for (size = 64; size <= MAX_SIZE; size *= 2)
159 {
160 uint64_t startTime, bestTime, currentTime;
161
162 bestTime = -1LL;
163 timep[0] = 0;
164 for (j = 0; j < 100; j++)
165 {
166 startTime = ReadTicks();
167 for (k = 0; k < LOOPCOUNT; k++)
168 correct += maxdot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
169 currentTime = ReadTicks() - startTime;
170 timep[0] += currentTime;
171 if (currentTime < bestTime)
172 bestTime = currentTime;
173 }
174 if (0 == gReportAverageTimes)
175 timep[0] = bestTime;
176 else
177 timep[0] /= 100;
178
179 timep++;
180 }
181
182 for (size = 1; size <= 32; size++)
183 {
184 uint64_t startTime, bestTime, currentTime;
185
186 bestTime = -1LL;
187 vectorTimes[size] = 0;
188 for (j = 0; j < 100; j++)
189 {
190 startTime = ReadTicks();
191 for (k = 0; k < LOOPCOUNT; k++)
192 test += localScaling.maxDot((btVector3 *)in, size, testDot);
193 currentTime = ReadTicks() - startTime;
194 vectorTimes[size] += currentTime;
195 if (currentTime < bestTime)
196 bestTime = currentTime;
197 }
198 if (0 == gReportAverageTimes)
199 vectorTimes[size] = bestTime;
200 else
201 vectorTimes[size] /= 100;
202 }
203
204 timep = &vectorTimes[33];
205 for (size = 64; size <= MAX_SIZE; size *= 2)
206 {
207 uint64_t startTime, bestTime, currentTime;
208
209 bestTime = -1LL;
210 timep[0] = 0;
211 for (j = 0; j < 100; j++)
212 {
213 startTime = ReadTicks();
214 for (k = 0; k < LOOPCOUNT; k++)
215 test += localScaling.maxDot((btVector3 *)in, size, testDot);
216 currentTime = ReadTicks() - startTime;
217 timep[0] += currentTime;
218 if (currentTime < bestTime)
219 bestTime = currentTime;
220 }
221 if (0 == gReportAverageTimes)
222 timep[0] = bestTime;
223 else
224 timep[0] /= 100;
225
226 timep++;
227 }
228
229 vlog("Timing:\n");
230 vlog(" size\t scalar\t vector\n");
231 for (size = 1; size <= 32; size++)
232 vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[size]) / LOOPCOUNT, TicksToCycles(vectorTimes[size]) / LOOPCOUNT);
233 size_t index = 33;
234 for (size = 64; size <= MAX_SIZE; size *= 2)
235 {
236 vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[index]) / LOOPCOUNT, TicksToCycles(vectorTimes[index]) / LOOPCOUNT);
237 index++;
238 }
239
240 // Useless check to make sure that the timing loops are not optimized away
241 if (test != correct)
242 vlog("Error: Test != correct: *%ld vs. %ld\n", correct, test);
243
244 GuardFree(data);
245
246 return 0;
247 }
248
maxdot_ref(const btSimdFloat4 * vertices,float * vec,size_t count,float * dotResult)249 static long maxdot_ref(const btSimdFloat4 *vertices,
250 float *vec,
251 size_t count,
252 float *dotResult)
253 {
254 const float *dp = (const float *)vertices;
255 float maxDot = -BT_INFINITY;
256 long i = 0;
257 long ptIndex = -1;
258
259 for (i = 0; i < count; i++)
260 {
261 float dot = vec[0] * dp[0] + vec[1] * dp[1] + vec[2] * dp[2];
262 dp += 4;
263
264 if (dot > maxDot)
265 {
266 maxDot = dot;
267 ptIndex = i;
268 }
269 }
270
271 *dotResult = maxDot;
272
273 return ptIndex;
274 }
275
276 #endif //BT_USE_SSE
277