1 /* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set expandtab shiftwidth=4 tabstop=4: */
3
4 #include "modp_ascii.h"
5 #include <stdio.h>
6 #include <time.h>
7 #include <stdint.h>
8 #include "modp_ascii_data.h"
9 #include <ctype.h>
10
11 //extern const char gsToUpperMap[256];
12 /**
13 * This is standard clib implementation of uppercasing a string.
14 * It has an unfair advantage since it's inside the test file
15 * so the optimizer can inline it.
16 */
toupper_copy1(char * dest,const char * str,int len)17 void toupper_copy1(char* dest, const char* str, int len)
18 {
19 int i;
20 for (i = 0; i < len; ++i) {
21 // toupper is defined in <ctype.h>
22 *dest++ = toupper(str[i]);
23 }
24 *dest = 0;
25 }
26
27
28 /**
29 * Skipping ctype, and doing the compare directly
30 *
31 */
toupper_copy2(char * dest,const char * str,int len)32 void toupper_copy2(char* dest, const char* str, int len)
33 {
34 int i;
35 char c;
36 for (i = 0; i < len; ++i) {
37 c = str[i];
38 *dest++ = (c >= 'a' && c <= 'z') ? c : (c -32);
39 }
40 *dest = 0;
41 }
42
43 /**
44 * Sequential table lookup
45 */
toupper_copy3(char * dest,const char * str,int len)46 void toupper_copy3(char* dest, const char* str, int len)
47 {
48 int i;
49 unsigned char c;
50 for (i = 0; i < len; ++i) {
51 c = str[i];
52 *dest++ = gsToUpperMap[c];
53 }
54 *dest = 0;
55 }
56
57 /** \brief toupper Version 4 -- parallel table lookup
58 *
59 *
60 */
toupper_copy4(char * dest,const char * str,int len)61 void toupper_copy4(char* dest, const char* str, int len)
62 {
63 /*
64 * int i;
65 * for (i = 0; i < len; ++i) {
66 * char c = str[i];
67 * *dest++ = (c >= 'a' && c <= 'z') ? c - 32 : c;
68 * }
69 */
70
71 int i;
72 uint8_t c1,c2,c3,c4;
73
74 const int leftover = len % 4;
75 const int imax = len - leftover;
76 const uint8_t* s = (const uint8_t*) str;
77 for (i = 0; i != imax ; i+=4) {
78 /*
79 * it's important to make these variables
80 * it helps the optimizer to figure out what to do
81 */
82 c1 = s[i], c2=s[i+1], c3=s[i+2], c4=s[i+3];
83 dest[0] = gsToUpperMap[c1];
84 dest[1] = gsToUpperMap[c2];
85 dest[2] = gsToUpperMap[c3];
86 dest[3] = gsToUpperMap[c4];
87 dest += 4;
88 }
89
90 switch (leftover) {
91 case 3: *dest++ = gsToUpperMap[s[i++]];
92 case 2: *dest++ = gsToUpperMap[s[i++]];
93 case 1: *dest++ = gsToUpperMap[s[i]];
94 case 0: *dest = '\0';
95 }
96 }
97
98 /** \brief toupper Versions 5 -- hsieh alternate
99 * Based code from Paul Hsieh
100 * http://www.azillionmonkeys.com/qed/asmexample.html
101 *
102 * This was his "improved" version, but it appears to either run just
103 * as fast, or a bit slower than his original version
104 */
toupper_copy5(char * dest,const char * str,int len)105 void toupper_copy5(char* dest, const char* str, int len)
106 {
107 int i;
108 uint32_t eax,ebx,ecx,edx;
109 const uint8_t* ustr = (const uint8_t*) str;
110 const int leftover = len % 4;
111 const int imax = len / 4;
112 const uint32_t* s = (const uint32_t*) str;
113 uint32_t* d = (uint32_t*) dest;
114 for (i = 0; i != imax; ++i) {
115 eax = s[i];
116 ebx = 0x80808080ul | eax;
117 ecx = ebx - 0x61616161ul;
118 edx = ~(ebx - 0x7b7b7b7bul);
119 ebx = (ecx & edx) & (~eax & 0x80808080ul);
120 *d++ = eax - (ebx >> 2);
121 }
122
123 i = imax*4;
124 dest = (char*) d;
125 switch (leftover) {
126 case 3: *dest++ = gsToUpperMap[ustr[i++]];
127 case 2: *dest++ = gsToUpperMap[ustr[i++]];
128 case 1: *dest++ = gsToUpperMap[ustr[i]];
129 case 0: *dest = '\0';
130 }
131 }
132
133 /** \brief ToUpper Version 6 -- Hsieh original, ASM style
134 * Based code from Paul Hsieh
135 * http://www.azillionmonkeys.com/qed/asmexample.html
136 *
137 * This is almost a direct port of the original ASM code, on some
138 * platforms/compilers it does run faster then the "de-asm'ed" version
139 * used in the modp library.
140 *
141 */
toupper_copy6(char * dest,const char * str,int len)142 void toupper_copy6(char* dest, const char* str, int len)
143 {
144 int i=0;
145 uint32_t eax,ebx,ecx,edx;
146 const uint8_t* ustr = (const uint8_t*) str;
147 const int leftover = len % 4;
148 const int imax = len / 4;
149 const uint32_t* s = (const uint32_t*) str;
150 uint32_t* d = (uint32_t*) dest;
151 for (i = 0; i != imax; ++i) {
152 #if 1
153 /*
154 * as close to original asm code as possible
155 */
156 eax = s[i];
157 ebx = 0x7f7f7f7f;
158 edx = 0x7f7f7f7f;
159 ebx = ebx & eax;
160 ebx = ebx + 0x05050505;
161 ecx = eax;
162 ecx = ~ecx;
163 ebx = ebx & edx;
164 ebx = ebx + 0x1a1a1a1a;
165 ebx = ebx & ecx;
166 ebx = ebx >> 2;
167 ebx = ebx & 0x20202020;
168 eax = eax - ebx;
169 *d++ = eax;
170 #else
171 /*
172 * "de-asm'ed" version, this is what is used in the modp library
173 */
174 eax = s[i];
175 ebx = (0x7f7f7f7ful & eax) + 0x05050505ul;
176 ebx = (0x7f7f7f7ful & ebx) + 0x1a1a1a1aul;
177 ebx = ((ebx & ~eax) >> 2 ) & 0x20202020ul;
178 *d++ = eax - ebx;
179 #endif
180 }
181
182 i = imax*4;
183 dest = (char*) d;
184 switch (leftover) {
185 case 3: *dest++ = gsToUpperMap[ustr[i++]];
186 case 2: *dest++ = gsToUpperMap[ustr[i++]];
187 case 1: *dest++ = gsToUpperMap[ustr[i]];
188 case 0: *dest = '\0';
189 }
190 }
191
modp_toupper_copy_a2(char * dest,const char * str,int len)192 void modp_toupper_copy_a2(char* dest, const char* str, int len)
193 {
194 int i=0;
195 uint32_t eax, ebx;
196 const uint8_t* ustr = (const uint8_t*) str;
197 const int leftover = len % 4;
198 const int imax = len / 4;
199 const uint32_t* s = (const uint32_t*) str;
200 uint32_t* d = (uint32_t*) dest;
201 for (i = 0; i != imax; ++i) {
202 eax = s[i];
203
204 ebx = (0x7f7f7f7ful & eax) + 0x05050505ul;
205 ebx = (0x7f7f7f7ful & ebx) + 0x1a1a1a1aul;
206 ebx = ((ebx & ~eax) >> 2 ) & 0x20202020ul;
207 *d++ = eax - ebx;
208 }
209
210 i = imax*4;
211 dest = (char*) d;
212 switch (leftover) {
213 case 3: *dest++ = gsToUpperMap[ustr[i++]];
214 case 2: *dest++ = gsToUpperMap[ustr[i++]];
215 case 1: *dest++ = gsToUpperMap[ustr[i]];
216 case 0: *dest = '\0';
217 }
218 }
219
main()220 int main()
221 {
222 double last = 0.0;
223 size_t i = 0;
224 char buf[256];
225 char obuf[300];
226
227 for (i = 0; i < 256; ++i) {
228 buf[i] = (char)i;
229 }
230
231 uint32_t max = 1000000;
232 clock_t t0, t1;
233 printf("%s", "type\tclib\tdirect\tmap\tpara\thsieh1\thsieh2\tFinal\timprovement\n");
234
235 printf("toupper\t");
236 fflush(stdout);
237
238 /**
239 ** V1
240 **/
241 t0 = clock();
242 for (i = 0; i < max; ++i) {
243 toupper_copy1(obuf, buf, sizeof(buf));
244 }
245 t1 = clock();
246 last = t1 -t0;
247 printf("%lu\t", (unsigned long)(t1-t0));
248 fflush(stdout);
249
250 /**
251 ** V2
252 **/
253 t0 = clock();
254 for (i = 0; i < max; ++i) {
255 toupper_copy2(obuf, buf, sizeof(buf));
256 }
257 t1 = clock();
258 printf("%lu\t", (unsigned long)(t1-t0));
259 fflush(stdout);
260
261 /**
262 ** V3
263 **/
264 t0 = clock();
265 for (i = 0; i < max; ++i) {
266 toupper_copy3(obuf, buf, sizeof(buf));
267 }
268 t1 = clock();
269 printf("%lu\t", (unsigned long)(t1-t0));
270 fflush(stdout);
271
272 /**
273 ** V4 -- Parallel Table Lookup
274 **/
275 t0 = clock();
276 for (i = 0; i < max; ++i) {
277 toupper_copy4(obuf, buf, sizeof(buf));
278 }
279 t1 = clock();
280 printf("%lu\t", (unsigned long)(t1-t0));
281 fflush(stdout);
282
283
284 /**
285 ** V5 -- Hsieh Alternate
286 **/
287 t0 = clock();
288 for (i = 0; i < max; ++i) {
289 toupper_copy5(obuf, buf, sizeof(buf));
290 }
291 t1 = clock();
292 printf("%lu\t", (unsigned long)(t1-t0));
293 fflush(stdout);
294
295 /**
296 ** HSEIH -- asm style
297 **/
298 t0 = clock();
299 for (i = 0; i < max; ++i) {
300 toupper_copy6(obuf, buf, sizeof(buf));
301 }
302 t1 = clock();
303 printf("%lu\t", (unsigned long)(t1-t0));
304 fflush(stdout);
305
306 /**
307 ** MODP FINAL
308 **/
309 t0 = clock();
310 for (i = 0; i < max; ++i) {
311 modp_toupper_copy(obuf, buf, sizeof(buf));
312 }
313 t1 = clock();
314
315 printf("%lu\t", (unsigned long)(t1-t0));
316 fflush(stdout);
317
318 printf("%.1fx\n", last/(t1-t0));
319 fflush(stdout);
320
321 return 0;
322 }
323