1 /* inline-strcmp-dword.c */
2 
3 #include "ufdb.h"
4 
5 #include <emmintrin.h>
6 
7 #ifdef __cplusplus
8 extern "C" {
9 #endif
10 
11 
__ufdb_strcmp_dword(const char * cs,const char * ct)12 static __inline__ int __ufdb_strcmp_dword(
13    const char * cs,
14    const char * ct )
15 {
16         long __res, rcx = 0, anyR = 16;
17 
18 	/*
19 		strcmploop:
20 	 		movl	eax, (%4)
21 			movl	ecx, (%5)
22 			subl    eax, ecx
23 
24 			jne	strcmpmismatch:
25 			addq    $4, %4
26 			addq    $4, %5
27 							# the dword is equal but we must stop if it has a \0
28 			tstb    cl, cl
29 			je      strcmpequal
30 			tstb    ch, ch
31 			je      strcmpequal
32 			bswap   ecx
33 			tstb    cl, cl
34 			je      strcmpequal
35 			tstb    ch, ch
36 			je      strcmpequal
37 			jmp     strcmploop
38 
39 		strcmpmismatch:
40 		strcmpequal:
41 	*/
42         __asm__(
43 		"# inlined __ufdb_strcmp_dword      \n"
44                 ".align 16                          \n"     // 0 filler bytes :)
45                 "sub        %5, %4                  \n"
46                 "1:                                 \n"     // loop is 16 opcode bytes :)
47                 "add        %5, %4                  \n"
48                 "movdqu     (%4), %2                \n"     // Use any XMM, using register constraint "x"
49                 "ja 1b                              \n"
50                 "jc 2f                              \n"
51                 "xor        %0, %0                  \n"
52                 "jmp 3f                             \n"     // XXX Extra jump could be avoided in pure asm
53                 "2:                                 \n"
54                 "add        %4, %0                  \n"
55                 "movzxb     (%0,%1), %0             \n"     // Note: uses full RCX(%1)
56                 "movzxb     (%4,%1), %4             \n"
57                 "sub        %4, %0                  \n"
58                 "3:                                 \n"
59         : "=r"(__res), "+c"(rcx), "=x"(anyXmm) : "0"(cs-ct), "r"(ct), "r"(anyR) );
60 
61         return (int) __res;
62 }
63 
64 
65 #ifdef __cplusplus
66 }
67 #endif
68