1 /* inline-strcmp-dword.c */
2
3 #include "ufdb.h"
4
5 #include <emmintrin.h>
6
7 #ifdef __cplusplus
8 extern "C" {
9 #endif
10
11
__ufdb_strcmp_dword(const char * cs,const char * ct)12 static __inline__ int __ufdb_strcmp_dword(
13 const char * cs,
14 const char * ct )
15 {
16 long __res, rcx = 0, anyR = 16;
17
18 /*
19 strcmploop:
20 movl eax, (%4)
21 movl ecx, (%5)
22 subl eax, ecx
23
24 jne strcmpmismatch:
25 addq $4, %4
26 addq $4, %5
27 # the dword is equal but we must stop if it has a \0
28 tstb cl, cl
29 je strcmpequal
30 tstb ch, ch
31 je strcmpequal
32 bswap ecx
33 tstb cl, cl
34 je strcmpequal
35 tstb ch, ch
36 je strcmpequal
37 jmp strcmploop
38
39 strcmpmismatch:
40 strcmpequal:
41 */
42 __asm__(
43 "# inlined __ufdb_strcmp_dword \n"
44 ".align 16 \n" // 0 filler bytes :)
45 "sub %5, %4 \n"
46 "1: \n" // loop is 16 opcode bytes :)
47 "add %5, %4 \n"
48 "movdqu (%4), %2 \n" // Use any XMM, using register constraint "x"
49 "ja 1b \n"
50 "jc 2f \n"
51 "xor %0, %0 \n"
52 "jmp 3f \n" // XXX Extra jump could be avoided in pure asm
53 "2: \n"
54 "add %4, %0 \n"
55 "movzxb (%0,%1), %0 \n" // Note: uses full RCX(%1)
56 "movzxb (%4,%1), %4 \n"
57 "sub %4, %0 \n"
58 "3: \n"
59 : "=r"(__res), "+c"(rcx), "=x"(anyXmm) : "0"(cs-ct), "r"(ct), "r"(anyR) );
60
61 return (int) __res;
62 }
63
64
65 #ifdef __cplusplus
66 }
67 #endif
68