1 /*
2 Copyright (c) 1990-2007 Info-ZIP. All rights reserved.
3
4 See the accompanying file LICENSE, version 2000-Apr-09 or later
5 (the contents of which are also included in zip.h) for terms of use.
6 If, for some reason, all these files are missing, the Info-ZIP license
7 also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html
8 */
9 /* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm
10 * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler.
11 * Last revised: 07-Jan-2007
12 *
13 * Original coded (in crc_i386.asm) and put into the public domain
14 * by Paul Kienitz and Christian Spieler.
15 *
16 * Revised 06-Oct-96, Scott Field (sfield@microsoft.com)
17 * fixed to assemble with masm by not using .model directive which makes
18 * assumptions about segment alignment. Also,
19 * avoid using loop, and j[e]cxz where possible. Use mov + inc, rather
20 * than lodsb, and other misc. changes resulting in the following performance
21 * increases:
22 *
23 * unrolled loops NO_UNROLLED_LOOPS
24 * *8 >8 <8 *8 >8 <8
25 *
26 * +54% +42% +35% +82% +52% +25%
27 *
28 * first item in each table is input buffer length, even multiple of 8
29 * second item in each table is input buffer length, > 8
30 * third item in each table is input buffer length, < 8
31 *
32 * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
33 * Incorporated Rodney Brown's 32-bit-reads optimization as found in the
34 * UNIX AS source crc_i386.S. This new code can be disabled by defining
35 * the macro symbol NO_32_BIT_LOADS.
36 *
37 * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
38 * Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs
39 * (like the Pentium Pro, Pentium II, and probably some Pentium clones).
40 * This optimization is controlled by the macro symbol __686 and is disabled
41 * by default. (This default is based on the assumption that most users
42 * do not yet work on a Pentium Pro or Pentium II machine ...)
43 *
44 * Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++
45 * 32-bit, removed unneeded kludge for potentially unknown movzx mnemonic,
46 * confirmed correct working with MS VC++ (32-bit).
47 *
48 * Revised 22-May-98, Peter Kunath, Chr. Spieler: The 16-Nov-97 revision broke
49 * MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its
50 * own __asm {...} construct. For MSVC, a "#pragma warning" was added to
51 * shut up the "no return value" warning message.
52 *
53 * Revised 13-Dec-98, Chr. Spieler: Modified path to "zip.h" header file.
54 *
55 * Revised 16-Jan-2005, Cosmin Truta: Added the ASM_CRC guard, for easier
56 * switching between ASM vs. non-ASM builds, when handling makefiles.
57 * Also enabled the 686 build by default, because there are hardly any
58 * pre-686 CPUs in serious use nowadays. (See the 12-Oct-97 note above.)
59 *
60 * Revised 03-Jan-2006, Chr. Spieler
61 * Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to
62 * data buffer in loop body (adjust pointer only once in loop body and use
63 * offsets to access each item); added additional support for the "unfolded
64 * tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL).
65 *
66 * Revised 07-Jan-2007, Chr. Spieler
67 * Recognize additional conditional flag CRC_TABLE_ONLY that prevents
68 * compilation of the crc32() function.
69 *
70 * FLAT memory model assumed.
71 *
72 * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
73 * This results in shorter code at the expense of reduced performance.
74 *
75 */
76
77 #include "../zip.h"
78 #include "../crc32.h"
79
80 #if defined(ASM_CRC) && !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY)
81
82 #if !defined(PRE_686) && !defined(__686)
83 # define __686
84 #endif
85
86 #ifndef ZCONST
87 # define ZCONST const
88 #endif
89
90 /* Select wether the following inline-assember code is supported. */
91 #if (defined(_MSC_VER) && _MSC_VER >= 700)
92 #if (defined(_M_IX86) && _M_IX86 >= 300)
93 # define MSC_INLINE_ASM_32BIT_SUPPORT
94 /* Disable warning for no return value, typical of asm functions */
95 # pragma warning( disable : 4035 )
96 #endif
97 #endif
98
99 #if (defined(__BORLANDC__) && __BORLANDC__ >= 452)
100 # define MSC_INLINE_ASM_32BIT_SUPPORT
101 #endif
102
103 #ifdef MSC_INLINE_ASM_32BIT_SUPPORT
104 /* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */
105
106 /*
107 * These two (three) macros make up the loop body of the CRC32 cruncher.
108 * registers modified:
109 * eax : crc value "c"
110 * esi : pointer to next data byte (or dword) "buf++"
111 * registers read:
112 * edi : pointer to base of crc_table array
113 * scratch registers:
114 * ebx : index into crc_table array
115 * (requires upper three bytes = 0 when __686 is undefined)
116 */
117 #ifndef __686
118 #define Do_CRC { \
119 __asm { mov bl, al }; \
120 __asm { shr eax, 8 }; \
121 __asm { xor eax, [edi+ebx*4] }; }
122 #else /* __686 */
123 #define Do_CRC { \
124 __asm { movzx ebx, al }; \
125 __asm { shr eax, 8 }; \
126 __asm { xor eax, [edi+ebx*4] }; }
127 #endif /* ?__686 */
128
129 #define Do_CRC_byte { \
130 __asm { xor al, byte ptr [esi] }; \
131 __asm { inc esi }; \
132 Do_CRC; }
133
134 #define Do_CRC_byteof(ofs) { \
135 __asm { xor al, byte ptr [esi+(ofs)] }; \
136 Do_CRC; }
137
138 #ifndef NO_32_BIT_LOADS
139 #ifdef IZ_CRCOPTIM_UNFOLDTBL
140 # define SavLen len /* the edx register is needed elsewhere */
141 # define UpdCRC_dword { \
142 __asm { movzx ebx,al }; \
143 __asm { mov edx,[edi+ebx*4+3072] }; \
144 __asm { movzx ebx,ah }; \
145 __asm { shr eax,16 }; \
146 __asm { xor edx,[edi+ebx*4+2048] }; \
147 __asm { movzx ebx,al }; \
148 __asm { shr eax,8 }; \
149 __asm { xor edx,[edi+ebx*4+1024] }; \
150 __asm { mov eax,[edi+eax*4] }; \
151 __asm { xor eax,edx }; }
152 # define UpdCRC_dword_sh(dwPtrIncr) { \
153 __asm { movzx ebx,al }; \
154 __asm { mov edx,[edi+ebx*4+3072] }; \
155 __asm { movzx ebx,ah }; \
156 __asm { xor edx,[edi+ebx*4+2048] }; \
157 __asm { shr eax,16 }; \
158 __asm { movzx ebx,al }; \
159 __asm { add esi, 4*dwPtrIncr }; \
160 __asm { shr eax,8 }; \
161 __asm { xor edx,[edi+ebx*4+1024] }; \
162 __asm { mov eax,[edi+eax*4] }; \
163 __asm { xor eax,edx }; }
164 #else /* !IZ_CRCOPTIM_UNFOLDTBL */
165 # define SavLen edx /* the edx register is free for use here */
166 # define UpdCRC_dword { \
167 Do_CRC; \
168 Do_CRC; \
169 Do_CRC; \
170 Do_CRC; }
171 # define UpdCRC_dword_sh(dwPtrIncr) { \
172 Do_CRC; \
173 Do_CRC; \
174 __asm { add esi, 4*(dwPtrIncr) }; \
175 Do_CRC; \
176 Do_CRC; }
177 #endif /* ?IZ_CRCOPTIM_UNFOLDTBL */
178
179 #define Do_CRC_dword { \
180 __asm { xor eax, dword ptr [esi] }; \
181 UpdCRC_dword_sh(1); }
182
183 #define Do_CRC_4dword { \
184 __asm { xor eax, dword ptr [esi] }; \
185 UpdCRC_dword; \
186 __asm { xor eax, dword ptr [esi+4] }; \
187 UpdCRC_dword; \
188 __asm { xor eax, dword ptr [esi+8] }; \
189 UpdCRC_dword; \
190 __asm { xor eax, dword ptr [esi+12] }; \
191 UpdCRC_dword_sh(4); }
192 #endif /* !NO_32_BIT_LOADS */
193
194 /* ========================================================================= */
crc32(crc,buf,len)195 ulg crc32(crc, buf, len)
196 ulg crc; /* crc shift register */
197 ZCONST uch *buf; /* pointer to bytes to pump through */
198 extent len; /* number of bytes in buf[] */
199 /* Run a set of bytes through the crc shift register. If buf is a NULL
200 pointer, then initialize the crc shift register contents instead.
201 Return the current crc in either case. */
202 {
203 __asm {
204 push edx
205 push ecx
206
207 mov esi,buf ;/* 2nd arg: uch *buf */
208 sub eax,eax ;/*> if (!buf) */
209 test esi,esi ;/*> return 0; */
210 jz fine ;/*> else { */
211
212 call get_crc_table
213 mov edi,eax
214 mov eax,crc ;/* 1st arg: ulg crc */
215 #ifndef __686
216 sub ebx,ebx ;/* ebx=0; => bl usable as a dword */
217 #endif
218 mov ecx,len ;/* 3rd arg: extent len */
219 not eax ;/*> c = ~crc; */
220
221 test ecx,ecx
222 #ifndef NO_UNROLLED_LOOPS
223 jz bail
224 # ifndef NO_32_BIT_LOADS
225 align_loop:
226 test esi,3 ;/* align buf pointer on next */
227 jz aligned_now ;/* dword boundary */
228 }
229 Do_CRC_byte ;
230 __asm {
231 dec ecx
232 jnz align_loop
233 aligned_now:
234 # endif /* !NO_32_BIT_LOADS */
235 mov SavLen,ecx ;/* save current len for later */
236 shr ecx,4 ;/* ecx = len / 16 */
237 jz No_Sixteens
238 ; align loop head at start of 486 internal cache line !!
239 align 16
240 Next_Sixteen:
241 }
242 # ifndef NO_32_BIT_LOADS
243 Do_CRC_4dword ;
244 # else /* NO_32_BIT_LOADS */
245 Do_CRC_byteof(0) ;
246 Do_CRC_byteof(1) ;
247 Do_CRC_byteof(2) ;
248 Do_CRC_byteof(3) ;
249 Do_CRC_byteof(4) ;
250 Do_CRC_byteof(5) ;
251 Do_CRC_byteof(6) ;
252 Do_CRC_byteof(7) ;
253 Do_CRC_byteof(8) ;
254 Do_CRC_byteof(9) ;
255 Do_CRC_byteof(10) ;
256 Do_CRC_byteof(11) ;
257 Do_CRC_byteof(12) ;
258 Do_CRC_byteof(13) ;
259 Do_CRC_byteof(14) ;
260 Do_CRC_byteof(15) ;
261 __asm { add esi,16 };
262 # endif /* ?NO_32_BIT_LOADS */
263 __asm {
264 dec ecx
265 jnz Next_Sixteen
266 No_Sixteens:
267 mov ecx,SavLen
268 and ecx,00000000FH ;/* ecx = len % 16 */
269 # ifndef NO_32_BIT_LOADS
270 shr ecx,2
271 jz No_Fours
272 Next_Four:
273 }
274 Do_CRC_dword ;
275 __asm {
276 dec ecx
277 jnz Next_Four
278 No_Fours:
279 mov ecx,SavLen
280 and ecx,000000003H ;/* ecx = len % 4 */
281 # endif /* !NO_32_BIT_LOADS */
282 #endif /* !NO_UNROLLED_LOOPS */
283 jz bail ;/*> if (len) */
284 ; align loop head at start of 486 internal cache line !!
285 align 16
286 loupe: ;/*> do { */
287 }
288 Do_CRC_byte ;/* c = CRC32(c,*buf++,crctab);*/
289 __asm {
290 dec ecx ;/*> } while (--len); */
291 jnz loupe
292
293 bail: ;/*> } */
294 not eax ;/*> return ~c; */
295 fine:
296 pop ecx
297 pop edx
298 }
299 #ifdef NEED_RETURN
300 return _EAX;
301 #endif
302 }
303 #endif /* MSC_INLINE_ASM_32BIT_SUPPORT */
304 #if (defined(_MSC_VER) && _MSC_VER >= 700)
305 #if (defined(_M_IX86) && _M_IX86 >= 300)
306 /* Reenable missing return value warning */
307 # pragma warning( default : 4035 )
308 #endif
309 #endif
310 #endif /* ASM_CRC && !USE_ZLIB && !CRC_TABLE_ONLY */
311