1 /*
2   Copyright (c) 1990-2007 Info-ZIP.  All rights reserved.
3 
4   See the accompanying file LICENSE, version 2000-Apr-09 or later
5   (the contents of which are also included in zip.h) for terms of use.
6   If, for some reason, all these files are missing, the Info-ZIP license
7   also may be found at:  ftp://ftp.info-zip.org/pub/infozip/license.html
8 */
9 /* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm
10  * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler.
11  * Last revised: 07-Jan-2007
12  *
13  * Original coded (in crc_i386.asm) and put into the public domain
14  * by Paul Kienitz and Christian Spieler.
15  *
16  * Revised 06-Oct-96, Scott Field (sfield@microsoft.com)
17  *   fixed to assemble with masm by not using .model directive which makes
18  *   assumptions about segment alignment.  Also,
19  *   avoid using loop, and j[e]cxz where possible.  Use mov + inc, rather
20  *   than lodsb, and other misc. changes resulting in the following performance
21  *   increases:
22  *
23  *      unrolled loops                NO_UNROLLED_LOOPS
24  *      *8    >8      <8              *8      >8      <8
25  *
26  *      +54%  +42%    +35%            +82%    +52%    +25%
27  *
28  *   first item in each table is input buffer length, even multiple of 8
29  *   second item in each table is input buffer length, > 8
30  *   third item in each table is input buffer length, < 8
31  *
32  * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
33  *   Incorporated Rodney Brown's 32-bit-reads optimization as found in the
34  *   UNIX AS source crc_i386.S. This new code can be disabled by defining
35  *   the macro symbol NO_32_BIT_LOADS.
36  *
37  * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
38  *   Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs
39  *   (like the Pentium Pro, Pentium II, and probably some Pentium clones).
40  *   This optimization is controlled by the macro symbol __686 and is disabled
41  *   by default. (This default is based on the assumption that most users
42  *   do not yet work on a Pentium Pro or Pentium II machine ...)
43  *
44  * Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++
45  *   32-bit, removed unneeded kludge for potentially unknown movzx mnemonic,
46  *   confirmed correct working with MS VC++ (32-bit).
47  *
48  * Revised 22-May-98, Peter Kunath, Chr. Spieler: The 16-Nov-97 revision broke
49  *   MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its
50  *   own __asm {...} construct.  For MSVC, a "#pragma warning" was added to
51  *   shut up the "no return value" warning message.
52  *
53  * Revised 13-Dec-98, Chr. Spieler: Modified path to "zip.h" header file.
54  *
55  * Revised 16-Jan-2005, Cosmin Truta: Added the ASM_CRC guard, for easier
56  *   switching between ASM vs. non-ASM builds, when handling makefiles.
57  *   Also enabled the 686 build by default, because there are hardly any
58  *   pre-686 CPUs in serious use nowadays. (See the 12-Oct-97 note above.)
59  *
60  * Revised 03-Jan-2006, Chr. Spieler
61  *   Enlarged unrolling loops to "do 16 bytes per turn"; optimized access to
62  *   data buffer in loop body (adjust pointer only once in loop body and use
63  *   offsets to access each item); added additional support for the "unfolded
64  *   tables" optimization variant (enabled by IZ_CRCOPTIM_UNFOLDTBL).
65  *
66  * Revised 07-Jan-2007, Chr. Spieler
67  *   Recognize additional conditional flag CRC_TABLE_ONLY that prevents
68  *   compilation of the crc32() function.
69  *
70  * FLAT memory model assumed.
71  *
72  * Loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
73  * This results in shorter code at the expense of reduced performance.
74  *
75  */
76 
77 #include "../zip.h"
78 #include "../crc32.h"
79 
80 #if defined(ASM_CRC) && !defined(USE_ZLIB) && !defined(CRC_TABLE_ONLY)
81 
82 #if !defined(PRE_686) && !defined(__686)
83 #  define __686
84 #endif
85 
86 #ifndef ZCONST
87 #  define ZCONST const
88 #endif
89 
90 /* Select wether the following inline-assember code is supported. */
91 #if (defined(_MSC_VER) && _MSC_VER >= 700)
92 #if (defined(_M_IX86) && _M_IX86 >= 300)
93 #  define MSC_INLINE_ASM_32BIT_SUPPORT
94    /* Disable warning for no return value, typical of asm functions */
95 #  pragma warning( disable : 4035 )
96 #endif
97 #endif
98 
99 #if (defined(__BORLANDC__) && __BORLANDC__ >= 452)
100 #  define MSC_INLINE_ASM_32BIT_SUPPORT
101 #endif
102 
103 #ifdef MSC_INLINE_ASM_32BIT_SUPPORT
104 /* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */
105 
106 /*
107  * These two (three) macros make up the loop body of the CRC32 cruncher.
108  * registers modified:
109  *   eax  : crc value "c"
110  *   esi  : pointer to next data byte (or dword) "buf++"
111  * registers read:
112  *   edi  : pointer to base of crc_table array
113  * scratch registers:
114  *   ebx  : index into crc_table array
115  *          (requires upper three bytes = 0 when __686 is undefined)
116  */
117 #ifndef __686
118 #define Do_CRC { \
119   __asm { mov   bl, al }; \
120   __asm { shr   eax, 8 }; \
121   __asm { xor   eax, [edi+ebx*4] }; }
122 #else /* __686 */
123 #define Do_CRC { \
124   __asm { movzx ebx, al }; \
125   __asm { shr   eax, 8  }; \
126   __asm { xor   eax, [edi+ebx*4] }; }
127 #endif /* ?__686 */
128 
129 #define Do_CRC_byte { \
130   __asm { xor   al, byte ptr [esi] }; \
131   __asm { inc   esi }; \
132   Do_CRC; }
133 
134 #define Do_CRC_byteof(ofs) { \
135   __asm { xor   al, byte ptr [esi+(ofs)] }; \
136   Do_CRC; }
137 
138 #ifndef NO_32_BIT_LOADS
139 #ifdef IZ_CRCOPTIM_UNFOLDTBL
140 # define SavLen  len            /* the edx register is needed elsewhere */
141 # define UpdCRC_dword { \
142    __asm { movzx   ebx,al }; \
143    __asm { mov     edx,[edi+ebx*4+3072] }; \
144    __asm { movzx   ebx,ah }; \
145    __asm { shr     eax,16 }; \
146    __asm { xor     edx,[edi+ebx*4+2048] }; \
147    __asm { movzx   ebx,al }; \
148    __asm { shr     eax,8 }; \
149    __asm { xor     edx,[edi+ebx*4+1024] }; \
150    __asm { mov     eax,[edi+eax*4] }; \
151    __asm { xor     eax,edx }; }
152 # define UpdCRC_dword_sh(dwPtrIncr) { \
153    __asm { movzx   ebx,al }; \
154    __asm { mov     edx,[edi+ebx*4+3072] }; \
155    __asm { movzx   ebx,ah }; \
156    __asm { xor     edx,[edi+ebx*4+2048] }; \
157    __asm { shr     eax,16 }; \
158    __asm { movzx   ebx,al }; \
159    __asm { add     esi, 4*dwPtrIncr }; \
160    __asm { shr     eax,8 }; \
161    __asm { xor     edx,[edi+ebx*4+1024] }; \
162    __asm { mov     eax,[edi+eax*4] }; \
163    __asm { xor     eax,edx }; }
164 #else /* !IZ_CRCOPTIM_UNFOLDTBL */
165 # define SavLen  edx            /* the edx register is free for use here */
166 # define UpdCRC_dword { \
167     Do_CRC; \
168     Do_CRC; \
169     Do_CRC; \
170     Do_CRC; }
171 # define UpdCRC_dword_sh(dwPtrIncr) { \
172     Do_CRC; \
173     Do_CRC; \
174     __asm { add   esi, 4*(dwPtrIncr) }; \
175     Do_CRC; \
176     Do_CRC; }
177 #endif /* ?IZ_CRCOPTIM_UNFOLDTBL */
178 
179 #define Do_CRC_dword { \
180   __asm { xor   eax, dword ptr [esi] }; \
181   UpdCRC_dword_sh(1); }
182 
183 #define Do_CRC_4dword { \
184   __asm { xor   eax, dword ptr [esi] }; \
185   UpdCRC_dword; \
186   __asm { xor   eax, dword ptr [esi+4] }; \
187   UpdCRC_dword; \
188   __asm { xor   eax, dword ptr [esi+8] }; \
189   UpdCRC_dword; \
190   __asm { xor   eax, dword ptr [esi+12] }; \
191   UpdCRC_dword_sh(4); }
192 #endif /* !NO_32_BIT_LOADS */
193 
194 /* ========================================================================= */
crc32(crc,buf,len)195 ulg crc32(crc, buf, len)
196     ulg crc;                    /* crc shift register */
197     ZCONST uch *buf;            /* pointer to bytes to pump through */
198     extent len;                 /* number of bytes in buf[] */
199 /* Run a set of bytes through the crc shift register.  If buf is a NULL
200    pointer, then initialize the crc shift register contents instead.
201    Return the current crc in either case. */
202 {
203     __asm {
204                 push    edx
205                 push    ecx
206 
207                 mov     esi,buf         ;/* 2nd arg: uch *buf              */
208                 sub     eax,eax         ;/*> if (!buf)                     */
209                 test    esi,esi         ;/*>   return 0;                   */
210                 jz      fine            ;/*> else {                        */
211 
212                 call    get_crc_table
213                 mov     edi,eax
214                 mov     eax,crc         ;/* 1st arg: ulg crc               */
215 #ifndef __686
216                 sub     ebx,ebx         ;/* ebx=0; => bl usable as a dword */
217 #endif
218                 mov     ecx,len         ;/* 3rd arg: extent len            */
219                 not     eax             ;/*>   c = ~crc;                   */
220 
221                 test    ecx,ecx
222 #ifndef NO_UNROLLED_LOOPS
223                 jz      bail
224 #  ifndef NO_32_BIT_LOADS
225 align_loop:
226                 test    esi,3           ;/* align buf pointer on next      */
227                 jz      aligned_now     ;/*  dword boundary                */
228     }
229                 Do_CRC_byte             ;
230     __asm {
231                 dec     ecx
232                 jnz     align_loop
233 aligned_now:
234 #  endif /* !NO_32_BIT_LOADS */
235                 mov     SavLen,ecx      ;/* save current len for later  */
236                 shr     ecx,4           ;/* ecx = len / 16    */
237                 jz      No_Sixteens
238 ; align loop head at start of 486 internal cache line !!
239                 align   16
240 Next_Sixteen:
241     }
242 #  ifndef NO_32_BIT_LOADS
243                 Do_CRC_4dword ;
244 #  else /* NO_32_BIT_LOADS */
245                 Do_CRC_byteof(0) ;
246                 Do_CRC_byteof(1) ;
247                 Do_CRC_byteof(2) ;
248                 Do_CRC_byteof(3) ;
249                 Do_CRC_byteof(4) ;
250                 Do_CRC_byteof(5) ;
251                 Do_CRC_byteof(6) ;
252                 Do_CRC_byteof(7) ;
253                 Do_CRC_byteof(8) ;
254                 Do_CRC_byteof(9) ;
255                 Do_CRC_byteof(10) ;
256                 Do_CRC_byteof(11) ;
257                 Do_CRC_byteof(12) ;
258                 Do_CRC_byteof(13) ;
259                 Do_CRC_byteof(14) ;
260                 Do_CRC_byteof(15) ;
261     __asm {     add     esi,16 };
262 #  endif /* ?NO_32_BIT_LOADS */
263     __asm {
264                 dec     ecx
265                 jnz     Next_Sixteen
266 No_Sixteens:
267                 mov     ecx,SavLen
268                 and     ecx,00000000FH  ;/* ecx = len % 16    */
269 #  ifndef NO_32_BIT_LOADS
270                 shr     ecx,2
271                 jz      No_Fours
272 Next_Four:
273     }
274                 Do_CRC_dword ;
275     __asm {
276                 dec     ecx
277                 jnz     Next_Four
278 No_Fours:
279                 mov     ecx,SavLen
280                 and     ecx,000000003H  ;/* ecx = len % 4    */
281 #  endif /* !NO_32_BIT_LOADS */
282 #endif /* !NO_UNROLLED_LOOPS */
283                 jz      bail            ;/*>  if (len)                       */
284 ; align loop head at start of 486 internal cache line !!
285                 align   16
286 loupe:                                  ;/*>    do {                         */
287     }
288                 Do_CRC_byte             ;/*       c = CRC32(c,*buf++,crctab);*/
289     __asm {
290                 dec     ecx             ;/*>    } while (--len);             */
291                 jnz     loupe
292 
293 bail:                                   ;/*> }                               */
294                 not     eax             ;/*> return ~c;                      */
295 fine:
296                 pop     ecx
297                 pop     edx
298     }
299 #ifdef NEED_RETURN
300     return _EAX;
301 #endif
302 }
303 #endif /* MSC_INLINE_ASM_32BIT_SUPPORT */
304 #if (defined(_MSC_VER) && _MSC_VER >= 700)
305 #if (defined(_M_IX86) && _M_IX86 >= 300)
306    /* Reenable missing return value warning */
307 #  pragma warning( default : 4035 )
308 #endif
309 #endif
310 #endif /* ASM_CRC && !USE_ZLIB && !CRC_TABLE_ONLY */
311