1; vim:filetype=nasm ts=8 2 3; libFLAC - Free Lossless Audio Codec library 4; Copyright (C) 2001-2009 Josh Coalson 5; Copyright (C) 2011-2013 Xiph.Org Foundation 6; 7; Redistribution and use in source and binary forms, with or without 8; modification, are permitted provided that the following conditions 9; are met: 10; 11; - Redistributions of source code must retain the above copyright 12; notice, this list of conditions and the following disclaimer. 13; 14; - Redistributions in binary form must reproduce the above copyright 15; notice, this list of conditions and the following disclaimer in the 16; documentation and/or other materials provided with the distribution. 17; 18; - Neither the name of the Xiph.org Foundation nor the names of its 19; contributors may be used to endorse or promote products derived from 20; this software without specific prior written permission. 21; 22; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 26; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 27; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 28; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 29; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 30; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 31; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 32; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 34%include "nasm.h" 35 36 data_section 37 38cextern FLAC__crc16_table ; unsigned FLAC__crc16_table[256]; 39cextern bitreader_read_from_client_ ; FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br); 40 41cglobal FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap 42 43 code_section 44 45 46; ********************************************************************** 47; 48; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter) 49; 50; Some details like assertions and other checking is performed by the caller. 51 ALIGN 16 52cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap 53 54 ;ASSERT(0 != br); 55 ;ASSERT(0 != br->buffer); 56 ; WATCHOUT: code only works if sizeof(brword)==32; we can make things much faster with this assertion 57 ;ASSERT(FLAC__BITS_PER_WORD == 32); 58 ;ASSERT(parameter < 32); 59 ; the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it 60 61 ;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time 62 ;; [esp + 16] unsigned parameter 63 ;; [esp + 12] unsigned nvals 64 ;; [esp + 8] int vals[] 65 ;; [esp + 4] FLAC__BitReader *br 66 mov eax, [esp + 12] ; if(nvals == 0) 67 test eax, eax 68 ja .nvals_gt_0 69 mov eax, 1 ; return true; 70 ret 71 72.nvals_gt_0: 73 push ebp 74 push ebx 75 push esi 76 push edi 77 sub esp, 4 78 ;; [esp + 36] unsigned parameter 79 ;; [esp + 32] unsigned nvals 80 ;; [esp + 28] int vals[] 81 ;; [esp + 24] FLAC__BitReader *br 82 ;; [esp] ucbits 83 mov ebp, [esp + 24] ; ebp <- br == br->buffer 84 mov esi, [ebp + 16] ; esi <- br->consumed_words (aka 'cwords' in the C version) 85 mov ecx, [ebp + 20] ; ecx <- br->consumed_bits (aka 'cbits' in the C version) 86 xor edi, edi ; edi <- 0 'uval' 87 ;; ecx cbits 88 ;; esi cwords 89 ;; edi uval 90 ;; ebp br 91 ;; [ebp] br->buffer 92 ;; [ebp + 8] br->words 93 ;; [ebp + 12] br->bytes 94 ;; [ebp + 16] br->consumed_words 95 ;; [ebp + 20] br->consumed_bits 96 ;; [ebp + 24] br->read_crc 97 ;; [ebp + 28] br->crc16_align 98 99 ; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits; 100 mov eax, [ebp + 8] ; eax <- br->words 101 sub eax, esi ; eax <- br->words-cwords 102 shl eax, 2 ; eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD 103 add eax, [ebp + 12] ; eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes 104 shl eax, 3 ; eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 105 sub eax, ecx ; eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits 106 mov [esp], eax ; ucbits <- eax 107 108 ALIGN 16 109.val_loop: ; while(1) { 110 111 ; 112 ; read unary part 113 ; 114.unary_loop: ; while(1) { 115 ;; ecx cbits 116 ;; esi cwords 117 ;; edi uval 118 ;; ebp br 119 cmp esi, [ebp + 8] ; while(cwords < br->words) /* if we've not consumed up to a partial tail word... */ 120 jae near .c1_next1 121.c1_loop: ; { 122 mov ebx, [ebp] 123 mov eax, [ebx + 4*esi] ; b = br->buffer[cwords] 124 mov edx, eax ; edx = br->buffer[cwords] (saved for later use) 125 shl eax, cl ; b = br->buffer[cwords] << cbits 126 test eax, eax ; (still have to test since cbits may be 0, thus ZF not updated for shl eax,0) 127 jz near .c1_next2 ; if(b) { 128 bsr ebx, eax 129 not ebx 130 and ebx, 31 ; ebx = 'i' = # of leading 0 bits in 'b' (eax) 131 add ecx, ebx ; cbits += i; 132 add edi, ebx ; uval += i; 133 add ecx, byte 1 ; cbits++; /* skip over stop bit */ 134 test ecx, ~31 135 jz near .break1 ; if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */ 136 ; crc16_update_word_(br, br->buffer[cwords]); 137 push edi ; [need more registers] 138 bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier 139 mov ecx, [ebp + 28] ; ecx <- br->crc16_align 140 mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc) 141%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE 142 mov edi, _FLAC__crc16_table 143%else 144%ifdef OBJ_FORMAT_elf 145 mov edi, [esp + 16] ; saved ebx (GOT base) 146 lea edi, [edi + FLAC__crc16_table wrt ..gotoff] 147%else 148 mov edi, FLAC__crc16_table 149%endif 150%endif 151 ;; eax (ax) crc a.k.a. br->read_crc 152 ;; ebx (bl) intermediate result index into FLAC__crc16_table[] 153 ;; ecx br->crc16_align 154 ;; edx byteswapped brword to CRC 155 ;; esi cwords 156 ;; edi unsigned FLAC__crc16_table[] 157 ;; ebp br 158 test ecx, ecx ; switch(br->crc16_align) ... 159 jnz .c0b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case] 160.c0b0: xor dl, ah ; dl <- (crc>>8)^(word>>24) 161 movzx ebx, dl 162 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] 163 shl eax, 8 ; ax <- (crc<<8) 164 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] 165.c0b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff)) 166 movzx ebx, dh 167 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 168 shl eax, 8 ; ax <- (crc<<8) 169 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 170 shr edx, 16 171.c0b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff)) 172 movzx ebx, dl 173 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 174 shl eax, 8 ; ax <- (crc<<8) 175 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 176.c0b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff) 177 movzx ebx, dh 178 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] 179 shl eax, 8 ; ax <- (crc<<8) 180 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] 181 movzx eax, ax 182 mov [ebp + 24], eax ; br->read_crc <- crc 183 pop edi 184 185 add esi, byte 1 ; cwords++; 186 xor ecx, ecx ; cbits = 0; 187 ; } 188 jmp near .break1 ; goto break1; 189 ;; this section relocated out of the way for performance 190.c0b4: 191 mov [ebp + 28], dword 0 ; br->crc16_align <- 0 192 cmp ecx, 8 193 je .c0b1 194 shr edx, 16 195 cmp ecx, 16 196 je .c0b2 197 jmp .c0b3 198 199 ;; this section relocated out of the way for performance 200.c1b4: 201 mov [ebp + 28], dword 0 ; br->crc16_align <- 0 202 cmp ecx, 8 203 je .c1b1 204 shr edx, 16 205 cmp ecx, 16 206 je .c1b2 207 jmp .c1b3 208 209.c1_next2: ; } else { 210 ;; ecx cbits 211 ;; edx current brword 'b' 212 ;; esi cwords 213 ;; edi uval 214 ;; ebp br 215 add edi, 32 216 sub edi, ecx ; uval += FLAC__BITS_PER_WORD - cbits; 217 ; crc16_update_word_(br, br->buffer[cwords]); 218 push edi ; [need more registers] 219 bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier 220 mov ecx, [ebp + 28] ; ecx <- br->crc16_align 221 mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc) 222%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE 223 mov edi, _FLAC__crc16_table 224%else 225%ifdef OBJ_FORMAT_elf 226 mov edi, [esp + 16] ; saved ebx (GOT base) 227 lea edi, [edi + FLAC__crc16_table wrt ..gotoff] 228%else 229 mov edi, FLAC__crc16_table 230%endif 231%endif 232 ;; eax (ax) crc a.k.a. br->read_crc 233 ;; ebx (bl) intermediate result index into FLAC__crc16_table[] 234 ;; ecx br->crc16_align 235 ;; edx byteswapped brword to CRC 236 ;; esi cwords 237 ;; edi unsigned FLAC__crc16_table[] 238 ;; ebp br 239 test ecx, ecx ; switch(br->crc16_align) ... 240 jnz .c1b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case] 241.c1b0: xor dl, ah ; dl <- (crc>>8)^(word>>24) 242 movzx ebx, dl 243 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] 244 shl eax, 8 ; ax <- (crc<<8) 245 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] 246.c1b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff)) 247 movzx ebx, dh 248 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 249 shl eax, 8 ; ax <- (crc<<8) 250 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 251 shr edx, 16 252.c1b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff)) 253 movzx ebx, dl 254 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 255 shl eax, 8 ; ax <- (crc<<8) 256 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 257.c1b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff) 258 movzx ebx, dh 259 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] 260 shl eax, 8 ; ax <- (crc<<8) 261 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] 262 movzx eax, ax 263 mov [ebp + 24], eax ; br->read_crc <- crc 264 pop edi 265 266 add esi, byte 1 ; cwords++; 267 xor ecx, ecx ; cbits = 0; 268 ; /* didn't find stop bit yet, have to keep going... */ 269 ; } 270 271 cmp esi, [ebp + 8] ; } while(cwords < br->words) /* if we've not consumed up to a partial tail word... */ 272 jb near .c1_loop 273 274.c1_next1: 275 ; at this point we've eaten up all the whole words; have to try 276 ; reading through any tail bytes before calling the read callback. 277 ; this is a repeat of the above logic adjusted for the fact we 278 ; don't have a whole word. note though if the client is feeding 279 ; us data a byte at a time (unlikely), br->consumed_bits may not 280 ; be zero. 281 ;; ecx cbits 282 ;; esi cwords 283 ;; edi uval 284 ;; ebp br 285 mov edx, [ebp + 12] ; edx <- br->bytes 286 shl edx, 3 ; edx <- br->bytes*8 287 cmp edx, ecx 288 jbe .read1 ; if(br->bytes*8 > cbits) { [NOTE: this case is rare so it doesn't have to be all that fast ] 289 mov ebx, [ebp] 290 ; edx <- const unsigned end = br->bytes * 8; 291 mov eax, [ebx + 4*esi] ; b = br->buffer[cwords] 292 xchg edx, ecx ; [edx <- cbits , ecx <- end] 293 mov ebx, 0xffffffff ; ebx <- FLAC__WORD_ALL_ONES 294 shr ebx, cl ; ebx <- FLAC__WORD_ALL_ONES >> end 295 not ebx ; ebx <- ~(FLAC__WORD_ALL_ONES >> end) 296 xchg edx, ecx ; [edx <- end , ecx <- cbits] 297 and eax, ebx ; b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)); 298 shl eax, cl ; b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits; 299 test eax, eax ; (still have to test since cbits may be 0, thus ZF not updated for shl eax,0) 300 jz .c1_next3 ; if(b) { 301 bsr ebx, eax 302 not ebx 303 and ebx, 31 ; ebx = 'i' = # of leading 0 bits in 'b' (eax) 304 add ecx, ebx ; cbits += i; 305 add edi, ebx ; uval += i; 306 add ecx, byte 1 ; cbits++; /* skip over stop bit */ 307 jmp short .break1 ; goto break1; 308.c1_next3: ; } else { 309 sub edi, ecx 310 add edi, edx ; uval += end - cbits; 311 mov ecx, edx ; cbits = end 312 ; /* didn't find stop bit yet, have to keep going... */ 313 ; } 314 ; } 315.read1: 316 ; flush registers and read; bitreader_read_from_client_() does 317 ; not touch br->consumed_bits at all but we still need to set 318 ; it in case it fails and we have to return false. 319 ;; ecx cbits 320 ;; esi cwords 321 ;; edi uval 322 ;; ebp br 323 mov [ebp + 16], esi ; br->consumed_words = cwords; 324 mov [ebp + 20], ecx ; br->consumed_bits = cbits; 325 push ecx ; /* save */ 326 push ebp ; /* push br argument */ 327%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE 328 call _bitreader_read_from_client_ 329%else 330%ifdef OBJ_FORMAT_elf 331 mov ebx, [esp + 20] ; saved ebx (GOT base) 332 call bitreader_read_from_client_ wrt ..plt 333%else 334 call bitreader_read_from_client_ 335%endif 336%endif 337 pop edx ; /* discard, unused */ 338 pop ecx ; /* restore */ 339 mov esi, [ebp + 16] ; cwords = br->consumed_words; 340 ; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits; 341 mov ebx, [ebp + 8] ; ebx <- br->words 342 sub ebx, esi ; ebx <- br->words-cwords 343 shl ebx, 2 ; ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD 344 add ebx, [ebp + 12] ; ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes 345 shl ebx, 3 ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 346 sub ebx, ecx ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits 347 add ebx, edi ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits + uval 348 ; + uval to offset our count by the # of unary bits already 349 ; consumed before the read, because we will add these back 350 ; in all at once at break1 351 mov [esp], ebx ; ucbits <- ebx 352 test eax, eax ; if(!bitreader_read_from_client_(br)) 353 jnz near .unary_loop 354 jmp .end ; return false; /* eax (the return value) is already 0 */ 355 ; } /* end while(1) unary part */ 356 357 ALIGN 16 358.break1: 359 ;; ecx cbits 360 ;; esi cwords 361 ;; edi uval 362 ;; ebp br 363 ;; [esp] ucbits 364 sub [esp], edi ; ucbits -= uval; 365 sub dword [esp], byte 1 ; ucbits--; /* account for stop bit */ 366 367 ; 368 ; read binary part 369 ; 370 mov ebx, [esp + 36] ; ebx <- parameter 371 test ebx, ebx ; if(parameter) { 372 jz near .break2 373.read2: 374 cmp [esp], ebx ; while(ucbits < parameter) { 375 jae .c2_next1 376 ; flush registers and read; bitreader_read_from_client_() does 377 ; not touch br->consumed_bits at all but we still need to set 378 ; it in case it fails and we have to return false. 379 mov [ebp + 16], esi ; br->consumed_words = cwords; 380 mov [ebp + 20], ecx ; br->consumed_bits = cbits; 381 push ecx ; /* save */ 382 push ebx ; /* save */ 383 push ebp ; /* push br argument */ 384%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE 385 call _bitreader_read_from_client_ 386%else 387%ifdef OBJ_FORMAT_elf 388 mov ebx, [esp + 24] ; saved ebx (GOT base) 389 call bitreader_read_from_client_ wrt ..plt 390%else 391 call bitreader_read_from_client_ 392%endif 393%endif 394 pop edx ; /* discard, unused */ 395 pop ebx ; /* restore */ 396 pop ecx ; /* restore */ 397 mov esi, [ebp + 16] ; cwords = br->consumed_words; 398 ; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits; 399 mov edx, [ebp + 8] ; edx <- br->words 400 sub edx, esi ; edx <- br->words-cwords 401 shl edx, 2 ; edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD 402 add edx, [ebp + 12] ; edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes 403 shl edx, 3 ; edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 404 sub edx, ecx ; edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits 405 mov [esp], edx ; ucbits <- edx 406 test eax, eax ; if(!bitreader_read_from_client_(br)) 407 jnz .read2 408 jmp .end ; return false; /* eax (the return value) is already 0 */ 409 ; } 410.c2_next1: 411 ;; ebx parameter 412 ;; ecx cbits 413 ;; esi cwords 414 ;; edi uval 415 ;; ebp br 416 ;; [esp] ucbits 417 cmp esi, [ebp + 8] ; if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */ 418 jae near .c2_next2 419 test ecx, ecx ; if(cbits) { 420 jz near .c2_next3 ; /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */ 421 mov eax, 32 422 mov edx, [ebp] 423 sub eax, ecx ; const unsigned n = FLAC__BITS_PER_WORD - cbits; 424 mov edx, [edx + 4*esi] ; const brword word = br->buffer[cwords]; 425 cmp ebx, eax ; if(parameter < n) { 426 jae .c2_next4 427 ; uval <<= parameter; 428 ; uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-parameter); 429 shl edx, cl 430 xchg ebx, ecx 431 shld edi, edx, cl 432 add ebx, ecx ; cbits += parameter; 433 xchg ebx, ecx ; ebx <- parameter, ecx <- cbits 434 jmp .break2 ; goto break2; 435 ; } 436.c2_next4: 437 ; uval <<= n; 438 ; uval |= word & (FLAC__WORD_ALL_ONES >> cbits); 439%if 1 440 rol edx, cl ; @@@@@@OPT: may be faster to use rol to save edx so we can restore it for CRC'ing 441 ; @@@@@@OPT: or put parameter in ch instead and free up ebx completely again 442%else 443 shl edx, cl 444%endif 445 xchg eax, ecx 446 shld edi, edx, cl 447 xchg eax, ecx 448%if 1 449 ror edx, cl ; restored. 450%else 451 mov edx, [ebp] 452 mov edx, [edx + 4*esi] 453%endif 454 ; crc16_update_word_(br, br->buffer[cwords]); 455 push edi ; [need more registers] 456 push ebx ; [need more registers] 457 push eax ; [need more registers] 458 bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier 459 mov ecx, [ebp + 28] ; ecx <- br->crc16_align 460 mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc) 461%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE 462 mov edi, _FLAC__crc16_table 463%else 464%ifdef OBJ_FORMAT_elf 465 mov edi, [esp + 24] ; saved ebx (GOT base) 466 lea edi, [edi + FLAC__crc16_table wrt ..gotoff] 467%else 468 mov edi, FLAC__crc16_table 469%endif 470%endif 471 ;; eax (ax) crc a.k.a. br->read_crc 472 ;; ebx (bl) intermediate result index into FLAC__crc16_table[] 473 ;; ecx br->crc16_align 474 ;; edx byteswapped brword to CRC 475 ;; esi cwords 476 ;; edi unsigned FLAC__crc16_table[] 477 ;; ebp br 478 test ecx, ecx ; switch(br->crc16_align) ... 479 jnz .c2b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case] 480.c2b0: xor dl, ah ; dl <- (crc>>8)^(word>>24) 481 movzx ebx, dl 482 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] 483 shl eax, 8 ; ax <- (crc<<8) 484 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] 485.c2b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff)) 486 movzx ebx, dh 487 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 488 shl eax, 8 ; ax <- (crc<<8) 489 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 490 shr edx, 16 491.c2b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff)) 492 movzx ebx, dl 493 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 494 shl eax, 8 ; ax <- (crc<<8) 495 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 496.c2b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff) 497 movzx ebx, dh 498 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] 499 shl eax, 8 ; ax <- (crc<<8) 500 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] 501 movzx eax, ax 502 mov [ebp + 24], eax ; br->read_crc <- crc 503 pop eax 504 pop ebx 505 pop edi 506 add esi, byte 1 ; cwords++; 507 mov ecx, ebx 508 sub ecx, eax ; cbits = parameter - n; 509 jz .break2 ; if(cbits) { /* parameter > n, i.e. if there are still bits left to read, there have to be less than 32 so they will all be in the next word */ 510 ; uval <<= cbits; 511 ; uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits)); 512 mov eax, [ebp] 513 mov eax, [eax + 4*esi] 514 shld edi, eax, cl 515 ; } 516 jmp .break2 ; goto break2; 517 518 ;; this section relocated out of the way for performance 519.c2b4: 520 mov [ebp + 28], dword 0 ; br->crc16_align <- 0 521 cmp ecx, 8 522 je .c2b1 523 shr edx, 16 524 cmp ecx, 16 525 je .c2b2 526 jmp .c2b3 527 528.c2_next3: ; } else { 529 mov ecx, ebx ; cbits = parameter; 530 ; uval <<= cbits; 531 ; uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits)); 532 mov eax, [ebp] 533 mov eax, [eax + 4*esi] 534 shld edi, eax, cl 535 jmp .break2 ; goto break2; 536 ; } 537.c2_next2: ; } else { 538 ; in this case we're starting our read at a partial tail word; 539 ; the reader has guaranteed that we have at least 'parameter' 540 ; bits available to read, which makes this case simpler. 541 ; uval <<= parameter; 542 ; if(cbits) { 543 ; /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */ 544 ; uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-parameter); 545 ; cbits += parameter; 546 ; goto break2; 547 ; } else { 548 ; cbits = parameter; 549 ; uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits); 550 ; goto break2; 551 ; } 552 ; the above is much shorter in assembly: 553 mov eax, [ebp] 554 mov eax, [eax + 4*esi] ; eax <- br->buffer[cwords] 555 shl eax, cl ; eax <- br->buffer[cwords] << cbits 556 add ecx, ebx ; cbits += parameter 557 xchg ebx, ecx ; ebx <- cbits, ecx <- parameter 558 shld edi, eax, cl ; uval <<= parameter <<< 'parameter' bits of tail word 559 xchg ebx, ecx ; ebx <- parameter, ecx <- cbits 560 ; } 561 ; } 562.break2: 563 sub [esp], ebx ; ucbits -= parameter; 564 565 ; 566 ; compose the value 567 ; 568 mov ebx, [esp + 28] ; ebx <- vals 569 mov edx, edi ; edx <- uval 570 and edi, 1 ; edi <- uval & 1 571 shr edx, 1 ; edx <- uval >> 1 572 neg edi ; edi <- -(int)(uval & 1) 573 xor edx, edi ; edx <- (uval >> 1 ^ -(int)(uval & 1)) 574 mov [ebx], edx ; *vals <- edx 575 sub dword [esp + 32], byte 1 ; --nvals; 576 jz .finished ; if(nvals == 0) /* jump to finish */ 577 xor edi, edi ; uval = 0; 578 add dword [esp + 28], 4 ; ++vals 579 jmp .val_loop ; } 580 581.finished: 582 mov [ebp + 16], esi ; br->consumed_words = cwords; 583 mov [ebp + 20], ecx ; br->consumed_bits = cbits; 584 mov eax, 1 585.end: 586 add esp, 4 587 pop edi 588 pop esi 589 pop ebx 590 pop ebp 591 ret 592 593; end 594