1// Copyright (c) 2018 Intel Corporation 2// 3// Permission is hereby granted, free of charge, to any person obtaining a copy 4// of this software and associated documentation files (the "Software"), to deal 5// in the Software without restriction, including without limitation the rights 6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7// copies of the Software, and to permit persons to whom the Software is 8// furnished to do so, subject to the following conditions: 9// 10// The above copyright notice and this permission notice shall be included in all 11// copies or substantial portions of the Software. 12// 13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19// SOFTWARE. 20 21 22.text 23 24 25.p2align 4, 0x90 26 27 28.globl mfxownpj_EncodeHuffman8x8_JPEG_16s1u_C1 29 30 31mfxownpj_EncodeHuffman8x8_JPEG_16s1u_C1: 32 33 34 push %rbp 35 36 37 push %rbx 38 39 40 push %r12 41 42 43 push %r13 44 45 46 push %r14 47 48 49 push %r15 50 51 52 sub $(72), %rsp 53 54 55 mov %rdi, (%rsp) 56 mov %rdx, %rax 57 mov %eax, %eax 58 mov %rax, (8)(%rsp) 59 mov %rcx, (16)(%rsp) 60 mov %r8, (24)(%rsp) 61 mov %r9, (40)(%rsp) 62 mov %rsi, %r8 63 64 65 movq (136)(%rsp), %rax 66 mov $(64), %ebp 67 lea (8)(%rax), %rdx 68 mov $(24), %ecx 69 sub (%rdx), %ebp 70 sub (%rdx), %ecx 71 lea (%rax), %rax 72 mov (%rax), %r11 73 mov (16)(%rsp), %rax 74 shr %cl, %r11 75 mov (%rax), %eax 76 lea ownTables(%rip), %rsi 77 mov %rax, %r12 78 sub (8)(%rsp), %eax 79 cmp $(-24), %eax 80 jg LEncHuffExitEndOfWork01gas_1 81 82 83 mov (24)(%rsp), %rax 84 mov (%rsp), %rdx 85 movswl (%rax), %ecx 86 movswl (%rdx), %edx 87 mov %ecx, (32)(%rsp) 88 mov %dx, (%rax) 89 sub %ecx, %edx 90 xor %ecx, %ecx 91 mov %edx, %ebx 92 neg %edx 93 setg %cl 94 cmovl %ebx, %edx 95 sub %ecx, %ebx 96 mov $(32), %ecx 97 cmp $(256), %edx 98 jge LEncHuffDcLong00gas_1 99 movzbl (64)(%rsi,%rdx), %edx 100LEncHuffDc00gas_1: 101 sub %edx, %ecx 102 shl %cl, %ebx 103 shr %cl, %ebx 104 mov (40)(%rsp), %rcx 105 mov %ebx, %r14d 106 lea (%rcx), %rbx 107 mov (%rbx,%rdx,4), %ecx 108 mov %ecx, %r15d 109 shr $(16), %ecx 110 jz LEncHuffExitEndOfWork00gas_1 111 and mask0F(%rip), %r15 112 sub %ecx, %ebp 113 sub %edx, %ebp 114 shl %cl, %r11 115 mov %edx, %ecx 116 or %r15, %r11 117 shl %cl, %r11 118 or %r14, %r11 119 120 121 movq (128)(%rsp), %r9 122 lea (%r9), %r9 123 xor %edx, %edx 124 mov (%rsp), %rbx 125 mov $(63), %edi 126 127 128 or (124)(%rbx), %edx 129 jnz LEncHuffAcZeros00gas_1 130 sub $(2), %edi 131 or (108)(%rbx), %edx 132 jnz LEncHuffAcZeros00gas_1 133 sub $(1), %edi 134 or (92)(%rbx), %edx 135 jnz LEncHuffAcZeros00gas_1 136 sub $(2), %edi 137 or (120)(%rbx), %edx 138 jnz LEncHuffAcZeros00gas_1 139 sub $(2), %edi 140 or (104)(%rbx), %edx 141 jnz LEncHuffAcZeros00gas_1 142 sub $(2), %edi 143 or (76)(%rbx), %edx 144 jnz LEncHuffAcZeros00gas_1 145 sub $(1), %edi 146 or (60)(%rbx), %edx 147 jnz LEncHuffAcZeros00gas_1 148 sub $(2), %edi 149 or (88)(%rbx), %edx 150 jnz LEncHuffAcZeros00gas_1 151 sub $(2), %edi 152 or (116)(%rbx), %edx 153 jnz LEncHuffAcZeros00gas_1 154 sub $(2), %edi 155 or (100)(%rbx), %edx 156 jnz LEncHuffAcZeros00gas_1 157 sub $(2), %edi 158 or (72)(%rbx), %edx 159 jnz LEncHuffAcZeros00gas_1 160 sub $(2), %edi 161 or (44)(%rbx), %edx 162 jnz LEncHuffAcZeros00gas_1 163 sub $(1), %edi 164 or (28)(%rbx), %edx 165 jnz LEncHuffAcZeros00gas_1 166 sub $(2), %edi 167 or (56)(%rbx), %edx 168 jnz LEncHuffAcZeros00gas_1 169 sub $(2), %edi 170 or (84)(%rbx), %edx 171 jnz LEncHuffAcZeros00gas_1 172 sub $(2), %edi 173 or (112)(%rbx), %edx 174 jnz LEncHuffAcZeros00gas_1 175 sub $(2), %edi 176 or (96)(%rbx), %edx 177 jnz LEncHuffAcZeros00gas_1 178 sub $(2), %edi 179 or (68)(%rbx), %edx 180 jnz LEncHuffAcZeros00gas_1 181 sub $(2), %edi 182 or (40)(%rbx), %edx 183 jnz LEncHuffAcZeros00gas_1 184 sub $(2), %edi 185 or (12)(%rbx), %edx 186 jnz LEncHuffAcZeros00gas_1 187 sub $(2), %edi 188 or (24)(%rbx), %edx 189 jnz LEncHuffAcZeros00gas_1 190 sub $(2), %edi 191 or (52)(%rbx), %edx 192 jnz LEncHuffAcZeros00gas_1 193 sub $(2), %edi 194 or (80)(%rbx), %edx 195 jnz LEncHuffAcZeros00gas_1 196 sub $(3), %edi 197 or (64)(%rbx), %edx 198 jnz LEncHuffAcZeros00gas_1 199 sub $(2), %edi 200 or (36)(%rbx), %edx 201 jnz LEncHuffAcZeros00gas_1 202 sub $(2), %edi 203 or (8)(%rbx), %edx 204 jnz LEncHuffAcZeros00gas_1 205 sub $(2), %edi 206 or (20)(%rbx), %edx 207 jnz LEncHuffAcZeros00gas_1 208 sub $(2), %edi 209 or (48)(%rbx), %edx 210 jnz LEncHuffAcZeros00gas_1 211 sub $(3), %edi 212 or (32)(%rbx), %edx 213 jnz LEncHuffAcZeros00gas_1 214 sub $(2), %edi 215 or (4)(%rbx), %edx 216 jnz LEncHuffAcZeros00gas_1 217 sub $(2), %edi 218 or (16)(%rbx), %edx 219 jnz LEncHuffAcZeros00gas_1 220 sub $(2), %edi 221 222LEncHuffAcZeros00gas_1: 223 xor %edx, %edx 224 movzbl (%rdi,%rsi), %ecx 225LEncHuffAcZeros01gas_1: 226 orw (%rbx,%rcx,2), %dx 227 jnz LEncHuffAcZeros02gas_1 228 movzbl (-1)(%rdi,%rsi), %ecx 229 sub $(1), %edi 230 jg LEncHuffAcZeros01gas_1 231LEncHuffAcZeros02gas_1: 232 add $(1), %edi 233 mov $(63), %eax 234 cmp $(63), %edi 235 cmovg %eax, %edi 236 mov %edi, (56)(%rsp) 237 mov $(1), %edi 238 xor %r13d, %r13d 239 movzbl (%rdi,%rsi), %ecx 240 241 242LEncHuffAc00gas_1: 243 movswl (%rbx,%rcx,2), %edx 244 movzbl (1)(%rdi,%rsi), %ecx 245 test %edx, %edx 246 jnz LEncHuffAc01gas_1 247 mov (56)(%rsp), %edx 248 add $(16), %r13d 249 add $(1), %edi 250 cmp %edx, %edi 251 jle LEncHuffAc00gas_1 252 jmp LEncHuffAc10gas_1 253 254 255LEncHuffAc01gas_1: 256 cmp $(256), %r13d 257 jge LEncHuffAc20gas_1 258LEncHuffAc02gas_1: 259 xor %ecx, %ecx 260 mov %edx, %ebx 261 neg %edx 262 setg %cl 263 cmovl %ebx, %edx 264 sub %ecx, %ebx 265 mov $(32), %ecx 266 cmp $(256), %edx 267 jge LEncHuffAcLong00gas_1 268 movzbl (64)(%rsi,%rdx), %edx 269LEncHuffAc03gas_1: 270 sub %edx, %ecx 271 shl %cl, %ebx 272 or %edx, %r13d 273 shr %cl, %ebx 274 mov %ebx, %r14d 275 mov (%r9,%r13,4), %ebx 276 mov %ebx, %r15d 277 shr $(16), %ebx 278 jz LEncHuffExitEndOfWork00gas_1 279 and mask0F(%rip), %r15 280 cmp %ebx, %ebp 281 jl LEncHuffCallWrite00gas_1 282LEncHuffRetWrite00gas_1: 283 mov %ebx, %ecx 284 sub %ebx, %ebp 285 mov (%rsp), %rbx 286 shl %cl, %r11 287 or %r15, %r11 288 cmp %edx, %ebp 289 jl LEncHuffCallWrite01gas_1 290LEncHuffRetWrite01gas_1: 291 mov %edx, %ecx 292 add $(1), %edi 293 shl %cl, %r11 294 or %r14, %r11 295 sub %edx, %ebp 296 mov (56)(%rsp), %edx 297 xor %r13d, %r13d 298 movzbl (%rdi,%rsi), %ecx 299 cmp %edx, %edi 300 jle LEncHuffAc00gas_1 301 jmp LEncHuffExitNormgas_1 302 303 304LEncHuffAc10gas_1: 305 mov (%r9), %ebx 306 mov %ebx, %r15d 307 shr $(16), %ebx 308 jz LEncHuffExitEndOfWork00gas_1 309 and mask0F(%rip), %r15 310 cmp %ebx, %ebp 311 jl LEncHuffCallWrite03gas_1 312LEncHuffRetWrite03gas_1: 313 mov %ebx, %ecx 314 sub %ebx, %ebp 315 shl %cl, %r11 316 or %r15, %r11 317 jmp LEncHuffExitNormgas_1 318 319 320LEncHuffAc20gas_1: 321 mov (960)(%r9), %ebx 322 mov %ebx, %r15d 323 shr $(16), %ebx 324 jz LEncHuffExitEndOfWork00gas_1 325 and mask0F(%rip), %r15 326 cmp %ebx, %ebp 327 jl LEncHuffCallWrite02gas_1 328LEncHuffRetWrite02gas_1: 329 mov %ebx, %ecx 330 sub %ebx, %ebp 331 shl %cl, %r11 332 or %r15, %r11 333 sub $(256), %r13d 334 cmp $(256), %r13d 335 jge LEncHuffAc20gas_1 336 jmp LEncHuffAc02gas_1 337 338 339LEncHuffCallWrite04gas_1: 340 lea LEncHuffRetWrite04gas_1(%rip), %rax 341 mov %rax, (48)(%rsp) 342 jmp LEncHuffWrite00gas_1 343 344LEncHuffCallWrite03gas_1: 345 lea LEncHuffRetWrite03gas_1(%rip), %rax 346 mov %rax, (48)(%rsp) 347 jmp LEncHuffWrite00gas_1 348 349LEncHuffCallWrite02gas_1: 350 lea LEncHuffRetWrite02gas_1(%rip), %rax 351 mov %rax, (48)(%rsp) 352 jmp LEncHuffWrite00gas_1 353 354LEncHuffCallWrite01gas_1: 355 lea LEncHuffRetWrite01gas_1(%rip), %rax 356 mov %rax, (48)(%rsp) 357 jmp LEncHuffWrite00gas_1 358 359LEncHuffCallWrite00gas_1: 360 lea LEncHuffRetWrite00gas_1(%rip), %rax 361 mov %rax, (48)(%rsp) 362 363LEncHuffWrite00gas_1: 364 mov %ebp, %ecx 365 sub $(64), %ebp 366 neg %ebp 367 mov %r11, %r10 368 shl %cl, %r10 369 movd %r10, %xmm1 370 bswap %r10 371 mov %r12d, %eax 372 mov %r12d, %ecx 373 subl (8)(%rsp), %eax 374 cmp $(-8), %eax 375 jg LEncHuffExitEndOfWork00gas_1 376 cmp $(32), %ebp 377 jl LEncHuffWrite11gas_1 378 379 pcmpeqb maskFF(%rip), %xmm1 380 movd %xmm1, %rax 381 test %rax, %rax 382 jnz LEncHuffWrite10gas_1 383 mov %r10, (%r8,%rcx) 384 mov %ebp, %eax 385 and $(7), %ebp 386 and $(4294967288), %eax 387 sub $(64), %ebp 388 shr $(3), %eax 389 add %eax, %r12d 390 mov (48)(%rsp), %rax 391 neg %ebp 392 jmp *%rax 393 394 395LEncHuffWrite10gas_1: 396 sub (8)(%rsp), %ecx 397 cmp $(-16), %ecx 398 mov %r12d, %ecx 399 jg LEncHuffExitEndOfWork00gas_1 400LEncHuffWrite11gas_1: 401 sub $(8), %ebp 402 jl LEncHuffWrite12gas_1 403 mov %r10b, %al 404 shr $(8), %r10 405 mov %al, (%r8,%rcx) 406 add $(1), %ecx 407 cmp $(255), %al 408 jne LEncHuffWrite11gas_1 409 xor %eax, %eax 410 mov %al, (%r8,%rcx) 411 add $(1), %ecx 412 jmp LEncHuffWrite11gas_1 413LEncHuffWrite12gas_1: 414 sub $(56), %ebp 415 mov %rcx, %r12 416 neg %ebp 417 mov (48)(%rsp), %rax 418 jmp *%rax 419 420 421LEncHuffExitEndOfWork00gas_1: 422 mov (24)(%rsp), %rax 423 mov (32)(%rsp), %ecx 424 mov %cx, (%rax) 425LEncHuffExitEndOfWork01gas_1: 426 mov $(1), %eax 427 jmp LEncHuffExit00gas_1 428 429LEncHuffExitNormgas_1: 430 cmp $(56), %ebp 431 jle LEncHuffCallWrite04gas_1 432LEncHuffRetWrite04gas_1: 433 mov (16)(%rsp), %rax 434 mov %r12d, (%rax) 435 mov %ebp, %ecx 436 sub $(64), %ebp 437 shl %cl, %r11 438 neg %ebp 439 shr $(40), %r11 440 movq (136)(%rsp), %rax 441 lea (8)(%rax), %rcx 442 mov %ebp, (%rcx) 443 lea (%rax), %rax 444 mov %r11d, (%rax) 445 mov $(0), %eax 446LEncHuffExit00gas_1: 447 add $(72), %rsp 448 449 450 pop %r15 451 452 453 pop %r14 454 455 456 pop %r13 457 458 459 pop %r12 460 461 462 pop %rbx 463 464 465 pop %rbp 466 467 ret 468 469 470LEncHuffDcLong00gas_1: 471 shr $(8), %edx 472 movzbl (64)(%rsi,%rdx), %edx 473 add $(8), %edx 474 jmp LEncHuffDc00gas_1 475 476LEncHuffAcLong00gas_1: 477 shr $(8), %edx 478 movzbl (64)(%rsi,%rdx), %edx 479 add $(8), %edx 480 jmp LEncHuffAc03gas_1 481 482 483.data 484 485.p2align 4, 0x90 486 487maskFF: 488.quad 0xffffffffffffffff, 0xffffffffffffffff 489 490mask0F: 491.quad 0xffff 492 493 494ownTables: 495.byte 0, 1, 8, 16, 9, 2, 3, 10 496 497 498.byte 17, 24, 32, 25, 18, 11, 4, 5 499 500 501.byte 12, 19, 26, 33, 40, 48, 41, 34 502 503 504.byte 27, 20, 13, 6, 7, 14, 21, 28 505 506 507.byte 35, 42, 49, 56, 57, 50, 43, 36 508 509 510.byte 29, 22, 15, 23, 30, 37, 44, 51 511 512 513.byte 58, 59, 52, 45, 38, 31, 39, 46 514 515 516.byte 53, 60, 61, 54, 47, 55, 62, 63 517 518 519 520.byte 0, 1, 2, 2, 3, 3, 3, 3 521 522 523.fill 8, 1, 4 524 525 526.fill 16, 1, 5 527 528 529.fill 32, 1, 6 530 531 532.fill 64, 1, 7 533 534 535.fill 128, 1, 8 536 537 538