1@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 2@@ **** WAVPACK **** @@ 3@@ Hybrid Lossless Wavefile Compressor @@ 4@@ Copyright (c) 1998 - 2019 David Bryant. @@ 5@@ All Rights Reserved. @@ 6@@ Distributed under the BSD Software License (see license.txt) @@ 7@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 8 9 .text 10 .align 11 .global unpack_decorr_stereo_pass_cont_armv7 12 .global unpack_decorr_mono_pass_cont_armv7 13 14/* This is an assembly optimized version of the following WavPack function: 15 * 16 * void decorr_stereo_pass_cont (struct decorr_pass *dpp, 17 * int32_t *buffer, 18 * int32_t sample_counti, 19 * int32_t long_math); 20 * 21 * It performs a single pass of stereo decorrelation on the provided buffer. 22 * Note that this version of the function requires that up to 8 previous stereo 23 * samples are visible and correct. In other words, it ignores the "samples_*" 24 * fields in the decorr_pass structure and gets the history data directly 25 * from the buffer. It does, however, return the appropriate history samples 26 * to the decorr_pass structure before returning. 27 * 28 * This should work on all ARM architectures. This version of the code 29 * checks the magnitude of the decorrelation sample with a pair of shifts 30 * to avoid possible overflow (and therefore ignores the "long_math" arg). 31 * Previously I used the SSAT instruction for this, but then discovered that 32 * SSAT is not universally available (although on the armv7 I'm testing on 33 * it is slightly faster than the shifts). 34 * 35 * A mono version follows below. 36 */ 37 38/* 39 * on entry: 40 * 41 * r0 = struct decorr_pass *dpp 42 * r1 = int32_t *buffer 43 * r2 = int32_t sample_count 44 * r3 = int32_t long_math 45 */ 46 47 .arm 48 .type unpack_decorr_stereo_pass_cont_armv7, STT_FUNC 49 50unpack_decorr_stereo_pass_cont_armv7: 51 52 stmfd sp!, {r4 - r8, r10, r11, lr} 53 54 mov r5, r0 @ r5 = dpp 55 mov r11, #512 @ r11 = 512 for rounding 56 ldr r6, [r0, #4] @ r6 = dpp->delta 57 ldr r4, [r0, #8] @ r4 = dpp->weight_A 58 ldr r0, [r0, #12] @ r0 = dpp->weight_B 59 cmp r2, #0 @ exit if no samples to process 60 beq common_exit 61 62 add r7, r1, r2, asl #3 @ r7 = buffer ending position 63 ldr r2, [r5, #0] @ r2 = dpp->term 64 cmp r2, #0 65 bmi minus_term 66 67 ldr lr, [r1, #-16] @ load 2 sample history from buffer 68 ldr r10, [r1, #-12] @ for terms 2, 17, and 18 69 ldr r8, [r1, #-8] 70 ldr r3, [r1, #-4] 71 cmp r2, #17 72 beq term_17_loop 73 cmp r2, #18 74 beq term_18_loop 75 cmp r2, #2 76 beq term_2_loop 77 b term_default_loop @ else handle default (1-8, except 2) 78 79minus_term: 80 mov r10, #1024 @ r10 = -1024 for weight clipping 81 rsb r10, r10, #0 @ (only used for negative terms) 82 cmn r2, #1 83 beq term_minus_1 84 cmn r2, #2 85 beq term_minus_2 86 cmn r2, #3 87 beq term_minus_3 88 b common_exit 89 90/* 91 ****************************************************************************** 92 * Loop to handle term = 17 condition 93 * 94 * r0 = dpp->weight_B r8 = previous left sample 95 * r1 = bptr r9 = 96 * r2 = current sample r10 = second previous right sample 97 * r3 = previous right sample r11 = 512 (for rounding) 98 * r4 = dpp->weight_A ip = current decorrelation value 99 * r5 = dpp sp = 100 * r6 = dpp->delta lr = second previous left sample 101 * r7 = eptr pc = 102 ******************************************************************************* 103 */ 104 105term_17_loop: 106 rsb ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev 107 mov lr, r8 @ previous becomes 2nd previous 108 ldr r2, [r1], #4 @ get sample & update pointer 109 mov r8, ip, lsl #11 @ check magnitude by shifting left then right 110 cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different 111 bne S117 112 cmp ip, #0 113 mla r8, ip, r4, r11 @ mult decorr value by weight, round, 114 add r8, r2, r8, asr #10 @ shift, and add to new sample 115 b S118 116 117S117: mov r8, #0 @ use 64-bit multiply to avoid overflow 118 smlal r11, r8, r4, ip 119 add r8, r2, r8, lsl #22 120 add r8, r8, r11, lsr #10 121 mov r11, #512 122 123S118: strne r8, [r1, #-4] @ if change possible, store sample back 124 cmpne r2, #0 125 beq S325 126 teq ip, r2 @ update weight based on signs 127 submi r4, r4, r6 128 addpl r4, r4, r6 129 130S325: rsb ip, r10, r3, asl #1 @ do same thing for right channel 131 mov r10, r3 132 ldr r2, [r1], #4 133 mov r3, ip, lsl #11 @ check magnitude by shifting left then right 134 cmp ip, r3, asr #11 @ and comparing, branch to 64-bit math if different 135 bne S119 136 cmp ip, #0 137 mla r3, ip, r0, r11 138 add r3, r2, r3, asr #10 139 b S120 140 141S119: mov r3, #0 142 smlal r11, r3, r0, ip 143 add r3, r2, r3, lsl #22 144 add r3, r3, r11, lsr #10 145 mov r11, #512 146 147S120: strne r3, [r1, #-4] 148 cmpne r2, #0 149 beq S329 150 teq ip, r2 151 submi r0, r0, r6 152 addpl r0, r0, r6 153 154S329: cmp r7, r1 @ loop back if more samples to do 155 bhi term_17_loop 156 b store_1718 @ common exit for terms 17 & 18 157 158/* 159 ****************************************************************************** 160 * Loop to handle term = 18 condition 161 * 162 * r0 = dpp->weight_B r8 = previous left sample 163 * r1 = bptr r9 = 164 * r2 = current sample r10 = second previous right sample 165 * r3 = previous right sample r11 = 512 (for rounding) 166 * r4 = dpp->weight_A ip = decorrelation value 167 * r5 = dpp sp = 168 * r6 = dpp->delta lr = second previous left sample 169 * r7 = eptr pc = 170 ******************************************************************************* 171 */ 172 173term_18_loop: 174 sub ip, r8, lr @ decorr value = 175 mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 176 add ip, r8, ip, asr #1 177 ldr r2, [r1], #4 @ get sample & update pointer 178 mov r8, ip, lsl #11 @ check magnitude by shifting left then right 179 cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different 180 bne S121 181 cmp ip, #0 182 mla r8, ip, r4, r11 @ mult decorr value by weight, round, 183 add r8, r2, r8, asr #10 @ shift, and add to new sample 184 b S122 185 186S121: mov r8, #0 @ use 64-bit multiply to avoid overflow 187 smlal r11, r8, r4, ip 188 add r8, r2, r8, lsl #22 189 add r8, r8, r11, lsr #10 190 mov r11, #512 191 192S122: strne r8, [r1, #-4] @ if change possible, store sample back 193 cmpne r2, #0 194 beq S337 195 teq ip, r2 @ update weight based on signs 196 submi r4, r4, r6 197 addpl r4, r4, r6 198 199S337: sub ip, r3, r10 @ do same thing for right channel 200 mov r10, r3 201 add ip, r3, ip, asr #1 202 ldr r2, [r1], #4 203 mov r3, ip, lsl #11 @ check magnitude by shifting left then right 204 cmp ip, r3, asr #11 @ and comparing, branch to 64-bit math if different 205 bne S123 206 cmp ip, #0 207 mla r3, ip, r0, r11 208 add r3, r2, r3, asr #10 209 b S124 210 211S123: mov r3, #0 212 smlal r11, r3, r0, ip 213 add r3, r2, r3, lsl #22 214 add r3, r3, r11, lsr #10 215 mov r11, #512 216 217S124: strne r3, [r1, #-4] 218 cmpne r2, #0 219 beq S341 220 teq ip, r2 221 submi r0, r0, r6 222 addpl r0, r0, r6 223 224S341: cmp r7, r1 @ loop back if more samples to do 225 bhi term_18_loop 226 227/* common exit for terms 17 & 18 */ 228 229store_1718: 230 str r3, [r5, #48] @ store sample history into struct 231 str r8, [r5, #16] 232 str r10, [r5, #52] 233 str lr, [r5, #20] 234 b common_exit @ and return 235 236/* 237 ****************************************************************************** 238 * Loop to handle term = 2 condition 239 * (note that this case can be handled by the default term handler (1-8), but 240 * this special case is faster because it doesn't have to read memory twice) 241 * 242 * r0 = dpp->weight_B r8 = previous left sample 243 * r1 = bptr r9 = 244 * r2 = current sample r10 = second previous right sample 245 * r3 = previous right sample r11 = 512 (for rounding) 246 * r4 = dpp->weight_A ip = decorrelation value 247 * r5 = dpp sp = 248 * r6 = dpp->delta lr = second previous left sample 249 * r7 = eptr pc = 250 ******************************************************************************* 251 */ 252 253term_2_loop: 254 mov ip, lr @ get decorrelation value 255 mov lr, r8 @ previous becomes 2nd previous 256 ldr r2, [r1], #4 @ get sample & update pointer 257 mov r8, ip, lsl #11 @ check magnitude by shifting left then right 258 cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different 259 bne S125 260 cmp ip, #0 261 mla r8, ip, r4, r11 @ mult decorr value by weight, round, 262 add r8, r2, r8, asr #10 @ shift, and add to new sample 263 b S126 264 265S125: mov r8, #0 @ use 64-bit multiply to avoid overflow 266 smlal r11, r8, r4, ip 267 add r8, r2, r8, lsl #22 268 add r8, r8, r11, lsr #10 269 mov r11, #512 270 271S126: strne r8, [r1, #-4] @ if change possible, store sample back 272 cmpne r2, #0 273 beq S225 274 teq ip, r2 @ update weight based on signs 275 submi r4, r4, r6 276 addpl r4, r4, r6 277 278S225: mov ip, r10 @ do same thing for right channel 279 mov r10, r3 280 ldr r2, [r1], #4 281 mov r3, ip, lsl #11 @ check magnitude by shifting left then right 282 cmp ip, r3, asr #11 @ and comparing, branch to 64-bit math if different 283 bne S127 284 cmp ip, #0 285 mla r3, ip, r0, r11 286 add r3, r2, r3, asr #10 287 b S128 288 289S127: mov r3, #0 290 smlal r11, r3, r0, ip 291 add r3, r2, r3, lsl #22 292 add r3, r3, r11, lsr #10 293 mov r11, #512 294 295S128: strne r3, [r1, #-4] 296 cmpne r2, #0 297 beq S229 298 teq ip, r2 299 submi r0, r0, r6 300 addpl r0, r0, r6 301 302S229: cmp r7, r1 @ loop back if more samples to do 303 bhi term_2_loop 304 b default_term_exit @ this exit updates all dpp->samples 305 306/* 307 ****************************************************************************** 308 * Loop to handle default term condition 309 * 310 * r0 = dpp->weight_B r8 = result accumulator 311 * r1 = bptr r9 = 312 * r2 = dpp->term r10 = 313 * r3 = decorrelation value r11 = 512 (for rounding) 314 * r4 = dpp->weight_A ip = current sample 315 * r5 = dpp sp = 316 * r6 = dpp->delta lr = 317 * r7 = eptr pc = 318 ******************************************************************************* 319 */ 320 321term_default_loop: 322 ldr ip, [r1] @ get original sample 323 ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term 324 mov r8, r3, lsl #11 @ check magnitude by shifting left then right 325 cmp r3, r8, asr #11 @ and comparing, branch to 64-bit math if different 326 bne S135 327 cmp r3, #0 328 mla r8, r3, r4, r11 @ mult decorr value by weight, round, 329 add r8, ip, r8, asr #10 @ shift and add to new sample 330 b S136 331 332S135: mov r8, #0 @ use 64-bit multiply to avoid overflow 333 smlal r11, r8, r4, r3 334 add r8, ip, r8, lsl #22 335 add r8, r8, r11, lsr #10 336 mov r11, #512 337 338S136: str r8, [r1], #4 @ store update sample 339 cmpne ip, #0 340 beq S350 341 teq ip, r3 @ update weight based on signs 342 submi r4, r4, r6 343 addpl r4, r4, r6 344 345S350: ldr ip, [r1] @ do the same thing for right channel 346 ldr r3, [r1, -r2, asl #3] 347 mov r8, r3, lsl #11 @ check magnitude by shifting left then right 348 cmp r3, r8, asr #11 @ and comparing, branch to 64-bit math if different 349 bne S137 350 cmp r3, #0 351 mla r8, r3, r0, r11 352 add r8, ip, r8, asr #10 353 b S138 354 355S137: mov r8, #0 356 smlal r11, r8, r0, r3 357 add r8, ip, r8, lsl #22 358 add r8, r8, r11, lsr #10 359 mov r11, #512 360 361S138: str r8, [r1], #4 362 cmpne ip, #0 363 beq S354 364 teq ip, r3 365 submi r0, r0, r6 366 addpl r0, r0, r6 367 368S354: cmp r7, r1 @ loop back if more samples to do 369 bhi term_default_loop 370 371/* 372 * This exit is used by terms 1-8 to store the previous "term" samples (up to 8) 373 * into the decorr pass structure history 374 */ 375 376default_term_exit: 377 ldr r2, [r5, #0] @ r2 = dpp->term 378 379S358: sub r2, r2, #1 380 sub r1, r1, #8 381 ldr r3, [r1, #4] @ get right sample and store in dpp->samples_B [r2] 382 add r6, r5, #48 383 str r3, [r6, r2, asl #2] 384 ldr r3, [r1, #0] @ get left sample and store in dpp->samples_A [r2] 385 add r6, r5, #16 386 str r3, [r6, r2, asl #2] 387 cmp r2, #0 388 bne S358 389 b common_exit 390 391/* 392 ****************************************************************************** 393 * Loop to handle term = -1 condition 394 * 395 * r0 = dpp->weight_B r8 = 396 * r1 = bptr r9 = 397 * r2 = intermediate result r10 = -1024 (for clipping) 398 * r3 = previous right sample r11 = 512 (for rounding) 399 * r4 = dpp->weight_A ip = current sample 400 * r5 = dpp sp = 401 * r6 = dpp->delta lr = updated left sample 402 * r7 = eptr pc = 403 ******************************************************************************* 404 */ 405 406term_minus_1: 407 ldr r3, [r1, #-4] 408 409term_minus_1_loop: 410 ldr ip, [r1] @ for left channel the decorrelation value 411 @ is the previous right sample (in r3) 412 mov lr, r3, lsl #11 @ check magnitude by shifting left then right 413 cmp r3, lr, asr #11 @ and comparing, branch to 64-bit math if different 414 bne S142 415 cmp r3, #0 416 mla r2, r3, r4, r11 417 add lr, ip, r2, asr #10 418 b S143 419 420S142: mov lr, #0 @ use 64-bit multiply to avoid overflow 421 smlal r11, lr, r4, r3 422 add lr, ip, lr, lsl #22 423 add lr, lr, r11, lsr #10 424 mov r11, #512 425 426S143: str lr, [r1], #8 427 cmpne ip, #0 428 beq S361 429 teq ip, r3 @ update weight based on signs 430 submi r4, r4, r6 431 addpl r4, r4, r6 432 cmp r4, #1024 433 movgt r4, #1024 434 cmp r4, r10 435 movlt r4, r10 436 437S361: ldr r2, [r1, #-4] @ for right channel the decorrelation value 438 @ is the just updated right sample (in lr) 439 mov r3, lr, lsl #11 @ check magnitude by shifting left then right 440 cmp lr, r3, asr #11 @ and comparing, branch to 64-bit math if different 441 bne S144 442 cmp lr, #0 443 mla r3, lr, r0, r11 444 add r3, r2, r3, asr #10 445 b S145 446 447S144: mov r3, #0 448 smlal r11, r3, r0, lr 449 add r3, r2, r3, lsl #22 450 add r3, r3, r11, lsr #10 451 mov r11, #512 452 453S145: strne r3, [r1, #-4] 454 cmpne r2, #0 455 beq S369 456 teq r2, lr 457 submi r0, r0, r6 458 addpl r0, r0, r6 459 cmp r0, #1024 @ then clip weight to +/-1024 460 movgt r0, #1024 461 cmp r0, r10 462 movlt r0, r10 463 464S369: cmp r7, r1 @ loop back if more samples to do 465 bhi term_minus_1_loop 466 467 str r3, [r5, #16] @ else store right sample and exit 468 b common_exit 469 470/* 471 ****************************************************************************** 472 * Loop to handle term = -2 condition 473 * (note that the channels are processed in the reverse order here) 474 * 475 * r0 = dpp->weight_B r8 = 476 * r1 = bptr r9 = 477 * r2 = intermediate result r10 = -1024 (for clipping) 478 * r3 = previous left sample r11 = 512 (for rounding) 479 * r4 = dpp->weight_A ip = current sample 480 * r5 = dpp sp = 481 * r6 = dpp->delta lr = updated right sample 482 * r7 = eptr pc = 483 ******************************************************************************* 484 */ 485 486term_minus_2: 487 ldr r3, [r1, #-8] 488 489term_minus_2_loop: 490 ldr ip, [r1, #4] @ for right channel the decorrelation value 491 @ is the previous left sample (in r3) 492 mov lr, r3, lsl #11 @ check magnitude by shifting left then right 493 cmp r3, lr, asr #11 @ and comparing, branch to 64-bit math if different 494 bne S146 495 cmp r3, #0 496 mla r2, r3, r0, r11 497 add lr, ip, r2, asr #10 498 b S147 499 500S146: mov lr, #0 @ use 64-bit multiply to avoid overflow 501 smlal r11, lr, r0, r3 502 add lr, ip, lr, lsl #22 503 add lr, lr, r11, lsr #10 504 mov r11, #512 505 506S147: strne lr, [r1, #4] 507 cmpne ip, #0 508 beq S380 509 teq ip, r3 @ update weight based on signs 510 submi r0, r0, r6 511 addpl r0, r0, r6 512 cmp r0, #1024 @ then clip weight to +/-1024 513 movgt r0, #1024 514 cmp r0, r10 515 movlt r0, r10 516 517S380: ldr r2, [r1, #0] @ for left channel the decorrelation value 518 @ is the just updated left sample (in lr) 519 mov r3, lr, lsl #11 @ check magnitude by shifting left then right 520 cmp lr, r3, asr #11 @ and comparing, branch to 64-bit math if different 521 bne S148 522 cmp lr, #0 523 mla r3, lr, r4, r11 524 add r3, r2, r3, asr #10 525 b S149 526 527S148: mov r3, #0 528 smlal r11, r3, r4, lr 529 add r3, r2, r3, lsl #22 530 add r3, r3, r11, lsr #10 531 mov r11, #512 532 533S149: str r3, [r1], #8 534 cmpne r2, #0 535 beq S388 536 teq r2, lr 537 submi r4, r4, r6 538 addpl r4, r4, r6 539 cmp r4, #1024 540 movgt r4, #1024 541 cmp r4, r10 542 movlt r4, r10 543 544S388: cmp r7, r1 @ loop back if more samples to do 545 bhi term_minus_2_loop 546 547 str r3, [r5, #48] @ else store left channel and exit 548 b common_exit 549 550/* 551 ****************************************************************************** 552 * Loop to handle term = -3 condition 553 * 554 * r0 = dpp->weight_B r8 = previous left sample 555 * r1 = bptr r9 = 556 * r2 = current left sample r10 = -1024 (for clipping) 557 * r3 = previous right sample r11 = 512 (for rounding) 558 * r4 = dpp->weight_A ip = intermediate result 559 * r5 = dpp sp = 560 * r6 = dpp->delta lr = 561 * r7 = eptr pc = 562 ******************************************************************************* 563 */ 564 565term_minus_3: 566 ldr r3, [r1, #-4] @ load previous samples 567 ldr r8, [r1, #-8] 568 569term_minus_3_loop: 570 ldr ip, [r1] 571 mov r2, r3, lsl #11 @ check magnitude by shifting left then right 572 cmp r3, r2, asr #11 @ and comparing, branch to 64-bit math if different 573 bne S160 574 cmp r3, #0 575 mla r2, r3, r4, r11 576 add r2, ip, r2, asr #10 577 b S161 578 579S160: mov r2, #0 @ use 64-bit multiply to avoid overflow 580 smlal r11, r2, r4, r3 581 add r2, ip, r2, lsl #22 582 add r2, r2, r11, lsr #10 583 mov r11, #512 584 585S161: str r2, [r1], #4 586 cmpne ip, #0 587 beq S399 588 teq ip, r3 @ update weight based on signs 589 submi r4, r4, r6 590 addpl r4, r4, r6 591 cmp r4, #1024 @ then clip weight to +/-1024 592 movgt r4, #1024 593 cmp r4, r10 594 movlt r4, r10 595 596S399: mov ip, r8 @ ip = previous left we use now 597 mov r8, r2 @ r8 = current left we use next time 598 ldr r2, [r1], #4 599 mov r3, ip, lsl #11 @ check magnitude by shifting left then right 600 cmp ip, r3, asr #11 @ and comparing, branch to 64-bit math if different 601 bne S162 602 cmp ip, #0 603 mla r3, ip, r0, r11 604 add r3, r2, r3, asr #10 605 b S163 606 607S162: mov r3, #0 608 smlal r11, r3, r0, ip 609 add r3, r2, r3, lsl #22 610 add r3, r3, r11, lsr #10 611 mov r11, #512 612 613S163: strne r3, [r1, #-4] 614 cmpne r2, #0 615 beq S407 616 teq ip, r2 617 submi r0, r0, r6 618 addpl r0, r0, r6 619 cmp r0, #1024 620 movgt r0, #1024 621 cmp r0, r10 622 movlt r0, r10 623 624S407: cmp r7, r1 @ loop back if more samples to do 625 bhi term_minus_3_loop 626 627 str r3, [r5, #16] @ else store previous samples & exit 628 str r8, [r5, #48] 629 630/* 631 * Before finally exiting we must store weights back for next time 632 */ 633 634common_exit: 635 str r4, [r5, #8] 636 str r0, [r5, #12] 637 ldmfd sp!, {r4 - r8, r10, r11, pc} 638 639 640 641/* This is a mono version of the function above. It does not handle negative terms. 642 * 643 * void decorr_mono_pass_cont (struct decorr_pass *dpp, 644 * int32_t *buffer, 645 * int32_t sample_counti, 646 * int32_t long_math); 647 * on entry: 648 * 649 * r0 = struct decorr_pass *dpp 650 * r1 = int32_t *buffer 651 * r2 = int32_t sample_count 652 * r3 = int32_t long_math 653 */ 654 655 .arm 656 .type unpack_decorr_mono_pass_cont_armv7, STT_FUNC 657 658unpack_decorr_mono_pass_cont_armv7: 659 660 stmfd sp!, {r4 - r8, r11, lr} 661 662 mov r5, r0 @ r5 = dpp 663 mov r11, #512 @ r11 = 512 for rounding 664 ldr r6, [r0, #4] @ r6 = dpp->delta 665 ldr r4, [r0, #8] @ r4 = dpp->weight_A 666 cmp r2, #0 @ exit if no samples to process 667 beq mono_common_exit 668 669 add r7, r1, r2, asl #2 @ r7 = buffer ending position 670 ldr r2, [r5, #0] @ r2 = dpp->term 671 672 ldr lr, [r1, #-8] @ load 2 sample history from buffer 673 ldr r8, [r1, #-4] 674 cmp r2, #17 675 beq mono_term_17_loop 676 cmp r2, #18 677 beq mono_term_18_loop 678 cmp r2, #2 679 beq mono_term_2_loop 680 b mono_term_default_loop @ else handle default (1-8, except 2) 681 682/* 683 ****************************************************************************** 684 * Loop to handle term = 17 condition 685 * 686 * r0 = r8 = previous sample 687 * r1 = bptr r9 = 688 * r2 = current sample r10 = 689 * r3 = r11 = 512 (for rounding) 690 * r4 = dpp->weight_A ip = current decorrelation value 691 * r5 = dpp sp = 692 * r6 = dpp->delta lr = second previous sample 693 * r7 = eptr pc = 694 ******************************************************************************* 695 */ 696 697mono_term_17_loop: 698 rsb ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev 699 mov lr, r8 @ previous becomes 2nd previous 700 ldr r2, [r1], #4 @ get sample & update pointer 701 mov r8, ip, lsl #11 @ check magnitude by shifting left then right 702 cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different 703 bne S717 704 cmp ip, #0 705 mla r8, ip, r4, r11 @ mult decorr value by weight, round, 706 add r8, r2, r8, asr #10 @ shift, and add to new sample 707 b S718 708 709S717: mov r8, #0 710 smlal r11, r8, r4, ip 711 add r8, r2, r8, lsl #22 712 add r8, r8, r11, lsr #10 713 mov r11, #512 714 715S718: strne r8, [r1, #-4] @ if change possible, store sample back 716 cmpne r2, #0 717 beq S129 718 teq ip, r2 @ update weight based on signs 719 submi r4, r4, r6 720 addpl r4, r4, r6 721 722S129: cmp r7, r1 @ loop back if more samples to do 723 bhi mono_term_17_loop 724 b mono_store_1718 @ common exit for terms 17 & 18 725 726/* 727 ****************************************************************************** 728 * Loop to handle term = 18 condition 729 * 730 * r0 = r8 = previous sample 731 * r1 = bptr r9 = 732 * r2 = current sample r10 = 733 * r3 = r11 = 512 (for rounding) 734 * r4 = dpp->weight_A ip = decorrelation value 735 * r5 = dpp sp = 736 * r6 = dpp->delta lr = second previous sample 737 * r7 = eptr pc = 738 ******************************************************************************* 739 */ 740 741mono_term_18_loop: 742 sub ip, r8, lr @ decorr value = 743 mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 744 add ip, r8, ip, asr #1 745 ldr r2, [r1], #4 @ get sample & update pointer 746 mov r8, ip, lsl #11 @ check magnitude by shifting left then right 747 cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different 748 bne S817 749 cmp ip, #0 750 mla r8, ip, r4, r11 @ mult decorr value by weight, round, 751 add r8, r2, r8, asr #10 @ shift, and add to new sample 752 b S818 753 754S817: mov r8, #0 755 smlal r11, r8, r4, ip 756 add r8, r2, r8, lsl #22 757 add r8, r8, r11, lsr #10 758 mov r11, #512 759 760S818: strne r8, [r1, #-4] @ if change possible, store sample back 761 cmpne r2, #0 762 beq S141 763 teq ip, r2 @ update weight based on signs 764 submi r4, r4, r6 765 addpl r4, r4, r6 766 767S141: cmp r7, r1 @ loop back if more samples to do 768 bhi mono_term_18_loop 769 770/* common exit for terms 17 & 18 */ 771 772mono_store_1718: 773 str r8, [r5, #16] @ store sample history into struct 774 str lr, [r5, #20] 775 b mono_common_exit @ and return 776 777/* 778 ****************************************************************************** 779 * Loop to handle term = 2 condition 780 * (note that this case can be handled by the default term handler (1-8), but 781 * this special case is faster because it doesn't have to read memory twice) 782 * 783 * r0 = r8 = previous sample 784 * r1 = bptr r9 = 785 * r2 = current sample r10 = 786 * r3 = r11 = 512 (for rounding) 787 * r4 = dpp->weight_A ip = decorrelation value 788 * r5 = dpp sp = 789 * r6 = dpp->delta lr = second previous sample 790 * r7 = eptr pc = 791 ******************************************************************************* 792 */ 793 794mono_term_2_loop: 795 mov ip, lr @ get decorrelation value 796 mov lr, r8 @ previous becomes 2nd previous 797 ldr r2, [r1], #4 @ get sample & update pointer 798 mov r8, ip, lsl #11 @ check magnitude by shifting left then right 799 cmp ip, r8, asr #11 @ and comparing, branch to 64-bit math if different 800 bne S917 801 cmp ip, #0 802 mla r8, ip, r4, r11 @ mult decorr value by weight, round, 803 add r8, r2, r8, asr #10 @ shift, and add to new sample 804 b S918 805 806S917: mov r8, #0 807 smlal r11, r8, r4, ip 808 add r8, r2, r8, lsl #22 809 add r8, r8, r11, lsr #10 810 mov r11, #512 811 812S918: strne r8, [r1, #-4] @ if change possible, store sample back 813 cmpne r2, #0 814 beq S029 815 teq ip, r2 @ update weight based on signs 816 submi r4, r4, r6 817 addpl r4, r4, r6 818 819S029: cmp r7, r1 @ loop back if more samples to do 820 bhi mono_term_2_loop 821 b mono_default_term_exit @ this exit updates all dpp->samples 822 823/* 824 ****************************************************************************** 825 * Loop to handle default term condition 826 * 827 * r0 = r8 = result accumulator 828 * r1 = bptr r9 = 829 * r2 = dpp->term r10 = 830 * r3 = decorrelation value r11 = 512 (for rounding) 831 * r4 = dpp->weight_A ip = current sample 832 * r5 = dpp sp = 833 * r6 = dpp->delta lr = 834 * r7 = eptr pc = 835 ******************************************************************************* 836 */ 837 838mono_term_default_loop: 839 ldr ip, [r1] @ get original sample 840 ldr r3, [r1, -r2, asl #2] @ get decorrelation value based on term 841 mov r8, r3, lsl #11 @ check magnitude by shifting left then right 842 cmp r3, r8, asr #11 @ and comparing, branch to 64-bit math if different 843 bne S617 844 mla r8, r3, r4, r11 @ mult decorr value by weight, round, 845 add r8, ip, r8, asr #10 @ shift and add to new sample 846 b S618 847 848S617: mov r8, #0 849 smlal r11, r8, r4, r3 850 add r8, ip, r8, lsl #22 851 add r8, r8, r11, lsr #10 852 mov r11, #512 853 854S618: str r8, [r1], #4 @ store update sample 855 cmp r3, #0 856 cmpne ip, #0 857 beq S154 858 teq ip, r3 @ update weight based on signs 859 submi r4, r4, r6 860 addpl r4, r4, r6 861 862S154: cmp r7, r1 @ loop back if more samples to do 863 bhi mono_term_default_loop 864 865/* 866 * This exit is used by terms 1-8 to store the previous "term" samples (up to 8) 867 * into the decorr pass structure history 868 */ 869 870mono_default_term_exit: 871 ldr r2, [r5, #0] @ r2 = dpp->term 872 873S158: sub r2, r2, #1 874 sub r1, r1, #4 875 ldr r3, [r1, #0] @ get sample and store in dpp->samples_A [r2] 876 add r6, r5, #16 877 str r3, [r6, r2, asl #2] 878 cmp r2, #0 879 bne S158 880 b mono_common_exit 881 882/* 883 * Before finally exiting we must store weight back for next time 884 */ 885 886mono_common_exit: 887 str r4, [r5, #8] 888 ldmfd sp!, {r4 - r8, r11, pc} 889 890#ifdef __ELF__ 891 .section .note.GNU-stack,"",%progbits 892#endif 893 894