1/*****************************************************************************
2 * cabac-a.S: aarch64 cabac
3 *****************************************************************************
4 * Copyright (C) 2014-2021 x264 project
5 *
6 * Authors: Janne Grunau <janne-x264@jannau.net>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21 *
22 * This program is also available under a commercial proprietary license.
23 * For more information, contact us at licensing@x264.com.
24 *****************************************************************************/
25
26#include "asm.S"
27#include "asm-offsets.h"
28
29// w11 holds x264_cabac_t.i_low
30// w12 holds x264_cabac_t.i_range
31
32function cabac_encode_decision_asm, export=1
33    add         w10, w1,  #CABAC_STATE
34    ldrb        w3,  [x0,  w10, uxtw]           // i_state
35    ldr         w12, [x0,  #CABAC_I_RANGE]
36    movrel      x8,  X264(cabac_range_lps), -4
37    movrel      x9,  X264(cabac_transition)
38    ubfx        x4,  x3,  #1,  #7
39    asr         w5,  w12, #6
40    add         x8,  x8,  x4, lsl #2
41    orr         w14, w2,  w3, lsl #1
42    ldrb        w4,  [x8,  w5,  uxtw]           // i_range_lps
43    ldr         w11, [x0,  #CABAC_I_LOW]
44    eor         w6,  w2,  w3                    // b ^ i_state
45    ldrb        w9,  [x9,  w14, uxtw]
46    sub         w12, w12, w4
47    add         w7,  w11, w12
48    tst         w6,  #1                         // (b ^ i_state) & 1
49    csel        w12, w4, w12, ne
50    csel        w11, w7, w11, ne
51    strb        w9,  [x0,  w10, uxtw]           // i_state
52
53cabac_encode_renorm:
54    ldr         w2,  [x0, #CABAC_I_QUEUE]
55    clz         w5,  w12
56    sub         w5,  w5,  #23
57    lsl         w11, w11, w5
58    lsl         w12, w12, w5
59    adds        w2,  w2,  w5
60    b.ge        cabac_putbyte
61
62    stp         w11, w12, [x0, #CABAC_I_LOW]    // store i_low, i_range
63    str         w2,  [x0, #CABAC_I_QUEUE]
64    ret
65
66.align 5
67cabac_putbyte:
68    ldr         w6,  [x0, #CABAC_I_BYTES_OUTSTANDING]
69    add         w14, w2,  #10
70    mov         w13, #-1
71    sub         w2,  w2,  #8
72    asr         w4,  w11, w14           // out
73    lsl         w13, w13, w14
74    subs        w5,  w4,  #0xff
75    bic         w11, w11, w13
76    cinc        w6,  w6,  eq
77    b.eq        0f
78
791:
80    ldr         x7,  [x0, #CABAC_P]
81    asr         w5,  w4,  #8            // carry
82    ldurb       w8,  [x7, #-1]
83    add         w8,  w8,  w5
84    sub         w5,  w5,  #1
85    sturb       w8,  [x7, #-1]
86    cbz         w6,  3f
872:
88    subs        w6,  w6,  #1
89    strb        w5,  [x7],  #1
90    b.gt        2b
913:
92    strb        w4,  [x7],  #1
93    str         x7,  [x0, #CABAC_P]
940:
95    stp         w11, w12, [x0, #CABAC_I_LOW]    // store i_low, i_range
96    stp         w2,  w6,  [x0, #CABAC_I_QUEUE]  // store i_queue, i_bytes_outstanding
97    ret
98endfunc
99
100function cabac_encode_bypass_asm, export=1, align=5
101    ldr         w12, [x0, #CABAC_I_RANGE]
102    ldr         w11, [x0, #CABAC_I_LOW]
103    ldr         w2,  [x0, #CABAC_I_QUEUE]
104    and         w1,  w1,  w12
105    add         w11, w1,  w11, lsl #1
106    adds        w2,  w2,  #1
107    b.ge        cabac_putbyte
108    str         w11, [x0, #CABAC_I_LOW]
109    str         w2,  [x0, #CABAC_I_QUEUE]
110    ret
111endfunc
112
113function cabac_encode_terminal_asm, export=1, align=5
114    ldr         w12, [x0, #CABAC_I_RANGE]
115    sub         w12, w12, #2
116    tbz         w12, #8, 1f
117
118    str         w12, [x0, #CABAC_I_RANGE]
119    ret
1201:
121    ldr         w2,  [x0, #CABAC_I_QUEUE]
122    ldr         w11, [x0, #CABAC_I_LOW]
123    lsl         w12, w12, #1
124    adds        w2,  w2,  #1
125    lsl         w11, w11, #1
126    b.ge        cabac_putbyte
127
128    stp         w11, w12, [x0, #CABAC_I_LOW]    // store i_low, i_range
129    str         w2,  [x0, #CABAC_I_QUEUE]
130    ret
131endfunc
132