1/* nrv2e_d32.S -- ARM decompressor for NRV2E
2
3   This file is part of the UPX executable compressor.
4
5   Copyright (C) 1996-2020 Markus Franz Xaver Johannes Oberhumer
6   Copyright (C) 1996-2020 Laszlo Molnar
7   Copyright (C) 2000-2020 John F. Reiser
8   All Rights Reserved.
9
10   UPX and the UCL library are free software; you can redistribute them
11   and/or modify them under the terms of the GNU General Public License as
12   published by the Free Software Foundation; either version 2 of
13   the License, or (at your option) any later version.
14
15   This program is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18   GNU General Public License for more details.
19
20   You should have received a copy of the GNU General Public License
21   along with this program; see the file COPYING.
22   If not, write to the Free Software Foundation, Inc.,
23   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24
25   Markus F.X.J. Oberhumer              Laszlo Molnar
26   <markus@oberhumer.com>               <ezerotven+github@gmail.com>
27
28   John F. Reiser
29   <jreiser@users.sourceforge.net>
30*/
31#include "macros.S"
32
33#define SAFE 0  /* 1 for src+dst bounds checking: cost 76 bytes */
34
35#define lr   x30
36
37#define src  x0
38#define len  w1  /* overlaps 'cnt' */
39#define dst  x2
40#define dstw w2
41#define tmp  w3
42#define tmpx x3
43#define bits w4
44#define off  w5
45/*           r6  UNUSED in ARM code unless DEBUG mode */
46#define srclim x7
47#if 1==SAFE  /*{*/
48#define dstlim x12
49#endif  /*}*/
50
51#define cnt  w1  /* overlaps 'len' while reading an offset */
52
53/* macros reduce "noise" when comparing this ARM code to corresponding THUMB code */
54#define ADD2( dst,src) add  dst,dst,src
55#define ADD2S(dst,src) adds dst,dst,src
56#define ADC2( dst,src) adc  dst,dst,src
57#define ADC2S(dst,src) adcs dst,dst,src
58#define SUB2( dst,src) sub  dst,dst,src
59#define SUB2S(dst,src) subs dst,dst,src
60#define LDRB3(reg,psrc,incr) ldrb reg,psrc,incr
61#define STRB3(reg,pdst,incr) strb reg,pdst,incr
62
63#if 1==SAFE  /*{*/
64#define CHECK_SRC  cmp srclim,src; bls bad_src_n2e /* Out: 1==Carry for get32_n2e */
65#define CHECK_DST  cmp dst,dstlim; bhs bad_dst_n2e
66#else  /*}{*/
67#define CHECK_SRC  /*empty*/
68#define CHECK_DST  /*empty*/
69#endif  /*}*/
70
71#if 0  /*{ DEBUG only: check newly-decompressed against original dst */
72#define CHECK_BYTE \
73   ldrb  w6,[dst]; \
74   cmp   w6,tmp; beq 0f; brk #0; 0:
75#else  /*}{*/
76#define CHECK_BYTE  /*empty*/
77#endif  /*}*/
78
79#undef GETBIT
80#define GETBIT bl get1_n2e
81
82#undef getnextb
83#define getnextb(reg) GETBIT; ADC2(reg,reg) /* Out: condition code not changed */
84#define   jnextb0     GETBIT; bcc
85#define   jnextb1     GETBIT; bcs
86
87ucl_nrv2e_decompress_32: .globl ucl_nrv2e_decompress_32  // ARM mode
88        .type ucl_nrv2e_decompress_32, %function
89/* error = (*)(char const *src, uint32_t len_src, char *dst, uint32_t *plen_dst)
90   Actual decompressed length is stored through plen_dst.
91   For SAFE mode: at call, *plen_dst must be allowed length of output buffer.
92*/
93        PUSH1(lr)
94        PUSH2(x2,x3)
95#define sp_DST0 0  /* stack offset of original dst */
96        add srclim,src,len,uxtw  // srclim= eof_src;
97#if 1==SAFE  /*{*/
98        ldr tmp,[r3]  // len_dst
99        add dstlim,tmp,dst
100#endif  /*}*/
101        mov off,#-1  // off= -1 initial condition
102        mov bits,#1<<31  // refill next time
103        b top_n2e
104
105#if 1==SAFE  /*{*/
106bad_dst_n2e:  # return value will be 2
107        add src,srclim,#1
108bad_src_n2e:  # return value will be 1
109        ADD2(src,#1)
110#endif  /*}*/
111eof_n2e:
112        POP2(x3,x4)  // r3= orig_dst; r4= plen_dst
113        SUB2(src,srclim)  // 0 if actual src length equals expected length
114        SUB2(dst,x3)  // actual dst length
115        str dstw,[x4]
116        mov x4,x0  // save result value
117
118        mov x0,x3  // orig_dst
119        add x1,x3,dst  // orig_dst + dst_len
120cache_n2e:
121        dc cvau,x0  // Clean by VA to point of Unification
122        ic ivau,x0  // Invalidate by VA to point of Unification
123        add x0,x0,#64  // next line
124        cmp x0,x1; blo cache_n2e
125
126        mov x0,x4  // result value
127        POP1(lr)
128        ret
129
130get1_n2e:
131        ADD2S(bits,bits); cbz bits,get32_n2e; ret
132get32_n2e:  // In: Carry set [from adding 0x80000000 (1<<31) to itself]
133        CHECK_SRC; ldr bits,[src],#4
134        ADC2S(bits,bits)  // left shift 1 bit with CarryIn and CarryOut
135        ret
136
137lit_n2e:
138        CHECK_SRC; LDRB3(tmp,[src],#1)
139        CHECK_BYTE
140        CHECK_DST; STRB3(tmp,[dst],#1)
141top_n2e:
142        jnextb1 lit_n2e
143        mov cnt,#1; b getoff_n2e
144
145off_n2e:
146        SUB2(cnt,#1)
147        getnextb(cnt)
148getoff_n2e:
149        getnextb(cnt)
150        jnextb0 off_n2e
151
152        subs tmp,cnt,#3  // set Carry
153        mov len,#0  // Carry unaffected
154        blo offprev_n2e  // cnt was 2; tests Carry only
155        CHECK_SRC; LDRB3(off,[src],#1)  // low 7+1 bits
156        orr  off,off,tmp,lsl #8
157        mvn off,off; cbz off,eof_n2e  // off= ~off
158        tst off,#1; asr off,off,#1; bne lenlast_n2e
159        b lenmore_n2e
160
161offprev_n2e:
162        jnextb1 lenlast_n2e
163lenmore_n2e:
164        mov len,#1
165        jnextb1 lenlast_n2e
166len_n2e:
167        getnextb(len)
168        jnextb0 len_n2e
169        ADD2(len,#6-2)
170        b gotlen_n2e
171
172lenlast_n2e:
173        getnextb(len)  // 0,1,2,3
174        ADD2(len,#2)
175gotlen_n2e:  // 'cmn': add the inputs, set condition codes, discard the sum
176        cmn off,#5<<8  // within M2_MAX_OFFSET
177        cinc len,len,cc  // too far away, so minimum match length is 3
178near_n2e:
179#if 1==SAFE  /*{*/
180        ldr tmp,[sp,#sp_DST0]
181        SUB2( tmp,dst)
182        SUB2S(tmp,off); bhi bad_dst_n2e  // reaching back too far
183
184        add tmp,dst,cnt
185        cmp tmp,dstlim; bhi bad_dst_n2e  // too much output
186#endif  /*}*/
187        add tmpx,dst,len,uxtw
188        ldrb tmp,[tmpx,#-1]  // force cacheline allocate
189copy_n2e:
190        ldrb tmp,[dst,off,sxtw]
191        CHECK_BYTE
192        STRB3(tmp,[dst],#1)
193        SUB2S(len,#1); bne copy_n2e
194        b top_n2e
195
196        .size ucl_nrv2e_decompress_32, .-ucl_nrv2e_decompress_32
197
198/*
199vi:ts=8:et:nowrap
200 */
201
202