1/* nrv2e_d32.S -- ARM decompressor for NRV2E 2 3 This file is part of the UPX executable compressor. 4 5 Copyright (C) 1996-2020 Markus Franz Xaver Johannes Oberhumer 6 Copyright (C) 1996-2020 Laszlo Molnar 7 Copyright (C) 2000-2020 John F. Reiser 8 All Rights Reserved. 9 10 UPX and the UCL library are free software; you can redistribute them 11 and/or modify them under the terms of the GNU General Public License as 12 published by the Free Software Foundation; either version 2 of 13 the License, or (at your option) any later version. 14 15 This program is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with this program; see the file COPYING. 22 If not, write to the Free Software Foundation, Inc., 23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 24 25 Markus F.X.J. Oberhumer Laszlo Molnar 26 <markus@oberhumer.com> <ezerotven+github@gmail.com> 27 28 John F. Reiser 29 <jreiser@users.sourceforge.net> 30*/ 31#include "macros.S" 32 33#define SAFE 0 /* 1 for src+dst bounds checking: cost 76 bytes */ 34 35#define lr x30 36 37#define src x0 38#define len w1 /* overlaps 'cnt' */ 39#define dst x2 40#define dstw w2 41#define tmp w3 42#define tmpx x3 43#define bits w4 44#define off w5 45/* r6 UNUSED in ARM code unless DEBUG mode */ 46#define srclim x7 47#if 1==SAFE /*{*/ 48#define dstlim x12 49#endif /*}*/ 50 51#define cnt w1 /* overlaps 'len' while reading an offset */ 52 53/* macros reduce "noise" when comparing this ARM code to corresponding THUMB code */ 54#define ADD2( dst,src) add dst,dst,src 55#define ADD2S(dst,src) adds dst,dst,src 56#define ADC2( dst,src) adc dst,dst,src 57#define ADC2S(dst,src) adcs dst,dst,src 58#define SUB2( dst,src) sub dst,dst,src 59#define SUB2S(dst,src) subs dst,dst,src 60#define LDRB3(reg,psrc,incr) ldrb reg,psrc,incr 61#define STRB3(reg,pdst,incr) strb reg,pdst,incr 62 63#if 1==SAFE /*{*/ 64#define CHECK_SRC cmp srclim,src; bls bad_src_n2e /* Out: 1==Carry for get32_n2e */ 65#define CHECK_DST cmp dst,dstlim; bhs bad_dst_n2e 66#else /*}{*/ 67#define CHECK_SRC /*empty*/ 68#define CHECK_DST /*empty*/ 69#endif /*}*/ 70 71#if 0 /*{ DEBUG only: check newly-decompressed against original dst */ 72#define CHECK_BYTE \ 73 ldrb w6,[dst]; \ 74 cmp w6,tmp; beq 0f; brk #0; 0: 75#else /*}{*/ 76#define CHECK_BYTE /*empty*/ 77#endif /*}*/ 78 79#undef GETBIT 80#define GETBIT bl get1_n2e 81 82#undef getnextb 83#define getnextb(reg) GETBIT; ADC2(reg,reg) /* Out: condition code not changed */ 84#define jnextb0 GETBIT; bcc 85#define jnextb1 GETBIT; bcs 86 87ucl_nrv2e_decompress_32: .globl ucl_nrv2e_decompress_32 // ARM mode 88 .type ucl_nrv2e_decompress_32, %function 89/* error = (*)(char const *src, uint32_t len_src, char *dst, uint32_t *plen_dst) 90 Actual decompressed length is stored through plen_dst. 91 For SAFE mode: at call, *plen_dst must be allowed length of output buffer. 92*/ 93 PUSH1(lr) 94 PUSH2(x2,x3) 95#define sp_DST0 0 /* stack offset of original dst */ 96 add srclim,src,len,uxtw // srclim= eof_src; 97#if 1==SAFE /*{*/ 98 ldr tmp,[r3] // len_dst 99 add dstlim,tmp,dst 100#endif /*}*/ 101 mov off,#-1 // off= -1 initial condition 102 mov bits,#1<<31 // refill next time 103 b top_n2e 104 105#if 1==SAFE /*{*/ 106bad_dst_n2e: # return value will be 2 107 add src,srclim,#1 108bad_src_n2e: # return value will be 1 109 ADD2(src,#1) 110#endif /*}*/ 111eof_n2e: 112 POP2(x3,x4) // r3= orig_dst; r4= plen_dst 113 SUB2(src,srclim) // 0 if actual src length equals expected length 114 SUB2(dst,x3) // actual dst length 115 str dstw,[x4] 116 mov x4,x0 // save result value 117 118 mov x0,x3 // orig_dst 119 add x1,x3,dst // orig_dst + dst_len 120cache_n2e: 121 dc cvau,x0 // Clean by VA to point of Unification 122 ic ivau,x0 // Invalidate by VA to point of Unification 123 add x0,x0,#64 // next line 124 cmp x0,x1; blo cache_n2e 125 126 mov x0,x4 // result value 127 POP1(lr) 128 ret 129 130get1_n2e: 131 ADD2S(bits,bits); cbz bits,get32_n2e; ret 132get32_n2e: // In: Carry set [from adding 0x80000000 (1<<31) to itself] 133 CHECK_SRC; ldr bits,[src],#4 134 ADC2S(bits,bits) // left shift 1 bit with CarryIn and CarryOut 135 ret 136 137lit_n2e: 138 CHECK_SRC; LDRB3(tmp,[src],#1) 139 CHECK_BYTE 140 CHECK_DST; STRB3(tmp,[dst],#1) 141top_n2e: 142 jnextb1 lit_n2e 143 mov cnt,#1; b getoff_n2e 144 145off_n2e: 146 SUB2(cnt,#1) 147 getnextb(cnt) 148getoff_n2e: 149 getnextb(cnt) 150 jnextb0 off_n2e 151 152 subs tmp,cnt,#3 // set Carry 153 mov len,#0 // Carry unaffected 154 blo offprev_n2e // cnt was 2; tests Carry only 155 CHECK_SRC; LDRB3(off,[src],#1) // low 7+1 bits 156 orr off,off,tmp,lsl #8 157 mvn off,off; cbz off,eof_n2e // off= ~off 158 tst off,#1; asr off,off,#1; bne lenlast_n2e 159 b lenmore_n2e 160 161offprev_n2e: 162 jnextb1 lenlast_n2e 163lenmore_n2e: 164 mov len,#1 165 jnextb1 lenlast_n2e 166len_n2e: 167 getnextb(len) 168 jnextb0 len_n2e 169 ADD2(len,#6-2) 170 b gotlen_n2e 171 172lenlast_n2e: 173 getnextb(len) // 0,1,2,3 174 ADD2(len,#2) 175gotlen_n2e: // 'cmn': add the inputs, set condition codes, discard the sum 176 cmn off,#5<<8 // within M2_MAX_OFFSET 177 cinc len,len,cc // too far away, so minimum match length is 3 178near_n2e: 179#if 1==SAFE /*{*/ 180 ldr tmp,[sp,#sp_DST0] 181 SUB2( tmp,dst) 182 SUB2S(tmp,off); bhi bad_dst_n2e // reaching back too far 183 184 add tmp,dst,cnt 185 cmp tmp,dstlim; bhi bad_dst_n2e // too much output 186#endif /*}*/ 187 add tmpx,dst,len,uxtw 188 ldrb tmp,[tmpx,#-1] // force cacheline allocate 189copy_n2e: 190 ldrb tmp,[dst,off,sxtw] 191 CHECK_BYTE 192 STRB3(tmp,[dst],#1) 193 SUB2S(len,#1); bne copy_n2e 194 b top_n2e 195 196 .size ucl_nrv2e_decompress_32, .-ucl_nrv2e_decompress_32 197 198/* 199vi:ts=8:et:nowrap 200 */ 201 202