1/* arm.v4t-linux.shlib-init.S -- Linux Elf shared library init & decompressor
2*
3*  This file is part of the UPX executable compressor.
4*
5*  Copyright (C) 1996-2020 Markus Franz Xaver Johannes Oberhumer
6*  Copyright (C) 1996-2020 Laszlo Molnar
7*  Copyright (C) 2000-2020 John F. Reiser
8*  All Rights Reserved.
9*
10*  UPX and the UCL library are free software; you can redistribute them
11*  and/or modify them under the terms of the GNU General Public License as
12*  published by the Free Software Foundation; either version 2 of
13*  the License, or (at your option) any later version.
14*
15*  This program is distributed in the hope that it will be useful,
16*  but WITHOUT ANY WARRANTY; without even the implied warranty of
17*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*  GNU General Public License for more details.
19*
20*  You should have received a copy of the GNU General Public License
21*  along with this program; see the file COPYING.
22*  If not, write to the Free Software Foundation, Inc.,
23*  59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24*
25*  Markus F.X.J. Oberhumer              Laszlo Molnar
26*  <markus@oberhumer.com>               <ezerotven+github@gmail.com>
27*
28*  John F. Reiser
29*  <jreiser@users.sourceforge.net>
30*/
31
32NBPW= 4
33#define ARM_OLDABI 1
34#include "arch/arm/v5a/macros.S"
35
36#define bkpt .long 0xe7f001f0  /* reserved instr; Linux GNU eabi breakpoint */
37#define bkpt_th .short 0xde01  /* reserved instr; Linux GNU eabi breakpoint */
38sz_Elf32_Ehdr = 13*NBPW
39sz_Elf32_Phdr =  8*NBPW
40
41sz_b_info= 12
42  sz_unc= 0
43  sz_cpr= 4
44  b_method= 8
45sz_l_info= 12
46sz_p_info= 12
47
48PROT_READ=  1
49PROT_WRITE= 2
50PROT_EXEC=  4
51
52MAP_PRIVATE= 2
53MAP_FIXED=     0x10
54MAP_ANONYMOUS= 0x20
55
56PAGE_SHIFT= 12
57PAGE_MASK=  (~0<<PAGE_SHIFT)
58PAGE_SIZE= -PAGE_MASK
59
60__NR_exit =      1 + __NR_SYSCALL_BASE
61__NR_write =     4 + __NR_SYSCALL_BASE
62__NR_mmap64 = 0xc0 + __NR_SYSCALL_BASE
63__NR_mprotect =125 + __NR_SYSCALL_BASE
64__NR_munmap =   91 + __NR_SYSCALL_BASE
65
66__ARM_NR_BASE  = 0xf0000 + __NR_SYSCALL_BASE
67__ARM_NR_cacheflush =  2 + __ARM_NR_BASE
68
69#define arg1 r0
70#define arg2 r1
71#define arg3 r2
72#define arg4 r3
73#define arg5 r4
74
75#define edi r0
76#define esi r1
77#define edx r2
78#define tmp r3
79#define eax r4
80#define ecx r5
81
82#define SP(d) sp,#4*(_-d)  /* stack addressing mode */
83
84.macro thumb_sys7t N
85#if defined(ARMEL_EABI4)
86        mov r7,#\N
87        swi 0
88#elif defined(ARM_OLDABI)
89        blx x\N
90#else
91        error \N  // ARM_OLDABI, ARMEL_EABI4, ARMEL_DARWIN ?
92#endif
93
94.endm
95
96.macro call4 label
97        .balign 4
98        bl \label
99.endm
100
101.macro push_ reg
102        push {\reg}
103_= 1+_  // one more word on stack
104.endm
105
106.macro pop_ reg
107        pop {\reg}
108_=-1+_  // one less word on stack
109.endm
110
111//#define lodsl ldr eax,[esi],#4
112#define lodslu bl get4u
113
114  section ELFMAINX
115//  .long offset(b_info)|(asl_delta>>12)  src for f_exp
116//D_INFO:
117//  .long offset(.)  // detect relocation
118//  .long offset(user DT_INIT)
119//  .long offset(escape_hatch)  // override with round_up(2, PT_LOAD[0]{.p_memsz + .p_vaddr})
120//  .long offset(dst for f_exp)
121
122#define DEBUG 0
123        .code 16  //; .balign 4
124.real_start_ofELFMAINX:
125        .thumb_func
126_start: .globl _start
127#if DEBUG  //{
128        bkpt_th  // for debugging
129#else  //}{
130        nop
131#endif  //}
132        push {r0,r1,r2,r3,r4,r5,r6,r7,lr}
133_=9
134o_uinit= 1  // lr
135        adr esi,here
136        sub esi,#(here - _start) + 5*NBPW  // -NBPW + &D_INFO
137here:
138        ldr tmp,[esi,#0*NBPW]  // offset(b_info)|(asl_delta>>12)
139                mov edi,#1
140                               add ecx,esi,#NBPW  // &D_INFO
141        ldr eax,[esi,#1*NBPW]
142                and edi,tmp  // asl_delta>>12
143                sub tmp,edi  // offset(b_info)
144                               sub ecx,eax; //str ecx,[SP(o_reloc)]
145                lsl edi,#12  // asl_delta
146        ldr eax,[esi,#2*NBPW]; cmp eax,#0; beq 0f  // empty user_init
147                               add eax,ecx  // reloc DT_INIT  for step 12
148                               add eax,edi
149                               str eax,[SP(o_uinit)]
150                            0:
151        ldr edi,[esi,#4*NBPW]; add edi,ecx  // dst for f_exp
152        add esi,tmp,ecx  // &b_info  src for f_exp
153
154        sub sp,#3*NBPW  // 3 slots of space
155        _=1+_  // &escape_hatch (step 10)
156o_hatch=_   // 10
157        _=2+_  // param space: munmap temp pages (step 9)
158p_unmap=_  // 12
159
160        push_ lr  // will be lr at entry to user_init
161o_lr=_  // 13
162        ldr eax,[esi,#sz_cpr]; add esi,#sz_b_info
163        add esi,eax  // skip unpack helper block
164
165        lodslu  // eax=dstlen
166        lsl tmp,edi,#(32-PAGE_SHIFT)
167        lsr tmp,tmp,#(32-PAGE_SHIFT)  // tmp= fragment
168        add eax,tmp; push_ eax  // params: mprotect restored pages  step 8
169        sub edi,tmp; push_ edi
170p_mprot=_  // 15
171        sub eax,tmp  // dstlen
172        add edi,tmp  // dst
173        lsr tmp,tmp,#2; push_ tmp  // w_fragment
174o_wfrag=_  // 16
175
176        call4 L610
177f_unfilter:  // (char *ptr, uint len, uint cto, uint fid)
178#define ptr r0
179#define len r1
180#define cto r2  /* FIXME: unused */
181#define fid r3
182
183#define t1 r2
184#define t2 r3
185
186#ifndef FILTER_ID  /*{*/
187#define FILTER_ID 0x50  /* little-endian */
188#endif  /*}*/
189        lsl fid,fid,#24; lsr len,len,#2
190        lsr fid,fid,#24; lsl len,len,#2
191        cmp fid,#FILTER_ID; bne unf_done  // last use of fid
192        b tst_unf
193top_unf:
194        sub len,len,#4
195        ldr t1,[ptr,len]
196        lsl t2,t1,#4
197        lsr t2,t2,#4+24  // bits 27..24
198        cmp t2,#0x0b; bne tst_unf  // not 'bl' subroutine call
199        lsr len,len,#2; sub t2,t1,len  // convert to word-relative displacement
200        lsl len,len,#2
201        lsr t1,t1,#24; lsl t2,t2,#8
202        lsl t1,t1,#24; lsr t2,t2,#8
203        orr t1,t1,t2  // re-combine
204        str t1,[ptr,len]
205tst_unf:
206        cmp len,#0
207        bne top_unf
208unf_done:
209        ret
210
211#undef ptr
212#undef len
213#undef cto
214#undef fid
215
216#undef t1
217#undef t2
218
219        .thumb_func
220L610:
221        push_ lr  // &f_unfilter (thumb mode)
222o_unflt=_  // 17
223        ldrb tmp,[esi,#b_method-4+1]; push_ tmp  // ftid
224        ldrb tmp,[esi,#b_method-4+2]; push_ tmp  // cto8
225        push_ eax  // dstlen  also for unfilter  step 7
226        push_ edi  // dst    param for unfilter  step 7
227p_unflt=_  // 21
228
229        lodslu; mov ecx,eax  // ecx= srclen
230        lodslu; push_ eax  // method,filter,cto,junk
231
232        call4 L710
233        .arm
234f_decompress:
235#define LINUX_ARM_CACHEFLUSH 1
236
237  section NRV_HEAD
238        // empty
239  section NRV_TAIL
240        // empty
241
242  section NRV2E
243#include "arch/arm/v4a/nrv2e_d8.S"
244
245  section NRV2D
246#include "arch/arm/v4a/nrv2d_d8.S"
247
248  section NRV2B
249#include "arch/arm/v4a/nrv2b_d8.S"
250
251#include "arch/arm/v4a/lzma_d.S"
252
253  section ELFMAINY
254end_decompress: .globl end_decompress
255
256msg_SELinux:
257        mov r2,#L71 - L70  // length
258        adr r1,L70  // message text
259        mov r0,#2  // fd stderr
260#if defined(ARMEL_EABI4)  /*{*/
261        mov r7,#__NR_write
262        swi 0
263#else  /*}{*/
264        swi __NR_write
265#endif  /*}*/
266die:
267        mov r0,#127
268#if defined(ARMEL_EABI4)  /*{*/
269        mov r7,#__NR_exit
270        swi 0
271#else  /*}{*/
272        swi __NR_exit
273#endif  /*}*/
274L70:
275        .asciz "PROT_EXEC|PROT_WRITE failed.\n"
276L71:
277        /* IDENTSTR goes here */
278
279  section ELFMAINZ
280        .code 16; .balign 2
281.real_start_ofELFMAINZ:
282
283        .thumb_func
284L710:
285.real_start_ofL710:
286
287//  1. allocate temporary pages
288//  2. copy to temporary pages:
289//       fragment of page below dst; compressed src;
290//       decompress+unfilter; supervise
291//  3. mmap destination pages for decompressed data
292//  4. create escape hatch
293//  5. jump to temporary pages
294//  6. uncompress
295//  7. unfilter
296//  8. mprotect decompressed pages
297//  9  setup args for unmap of temp pages
298// 10. jump to escape hatch
299// 11. unmap temporary pages
300// 12. goto user DT_INIT
301
302        mov tmp,lr; sub tmp,#1; push_ tmp  // &f_decompress (ARM mode)
303o_uncpr=_  // 23
304        add tmp,SP(p_unflt)+1*NBPW; push_ tmp  // &dstlen
305        push_ edi  // dst
306        push_ ecx  // srclen
307        push_ esi  // src;  arglist ready for decompress  step 6
308p_uncpr=_  // 27
309
310        mov tmp,#3
311        and tmp,esi  // length of prefix alignment
312        add ecx,#3  // allow  suffix alignment
313        add ecx,tmp  // prefix increases byte length
314        lsr ecx,#2  // w_srclen
315        ldr tmp,[SP(o_wfrag)]; add edx,tmp,ecx  // w_srclen + w_frag
316        ldr tmp,[SP(o_uncpr)]; bl wlen_subr; add edx,ecx
317        ldr tmp,[SP(o_unflt)]; bl wlen_subr; add edx,ecx
318
319        call4 L220
320SAVE_=_
321supervise:  // moved at runtime before being executed
322        // Allocate pages for result of decompressing.
323        // These replace the compressed source and the following hole.
324        mov arg5,#0; mvn arg5,arg5  // -1; cater to *BSD for fd of MAP_ANON
325        mov arg4,#MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED
326        mov arg3,#PROT_READ|PROT_WRITE
327        ldr arg2,[SP(p_mprot)+4]  // dstlen
328        ldr arg1,[SP(p_mprot)  ]  // dst
329        mov r6,arg1  // required result
330        thumb_sys7t __NR_mmap64; cmp r0,r6; beq 0f; bkpt_th; 0:
331
332        // Restore fragment of page below dst
333        ldr ecx,[SP(o_wfrag)]
334        //mov edi,r0  // NOP: edi==r0
335        ldr esi,[SP(p_unmap)]
336        bl movsl
337
338        pop {arg1,arg2,arg3,arg4, eax}
339_=-5+_  // 22
340        blx eax  // decompress
341        pop_ tmp  // toss arg5
342
343// Place the escape hatch
344        pop  {arg1,arg2}  // addr, len
345        mov tmp,#1
346        push {arg1,arg2}
347        add edi,arg1,arg2  // ldr edi,[SP(o_hatch)]
348        add edi,#1
349        bic edi,tmp  // round_up(2, .p_memsz + .p_vaddr)
350        ldr tmp,hatch  // the 2 instructions
351        str tmp,[edi]
352        add edi,#1  // thumb mode
353        str edi,[SP(o_hatch)]
354
355
356//p_unflt  // 21
357        pop {arg1,arg2,arg3,arg4, eax, r5}  // r5= w_fragment [discard]
358_=-6+_  // 15
359        tst arg4,arg4; beq 0f  // 0==ftid ==> no filter
360        blx eax  // f_unfilter
3610:
362        ldr arg1,[sp,#0*NBPW]  // lo(dst)
363        ldr arg2,[sp,#1*NBPW]  // len
364        add arg2,arg1  // hi(dst)
365        bl x__ARM_NR_cacheflush
366
367//p_mprot  // 15
368        pop {arg1,arg2, tmp}; mov lr,tmp
369_=-3+_  // 12
370        mov arg3,#PROT_READ|PROT_EXEC
371        thumb_sys7t __NR_mprotect
372
373//p_unmap
374.if __NR_munmap <= 0xff
375        mov r7,#__NR_munmap
376.else
377        mov r7,#__NR_munmap>>16
378        lsl r7,#16
379        add r7,#__NR_munmap - ((__NR_munmap>>16)<<16)
380.endif
381        pop {arg1,arg2, pc}  // goto hatch
382_=-3+_  // 9
383        .balign 4
384hatch:
385        swi 0  // 0xdf00; munmap
386        pop {r0,r1,r2,r3,r4,r5,r6,r7,pc}  // 0xbdff; goto user DT_INIT
387
388        .thumb_func
389movsl_subr:
390.real_start_ofmovsl_subr:
391        lsr esi,esi,#2
392        lsl esi,esi,#2  // word align [corrects for thumb-mode]
393        push {lr}; mov tmp,esi; bl wlen_subr
394        pop {tmp}; mov lr,tmp
395// FALL THROUGH to the part of 'movsl' that trims to a multiple of 8 words.
396// 7/8 of the time this is faster; 1/8 of the time it's slower.
3979:
398        ldr tmp,[esi,#0]; add esi,#4
399        str tmp,[edi,#0]; add edi,#4
400        sub ecx,#1
401        .thumb_func
402movsl:  // In:  edi= 4-byte aligned dst; esi= 4-byte aligned src; ecx= word count
403.real_start_ofmovsl:
404        mov tmp,#7; tst ecx,tmp; bne 9b
405        lsr ecx,#3; beq 8f  // THUMB return when zero
406        .balign 4; bx pc; nop  // enter ARM mode
407        .arm
408        stmdb  sp!,{r2,   r4,r6, r7,r8,r9}  // tmp===r3, ecx===r5
4097:
410        ldmia esi!,{r2,r3,r4,r6, r7,r8,r9,r12}; subs ecx,ecx,#1
411        stmia edi!,{r2,r3,r4,r6, r7,r8,r9,r12}; bne 7b
412        ldmia  sp!,{r2,   r4,r6, r7,r8,r9}
4139:
414        ret
4158:
416        .thumb
417        bx lr
418
419#if !defined(ARMEL_EABI4)  /*{*/
420        .arm
421        .balign 4
422x__NR_mmap:
423        do_sys7t __NR_mmap64
424        bx lr
425x__NR_munmap:
426        do_sys7t __NR_munmap
427        bx lr
428x__NR_mprotect:
429        do_sys7t __NR_mprotect
430        bx lr
431        .thumb
432#endif  /*}*/
433
434        .thumb_func
435x__ARM_NR_cacheflush:
436.real_start_ofx__ARM_NR_cacheflush:
437        mov arg3,#0
438        mov r7,#__ARM_NR_BASE>>16
439        lsl r7,#16
440        add r7,# __ARM_NR_cacheflush - __ARM_NR_BASE
441        swi 0
442        bx lr
443
444        .thumb_func
445L220:
446.real_start_ofL220:
447_=SAVE_  // 27
448        mov tmp,lr; sub tmp,#1; push_ tmp  // &supervise
449o_super=_  // 28
450        bl wlen_subr; add edx,ecx  // wlen_supervise
451        lsl arg2,edx,#2  // convert to bytes
452
453        // Allocate pages to hold temporary copy.
454        mov arg5,#0; mvn arg5,arg5  // -1; cater to *BSD for fd of MAP_ANON
455        mov arg4,#MAP_PRIVATE|MAP_ANONYMOUS
456        mov arg3,#PROT_READ|PROT_WRITE|PROT_EXEC
457        str arg2,[SP(p_unmap)+1*NBPW]  // length to unmap
458        mov arg1,#0  // any addr
459        thumb_sys7t __NR_mmap64; asr tmp,r0,#12; add tmp,#1; bne 0f; bkpt_th; 0:
460        str r0,[SP(p_unmap)]  // address to unmap
461
462        ldr esi,[SP(p_mprot)]
463        //mov edi,r0  // edi= dst  NOP: edi==r0
464        ldr ecx,[SP(o_wfrag)]  // w_fragment
465        bl movsl  // copy the fragment
466
467        ldr esi,[SP(p_uncpr)+0*NBPW]  // src
468        ldr ecx,[SP(p_uncpr)+1*NBPW]  // len
469        mov tmp,#3
470        and tmp,esi  // length of prefix alignment
471        sub esi,tmp  // down to word aligned
472        add ecx,tmp  // prefix increases byte length
473        add tmp,edi // skip prefix at destination
474        str tmp,[SP(p_uncpr)+0*NBPW]  // dst
475        add ecx,#3  // round up to full words
476        lsr ecx,#2
477        bl movsl  // copy all aligned words that contain compressed data
478
479        mov edx,edi  // lo(dst) of copied code
480
481        ldr esi,[SP(o_uncpr)]
482        str edi,[SP(o_uncpr)]
483        bl movsl_subr  // copy decompressor
484
485        add tmp,edi,#1  // dst f_unfilter thumb mode
486        ldr esi,[SP(o_unflt)]
487        str tmp,[SP(o_unflt)]
488        bl movsl_subr  // copy f_unfilter
489
490        pop_ esi   // &supervise
491        add r7,edi,#1  // &copied (thumb mode)
492        bl movsl_subr  // copy supervisor
493
494        mov arg2,edi  // hi(dst) of copied code
495        mov arg1,edx  // lo(dst) of copied code
496        mov lr,r7  // return address for ...
497        b x__ARM_NR_cacheflush  // call with continuation return
498
499get4u:
500        ldrb eax,[esi,#3];
501        ldrb tmp,[esi,#2]; lsl eax,#8; orr eax,tmp
502        ldrb tmp,[esi,#1]; lsl eax,#8; orr eax,tmp
503        ldrb tmp,[esi,#0]; lsl eax,#8; orr eax,tmp
504        add esi,#4
505        ret
506
507wlen_subr:  // Out: ecx= nwords of inline subr at *tmp
508        lsr tmp,tmp,#2
509        lsl tmp,tmp,#2  // word align (correct for thumb mode)
510        sub tmp,#4
511        ldrh ecx,[tmp,#0]; lsl ecx,#32-11; lsr ecx,#32-11-11  // hi(disp)
512        ldrh tmp,[tmp,#2]; lsl tmp,#32-11; lsr tmp,#32-11- 0  // lo(disp)
513        add ecx,tmp  // disp
514        add ecx,#1+1  // disp omits 1 word; prepare to round
515        lsr ecx,#1  // round up to whole 32-bit words
516        ret
517
518/*__XTHEENDX__*/
519
520/* vim:set ts=8 sw=8 et: */
521