1/*
2 *  powerpc64le-darwin.dylib-entry.S -- program entry point & decompress (PowerPC64 dylib)
3 *
4 *  This file is part of the UPX executable compressor.
5 *
6 *  Copyright (C) 2005-2020 John F. Reiser
7 *  All Rights Reserved.
8 *
9 *  UPX and the UCL library are free software; you can redistribute them
10 *  and/or modify them under the terms of the GNU General Public License as
11 *  published by the Free Software Foundation; either version 2 of
12 *  the License, or (at your option) any later version.
13 *
14 *  This program is distributed in the hope that it will be useful,
15 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
16 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 *  GNU General Public License for more details.
18 *
19 *  You should have received a copy of the GNU General Public License
20 *  along with this program; see the file COPYING.
21 *  If not, write to the Free Software Foundation, Inc.,
22 *  59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 *
24 *  John F. Reiser
25 *  <jreiser@users.sourceforge.net>
26 *
27 */
28
29#ifndef BIG_ENDIAN  //{
30#define BIG_ENDIAN 1  /* Apple on PowerPC* is BIG_ENDIAN */
31#endif  //}
32NBPW= 8  // Number of Bytes Per Word
33
34#include "arch/powerpc/64le/macros.S"
35#include "arch/powerpc/64le/ppc_regs.h"
36
37/*************************************************************************
38// We have been CALLed as a subroutine from dyld; C-language rules apply.
39// -4*4+_start: .long offset(user_init_function)
40// -3*4+_start: .long offset(b_info of compressed Mach_headers)
41// -2*4+_start: .long length(compressed __TEXT)
42// -1*4+_start: .long 8+ total_length  # 8+ number of preceding bytes in file
43**************************************************************************/
44
45  section MACOS000
46_start: .globl _start
47        mflr r2
48        call main  # must be exactly 1 instruction; link_register= &decompress
49decompress:
50  section NRV_HEAD
51SZ_DLINE=128  # size of data cache line in Apple G5
52
53/* PowerPC has no 'cmplis': compare logical [unsigned] immediate shifted [by 16] */
54#define  hibit r0  /* holds 0x80000000 during decompress */
55
56#define src  a0
57#define lsrc a1
58#define dst  a2
59#define ldst a3  /* Out: actually a reference: &len_dst */
60#define meth a4
61
62#define off  a4
63#define len  a5
64#define bits a6
65#define disp a7
66
67  section NRV2E
68#include "arch/powerpc/64le/nrv2e_d.S"
69
70  section NRV2D
71#include "arch/powerpc/64le/nrv2d_d.S"
72
73  section NRV2B
74#include "arch/powerpc/64le/nrv2b_d.S"
75
76#include "arch/powerpc/64le/lzma_d.S"
77
78#undef off
79#undef len
80#undef bits
81#undef disp
82
83  section NRV_TAIL
84eof_nrv:
85#define dst0 a4
86#define tmp a1
87        ld dst0,0(ldst)  // original dst
88        mtlr t3  // return address
89        subf a0,lsrc,src
90        subf tmp,dst0,dst  // -1+ dst length
91        addi a0,a0,1  // return 0: good; else: bad  [+1: correct for lbzu]
92        addi tmp,tmp,1  // dst length
93        std  tmp,0(ldst)
94#undef tmp
95
96// CACHELINE=32 is the observed minimum line size of any cache.
97// Some caches may have larger lines, but it is cumbersome to lookup
98// {AT_DCACHEBSIZE, AT_ICACHEBSIZE, AT_UCACHEBSIZE: /usr/include/elf.h},
99// then save the correct size in a variable {where to put it?}, or to modify
100// the two instructions here.  If a cache has larger lines, then we expect
101// that the second dcbst (or icbi) on a the same line will be fast.
102// If not, then too bad.
103
104  section CFLUSH  // In: a2=dst= &highest stored byte; a4=dst0= &lowest stored byte
105CACHELINE=32
106        ori dst0,dst0,-1+ CACHELINE  // highest addr on cache line
107cfl_nrv:
108        dcbst  0,dst0  // initiate store (modified) cacheline to memory
109        cmpld cr0,dst0,dst  // did we cover the highest-addressed byte?
110        icbi   0,dst0  // discard instructions from cacheline
111        addi     dst0,dst0,CACHELINE  // highest addr on next line
112        blt  cr0,cfl_nrv  // not done yet
113#undef dst0
114        sync   // wait for all memory operations to finish
115        isync  // discard prefetched instructions (if any)
116cfl_ret:
117        ret
118
119  section ELFMAINY
120        // IDENTSTR goes here
121
122  section ELFMAINZ
123sz_l_info= 12
124sz_p_info= 12
125sz_b_info= 12
126  sz_unc= 0
127  sz_cpr= 4
128  b_method= 8
129  b_ftid=   9
130  b_cto8=  10
131
132// register numbers during entry
133#define f_unc 31
134#define f_uini 30
135#define l_unm 29
136#define a_unm 28
137#define r_unc 27
138#define r_cpr 26
139#define s_unc 25
140#define s_cpr 24
141#define l_unc 23
142#define l_cpr 22
143#define t_h   21  /* temporary */
144
145PROT_NONE  =0x00
146PROT_READ  =0x01
147PROT_WRITE =0x02
148PROT_EXEC  =0x04
149
150MAP_SHARED  =0x1
151MAP_PRIVATE =0x2
152MAP_ANON    =0x1000
153
154SYS_mmap    =197
155SYS_munmap=   73
156SYS_mprotect= 74
157
158main2:
159    //teq r0,r0  // debugging
160        stdu r2,-8*(1+ 32-a0)(sp)  # retaddr
161//        stmw a0,4*1(sp)
162        std  3,3*8-8*2(sp)
163        std  4,4*8-8*2(sp)
164        std  5,5*8-8*2(sp)
165        std  6,6*8-8*2(sp)
166        std  7,7*8-8*2(sp)
167        std  8,8*8-8*2(sp)
168        std  9,9*8-8*2(sp)
169        std  10,10*8-8*2(sp)
170        std  11,11*8-8*2(sp)
171        std  12,12*8-8*2(sp)
172        std  13,13*8-8*2(sp)
173        std  14,14*8-8*2(sp)
174        std  15,15*8-8*2(sp)
175        std  16,16*8-8*2(sp)
176        std  17,17*8-8*2(sp)
177        std  18,18*8-8*2(sp)
178        std  19,19*8-8*2(sp)
179        std  20,20*8-8*2(sp)
180        std  21,21*8-8*2(sp)
181        std  22,22*8-8*2(sp)
182        std  23,23*8-8*2(sp)
183        std  24,24*8-8*2(sp)
184        std  25,25*8-8*2(sp)
185        std  26,26*8-8*2(sp)
186        std  27,27*8-8*2(sp)
187        std  28,28*8-8*2(sp)
188        std  29,29*8-8*2(sp)
189        std  30,30*8-8*2(sp)
190        std  31,31*8-8*2(sp)
191
192        mflr f_unc  # f_unc= &decompress
193        lwz  t_h, -4*1(f_unc)  # "call main" at _start
194        lwz  l_unm,-4*1+ _start - decompress(f_unc)  # 4+ offset(_start)
195        rlwinm t_h,t_h,0,6,29  # 4+ main - decompress
196        add  l_unm,l_unm,t_h  # offset(main); ASSUMES (8+_start)==decompress
197        addi t_h,t_h,-4   # main - decompress
198
199        li  a0,0  # addr
200        mr  a1,l_unm  # length for munmap
201        li  a2,PROT_READ|PROT_WRITE
202        li  a3,MAP_ANON|MAP_PRIVATE
203        li  a4,-1
204        li  a5,0  # hi32(offset)
205        li  a6,0  # lo32(offset)
206        li  0,SYS_mmap
207        sc
208        li a0,-1  # failure
209        mr a_unm,a0  # address for munmap
210
211
212        li   a2,main - movup2
213        mtctr a2
214        add  a1,a0 ,l_unm  # lwa(dst); new_page + offset(main)
215        add  a0,t_h,f_unc  # lwa(src); &main
216movup1:  # descending copy [moveup2, main)
217        lbzu r0,-1(a0)
218        stbu r0,-1(a1)
219        bdnz+ movup1
220
221        subf a2,a2,l_unm  # offset(movup2)
222        mtlr a1  # &copied movup2
223        mtctr a2  # offset(movup2)
224        blr  # goto the copied code
225
226movup2:  # descending copy [base, movup2)
227        lbzu r0,-1(a0)
228        stbu r0,-1(a1)
229        bdnz+ movup2
230
231        lwz  f_uini,-4*4+ _start - decompress(f_unc)  # offset(user_init_fn)
232        subf f_unc,a0,f_unc
233        add  f_unc,a1,f_unc  # relocated decompress
234        add  f_uini,f_uini,a0
235
236        lwz  t1,-4*3+ _start - decompress(f_unc)  # offset(b_info)
237        add  r_cpr,a1,t1  # &b_info
238        add  r_unc,a0,t1  # &b_info
239        addi r_unc,r_unc,-sz_l_info -sz_p_info
240
241        // skip compressed Mach headers
242        lwz  t1,sz_cpr(r_cpr)
243        addi r_cpr,r_cpr,sz_b_info
244        add  r_cpr,r_cpr,t1
245dy_uncpr:
246        mr s_cpr,r_cpr
247        mr s_unc,r_unc
248        addi a0,r_cpr,sz_unc
249        call get4; beq dy_done
250        add r_unc,r_unc,a0
251        mr l_unc,a0
252        addi a0,r_cpr,sz_cpr
253        call get4
254                                        add r_cpr,r_cpr,a0
255        mr l_cpr,a0
256        addi r_cpr,r_cpr,sz_b_info
257
258        stdu l_unc,-8(sp)  # keep stack 8-byte aligned
259        mtlr f_unc
260        addi a0,s_cpr,sz_b_info  # src
261        mr a1,l_cpr
262        mr a2,s_unc  # dst
263        mr a3,sp  # &l_dst
264        lbz a4,b_method(s_cpr)
265        stdu sp,-SZ_FRAME(sp)
266        blrl  # uncompress
267        la sp,8+SZ_FRAME(sp)
268                // FIXME: check status
269
270        lbz a3,b_ftid(s_cpr)
271        cmplwi cr0,a3,0
272        beq dy_uncpr
273        lbz a2,b_cto8(s_cpr)
274        ld a1,sz_unc(s_cpr)
275        mr  a0,s_unc
276        bl unfilter
277        b dy_uncpr
278
279dy_done:
280        bl dy_done2
281dy_done1:  # escape hatch
282        sc  # munmap
283        li a0,~0  # failure
284//        lmw r0,0(sp)
285        ld  2,2*8-8*2(sp)
286        ld  3,3*8-8*2(sp)
287        ld  4,4*8-8*2(sp)
288        ld  5,5*8-8*2(sp)
289        ld  6,6*8-8*2(sp)
290        ld  7,7*8-8*2(sp)
291        ld  8,8*8-8*2(sp)
292        ld  9,9*8-8*2(sp)
293        ld  10,10*8-8*2(sp)
294        ld  11,11*8-8*2(sp)
295        ld  12,12*8-8*2(sp)
296        ld  13,13*8-8*2(sp)
297        ld  14,14*8-8*2(sp)
298        ld  15,15*8-8*2(sp)
299        ld  16,16*8-8*2(sp)
300        ld  17,17*8-8*2(sp)
301        ld  18,18*8-8*2(sp)
302        ld  19,19*8-8*2(sp)
303        ld  20,20*8-8*2(sp)
304        ld  21,21*8-8*2(sp)
305        ld  22,22*8-8*2(sp)
306        ld  23,23*8-8*2(sp)
307        ld  24,24*8-8*2(sp)
308        ld  25,25*8-8*2(sp)
309        ld  26,26*8-8*2(sp)
310        ld  27,27*8-8*2(sp)
311        ld  28,28*8-8*2(sp)
312        ld  29,29*8-8*2(sp)
313        ld  30,30*8-8*2(sp)
314        ld  31,31*8-8*2(sp)
315        addi sp,sp,8*(32-r0)
316        mtlr r0  # &continuation in dyld
317        bctr  # goto user_init_function
318dy_done2:
319        li r0,(dy_done2 - dy_done1)/4
320        mflr a0
321        la a0,dy_done2 - dy_done1(a0)
322        mtctr r0
323dy_done3:
324        lwzu r0,-4(a0)
325        stwu r0,-4(s_unc)
326        bdnz+ dy_done3
327
328        mtlr s_unc
329        mtctr f_uini  # user_init_function
330        mr a0,a_unm
331        mr a1,l_unm
332        li  0,SYS_munmap
333        blr  # goto relocated dy_done1
334
335get4:
336        lbz t1,3(a0)
337        lbz t2,2(a0)
338        rlwimi t1,t2, 8,16,23
339        lbz t2,1(a0)
340        rlwimi t1,t2,16, 8,15
341        lbz t2,0(a0)
342        rlwimi t1,t2,24, 0, 7
343        mr. a0,t1  # set condition codes
344        blr
345
346unfilter:
347#include "arch/powerpc/64le/bxx.S"
348
349main:
350        b main2
351dy_top:
352len_top  = dy_top - main
353
354/* vim:set ts=8 sw=8 et: */
355