1/* 2 * powerpc64le-darwin.dylib-entry.S -- program entry point & decompress (PowerPC64 dylib) 3 * 4 * This file is part of the UPX executable compressor. 5 * 6 * Copyright (C) 2005-2020 John F. Reiser 7 * All Rights Reserved. 8 * 9 * UPX and the UCL library are free software; you can redistribute them 10 * and/or modify them under the terms of the GNU General Public License as 11 * published by the Free Software Foundation; either version 2 of 12 * the License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; see the file COPYING. 21 * If not, write to the Free Software Foundation, Inc., 22 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 23 * 24 * John F. Reiser 25 * <jreiser@users.sourceforge.net> 26 * 27 */ 28 29#ifndef BIG_ENDIAN //{ 30#define BIG_ENDIAN 1 /* Apple on PowerPC* is BIG_ENDIAN */ 31#endif //} 32NBPW= 8 // Number of Bytes Per Word 33 34#include "arch/powerpc/64le/macros.S" 35#include "arch/powerpc/64le/ppc_regs.h" 36 37/************************************************************************* 38// We have been CALLed as a subroutine from dyld; C-language rules apply. 39// -4*4+_start: .long offset(user_init_function) 40// -3*4+_start: .long offset(b_info of compressed Mach_headers) 41// -2*4+_start: .long length(compressed __TEXT) 42// -1*4+_start: .long 8+ total_length # 8+ number of preceding bytes in file 43**************************************************************************/ 44 45 section MACOS000 46_start: .globl _start 47 mflr r2 48 call main # must be exactly 1 instruction; link_register= &decompress 49decompress: 50 section NRV_HEAD 51SZ_DLINE=128 # size of data cache line in Apple G5 52 53/* PowerPC has no 'cmplis': compare logical [unsigned] immediate shifted [by 16] */ 54#define hibit r0 /* holds 0x80000000 during decompress */ 55 56#define src a0 57#define lsrc a1 58#define dst a2 59#define ldst a3 /* Out: actually a reference: &len_dst */ 60#define meth a4 61 62#define off a4 63#define len a5 64#define bits a6 65#define disp a7 66 67 section NRV2E 68#include "arch/powerpc/64le/nrv2e_d.S" 69 70 section NRV2D 71#include "arch/powerpc/64le/nrv2d_d.S" 72 73 section NRV2B 74#include "arch/powerpc/64le/nrv2b_d.S" 75 76#include "arch/powerpc/64le/lzma_d.S" 77 78#undef off 79#undef len 80#undef bits 81#undef disp 82 83 section NRV_TAIL 84eof_nrv: 85#define dst0 a4 86#define tmp a1 87 ld dst0,0(ldst) // original dst 88 mtlr t3 // return address 89 subf a0,lsrc,src 90 subf tmp,dst0,dst // -1+ dst length 91 addi a0,a0,1 // return 0: good; else: bad [+1: correct for lbzu] 92 addi tmp,tmp,1 // dst length 93 std tmp,0(ldst) 94#undef tmp 95 96// CACHELINE=32 is the observed minimum line size of any cache. 97// Some caches may have larger lines, but it is cumbersome to lookup 98// {AT_DCACHEBSIZE, AT_ICACHEBSIZE, AT_UCACHEBSIZE: /usr/include/elf.h}, 99// then save the correct size in a variable {where to put it?}, or to modify 100// the two instructions here. If a cache has larger lines, then we expect 101// that the second dcbst (or icbi) on a the same line will be fast. 102// If not, then too bad. 103 104 section CFLUSH // In: a2=dst= &highest stored byte; a4=dst0= &lowest stored byte 105CACHELINE=32 106 ori dst0,dst0,-1+ CACHELINE // highest addr on cache line 107cfl_nrv: 108 dcbst 0,dst0 // initiate store (modified) cacheline to memory 109 cmpld cr0,dst0,dst // did we cover the highest-addressed byte? 110 icbi 0,dst0 // discard instructions from cacheline 111 addi dst0,dst0,CACHELINE // highest addr on next line 112 blt cr0,cfl_nrv // not done yet 113#undef dst0 114 sync // wait for all memory operations to finish 115 isync // discard prefetched instructions (if any) 116cfl_ret: 117 ret 118 119 section ELFMAINY 120 // IDENTSTR goes here 121 122 section ELFMAINZ 123sz_l_info= 12 124sz_p_info= 12 125sz_b_info= 12 126 sz_unc= 0 127 sz_cpr= 4 128 b_method= 8 129 b_ftid= 9 130 b_cto8= 10 131 132// register numbers during entry 133#define f_unc 31 134#define f_uini 30 135#define l_unm 29 136#define a_unm 28 137#define r_unc 27 138#define r_cpr 26 139#define s_unc 25 140#define s_cpr 24 141#define l_unc 23 142#define l_cpr 22 143#define t_h 21 /* temporary */ 144 145PROT_NONE =0x00 146PROT_READ =0x01 147PROT_WRITE =0x02 148PROT_EXEC =0x04 149 150MAP_SHARED =0x1 151MAP_PRIVATE =0x2 152MAP_ANON =0x1000 153 154SYS_mmap =197 155SYS_munmap= 73 156SYS_mprotect= 74 157 158main2: 159 //teq r0,r0 // debugging 160 stdu r2,-8*(1+ 32-a0)(sp) # retaddr 161// stmw a0,4*1(sp) 162 std 3,3*8-8*2(sp) 163 std 4,4*8-8*2(sp) 164 std 5,5*8-8*2(sp) 165 std 6,6*8-8*2(sp) 166 std 7,7*8-8*2(sp) 167 std 8,8*8-8*2(sp) 168 std 9,9*8-8*2(sp) 169 std 10,10*8-8*2(sp) 170 std 11,11*8-8*2(sp) 171 std 12,12*8-8*2(sp) 172 std 13,13*8-8*2(sp) 173 std 14,14*8-8*2(sp) 174 std 15,15*8-8*2(sp) 175 std 16,16*8-8*2(sp) 176 std 17,17*8-8*2(sp) 177 std 18,18*8-8*2(sp) 178 std 19,19*8-8*2(sp) 179 std 20,20*8-8*2(sp) 180 std 21,21*8-8*2(sp) 181 std 22,22*8-8*2(sp) 182 std 23,23*8-8*2(sp) 183 std 24,24*8-8*2(sp) 184 std 25,25*8-8*2(sp) 185 std 26,26*8-8*2(sp) 186 std 27,27*8-8*2(sp) 187 std 28,28*8-8*2(sp) 188 std 29,29*8-8*2(sp) 189 std 30,30*8-8*2(sp) 190 std 31,31*8-8*2(sp) 191 192 mflr f_unc # f_unc= &decompress 193 lwz t_h, -4*1(f_unc) # "call main" at _start 194 lwz l_unm,-4*1+ _start - decompress(f_unc) # 4+ offset(_start) 195 rlwinm t_h,t_h,0,6,29 # 4+ main - decompress 196 add l_unm,l_unm,t_h # offset(main); ASSUMES (8+_start)==decompress 197 addi t_h,t_h,-4 # main - decompress 198 199 li a0,0 # addr 200 mr a1,l_unm # length for munmap 201 li a2,PROT_READ|PROT_WRITE 202 li a3,MAP_ANON|MAP_PRIVATE 203 li a4,-1 204 li a5,0 # hi32(offset) 205 li a6,0 # lo32(offset) 206 li 0,SYS_mmap 207 sc 208 li a0,-1 # failure 209 mr a_unm,a0 # address for munmap 210 211 212 li a2,main - movup2 213 mtctr a2 214 add a1,a0 ,l_unm # lwa(dst); new_page + offset(main) 215 add a0,t_h,f_unc # lwa(src); &main 216movup1: # descending copy [moveup2, main) 217 lbzu r0,-1(a0) 218 stbu r0,-1(a1) 219 bdnz+ movup1 220 221 subf a2,a2,l_unm # offset(movup2) 222 mtlr a1 # &copied movup2 223 mtctr a2 # offset(movup2) 224 blr # goto the copied code 225 226movup2: # descending copy [base, movup2) 227 lbzu r0,-1(a0) 228 stbu r0,-1(a1) 229 bdnz+ movup2 230 231 lwz f_uini,-4*4+ _start - decompress(f_unc) # offset(user_init_fn) 232 subf f_unc,a0,f_unc 233 add f_unc,a1,f_unc # relocated decompress 234 add f_uini,f_uini,a0 235 236 lwz t1,-4*3+ _start - decompress(f_unc) # offset(b_info) 237 add r_cpr,a1,t1 # &b_info 238 add r_unc,a0,t1 # &b_info 239 addi r_unc,r_unc,-sz_l_info -sz_p_info 240 241 // skip compressed Mach headers 242 lwz t1,sz_cpr(r_cpr) 243 addi r_cpr,r_cpr,sz_b_info 244 add r_cpr,r_cpr,t1 245dy_uncpr: 246 mr s_cpr,r_cpr 247 mr s_unc,r_unc 248 addi a0,r_cpr,sz_unc 249 call get4; beq dy_done 250 add r_unc,r_unc,a0 251 mr l_unc,a0 252 addi a0,r_cpr,sz_cpr 253 call get4 254 add r_cpr,r_cpr,a0 255 mr l_cpr,a0 256 addi r_cpr,r_cpr,sz_b_info 257 258 stdu l_unc,-8(sp) # keep stack 8-byte aligned 259 mtlr f_unc 260 addi a0,s_cpr,sz_b_info # src 261 mr a1,l_cpr 262 mr a2,s_unc # dst 263 mr a3,sp # &l_dst 264 lbz a4,b_method(s_cpr) 265 stdu sp,-SZ_FRAME(sp) 266 blrl # uncompress 267 la sp,8+SZ_FRAME(sp) 268 // FIXME: check status 269 270 lbz a3,b_ftid(s_cpr) 271 cmplwi cr0,a3,0 272 beq dy_uncpr 273 lbz a2,b_cto8(s_cpr) 274 ld a1,sz_unc(s_cpr) 275 mr a0,s_unc 276 bl unfilter 277 b dy_uncpr 278 279dy_done: 280 bl dy_done2 281dy_done1: # escape hatch 282 sc # munmap 283 li a0,~0 # failure 284// lmw r0,0(sp) 285 ld 2,2*8-8*2(sp) 286 ld 3,3*8-8*2(sp) 287 ld 4,4*8-8*2(sp) 288 ld 5,5*8-8*2(sp) 289 ld 6,6*8-8*2(sp) 290 ld 7,7*8-8*2(sp) 291 ld 8,8*8-8*2(sp) 292 ld 9,9*8-8*2(sp) 293 ld 10,10*8-8*2(sp) 294 ld 11,11*8-8*2(sp) 295 ld 12,12*8-8*2(sp) 296 ld 13,13*8-8*2(sp) 297 ld 14,14*8-8*2(sp) 298 ld 15,15*8-8*2(sp) 299 ld 16,16*8-8*2(sp) 300 ld 17,17*8-8*2(sp) 301 ld 18,18*8-8*2(sp) 302 ld 19,19*8-8*2(sp) 303 ld 20,20*8-8*2(sp) 304 ld 21,21*8-8*2(sp) 305 ld 22,22*8-8*2(sp) 306 ld 23,23*8-8*2(sp) 307 ld 24,24*8-8*2(sp) 308 ld 25,25*8-8*2(sp) 309 ld 26,26*8-8*2(sp) 310 ld 27,27*8-8*2(sp) 311 ld 28,28*8-8*2(sp) 312 ld 29,29*8-8*2(sp) 313 ld 30,30*8-8*2(sp) 314 ld 31,31*8-8*2(sp) 315 addi sp,sp,8*(32-r0) 316 mtlr r0 # &continuation in dyld 317 bctr # goto user_init_function 318dy_done2: 319 li r0,(dy_done2 - dy_done1)/4 320 mflr a0 321 la a0,dy_done2 - dy_done1(a0) 322 mtctr r0 323dy_done3: 324 lwzu r0,-4(a0) 325 stwu r0,-4(s_unc) 326 bdnz+ dy_done3 327 328 mtlr s_unc 329 mtctr f_uini # user_init_function 330 mr a0,a_unm 331 mr a1,l_unm 332 li 0,SYS_munmap 333 blr # goto relocated dy_done1 334 335get4: 336 lbz t1,3(a0) 337 lbz t2,2(a0) 338 rlwimi t1,t2, 8,16,23 339 lbz t2,1(a0) 340 rlwimi t1,t2,16, 8,15 341 lbz t2,0(a0) 342 rlwimi t1,t2,24, 0, 7 343 mr. a0,t1 # set condition codes 344 blr 345 346unfilter: 347#include "arch/powerpc/64le/bxx.S" 348 349main: 350 b main2 351dy_top: 352len_top = dy_top - main 353 354/* vim:set ts=8 sw=8 et: */ 355