1/* 2 * Branch/Call/Jump (BCJ) filter decoders 3 * 4 * Authors: Lasse Collin <lasse.collin@tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * Translation to Go: Michael Cross <https://github.com/xi2> 8 * 9 * This file has been put into the public domain. 10 * You can do whatever you want with this file. 11 */ 12 13package xz 14 15/* from linux/lib/xz/xz_dec_bcj.c *************************************/ 16 17type xzDecBCJ struct { 18 /* Type of the BCJ filter being used */ 19 typ xzFilterID 20 /* 21 * Return value of the next filter in the chain. We need to preserve 22 * this information across calls, because we must not call the next 23 * filter anymore once it has returned xzStreamEnd 24 */ 25 ret xzRet 26 /* 27 * Absolute position relative to the beginning of the uncompressed 28 * data (in a single .xz Block). 29 */ 30 pos int 31 /* x86 filter state */ 32 x86PrevMask uint32 33 /* Temporary space to hold the variables from xzBuf */ 34 out []byte 35 outPos int 36 temp struct { 37 /* Amount of already filtered data in the beginning of buf */ 38 filtered int 39 /* 40 * Buffer to hold a mix of filtered and unfiltered data. This 41 * needs to be big enough to hold Alignment + 2 * Look-ahead: 42 * 43 * Type Alignment Look-ahead 44 * x86 1 4 45 * PowerPC 4 0 46 * IA-64 16 0 47 * ARM 4 0 48 * ARM-Thumb 2 2 49 * SPARC 4 0 50 */ 51 buf []byte // slice buf will be backed by bufArray 52 bufArray [16]byte 53 } 54} 55 56/* 57 * This is used to test the most significant byte of a memory address 58 * in an x86 instruction. 59 */ 60func bcjX86TestMSByte(b byte) bool { 61 return b == 0x00 || b == 0xff 62} 63 64func bcjX86Filter(s *xzDecBCJ, buf []byte) int { 65 var maskToAllowedStatus = []bool{ 66 true, true, true, false, true, false, false, false, 67 } 68 var maskToBitNum = []byte{0, 1, 2, 2, 3, 3, 3, 3} 69 var i int 70 var prevPos int = -1 71 var prevMask uint32 = s.x86PrevMask 72 var src uint32 73 var dest uint32 74 var j uint32 75 var b byte 76 if len(buf) <= 4 { 77 return 0 78 } 79 for i = 0; i < len(buf)-4; i++ { 80 if buf[i]&0xfe != 0xe8 { 81 continue 82 } 83 prevPos = i - prevPos 84 if prevPos > 3 { 85 prevMask = 0 86 } else { 87 prevMask = (prevMask << (uint(prevPos) - 1)) & 7 88 if prevMask != 0 { 89 b = buf[i+4-int(maskToBitNum[prevMask])] 90 if !maskToAllowedStatus[prevMask] || bcjX86TestMSByte(b) { 91 prevPos = i 92 prevMask = prevMask<<1 | 1 93 continue 94 } 95 } 96 } 97 prevPos = i 98 if bcjX86TestMSByte(buf[i+4]) { 99 src = getLE32(buf[i+1:]) 100 for { 101 dest = src - uint32(s.pos+i+5) 102 if prevMask == 0 { 103 break 104 } 105 j = uint32(maskToBitNum[prevMask]) * 8 106 b = byte(dest >> (24 - j)) 107 if !bcjX86TestMSByte(b) { 108 break 109 } 110 src = dest ^ (1<<(32-j) - 1) 111 } 112 dest &= 0x01FFFFFF 113 dest |= 0 - dest&0x01000000 114 putLE32(dest, buf[i+1:]) 115 i += 4 116 } else { 117 prevMask = prevMask<<1 | 1 118 } 119 } 120 prevPos = i - prevPos 121 if prevPos > 3 { 122 s.x86PrevMask = 0 123 } else { 124 s.x86PrevMask = prevMask << (uint(prevPos) - 1) 125 } 126 return i 127} 128 129func bcjPowerPCFilter(s *xzDecBCJ, buf []byte) int { 130 var i int 131 var instr uint32 132 for i = 0; i+4 <= len(buf); i += 4 { 133 instr = getBE32(buf[i:]) 134 if instr&0xFC000003 == 0x48000001 { 135 instr &= 0x03FFFFFC 136 instr -= uint32(s.pos + i) 137 instr &= 0x03FFFFFC 138 instr |= 0x48000001 139 putBE32(instr, buf[i:]) 140 } 141 } 142 return i 143} 144 145var bcjIA64BranchTable = [...]byte{ 146 0, 0, 0, 0, 0, 0, 0, 0, 147 0, 0, 0, 0, 0, 0, 0, 0, 148 4, 4, 6, 6, 0, 0, 7, 7, 149 4, 4, 0, 0, 4, 4, 0, 0, 150} 151 152func bcjIA64Filter(s *xzDecBCJ, buf []byte) int { 153 var branchTable = bcjIA64BranchTable[:] 154 /* 155 * The local variables take a little bit stack space, but it's less 156 * than what LZMA2 decoder takes, so it doesn't make sense to reduce 157 * stack usage here without doing that for the LZMA2 decoder too. 158 */ 159 /* Loop counters */ 160 var i int 161 var j int 162 /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ 163 var slot uint32 164 /* Bitwise offset of the instruction indicated by slot */ 165 var bitPos uint32 166 /* bit_pos split into byte and bit parts */ 167 var bytePos uint32 168 var bitRes uint32 169 /* Address part of an instruction */ 170 var addr uint32 171 /* Mask used to detect which instructions to convert */ 172 var mask uint32 173 /* 41-bit instruction stored somewhere in the lowest 48 bits */ 174 var instr uint64 175 /* Instruction normalized with bit_res for easier manipulation */ 176 var norm uint64 177 for i = 0; i+16 <= len(buf); i += 16 { 178 mask = uint32(branchTable[buf[i]&0x1f]) 179 for slot, bitPos = 0, 5; slot < 3; slot, bitPos = slot+1, bitPos+41 { 180 if (mask>>slot)&1 == 0 { 181 continue 182 } 183 bytePos = bitPos >> 3 184 bitRes = bitPos & 7 185 instr = 0 186 for j = 0; j < 6; j++ { 187 instr |= uint64(buf[i+j+int(bytePos)]) << (8 * uint(j)) 188 } 189 norm = instr >> bitRes 190 if (norm>>37)&0x0f == 0x05 && (norm>>9)&0x07 == 0 { 191 addr = uint32((norm >> 13) & 0x0fffff) 192 addr |= (uint32(norm>>36) & 1) << 20 193 addr <<= 4 194 addr -= uint32(s.pos + i) 195 addr >>= 4 196 norm &= ^(uint64(0x8fffff) << 13) 197 norm |= uint64(addr&0x0fffff) << 13 198 norm |= uint64(addr&0x100000) << (36 - 20) 199 instr &= 1<<bitRes - 1 200 instr |= norm << bitRes 201 for j = 0; j < 6; j++ { 202 buf[i+j+int(bytePos)] = byte(instr >> (8 * uint(j))) 203 } 204 } 205 } 206 } 207 return i 208} 209 210func bcjARMFilter(s *xzDecBCJ, buf []byte) int { 211 var i int 212 var addr uint32 213 for i = 0; i+4 <= len(buf); i += 4 { 214 if buf[i+3] == 0xeb { 215 addr = uint32(buf[i]) | uint32(buf[i+1])<<8 | 216 uint32(buf[i+2])<<16 217 addr <<= 2 218 addr -= uint32(s.pos + i + 8) 219 addr >>= 2 220 buf[i] = byte(addr) 221 buf[i+1] = byte(addr >> 8) 222 buf[i+2] = byte(addr >> 16) 223 } 224 } 225 return i 226} 227 228func bcjARMThumbFilter(s *xzDecBCJ, buf []byte) int { 229 var i int 230 var addr uint32 231 for i = 0; i+4 <= len(buf); i += 2 { 232 if buf[i+1]&0xf8 == 0xf0 && buf[i+3]&0xf8 == 0xf8 { 233 addr = uint32(buf[i+1]&0x07)<<19 | 234 uint32(buf[i])<<11 | 235 uint32(buf[i+3]&0x07)<<8 | 236 uint32(buf[i+2]) 237 addr <<= 1 238 addr -= uint32(s.pos + i + 4) 239 addr >>= 1 240 buf[i+1] = byte(0xf0 | (addr>>19)&0x07) 241 buf[i] = byte(addr >> 11) 242 buf[i+3] = byte(0xf8 | (addr>>8)&0x07) 243 buf[i+2] = byte(addr) 244 i += 2 245 } 246 } 247 return i 248} 249 250func bcjSPARCFilter(s *xzDecBCJ, buf []byte) int { 251 var i int 252 var instr uint32 253 for i = 0; i+4 <= len(buf); i += 4 { 254 instr = getBE32(buf[i:]) 255 if instr>>22 == 0x100 || instr>>22 == 0x1ff { 256 instr <<= 2 257 instr -= uint32(s.pos + i) 258 instr >>= 2 259 instr = (0x40000000 - instr&0x400000) | 260 0x40000000 | (instr & 0x3FFFFF) 261 putBE32(instr, buf[i:]) 262 } 263 } 264 return i 265} 266 267/* 268 * Apply the selected BCJ filter. Update *pos and s.pos to match the amount 269 * of data that got filtered. 270 */ 271func bcjApply(s *xzDecBCJ, buf []byte, pos *int) { 272 var filtered int 273 buf = buf[*pos:] 274 switch s.typ { 275 case idBCJX86: 276 filtered = bcjX86Filter(s, buf) 277 case idBCJPowerPC: 278 filtered = bcjPowerPCFilter(s, buf) 279 case idBCJIA64: 280 filtered = bcjIA64Filter(s, buf) 281 case idBCJARM: 282 filtered = bcjARMFilter(s, buf) 283 case idBCJARMThumb: 284 filtered = bcjARMThumbFilter(s, buf) 285 case idBCJSPARC: 286 filtered = bcjSPARCFilter(s, buf) 287 default: 288 /* Never reached */ 289 } 290 *pos += filtered 291 s.pos += filtered 292} 293 294/* 295 * Flush pending filtered data from temp to the output buffer. 296 * Move the remaining mixture of possibly filtered and unfiltered 297 * data to the beginning of temp. 298 */ 299func bcjFlush(s *xzDecBCJ, b *xzBuf) { 300 var copySize int 301 copySize = len(b.out) - b.outPos 302 if copySize > s.temp.filtered { 303 copySize = s.temp.filtered 304 } 305 copy(b.out[b.outPos:], s.temp.buf[:copySize]) 306 b.outPos += copySize 307 s.temp.filtered -= copySize 308 copy(s.temp.buf, s.temp.buf[copySize:]) 309 s.temp.buf = s.temp.buf[:len(s.temp.buf)-copySize] 310} 311 312/* 313 * Decode raw stream which has a BCJ filter as the first filter. 314 * 315 * The BCJ filter functions are primitive in sense that they process the 316 * data in chunks of 1-16 bytes. To hide this issue, this function does 317 * some buffering. 318 */ 319func xzDecBCJRun(s *xzDecBCJ, b *xzBuf, chain func(*xzBuf) xzRet) xzRet { 320 var outStart int 321 /* 322 * Flush pending already filtered data to the output buffer. Return 323 * immediately if we couldn't flush everything, or if the next 324 * filter in the chain had already returned xzStreamEnd. 325 */ 326 if s.temp.filtered > 0 { 327 bcjFlush(s, b) 328 if s.temp.filtered > 0 { 329 return xzOK 330 } 331 if s.ret == xzStreamEnd { 332 return xzStreamEnd 333 } 334 } 335 /* 336 * If we have more output space than what is currently pending in 337 * temp, copy the unfiltered data from temp to the output buffer 338 * and try to fill the output buffer by decoding more data from the 339 * next filter in the chain. Apply the BCJ filter on the new data 340 * in the output buffer. If everything cannot be filtered, copy it 341 * to temp and rewind the output buffer position accordingly. 342 * 343 * This needs to be always run when len(temp.buf) == 0 to handle a special 344 * case where the output buffer is full and the next filter has no 345 * more output coming but hasn't returned xzStreamEnd yet. 346 */ 347 if len(s.temp.buf) < len(b.out)-b.outPos || len(s.temp.buf) == 0 { 348 outStart = b.outPos 349 copy(b.out[b.outPos:], s.temp.buf) 350 b.outPos += len(s.temp.buf) 351 s.ret = chain(b) 352 if s.ret != xzStreamEnd && s.ret != xzOK { 353 return s.ret 354 } 355 bcjApply(s, b.out[:b.outPos], &outStart) 356 /* 357 * As an exception, if the next filter returned xzStreamEnd, 358 * we can do that too, since the last few bytes that remain 359 * unfiltered are meant to remain unfiltered. 360 */ 361 if s.ret == xzStreamEnd { 362 return xzStreamEnd 363 } 364 s.temp.buf = s.temp.bufArray[:b.outPos-outStart] 365 b.outPos -= len(s.temp.buf) 366 copy(s.temp.buf, b.out[b.outPos:]) 367 /* 368 * If there wasn't enough input to the next filter to fill 369 * the output buffer with unfiltered data, there's no point 370 * to try decoding more data to temp. 371 */ 372 if b.outPos+len(s.temp.buf) < len(b.out) { 373 return xzOK 374 } 375 } 376 /* 377 * We have unfiltered data in temp. If the output buffer isn't full 378 * yet, try to fill the temp buffer by decoding more data from the 379 * next filter. Apply the BCJ filter on temp. Then we hopefully can 380 * fill the actual output buffer by copying filtered data from temp. 381 * A mix of filtered and unfiltered data may be left in temp; it will 382 * be taken care on the next call to this function. 383 */ 384 if b.outPos < len(b.out) { 385 /* Make b.out temporarily point to s.temp. */ 386 s.out = b.out 387 s.outPos = b.outPos 388 b.out = s.temp.bufArray[:] 389 b.outPos = len(s.temp.buf) 390 s.ret = chain(b) 391 s.temp.buf = s.temp.bufArray[:b.outPos] 392 b.out = s.out 393 b.outPos = s.outPos 394 if s.ret != xzOK && s.ret != xzStreamEnd { 395 return s.ret 396 } 397 bcjApply(s, s.temp.buf, &s.temp.filtered) 398 /* 399 * If the next filter returned xzStreamEnd, we mark that 400 * everything is filtered, since the last unfiltered bytes 401 * of the stream are meant to be left as is. 402 */ 403 if s.ret == xzStreamEnd { 404 s.temp.filtered = len(s.temp.buf) 405 } 406 bcjFlush(s, b) 407 if s.temp.filtered > 0 { 408 return xzOK 409 } 410 } 411 return s.ret 412} 413 414/* 415 * Allocate memory for BCJ decoders. xzDecBCJReset must be used before 416 * calling xzDecBCJRun. 417 */ 418func xzDecBCJCreate() *xzDecBCJ { 419 return new(xzDecBCJ) 420} 421 422/* 423 * Decode the Filter ID of a BCJ filter and check the start offset is 424 * valid. Returns xzOK if the given Filter ID and offset is 425 * supported. Otherwise xzOptionsError is returned. 426 */ 427func xzDecBCJReset(s *xzDecBCJ, id xzFilterID, offset int) xzRet { 428 switch id { 429 case idBCJX86: 430 case idBCJPowerPC: 431 case idBCJIA64: 432 case idBCJARM: 433 case idBCJARMThumb: 434 case idBCJSPARC: 435 default: 436 /* Unsupported Filter ID */ 437 return xzOptionsError 438 } 439 // check offset is a multiple of alignment 440 switch id { 441 case idBCJPowerPC, idBCJARM, idBCJSPARC: 442 if offset%4 != 0 { 443 return xzOptionsError 444 } 445 case idBCJIA64: 446 if offset%16 != 0 { 447 return xzOptionsError 448 } 449 case idBCJARMThumb: 450 if offset%2 != 0 { 451 return xzOptionsError 452 } 453 } 454 s.typ = id 455 s.ret = xzOK 456 s.pos = offset 457 s.x86PrevMask = 0 458 s.temp.filtered = 0 459 s.temp.buf = nil 460 return xzOK 461} 462