1/*
2 * Branch/Call/Jump (BCJ) filter decoders
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 *          Igor Pavlov <http://7-zip.org/>
6 *
7 * Translation to Go: Michael Cross <https://github.com/xi2>
8 *
9 * This file has been put into the public domain.
10 * You can do whatever you want with this file.
11 */
12
13package xz
14
15/* from linux/lib/xz/xz_dec_bcj.c *************************************/
16
17type xzDecBCJ struct {
18	/* Type of the BCJ filter being used */
19	typ xzFilterID
20	/*
21	 * Return value of the next filter in the chain. We need to preserve
22	 * this information across calls, because we must not call the next
23	 * filter anymore once it has returned xzStreamEnd
24	 */
25	ret xzRet
26	/*
27	 * Absolute position relative to the beginning of the uncompressed
28	 * data (in a single .xz Block).
29	 */
30	pos int
31	/* x86 filter state */
32	x86PrevMask uint32
33	/* Temporary space to hold the variables from xzBuf */
34	out    []byte
35	outPos int
36	temp   struct {
37		/* Amount of already filtered data in the beginning of buf */
38		filtered int
39		/*
40		 * Buffer to hold a mix of filtered and unfiltered data. This
41		 * needs to be big enough to hold Alignment + 2 * Look-ahead:
42		 *
43		 * Type         Alignment   Look-ahead
44		 * x86              1           4
45		 * PowerPC          4           0
46		 * IA-64           16           0
47		 * ARM              4           0
48		 * ARM-Thumb        2           2
49		 * SPARC            4           0
50		 */
51		buf      []byte // slice buf will be backed by bufArray
52		bufArray [16]byte
53	}
54}
55
56/*
57 * This is used to test the most significant byte of a memory address
58 * in an x86 instruction.
59 */
60func bcjX86TestMSByte(b byte) bool {
61	return b == 0x00 || b == 0xff
62}
63
64func bcjX86Filter(s *xzDecBCJ, buf []byte) int {
65	var maskToAllowedStatus = []bool{
66		true, true, true, false, true, false, false, false,
67	}
68	var maskToBitNum = []byte{0, 1, 2, 2, 3, 3, 3, 3}
69	var i int
70	var prevPos int = -1
71	var prevMask uint32 = s.x86PrevMask
72	var src uint32
73	var dest uint32
74	var j uint32
75	var b byte
76	if len(buf) <= 4 {
77		return 0
78	}
79	for i = 0; i < len(buf)-4; i++ {
80		if buf[i]&0xfe != 0xe8 {
81			continue
82		}
83		prevPos = i - prevPos
84		if prevPos > 3 {
85			prevMask = 0
86		} else {
87			prevMask = (prevMask << (uint(prevPos) - 1)) & 7
88			if prevMask != 0 {
89				b = buf[i+4-int(maskToBitNum[prevMask])]
90				if !maskToAllowedStatus[prevMask] || bcjX86TestMSByte(b) {
91					prevPos = i
92					prevMask = prevMask<<1 | 1
93					continue
94				}
95			}
96		}
97		prevPos = i
98		if bcjX86TestMSByte(buf[i+4]) {
99			src = getLE32(buf[i+1:])
100			for {
101				dest = src - uint32(s.pos+i+5)
102				if prevMask == 0 {
103					break
104				}
105				j = uint32(maskToBitNum[prevMask]) * 8
106				b = byte(dest >> (24 - j))
107				if !bcjX86TestMSByte(b) {
108					break
109				}
110				src = dest ^ (1<<(32-j) - 1)
111			}
112			dest &= 0x01FFFFFF
113			dest |= 0 - dest&0x01000000
114			putLE32(dest, buf[i+1:])
115			i += 4
116		} else {
117			prevMask = prevMask<<1 | 1
118		}
119	}
120	prevPos = i - prevPos
121	if prevPos > 3 {
122		s.x86PrevMask = 0
123	} else {
124		s.x86PrevMask = prevMask << (uint(prevPos) - 1)
125	}
126	return i
127}
128
129func bcjPowerPCFilter(s *xzDecBCJ, buf []byte) int {
130	var i int
131	var instr uint32
132	for i = 0; i+4 <= len(buf); i += 4 {
133		instr = getBE32(buf[i:])
134		if instr&0xFC000003 == 0x48000001 {
135			instr &= 0x03FFFFFC
136			instr -= uint32(s.pos + i)
137			instr &= 0x03FFFFFC
138			instr |= 0x48000001
139			putBE32(instr, buf[i:])
140		}
141	}
142	return i
143}
144
145var bcjIA64BranchTable = [...]byte{
146	0, 0, 0, 0, 0, 0, 0, 0,
147	0, 0, 0, 0, 0, 0, 0, 0,
148	4, 4, 6, 6, 0, 0, 7, 7,
149	4, 4, 0, 0, 4, 4, 0, 0,
150}
151
152func bcjIA64Filter(s *xzDecBCJ, buf []byte) int {
153	var branchTable = bcjIA64BranchTable[:]
154	/*
155	 * The local variables take a little bit stack space, but it's less
156	 * than what LZMA2 decoder takes, so it doesn't make sense to reduce
157	 * stack usage here without doing that for the LZMA2 decoder too.
158	 */
159	/* Loop counters */
160	var i int
161	var j int
162	/* Instruction slot (0, 1, or 2) in the 128-bit instruction word */
163	var slot uint32
164	/* Bitwise offset of the instruction indicated by slot */
165	var bitPos uint32
166	/* bit_pos split into byte and bit parts */
167	var bytePos uint32
168	var bitRes uint32
169	/* Address part of an instruction */
170	var addr uint32
171	/* Mask used to detect which instructions to convert */
172	var mask uint32
173	/* 41-bit instruction stored somewhere in the lowest 48 bits */
174	var instr uint64
175	/* Instruction normalized with bit_res for easier manipulation */
176	var norm uint64
177	for i = 0; i+16 <= len(buf); i += 16 {
178		mask = uint32(branchTable[buf[i]&0x1f])
179		for slot, bitPos = 0, 5; slot < 3; slot, bitPos = slot+1, bitPos+41 {
180			if (mask>>slot)&1 == 0 {
181				continue
182			}
183			bytePos = bitPos >> 3
184			bitRes = bitPos & 7
185			instr = 0
186			for j = 0; j < 6; j++ {
187				instr |= uint64(buf[i+j+int(bytePos)]) << (8 * uint(j))
188			}
189			norm = instr >> bitRes
190			if (norm>>37)&0x0f == 0x05 && (norm>>9)&0x07 == 0 {
191				addr = uint32((norm >> 13) & 0x0fffff)
192				addr |= (uint32(norm>>36) & 1) << 20
193				addr <<= 4
194				addr -= uint32(s.pos + i)
195				addr >>= 4
196				norm &= ^(uint64(0x8fffff) << 13)
197				norm |= uint64(addr&0x0fffff) << 13
198				norm |= uint64(addr&0x100000) << (36 - 20)
199				instr &= 1<<bitRes - 1
200				instr |= norm << bitRes
201				for j = 0; j < 6; j++ {
202					buf[i+j+int(bytePos)] = byte(instr >> (8 * uint(j)))
203				}
204			}
205		}
206	}
207	return i
208}
209
210func bcjARMFilter(s *xzDecBCJ, buf []byte) int {
211	var i int
212	var addr uint32
213	for i = 0; i+4 <= len(buf); i += 4 {
214		if buf[i+3] == 0xeb {
215			addr = uint32(buf[i]) | uint32(buf[i+1])<<8 |
216				uint32(buf[i+2])<<16
217			addr <<= 2
218			addr -= uint32(s.pos + i + 8)
219			addr >>= 2
220			buf[i] = byte(addr)
221			buf[i+1] = byte(addr >> 8)
222			buf[i+2] = byte(addr >> 16)
223		}
224	}
225	return i
226}
227
228func bcjARMThumbFilter(s *xzDecBCJ, buf []byte) int {
229	var i int
230	var addr uint32
231	for i = 0; i+4 <= len(buf); i += 2 {
232		if buf[i+1]&0xf8 == 0xf0 && buf[i+3]&0xf8 == 0xf8 {
233			addr = uint32(buf[i+1]&0x07)<<19 |
234				uint32(buf[i])<<11 |
235				uint32(buf[i+3]&0x07)<<8 |
236				uint32(buf[i+2])
237			addr <<= 1
238			addr -= uint32(s.pos + i + 4)
239			addr >>= 1
240			buf[i+1] = byte(0xf0 | (addr>>19)&0x07)
241			buf[i] = byte(addr >> 11)
242			buf[i+3] = byte(0xf8 | (addr>>8)&0x07)
243			buf[i+2] = byte(addr)
244			i += 2
245		}
246	}
247	return i
248}
249
250func bcjSPARCFilter(s *xzDecBCJ, buf []byte) int {
251	var i int
252	var instr uint32
253	for i = 0; i+4 <= len(buf); i += 4 {
254		instr = getBE32(buf[i:])
255		if instr>>22 == 0x100 || instr>>22 == 0x1ff {
256			instr <<= 2
257			instr -= uint32(s.pos + i)
258			instr >>= 2
259			instr = (0x40000000 - instr&0x400000) |
260				0x40000000 | (instr & 0x3FFFFF)
261			putBE32(instr, buf[i:])
262		}
263	}
264	return i
265}
266
267/*
268 * Apply the selected BCJ filter. Update *pos and s.pos to match the amount
269 * of data that got filtered.
270 */
271func bcjApply(s *xzDecBCJ, buf []byte, pos *int) {
272	var filtered int
273	buf = buf[*pos:]
274	switch s.typ {
275	case idBCJX86:
276		filtered = bcjX86Filter(s, buf)
277	case idBCJPowerPC:
278		filtered = bcjPowerPCFilter(s, buf)
279	case idBCJIA64:
280		filtered = bcjIA64Filter(s, buf)
281	case idBCJARM:
282		filtered = bcjARMFilter(s, buf)
283	case idBCJARMThumb:
284		filtered = bcjARMThumbFilter(s, buf)
285	case idBCJSPARC:
286		filtered = bcjSPARCFilter(s, buf)
287	default:
288		/* Never reached */
289	}
290	*pos += filtered
291	s.pos += filtered
292}
293
294/*
295 * Flush pending filtered data from temp to the output buffer.
296 * Move the remaining mixture of possibly filtered and unfiltered
297 * data to the beginning of temp.
298 */
299func bcjFlush(s *xzDecBCJ, b *xzBuf) {
300	var copySize int
301	copySize = len(b.out) - b.outPos
302	if copySize > s.temp.filtered {
303		copySize = s.temp.filtered
304	}
305	copy(b.out[b.outPos:], s.temp.buf[:copySize])
306	b.outPos += copySize
307	s.temp.filtered -= copySize
308	copy(s.temp.buf, s.temp.buf[copySize:])
309	s.temp.buf = s.temp.buf[:len(s.temp.buf)-copySize]
310}
311
312/*
313 * Decode raw stream which has a BCJ filter as the first filter.
314 *
315 * The BCJ filter functions are primitive in sense that they process the
316 * data in chunks of 1-16 bytes. To hide this issue, this function does
317 * some buffering.
318 */
319func xzDecBCJRun(s *xzDecBCJ, b *xzBuf, chain func(*xzBuf) xzRet) xzRet {
320	var outStart int
321	/*
322	 * Flush pending already filtered data to the output buffer. Return
323	 * immediately if we couldn't flush everything, or if the next
324	 * filter in the chain had already returned xzStreamEnd.
325	 */
326	if s.temp.filtered > 0 {
327		bcjFlush(s, b)
328		if s.temp.filtered > 0 {
329			return xzOK
330		}
331		if s.ret == xzStreamEnd {
332			return xzStreamEnd
333		}
334	}
335	/*
336	 * If we have more output space than what is currently pending in
337	 * temp, copy the unfiltered data from temp to the output buffer
338	 * and try to fill the output buffer by decoding more data from the
339	 * next filter in the chain. Apply the BCJ filter on the new data
340	 * in the output buffer. If everything cannot be filtered, copy it
341	 * to temp and rewind the output buffer position accordingly.
342	 *
343	 * This needs to be always run when len(temp.buf) == 0 to handle a special
344	 * case where the output buffer is full and the next filter has no
345	 * more output coming but hasn't returned xzStreamEnd yet.
346	 */
347	if len(s.temp.buf) < len(b.out)-b.outPos || len(s.temp.buf) == 0 {
348		outStart = b.outPos
349		copy(b.out[b.outPos:], s.temp.buf)
350		b.outPos += len(s.temp.buf)
351		s.ret = chain(b)
352		if s.ret != xzStreamEnd && s.ret != xzOK {
353			return s.ret
354		}
355		bcjApply(s, b.out[:b.outPos], &outStart)
356		/*
357		 * As an exception, if the next filter returned xzStreamEnd,
358		 * we can do that too, since the last few bytes that remain
359		 * unfiltered are meant to remain unfiltered.
360		 */
361		if s.ret == xzStreamEnd {
362			return xzStreamEnd
363		}
364		s.temp.buf = s.temp.bufArray[:b.outPos-outStart]
365		b.outPos -= len(s.temp.buf)
366		copy(s.temp.buf, b.out[b.outPos:])
367		/*
368		 * If there wasn't enough input to the next filter to fill
369		 * the output buffer with unfiltered data, there's no point
370		 * to try decoding more data to temp.
371		 */
372		if b.outPos+len(s.temp.buf) < len(b.out) {
373			return xzOK
374		}
375	}
376	/*
377	 * We have unfiltered data in temp. If the output buffer isn't full
378	 * yet, try to fill the temp buffer by decoding more data from the
379	 * next filter. Apply the BCJ filter on temp. Then we hopefully can
380	 * fill the actual output buffer by copying filtered data from temp.
381	 * A mix of filtered and unfiltered data may be left in temp; it will
382	 * be taken care on the next call to this function.
383	 */
384	if b.outPos < len(b.out) {
385		/* Make b.out temporarily point to s.temp. */
386		s.out = b.out
387		s.outPos = b.outPos
388		b.out = s.temp.bufArray[:]
389		b.outPos = len(s.temp.buf)
390		s.ret = chain(b)
391		s.temp.buf = s.temp.bufArray[:b.outPos]
392		b.out = s.out
393		b.outPos = s.outPos
394		if s.ret != xzOK && s.ret != xzStreamEnd {
395			return s.ret
396		}
397		bcjApply(s, s.temp.buf, &s.temp.filtered)
398		/*
399		 * If the next filter returned xzStreamEnd, we mark that
400		 * everything is filtered, since the last unfiltered bytes
401		 * of the stream are meant to be left as is.
402		 */
403		if s.ret == xzStreamEnd {
404			s.temp.filtered = len(s.temp.buf)
405		}
406		bcjFlush(s, b)
407		if s.temp.filtered > 0 {
408			return xzOK
409		}
410	}
411	return s.ret
412}
413
414/*
415 * Allocate memory for BCJ decoders. xzDecBCJReset must be used before
416 * calling xzDecBCJRun.
417 */
418func xzDecBCJCreate() *xzDecBCJ {
419	return new(xzDecBCJ)
420}
421
422/*
423 * Decode the Filter ID of a BCJ filter and check the start offset is
424 * valid. Returns xzOK if the given Filter ID and offset is
425 * supported. Otherwise xzOptionsError is returned.
426 */
427func xzDecBCJReset(s *xzDecBCJ, id xzFilterID, offset int) xzRet {
428	switch id {
429	case idBCJX86:
430	case idBCJPowerPC:
431	case idBCJIA64:
432	case idBCJARM:
433	case idBCJARMThumb:
434	case idBCJSPARC:
435	default:
436		/* Unsupported Filter ID */
437		return xzOptionsError
438	}
439	// check offset is a multiple of alignment
440	switch id {
441	case idBCJPowerPC, idBCJARM, idBCJSPARC:
442		if offset%4 != 0 {
443			return xzOptionsError
444		}
445	case idBCJIA64:
446		if offset%16 != 0 {
447			return xzOptionsError
448		}
449	case idBCJARMThumb:
450		if offset%2 != 0 {
451			return xzOptionsError
452		}
453	}
454	s.typ = id
455	s.ret = xzOK
456	s.pos = offset
457	s.x86PrevMask = 0
458	s.temp.filtered = 0
459	s.temp.buf = nil
460	return xzOK
461}
462