1 #region Copyright & License Information
2 /*
3  * Copyright 2007-2020 The OpenRA Developers (see AUTHORS)
4  * This file is part of OpenRA, which is free software. It is made
5  * available to you under the terms of the GNU General Public License
6  * as published by the Free Software Foundation, either version 3 of
7  * the License, or (at your option) any later version. For more
8  * information, see COPYING.
9  */
10 #endregion
11 #region Additional Copyright & License Information
12 /*
13  * This file is based on the blast routines (version 1.1 by Mark Adler)
14  * included in zlib/contrib
15  */
16 #endregion
17 
18 using System;
19 using System.IO;
20 
21 namespace OpenRA.Mods.Common.FileFormats
22 {
23 	public static class Blast
24 	{
25 		public static readonly int MAXBITS = 13; // maximum code length
26 		public static readonly int MAXWIN = 4096; // maximum window size
27 
28 		static byte[] litlen =
29 		{
30 			11, 124, 8, 7, 28, 7, 188, 13, 76, 4,
31 			10, 8, 12, 10, 12, 10, 8, 23, 8, 9,
32 			7, 6, 7, 8, 7, 6, 55, 8, 23, 24,
33 			12, 11, 7, 9, 11, 12, 6, 7, 22, 5,
34 			7, 24, 6, 11, 9, 6, 7, 22, 7, 11,
35 			38, 7, 9, 8, 25, 11, 8, 11, 9, 12,
36 			8, 12, 5, 38, 5, 38, 5, 11, 7, 5,
37 			6, 21, 6, 10, 53, 8, 7, 24, 10, 27,
38 			44, 253, 253, 253, 252, 252, 252, 13, 12, 45,
39 			12, 45, 12, 61, 12, 45, 44, 173
40 		};
41 
42 		// bit lengths of length codes 0..15
43 		static byte[] lenlen = { 2, 35, 36, 53, 38, 23 };
44 
45 		// bit lengths of distance codes 0..63
46 		static byte[] distlen = { 2, 20, 53, 230, 247, 151, 248 };
47 
48 		// base for length codes
49 		static short[] lengthbase =
50 		{
51 			3, 2, 4, 5, 6, 7, 8, 9, 10, 12,
52 			16, 24, 40, 72, 136, 264
53 		};
54 
55 		// extra bits for length codes
56 		static byte[] extra =
57 		{
58 			0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
59 			3, 4, 5, 6, 7, 8
60 		};
61 
62 		static Huffman litcode = new Huffman(litlen, litlen.Length, 256);
63 		static Huffman lencode = new Huffman(lenlen, lenlen.Length, 16);
64 		static Huffman distcode = new Huffman(distlen, distlen.Length, 64);
65 
66 		/// <summary>PKWare Compression Library stream.</summary>
67 		/// <param name="input">Compressed input stream.</param>
68 		/// <param name="output">Stream to write the decompressed output.</param>
69 		/// <param name="onProgress">Progress callback, invoked with (read bytes, written bytes).</param>
Decompress(Stream input, Stream output, Action<long, long> onProgress = null)70 		public static void Decompress(Stream input, Stream output, Action<long, long> onProgress = null)
71 		{
72 			var br = new BitReader(input);
73 
74 			// Are literals coded?
75 			var coded = br.ReadBits(8);
76 
77 			if (coded < 0 || coded > 1)
78 				throw new NotImplementedException("Invalid data stream");
79 			var encodedLiterals = coded == 1;
80 
81 			// log2(dictionary size) - 6
82 			var dict = br.ReadBits(8);
83 			if (dict < 4 || dict > 6)
84 				throw new InvalidDataException("Invalid dictionary size");
85 
86 			// output state
87 			ushort next = 0; // index of next write location in out[]
88 			var first = true; // true to check distances (for first 4K)
89 			var outBuffer = new byte[MAXWIN]; // output buffer and sliding window
90 
91 			var inputStart = input.Position;
92 			var outputStart = output.Position;
93 
94 			// decode literals and length/distance pairs
95 			do
96 			{
97 				// length/distance pair
98 				if (br.ReadBits(1) == 1)
99 				{
100 					// Length
101 					var symbol = Decode(lencode, br);
102 					var len = lengthbase[symbol] + br.ReadBits(extra[symbol]);
103 
104 					// Magic number for "done"
105 					if (len == 519)
106 					{
107 						for (var i = 0; i < next; i++)
108 							output.WriteByte(outBuffer[i]);
109 
110 						if (onProgress != null)
111 							onProgress(input.Position - inputStart, output.Position - outputStart);
112 						break;
113 					}
114 
115 					// Distance
116 					symbol = len == 2 ? 2 : dict;
117 					var dist = Decode(distcode, br) << symbol;
118 					dist += br.ReadBits(symbol);
119 					dist++;
120 
121 					if (first && dist > next)
122 						throw new InvalidDataException("Attempt to jump before data");
123 
124 					// copy length bytes from distance bytes back
125 					do
126 					{
127 						var dest = next;
128 						var source = dest - dist;
129 
130 						var copy = MAXWIN;
131 						if (next < dist)
132 						{
133 							source += copy;
134 							copy = dist;
135 						}
136 
137 						copy -= next;
138 						if (copy > len)
139 							copy = len;
140 
141 						len -= copy;
142 						next += (ushort)copy;
143 
144 						// copy with old-fashioned memcpy semantics
145 						// in case of overlapping ranges. this is NOT
146 						// the same as Array.Copy()
147 						while (copy-- > 0)
148 							outBuffer[dest++] = outBuffer[source++];
149 
150 						// Flush window to outstream
151 						if (next == MAXWIN)
152 						{
153 							for (var i = 0; i < next; i++)
154 								output.WriteByte(outBuffer[i]);
155 							next = 0;
156 							first = false;
157 
158 							if (onProgress != null)
159 								onProgress(input.Position - inputStart, output.Position - outputStart);
160 						}
161 					}
162 					while (len != 0);
163 				}
164 				else
165 				{
166 					// literal value
167 					var symbol = encodedLiterals ? Decode(litcode, br) : br.ReadBits(8);
168 					outBuffer[next++] = (byte)symbol;
169 					if (next == MAXWIN)
170 					{
171 						for (var i = 0; i < next; i++)
172 							output.WriteByte(outBuffer[i]);
173 						next = 0;
174 						first = false;
175 
176 						if (onProgress != null)
177 							onProgress(input.Position - inputStart, output.Position - outputStart);
178 					}
179 				}
180 			}
181 			while (true);
182 		}
183 
184 		// Decode a code using Huffman table h.
Decode(Huffman h, BitReader br)185 		static int Decode(Huffman h, BitReader br)
186 		{
187 			var code = 0; // len bits being decoded
188 			var first = 0; // first code of length len
189 			var index = 0; // index of first code of length len in symbol table
190 			short next = 1;
191 			while (true)
192 			{
193 				code |= br.ReadBits(1) ^ 1; // invert code
194 				int count = h.Count[next++];
195 				if (code < first + count)
196 					return h.Symbol[index + (code - first)];
197 
198 				index += count;
199 				first += count;
200 				first <<= 1;
201 				code <<= 1;
202 			}
203 		}
204 	}
205 
206 	class BitReader
207 	{
208 		readonly Stream stream;
209 		byte bitBuffer = 0;
210 		int bitCount = 0;
211 
BitReader(Stream stream)212 		public BitReader(Stream stream)
213 		{
214 			this.stream = stream;
215 		}
216 
ReadBits(int count)217 		public int ReadBits(int count)
218 		{
219 			var ret = 0;
220 			var filled = 0;
221 			while (filled < count)
222 			{
223 				if (bitCount == 0)
224 				{
225 					bitBuffer = stream.ReadUInt8();
226 					bitCount = 8;
227 				}
228 
229 				ret |= (bitBuffer & 1) << filled;
230 				bitBuffer >>= 1;
231 				bitCount--;
232 				filled++;
233 			}
234 
235 			return ret;
236 		}
237 	}
238 
239 	/*
240 	 * Given a list of repeated code lengths rep[0..n-1], where each byte is a
241 	 * count (high four bits + 1) and a code length (low four bits), generate the
242 	 * list of code lengths.  This compaction reduces the size of the object code.
243 	 * Then given the list of code lengths length[0..n-1] representing a canonical
244 	 * Huffman code for n symbols, construct the tables required to decode those
245 	 * codes.  Those tables are the number of codes of each length, and the symbols
246 	 * sorted by length, retaining their original order within each length.
247 	 */
248 	class Huffman
249 	{
250 		public short[] Count; // number of symbols of each length
251 		public short[] Symbol; // canonically ordered symbols
252 
Huffman(byte[] rep, int n, short symbolCount)253 		public Huffman(byte[] rep, int n, short symbolCount)
254 		{
255 			var length = new short[256]; // code lengths
256 			var s = 0; // current symbol
257 
258 			// convert compact repeat counts into symbol bit length list
259 			foreach (var code in rep)
260 			{
261 				var num = (code >> 4) + 1; // Number of codes (top four bits plus 1)
262 				var len = (byte)(code & 15); // Code length (low four bits)
263 				do
264 					length[s++] = len;
265 				while (--num > 0);
266 			}
267 
268 			n = s;
269 
270 			// count number of codes of each length
271 			Count = new short[Blast.MAXBITS + 1];
272 			for (var i = 0; i < n; i++)
273 				Count[length[i]]++;
274 
275 			// no codes!
276 			if (Count[0] == n)
277 				return;
278 
279 			// check for an over-subscribed or incomplete set of lengths
280 			var left = 1; // one possible code of zero length
281 			for (var len = 1; len <= Blast.MAXBITS; len++)
282 			{
283 				left <<= 1;	// one more bit, double codes left
284 				left -= Count[len];	// deduct count from possible codes
285 				if (left < 0)
286 					throw new InvalidDataException("over subscribed code set");
287 			}
288 
289 			// generate offsets into symbol table for each length for sorting
290 			var offs = new short[Blast.MAXBITS + 1];
291 			for (var len = 1; len < Blast.MAXBITS; len++)
292 				offs[len + 1] = (short)(offs[len] + Count[len]);
293 
294 			// put symbols in table sorted by length, by symbol order within each length
295 			Symbol = new short[symbolCount];
296 			for (short i = 0; i < n; i++)
297 				if (length[i] != 0)
298 					Symbol[offs[length[i]]++] = i;
299 		}
300 	}
301 }
302