1 /*
2 
3 Copyright (c) 2011, 2012, Simon Howard
4 
5 Permission to use, copy, modify, and/or distribute this software
6 for any purpose with or without fee is hereby granted, provided
7 that the above copyright notice and this permission notice appear
8 in all copies.
9 
10 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17 CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 
19  */
20 
21 // Automated tester for 'ghost' archive formats.
22 //
23 // Some old tools support 'ghost' archive formats: compression algorithms
24 // that can be decompressed, but for which no version of the tool can be
25 // found that actually generates archives that use them. These formats were
26 // likely beta versions of the mainstream algorithms that are more widely
27 // used.
28 //
29 // Ideally, these archive formats should still be supported. However, testing
30 // poses a problem, because test archives cannot be generated to test the
31 // decompression code. This tool provides an alternative: it uses the
32 // decompression code to generate a random archive which, assuming the
33 // correctness of the code, should be a valid archive. The original legacy
34 // tool can then be run via DOSbox to extract the archive. The data extracted
35 // by the two tools can then be compared.
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <assert.h>
41 #include <time.h>
42 #include <unistd.h>
43 
44 #include "lib/lha_arch.h"
45 #include "lha_decoder.h"
46 #include "crc32.h"
47 
48 // Filename to use for archive file:
49 
50 #define TEST_ARCHIVE_FILENAME "testdata.lzh"
51 
52 // Filename to use for archived file:
53 
54 #define TEST_FILENAME "TESTDATA.BIN"
55 
56 // Minimum length of a level 0 LHA header:
57 
58 #define MIN_HEADER_LEN  24
59 
60 // Number of bytes of random "compressed" data to generate:
61 
62 #define COMPRESSED_DATA_LEN    (64 * 1024)
63 
64 // Length of uncompressed data (for header):
65 
66 #define UNCOMPRESSED_DATA_LEN  (64 * 1024)
67 
68 // Byte location at which to align compressed files within the archive:
69 
70 #define ARCHIVED_FILE_ALIGN  1024
71 
72 typedef struct {
73 	uint8_t *buf;
74 	size_t buf_len;
75 	unsigned int buf_pos;
76 } ReadDataCallback;
77 
78 // Write a 32-bit integer into the specified buffer.
79 
write_uint32(uint8_t * buf,uint32_t value)80 static void write_uint32(uint8_t *buf, uint32_t value)
81 {
82 	buf[0] = value & 0xff;
83 	buf[1] = (value >> 8) & 0xff;
84 	buf[2] = (value >> 16) & 0xff;
85 	buf[3] = (value >> 24) & 0xff;
86 }
87 
88 // Write a 16-bit integer into the specified buffer.
89 
write_uint16(uint8_t * buf,uint16_t value)90 static void write_uint16(uint8_t *buf, uint16_t value)
91 {
92 	buf[0] = value & 0xff;
93 	buf[1] = (value >> 8) & 0xff;
94 }
95 
96 // Fill the specified buffer with random data.
97 
fill_random(uint8_t * buf,size_t buf_len)98 static void fill_random(uint8_t *buf, size_t buf_len)
99 {
100 	unsigned int i;
101 
102 	for (i = 0; i < buf_len; ++i) {
103 		buf[i] = rand() & 0xff;
104 	}
105 }
106 
107 /**
108  * Callback function used by LHADecoder to read more data.
109  *
110  * @param buf        Buffer in which to read the data.
111  * @param buf_len    Length of the buffer.
112  * @param user_data  Pointer to the ReadDataCallback structure containing
113  *                   the stream decompression state.
114  * @return           Number of bytes read.
115  */
116 
read_data(void * buf,size_t buf_len,void * user_data)117 static size_t read_data(void *buf, size_t buf_len, void *user_data)
118 {
119 	ReadDataCallback *callback_data = user_data;
120 	size_t to_copy;
121 
122 	to_copy = callback_data->buf_len - callback_data->buf_pos;
123 
124 	if (to_copy > buf_len) {
125 		to_copy = buf_len;
126 	}
127 
128 	memcpy(buf, callback_data->buf + callback_data->buf_pos, to_copy);
129 	callback_data->buf_pos += to_copy;
130 
131 	return to_copy;
132 }
133 
134 /**
135  * Generate some test data that can be successfully decompressed using
136  * the specified algorithm type.
137  *
138  * @param type             The algorithm to use for decompression.
139  * @param buf              Pointer to the buffer to store the data.
140  * @param buf_len          Length of the buffer.
141  * @param uncompressed_len Number of bytes to decompress before stopping.
142  * @param crc16            Pointer to a variable to store the 16-bit CRC.
143  * @param crc32            Pointer to a variable to store the 32-bit CRC.
144  */
145 
generate_data(char * type,uint8_t * buf,size_t buf_len,size_t uncompressed_len,uint16_t * crc16,uint32_t * crc32)146 static void generate_data(char *type, uint8_t *buf, size_t buf_len,
147                           size_t uncompressed_len,
148                           uint16_t *crc16, uint32_t *crc32)
149 {
150 	LHADecoderType *dtype;
151 	ReadDataCallback callback_data;
152 	LHADecoder *decoder;
153 	size_t decoded_stream_len;
154 
155 	dtype = lha_decoder_for_name(type);
156 	assert(dtype != NULL);
157 
158 	// Fill the buffer with random data:
159 
160 	fill_random(buf, buf_len);
161 
162 	// Decompress as much data as possible. If we fail to decompress
163 	// all the data, modify some of the data from the point just
164 	// before we stopped and try again.
165 	// This is essentially a genetic algorithm.
166 
167 	for (;;) {
168 		callback_data.buf = buf;
169 		callback_data.buf_len = buf_len;
170 		callback_data.buf_pos = 0;
171 
172 		decoder = lha_decoder_new(dtype, read_data, &callback_data,
173 					  uncompressed_len);
174 
175 		decoded_stream_len = 0;
176 		*crc32 = 0;
177 
178 		for (;;) {
179 			uint8_t decode_buf[128];
180 			size_t decoded_bytes;
181 
182 			decoded_bytes = lha_decoder_read(decoder, decode_buf,
183 							 sizeof(decode_buf));
184 			if (decoded_bytes == 0) {
185 				break;
186 			}
187 
188 			crc32_buf(crc32, decode_buf, decoded_bytes);
189 
190 			decoded_stream_len += decoded_bytes;
191 		}
192 
193 		*crc16 = lha_decoder_get_crc(decoder);
194 
195 		lha_decoder_free(decoder);
196 
197 		// Successfully decompressed all the data? We are done.
198 
199 		if (decoded_stream_len >= uncompressed_len) {
200 			break;
201 		}
202 
203 		// Modify some data from the end of the stream and try again.
204 
205 		if (callback_data.buf_pos < 6) {
206 			fill_random(callback_data.buf, 6);
207 		} else {
208 			fill_random(callback_data.buf
209 			            + callback_data.buf_pos - 6, 6);
210 		}
211 	}
212 }
213 
214 /**
215  * Calculate the LHA level 0 header checksum for the specified buffer.
216  *
217  * @param buf         Pointer to buffer containing the data to checksum.
218  * @param buf_len     Length of the buffer.
219  * @return            Checksum of the buffer.
220  */
221 
calculate_checksum(uint8_t * buf,size_t buf_len)222 static uint8_t calculate_checksum(uint8_t *buf, size_t buf_len)
223 {
224 	uint8_t result;
225 	unsigned int i;
226 
227 	result = 0;
228 
229 	for (i = 0; i < buf_len; ++i) {
230 		result = (result + buf[i]) & 0xff;
231 	}
232 
233 	return result;
234 }
235 
236 /**
237  * Generate an archive file.
238  *
239  * @param out_filename     Location to save the generate file.
240  * @param type             The algorithm to use for decompression.
241  * @param filename         Filename to use for the archived file.
242  * @return                 32-bit CRC of the decompressed data.
243  */
244 
generate_archive(char * out_filename,char * type,char * filename)245 static uint32_t generate_archive(char *out_filename, char *type,
246                                  char *filename)
247 {
248 	FILE *outfile;
249 	uint8_t *buf;
250 	size_t buf_len;
251 	uint16_t crc16;
252 	uint32_t crc32;
253 	unsigned int skip_len;
254 
255 	skip_len = COMPRESSED_DATA_LEN;
256 	buf_len = COMPRESSED_DATA_LEN + MIN_HEADER_LEN + strlen(filename);
257 
258 	buf = malloc(buf_len);
259 	assert(buf != NULL);
260 
261 	generate_data(type, buf + MIN_HEADER_LEN + strlen(filename),
262 	              COMPRESSED_DATA_LEN, UNCOMPRESSED_DATA_LEN,
263 	              &crc16, &crc32);
264 
265 	// Construct header:
266 
267 	buf[0] = MIN_HEADER_LEN + strlen(filename) - 2;     // Header length
268 	memcpy(buf + 2, type, 5);                           // Compression type
269 	write_uint32(buf + 7, COMPRESSED_DATA_LEN);         // Packed len
270 	write_uint32(buf + 11, UNCOMPRESSED_DATA_LEN);      // Original len
271 	write_uint32(buf + 15, 0);                          // Mod date/time
272 	write_uint16(buf + 19, 0x0020);                     // DOS attribute
273 	buf[21] = strlen(filename);                         // Filename
274 	memcpy(buf + 22, filename, strlen(filename));
275 	write_uint16(buf + 22 + strlen(filename), crc16);   // CRC
276 	buf[1] = calculate_checksum(buf + 2,
277 	                            MIN_HEADER_LEN + strlen(filename) - 2);
278 
279 	outfile = fopen(out_filename, "wb");
280 	fwrite(buf, buf_len, 1, outfile);
281 	fclose(outfile);
282 
283 	free(buf);
284 
285 	return crc32;
286 }
287 
288 /**
289  * Invoke DOSbox to run the specified command.
290  *
291  * @param command       Command string of the form 'cmd %s'.
292  * @param filename      Filename of the file to extract.
293  */
294 
run_dosbox(char * command,char * filename)295 static void run_dosbox(char *command, char *filename)
296 {
297 	char cmdbuf1[64];
298 	char cmdbuf2[128];
299 
300 	sprintf(cmdbuf1, command, filename);
301 	sprintf(cmdbuf2, "dosbox -c 'mount c .' -c 'c:' -c '%s' -c exit",
302 	                 cmdbuf1);
303 	system(cmdbuf2);
304 }
305 
306 /**
307  * Check the extracted file has the expected CRC32.
308  *
309  * @param archive       Path to the archive file.
310  * @param filename      Filename of the file to check.
311  * @param crc           Expected 32-bit CRC.
312  */
313 
check_file_crc(char * archive,char * filename,uint32_t crc)314 static void check_file_crc(char *archive, char *filename, uint32_t crc)
315 {
316 	uint8_t buf[64];
317 	FILE *fstream;
318 	uint32_t check_crc;
319 	size_t bytes;
320 
321 	fstream = fopen(filename, "rb");
322 
323 	if (fstream == NULL) {
324 		fprintf(stderr, "\n\nFailed to extract file:\n"
325 		                "Archive: %s\nFilename: %s\n",
326 		                archive, filename);
327 		exit(-1);
328 	}
329 
330 	check_crc = 0;
331 
332 	while (!feof(fstream)) {
333 		bytes = fread(buf, 1, sizeof(buf), fstream);
334 		crc32_buf(&check_crc, buf, bytes);
335 	}
336 
337 	fclose(fstream);
338 
339 	if (check_crc != crc) {
340 		fprintf(stderr, "\n\nExtracted file failed CRC check:\n"
341 		                "Archive: %s\nFilename: %s\n"
342 		                "Expected CRC: %08x, Actual CRC: %08x\n",
343 		                archive, filename, crc, check_crc);
344 		exit(-1);
345 	}
346 }
347 
main(int argc,char * argv[])348 int main(int argc, char *argv[])
349 {
350 	uint32_t crc;
351 
352 	if (argc < 4) {
353 		printf("Usage: %s <compression type> <test directory> "
354 		       "<command>\n", argv[0]);
355 		printf("  where command is of the form 'cmd %%s'\n");
356 		exit(-1);
357 	}
358 
359 	assert(chdir(argv[2]) != 0);
360 	srand(time(NULL));
361 
362 	for (;;) {
363 		remove(TEST_ARCHIVE_FILENAME);
364 		remove(TEST_FILENAME);
365 
366 		crc = generate_archive(TEST_ARCHIVE_FILENAME, argv[1],
367 		                       TEST_FILENAME);
368 
369 		run_dosbox(argv[3], TEST_ARCHIVE_FILENAME);
370 		check_file_crc(TEST_ARCHIVE_FILENAME, TEST_FILENAME, crc);
371 	}
372 
373 	return 0;
374 }
375 
376