1 /*
2
3 Copyright (c) 2011, 2012, Simon Howard
4
5 Permission to use, copy, modify, and/or distribute this software
6 for any purpose with or without fee is hereby granted, provided
7 that the above copyright notice and this permission notice appear
8 in all copies.
9
10 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11 WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12 WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13 AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15 LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16 NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17 CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
19 */
20
21 // Automated tester for 'ghost' archive formats.
22 //
23 // Some old tools support 'ghost' archive formats: compression algorithms
24 // that can be decompressed, but for which no version of the tool can be
25 // found that actually generates archives that use them. These formats were
26 // likely beta versions of the mainstream algorithms that are more widely
27 // used.
28 //
29 // Ideally, these archive formats should still be supported. However, testing
30 // poses a problem, because test archives cannot be generated to test the
31 // decompression code. This tool provides an alternative: it uses the
32 // decompression code to generate a random archive which, assuming the
33 // correctness of the code, should be a valid archive. The original legacy
34 // tool can then be run via DOSbox to extract the archive. The data extracted
35 // by the two tools can then be compared.
36
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <assert.h>
41 #include <time.h>
42 #include <unistd.h>
43
44 #include "lib/lha_arch.h"
45 #include "lha_decoder.h"
46 #include "crc32.h"
47
48 // Filename to use for archive file:
49
50 #define TEST_ARCHIVE_FILENAME "testdata.lzh"
51
52 // Filename to use for archived file:
53
54 #define TEST_FILENAME "TESTDATA.BIN"
55
56 // Minimum length of a level 0 LHA header:
57
58 #define MIN_HEADER_LEN 24
59
60 // Number of bytes of random "compressed" data to generate:
61
62 #define COMPRESSED_DATA_LEN (64 * 1024)
63
64 // Length of uncompressed data (for header):
65
66 #define UNCOMPRESSED_DATA_LEN (64 * 1024)
67
68 // Byte location at which to align compressed files within the archive:
69
70 #define ARCHIVED_FILE_ALIGN 1024
71
72 typedef struct {
73 uint8_t *buf;
74 size_t buf_len;
75 unsigned int buf_pos;
76 } ReadDataCallback;
77
78 // Write a 32-bit integer into the specified buffer.
79
write_uint32(uint8_t * buf,uint32_t value)80 static void write_uint32(uint8_t *buf, uint32_t value)
81 {
82 buf[0] = value & 0xff;
83 buf[1] = (value >> 8) & 0xff;
84 buf[2] = (value >> 16) & 0xff;
85 buf[3] = (value >> 24) & 0xff;
86 }
87
88 // Write a 16-bit integer into the specified buffer.
89
write_uint16(uint8_t * buf,uint16_t value)90 static void write_uint16(uint8_t *buf, uint16_t value)
91 {
92 buf[0] = value & 0xff;
93 buf[1] = (value >> 8) & 0xff;
94 }
95
96 // Fill the specified buffer with random data.
97
fill_random(uint8_t * buf,size_t buf_len)98 static void fill_random(uint8_t *buf, size_t buf_len)
99 {
100 unsigned int i;
101
102 for (i = 0; i < buf_len; ++i) {
103 buf[i] = rand() & 0xff;
104 }
105 }
106
107 /**
108 * Callback function used by LHADecoder to read more data.
109 *
110 * @param buf Buffer in which to read the data.
111 * @param buf_len Length of the buffer.
112 * @param user_data Pointer to the ReadDataCallback structure containing
113 * the stream decompression state.
114 * @return Number of bytes read.
115 */
116
read_data(void * buf,size_t buf_len,void * user_data)117 static size_t read_data(void *buf, size_t buf_len, void *user_data)
118 {
119 ReadDataCallback *callback_data = user_data;
120 size_t to_copy;
121
122 to_copy = callback_data->buf_len - callback_data->buf_pos;
123
124 if (to_copy > buf_len) {
125 to_copy = buf_len;
126 }
127
128 memcpy(buf, callback_data->buf + callback_data->buf_pos, to_copy);
129 callback_data->buf_pos += to_copy;
130
131 return to_copy;
132 }
133
134 /**
135 * Generate some test data that can be successfully decompressed using
136 * the specified algorithm type.
137 *
138 * @param type The algorithm to use for decompression.
139 * @param buf Pointer to the buffer to store the data.
140 * @param buf_len Length of the buffer.
141 * @param uncompressed_len Number of bytes to decompress before stopping.
142 * @param crc16 Pointer to a variable to store the 16-bit CRC.
143 * @param crc32 Pointer to a variable to store the 32-bit CRC.
144 */
145
generate_data(char * type,uint8_t * buf,size_t buf_len,size_t uncompressed_len,uint16_t * crc16,uint32_t * crc32)146 static void generate_data(char *type, uint8_t *buf, size_t buf_len,
147 size_t uncompressed_len,
148 uint16_t *crc16, uint32_t *crc32)
149 {
150 LHADecoderType *dtype;
151 ReadDataCallback callback_data;
152 LHADecoder *decoder;
153 size_t decoded_stream_len;
154
155 dtype = lha_decoder_for_name(type);
156 assert(dtype != NULL);
157
158 // Fill the buffer with random data:
159
160 fill_random(buf, buf_len);
161
162 // Decompress as much data as possible. If we fail to decompress
163 // all the data, modify some of the data from the point just
164 // before we stopped and try again.
165 // This is essentially a genetic algorithm.
166
167 for (;;) {
168 callback_data.buf = buf;
169 callback_data.buf_len = buf_len;
170 callback_data.buf_pos = 0;
171
172 decoder = lha_decoder_new(dtype, read_data, &callback_data,
173 uncompressed_len);
174
175 decoded_stream_len = 0;
176 *crc32 = 0;
177
178 for (;;) {
179 uint8_t decode_buf[128];
180 size_t decoded_bytes;
181
182 decoded_bytes = lha_decoder_read(decoder, decode_buf,
183 sizeof(decode_buf));
184 if (decoded_bytes == 0) {
185 break;
186 }
187
188 crc32_buf(crc32, decode_buf, decoded_bytes);
189
190 decoded_stream_len += decoded_bytes;
191 }
192
193 *crc16 = lha_decoder_get_crc(decoder);
194
195 lha_decoder_free(decoder);
196
197 // Successfully decompressed all the data? We are done.
198
199 if (decoded_stream_len >= uncompressed_len) {
200 break;
201 }
202
203 // Modify some data from the end of the stream and try again.
204
205 if (callback_data.buf_pos < 6) {
206 fill_random(callback_data.buf, 6);
207 } else {
208 fill_random(callback_data.buf
209 + callback_data.buf_pos - 6, 6);
210 }
211 }
212 }
213
214 /**
215 * Calculate the LHA level 0 header checksum for the specified buffer.
216 *
217 * @param buf Pointer to buffer containing the data to checksum.
218 * @param buf_len Length of the buffer.
219 * @return Checksum of the buffer.
220 */
221
calculate_checksum(uint8_t * buf,size_t buf_len)222 static uint8_t calculate_checksum(uint8_t *buf, size_t buf_len)
223 {
224 uint8_t result;
225 unsigned int i;
226
227 result = 0;
228
229 for (i = 0; i < buf_len; ++i) {
230 result = (result + buf[i]) & 0xff;
231 }
232
233 return result;
234 }
235
236 /**
237 * Generate an archive file.
238 *
239 * @param out_filename Location to save the generate file.
240 * @param type The algorithm to use for decompression.
241 * @param filename Filename to use for the archived file.
242 * @return 32-bit CRC of the decompressed data.
243 */
244
generate_archive(char * out_filename,char * type,char * filename)245 static uint32_t generate_archive(char *out_filename, char *type,
246 char *filename)
247 {
248 FILE *outfile;
249 uint8_t *buf;
250 size_t buf_len;
251 uint16_t crc16;
252 uint32_t crc32;
253 unsigned int skip_len;
254
255 skip_len = COMPRESSED_DATA_LEN;
256 buf_len = COMPRESSED_DATA_LEN + MIN_HEADER_LEN + strlen(filename);
257
258 buf = malloc(buf_len);
259 assert(buf != NULL);
260
261 generate_data(type, buf + MIN_HEADER_LEN + strlen(filename),
262 COMPRESSED_DATA_LEN, UNCOMPRESSED_DATA_LEN,
263 &crc16, &crc32);
264
265 // Construct header:
266
267 buf[0] = MIN_HEADER_LEN + strlen(filename) - 2; // Header length
268 memcpy(buf + 2, type, 5); // Compression type
269 write_uint32(buf + 7, COMPRESSED_DATA_LEN); // Packed len
270 write_uint32(buf + 11, UNCOMPRESSED_DATA_LEN); // Original len
271 write_uint32(buf + 15, 0); // Mod date/time
272 write_uint16(buf + 19, 0x0020); // DOS attribute
273 buf[21] = strlen(filename); // Filename
274 memcpy(buf + 22, filename, strlen(filename));
275 write_uint16(buf + 22 + strlen(filename), crc16); // CRC
276 buf[1] = calculate_checksum(buf + 2,
277 MIN_HEADER_LEN + strlen(filename) - 2);
278
279 outfile = fopen(out_filename, "wb");
280 fwrite(buf, buf_len, 1, outfile);
281 fclose(outfile);
282
283 free(buf);
284
285 return crc32;
286 }
287
288 /**
289 * Invoke DOSbox to run the specified command.
290 *
291 * @param command Command string of the form 'cmd %s'.
292 * @param filename Filename of the file to extract.
293 */
294
run_dosbox(char * command,char * filename)295 static void run_dosbox(char *command, char *filename)
296 {
297 char cmdbuf1[64];
298 char cmdbuf2[128];
299
300 sprintf(cmdbuf1, command, filename);
301 sprintf(cmdbuf2, "dosbox -c 'mount c .' -c 'c:' -c '%s' -c exit",
302 cmdbuf1);
303 system(cmdbuf2);
304 }
305
306 /**
307 * Check the extracted file has the expected CRC32.
308 *
309 * @param archive Path to the archive file.
310 * @param filename Filename of the file to check.
311 * @param crc Expected 32-bit CRC.
312 */
313
check_file_crc(char * archive,char * filename,uint32_t crc)314 static void check_file_crc(char *archive, char *filename, uint32_t crc)
315 {
316 uint8_t buf[64];
317 FILE *fstream;
318 uint32_t check_crc;
319 size_t bytes;
320
321 fstream = fopen(filename, "rb");
322
323 if (fstream == NULL) {
324 fprintf(stderr, "\n\nFailed to extract file:\n"
325 "Archive: %s\nFilename: %s\n",
326 archive, filename);
327 exit(-1);
328 }
329
330 check_crc = 0;
331
332 while (!feof(fstream)) {
333 bytes = fread(buf, 1, sizeof(buf), fstream);
334 crc32_buf(&check_crc, buf, bytes);
335 }
336
337 fclose(fstream);
338
339 if (check_crc != crc) {
340 fprintf(stderr, "\n\nExtracted file failed CRC check:\n"
341 "Archive: %s\nFilename: %s\n"
342 "Expected CRC: %08x, Actual CRC: %08x\n",
343 archive, filename, crc, check_crc);
344 exit(-1);
345 }
346 }
347
main(int argc,char * argv[])348 int main(int argc, char *argv[])
349 {
350 uint32_t crc;
351
352 if (argc < 4) {
353 printf("Usage: %s <compression type> <test directory> "
354 "<command>\n", argv[0]);
355 printf(" where command is of the form 'cmd %%s'\n");
356 exit(-1);
357 }
358
359 assert(chdir(argv[2]) != 0);
360 srand(time(NULL));
361
362 for (;;) {
363 remove(TEST_ARCHIVE_FILENAME);
364 remove(TEST_FILENAME);
365
366 crc = generate_archive(TEST_ARCHIVE_FILENAME, argv[1],
367 TEST_FILENAME);
368
369 run_dosbox(argv[3], TEST_ARCHIVE_FILENAME);
370 check_file_crc(TEST_ARCHIVE_FILENAME, TEST_FILENAME, crc);
371 }
372
373 return 0;
374 }
375
376