1 // $Id: zip.cpp,v 1.22 2002/07/30 16:30:03 ericb Exp $
2 //
3 // This software is subject to the terms of the IBM Jikes Compiler
4 // License Agreement available at the following URL:
5 // http://ibm.com/developerworks/opensource/jikes.
6 // Copyright (C) 1996, 1998, 1999, 2000, 2001 International Business
7 // Machines Corporation and others. All Rights Reserved.
8 // You must accept the terms of that agreement to use this software.
9 //
10
11 #include "zip.h"
12 #include "control.h"
13 #include "symbol.h"
14
15 #ifdef HAVE_JIKES_NAMESPACE
16 namespace Jikes { // Open namespace Jikes block
17 #endif
18
19 //************************************************************
20 //
21 // The ZipFile methods follow
22 //
23 //************************************************************
24 #ifdef UNIX_FILE_SYSTEM
25 int (*ZipFile::uncompress_file[10]) (FILE *, char *, long) =
26 {
27 UncompressFile0,
28 UncompressFile1,
29 UncompressFile2,
30 UncompressFile3,
31 UncompressFile4,
32 UncompressFile5,
33 UncompressFile6,
34 UncompressFile7,
35 UncompressFile8,
36 UncompressFile9
37 };
38
GetU1()39 inline u1 ZipFile::GetU1()
40 {
41 return getc(zipfile);
42 }
43
Skip(u4 length)44 inline void ZipFile::Skip(u4 length)
45 {
46 for (u4 i = 0; i < length; i++)
47 getc(zipfile);
48 }
49
50 #elif defined(WIN32_FILE_SYSTEM) // ! UNIX_FILE_SYSTEM
51
52 int (*ZipFile::uncompress_file[10]) (char *, char *, long) =
53 {
54 UncompressFile0,
55 UncompressFile1,
56 UncompressFile2,
57 UncompressFile3,
58 UncompressFile4,
59 UncompressFile5,
60 UncompressFile6,
61 UncompressFile7,
62 UncompressFile8,
63 UncompressFile9
64 };
65
66 inline u1 ZipFile::GetU1()
67 {
68 return *file_buffer++;
69 }
70
71 inline void ZipFile::Skip(u4 length)
72 {
73 file_buffer += length;
74 }
75 #endif // WIN32_FILE_SYSTEM
76
77
GetU2()78 inline u2 ZipFile::GetU2()
79 {
80 u4 val = GetU1();
81 val |= (((u4) GetU1()) << 8);
82
83 return val;
84 }
85
86
GetU4()87 inline u4 ZipFile::GetU4()
88 {
89 u4 val = GetU1();
90 val |= (((u4) GetU1()) << 8);
91 val |= (((u4) GetU1()) << 16);
92 val |= (((u4) GetU1()) << 24);
93
94 return val;
95 }
96
97
ZipFile(FileSymbol * file_symbol)98 ZipFile::ZipFile(FileSymbol *file_symbol) : buffer(NULL)
99 {
100 Zip *zip = file_symbol -> Zipfile();
101
102 assert(zip -> IsValid());
103
104 #ifdef UNIX_FILE_SYSTEM
105 zipfile = zip -> zipfile;
106 int rc = fseek(zipfile, file_symbol -> offset, SEEK_SET);
107
108 assert(rc == 0);
109
110 #elif defined(WIN32_FILE_SYSTEM)
111 file_buffer = &zip -> zipbuffer[file_symbol -> offset];
112 #endif
113
114 Skip(8); // u4 magic = GetU4();
115 // u2 version_needed_to_extract = GetU2();
116 // u2 general_purpose_bits = GetU2();
117 u2 compression_method = GetU2();
118 Skip(16); // u2 time = GetU2();
119 // u2 date = GetU2();
120 // u4 crc32 = GetU4();
121 // u4 compressed_size = GetU4();
122 // u4 uncompressed_size = GetU4();
123 u2 filename_length = GetU2();
124 u2 extra_field_length = GetU2();
125 Skip(filename_length + extra_field_length);
126
127 #ifdef UNIX_FILE_SYSTEM
128 this -> buffer = new char[file_symbol -> uncompressed_size];
129 if (! uncompress_file[compression_method < 9 ? compression_method : 9](zipfile, this -> buffer, file_symbol -> uncompressed_size))
130 {
131 delete [] this -> buffer;
132 this -> buffer = NULL;
133 }
134 #elif defined(WIN32_FILE_SYSTEM)
135 if (compression_method > 0)
136 {
137 this -> buffer = new char[file_symbol -> uncompressed_size];
138 if (! uncompress_file[compression_method < 9 ? compression_method : 9](file_buffer,
139 this -> buffer,
140 file_symbol -> uncompressed_size))
141 {
142 delete [] this -> buffer;
143 this -> buffer = NULL;
144 this -> file_buffer = NULL;
145 }
146 }
147 #endif
148 }
149
150
~ZipFile()151 ZipFile::~ZipFile()
152 {
153 delete [] buffer;
154 }
155
156
157 //********************************************************************
158 //
159 // The Zip methods follow:
160 //
161 //********************************************************************
GetU1()162 inline u1 Zip::GetU1()
163 {
164 return *buffer_ptr++;
165 }
166
167
GetU2()168 inline u2 Zip::GetU2()
169 {
170 u4 val = GetU1();
171 val |= (((u4) GetU1()) << 8);
172
173 return val;
174 }
175
176
GetU4()177 inline u4 Zip::GetU4()
178 {
179 u4 val = GetU1();
180 val |= (((u4) GetU1()) << 8);
181 val |= (((u4) GetU1()) << 16);
182 val |= (((u4) GetU1()) << 24);
183
184 return val;
185 }
186
187
Skip(u4 length)188 inline void Zip::Skip(u4 length)
189 {
190 buffer_ptr += length;
191 }
192
193
ProcessSubdirectoryEntries(DirectorySymbol * directory_symbol,char * name,int name_length)194 inline DirectorySymbol *Zip::ProcessSubdirectoryEntries(DirectorySymbol *directory_symbol, char *name, int name_length)
195 {
196 wchar_t *directory_name = new wchar_t[name_length];
197
198 for (int start = 0, end; start < name_length; start = end + 1)
199 {
200 end = start;
201 for (int i = 0; end < name_length && name[end] != U_SLASH; i++, end++)
202 directory_name[i] = name[end];
203 NameSymbol *name_symbol = control.FindOrInsertName(directory_name, end - start);
204 DirectorySymbol *subdirectory_symbol = directory_symbol -> FindDirectorySymbol(name_symbol);
205 if (! subdirectory_symbol)
206 subdirectory_symbol = directory_symbol -> InsertDirectorySymbol(name_symbol, false);
207 directory_symbol = subdirectory_symbol;
208 }
209
210 delete [] directory_name;
211
212 return directory_symbol;
213 }
214
215
ProcessFilename(char * name,int name_length)216 inline NameSymbol *Zip::ProcessFilename(char *name, int name_length)
217 {
218 wchar_t *input_filename = new wchar_t[name_length];
219 for (int i = 0; i < name_length; i++)
220 input_filename[i] = name[i];
221 NameSymbol *name_symbol = control.FindOrInsertName(input_filename, name_length);
222
223 delete [] input_filename;
224
225 return name_symbol;
226 }
227
228
ProcessDirectoryEntry()229 inline void Zip::ProcessDirectoryEntry()
230 {
231 Skip(8); // u2 version_made_by = GetU2();
232 // u2 version_needed_to_extract = GetU2();
233 // u2 general_purpose_bits = GetU2();
234 // u2 compression_method = GetU2();
235 u2 last_mod_file_time = GetU2();
236 u2 last_mod_file_date = GetU2();
237 Skip(4); // u4 crc32 = GetU4();
238 Skip(4); // u4 compressed_size = GetU4();
239 u4 uncompressed_size = GetU4();
240 u2 file_name_length = GetU2();
241 u2 extra_field_length = GetU2();
242 u2 file_comment_length = GetU2();
243 Skip(8); // u2 disk_number_start = GetU2();
244 // u2 internal_file_attributes = GetU2();
245 // u4 external_file_attributes = GetU4();
246 u4 relative_offset_of_local_header = GetU4();
247
248 u4 date_time = ((u4) last_mod_file_date) << 16 | last_mod_file_time;
249 char *name = buffer_ptr;
250
251 Skip(file_name_length + extra_field_length + file_comment_length);
252
253 //
254 // Note that we need to process all subdirectory entries
255 // that appear in the zip file, and not just the ones that
256 // contain java and class files. Recall that in java the
257 // dot notation is used in specifying a package. Therefore,
258 // in processing a qualified-name that represents a package,
259 // we need to recognize each name as a subpackage. E.g.,
260 // when processing "java.lang", we need to recognize "java"
261 // as a package before looking for "lang"...
262
263 // start at the "." directory.
264 DirectorySymbol *directory_symbol = root_directory;
265 // -1 to remove last '/'
266 if (name[file_name_length - 1] == U_SLASH)
267 ProcessSubdirectoryEntries(directory_symbol,
268 name,
269 file_name_length - 1);
270 else
271 {
272 bool java_file = (file_name_length >= FileSymbol::java_suffix_length &&
273 FileSymbol::IsJavaSuffix(&name[file_name_length - FileSymbol::java_suffix_length])),
274 class_file = (file_name_length >= FileSymbol::class_suffix_length &&
275 FileSymbol::IsClassSuffix(&name[file_name_length - FileSymbol::class_suffix_length]));
276
277 if (java_file || class_file)
278 {
279 int name_length = file_name_length - (java_file ? FileSymbol::java_suffix_length : FileSymbol::class_suffix_length);
280 int i;
281 for (i = name_length - 1; i >= 0 && name[i] != U_SLASH; i--)
282 ;
283 if (i > 0) // directory specified?
284 directory_symbol = ProcessSubdirectoryEntries(directory_symbol,
285 name, i);
286 NameSymbol *name_symbol = ProcessFilename(&name[i + 1],
287 name_length - (i + 1));
288
289 //
290 // Search for a file of that name in the directory.
291 // If one is not found, then insert ... Otherwise,
292 // either a class file of that name was previously
293 // processed and now we found a java file with the
294 // same name or vice-versa... In that case keep
295 // (or replace with) the file with the most recent
296 // date stamp.
297 //
298 FileSymbol *file_symbol = directory_symbol ->
299 FindFileSymbol(name_symbol);
300 if (! file_symbol)
301 {
302 file_symbol = directory_symbol -> InsertFileSymbol(name_symbol);
303
304 file_symbol -> directory_symbol = directory_symbol;
305 if (java_file)
306 file_symbol -> SetJava();
307 else file_symbol -> SetClassOnly();
308
309 file_symbol -> uncompressed_size = uncompressed_size;
310 file_symbol -> offset = relative_offset_of_local_header;
311 file_symbol -> date_time = date_time;
312 }
313 else if (file_symbol -> date_time < date_time)
314 {
315 if (java_file)
316 file_symbol -> SetJava();
317 else file_symbol -> SetClass();
318
319 file_symbol -> uncompressed_size = uncompressed_size;
320 file_symbol -> offset = relative_offset_of_local_header;
321 file_symbol -> date_time = date_time;
322 }
323 }
324 }
325 }
326
327
Zip(Control & control_,char * zipfile_name)328 Zip::Zip(Control &control_, char *zipfile_name) : control(control_),
329 magic(0),
330 zipbuffer(NULL)
331 {
332 #ifdef UNIX_FILE_SYSTEM
333 zipfile = SystemFopen(zipfile_name, "rb");
334 if (zipfile)
335 {
336 int rc = fseek(zipfile, -END_SIZE, SEEK_END);
337 if (rc == 0)
338 {
339 zipbuffer = new char[END_SIZE];
340 buffer_ptr = zipbuffer;
341 SystemFread(buffer_ptr, sizeof(char), END_SIZE, zipfile);
342
343 magic = GetU4();
344 }
345 }
346 #elif defined(WIN32_FILE_SYSTEM)
347 zipfile = CreateFile(zipfile_name,
348 GENERIC_READ,
349 FILE_SHARE_READ,
350 NULL,
351 OPEN_EXISTING,
352 FILE_ATTRIBUTE_READONLY,
353 NULL);
354 if (zipfile != INVALID_HANDLE_VALUE)
355 {
356 mapfile = CreateFileMapping(zipfile, NULL, PAGE_READONLY, 0, 0, NULL);
357 zipbuffer = (mapfile == INVALID_HANDLE_VALUE ?
358 NULL :
359 (char *) MapViewOfFile(mapfile,
360 FILE_MAP_READ,
361 0, 0, 0)
362 );
363 if (zipbuffer)
364 {
365 buffer_ptr = &zipbuffer[GetFileSize(zipfile, NULL) - END_SIZE];
366 magic = GetU4();
367 }
368 }
369 #endif
370
371 // The following was posted to the dev list, but was just
372 // too good to not put in here, the next person to have to
373 // deal with this crap will appreciate it. -=Chris
374 //
375 // From: Mo DeJong <supermo@bayarea.net>
376 //
377 // Ode to a zip file:
378 //
379 // I can't read it forwards
380 // I can't read it backwards
381 // I must know where to begin
382 // so I need to look in the middle
383 // to find the middle, I must know the end
384 // but I don't know where that is, so I guess
385 //
386 // -------------------------------------------------
387
388
389 // This may or may not be a valid zip file. The zip file might have
390 // a file comment so we can't be sure where the END header is located.
391 // We check for the LOC header at byte 0 to make sure this is a valid
392 // zip file and then scan over the file backwards in search of the
393 // END header.
394
395 if (zipbuffer != NULL && ! IsValid()) {
396 u4 sig = 0;
397
398 #ifdef UNIX_FILE_SYSTEM
399 int res = fseek(zipfile, 0, SEEK_SET);
400 assert(res == 0);
401
402 char *tmpbuffer = new char[LOC_SIZE];
403 buffer_ptr = tmpbuffer;
404 SystemFread(buffer_ptr, sizeof(char), LOC_SIZE, zipfile);
405 sig = GetU4();
406 delete [] tmpbuffer;
407 buffer_ptr = NULL;
408
409 if (sig == LOC_SIG)
410 {
411 int block_size = 8192;
412 tmpbuffer = new char[block_size];
413 char *holdbuffer = new char[8];
414 char *index_ptr;
415
416 res = fseek(zipfile, 0, SEEK_END);
417 assert(res == 0);
418
419 long zip_file_size = ftell(zipfile);
420 int num_loops = zip_file_size / block_size;
421 magic = 0;
422
423 for (; magic == 0 && num_loops >= 0 ; num_loops--) {
424
425 if ((ftell(zipfile) - block_size) < 0)
426 {
427 block_size = ftell(zipfile);
428 res = fseek(zipfile, 0L, SEEK_SET);
429 }
430 else
431 {
432 res = fseek(zipfile, -block_size, SEEK_CUR);
433 }
434
435 assert(res == 0);
436 SystemFread(tmpbuffer, sizeof(char), block_size, zipfile);
437 res = fseek(zipfile, -block_size, SEEK_CUR); // undo fread
438 assert(res == 0);
439
440 for (index_ptr = tmpbuffer + block_size - 1;
441 index_ptr >= tmpbuffer;
442 index_ptr--)
443 {
444 if (*index_ptr == 'P')
445 {
446 // Check for header signature that spans buffer
447 int span = (tmpbuffer + block_size) - index_ptr;
448
449 if (span < 4)
450 {
451 memmove(holdbuffer+span, holdbuffer, 3);
452 memmove(holdbuffer, index_ptr, span);
453 buffer_ptr = holdbuffer;
454 }
455 else
456 {
457 buffer_ptr = index_ptr;
458 }
459
460 sig = GetU4();
461
462 if (sig == END_SIG)
463 {
464 // Found the END header, put it in zipbuffer.
465 buffer_ptr = zipbuffer;
466 fseek(zipfile, block_size-span, SEEK_CUR);
467 SystemFread(buffer_ptr, sizeof(char),
468 END_SIZE, zipfile);
469
470 magic = GetU4();
471 break;
472 }
473 }
474 }
475
476 // Copy first 3 bytes into holdbuffer in case sig spans
477 holdbuffer[0] = tmpbuffer[0];
478 holdbuffer[1] = tmpbuffer[1];
479 holdbuffer[2] = tmpbuffer[2];
480 }
481
482 delete [] tmpbuffer;
483 delete [] holdbuffer;
484 }
485 #elif defined(WIN32_FILE_SYSTEM)
486 buffer_ptr = &zipbuffer[0];
487 sig = GetU4();
488
489 if (sig == LOC_SIG)
490 {
491 buffer_ptr = &zipbuffer[GetFileSize(zipfile, NULL) - END_SIZE];
492 for ( ; buffer_ptr >= zipbuffer; buffer_ptr--)
493 {
494 if (*buffer_ptr == 'P')
495 {
496 sig = GetU4();
497 if (sig == END_SIG)
498 {
499 magic = sig;
500 break;
501 }
502 else
503 buffer_ptr -= 4;
504 }
505 }
506 }
507 #endif
508 }
509
510 ReadDirectory();
511 }
512
513
~Zip()514 Zip::~Zip()
515 {
516 #ifdef UNIX_FILE_SYSTEM
517 delete [] zipbuffer;
518 if (zipfile)
519 fclose(zipfile);
520 #elif defined(WIN32_FILE_SYSTEM)
521 if (zipfile != INVALID_HANDLE_VALUE)
522 {
523 if (mapfile != INVALID_HANDLE_VALUE)
524 {
525 if (zipbuffer)
526 UnmapViewOfFile(zipbuffer);
527 CloseHandle(mapfile);
528 }
529 CloseHandle(zipfile);
530 }
531 #endif
532
533 delete root_directory;
534 }
535
536
537 //
538 // Upon successful termination of this function, IsValid() should yield true.
539 // Each CEN header would have been read so the magic number would get reset
540 // when the END header is again read.
541 //
ReadDirectory()542 void Zip::ReadDirectory()
543 {
544 // Not a sourcepath (since we don't read java files from zip files)
545 root_directory = new DirectorySymbol(control.dot_name_symbol, NULL, false);
546
547 if (IsValid())
548 {
549 Skip(8); // u2 number_of_this_disk = GetU2();
550 // u2 number_of_the_disk_with_the_star = GetU2();
551 // u2 start_of_the_central_directory = GetU2();
552 // u2 total_number_of_entries_in_the_ = GetU2();
553 u4 central_directory_size = GetU4();
554
555 #ifdef UNIX_FILE_SYSTEM
556 u4 central_directory_offset = GetU4();
557 Skip(2); // u2 comment_length = GetU2();
558 int rc = fseek(zipfile, central_directory_offset, SEEK_SET);
559
560 assert(rc == 0);
561
562 delete [] zipbuffer;
563 zipbuffer = new char[central_directory_size + END_SIZE];
564 buffer_ptr = zipbuffer;
565 SystemFread(buffer_ptr, sizeof(char),
566 central_directory_size + END_SIZE,
567 zipfile);
568 #elif defined(WIN32_FILE_SYSTEM)
569 Skip(6); // u4 central_directory_offset = GetU4();
570 // u2 comment_length = GetU2();
571 buffer_ptr -= END_SIZE + central_directory_size;
572 #endif
573 for (magic = GetU4(); magic == CEN_SIG; magic = GetU4())
574 ProcessDirectoryEntry();
575 }
576 }
577
578 #ifdef HAVE_JIKES_NAMESPACE
579 } // Close namespace Jikes block
580 #endif
581
582