1 // $Id: zip.cpp,v 1.22 2002/07/30 16:30:03 ericb Exp $
2 //
3 // This software is subject to the terms of the IBM Jikes Compiler
4 // License Agreement available at the following URL:
5 // http://ibm.com/developerworks/opensource/jikes.
6 // Copyright (C) 1996, 1998, 1999, 2000, 2001 International Business
7 // Machines Corporation and others.  All Rights Reserved.
8 // You must accept the terms of that agreement to use this software.
9 //
10 
11 #include "zip.h"
12 #include "control.h"
13 #include "symbol.h"
14 
15 #ifdef HAVE_JIKES_NAMESPACE
16 namespace Jikes { // Open namespace Jikes block
17 #endif
18 
19 //************************************************************
20 //
21 // The ZipFile methods follow
22 //
23 //************************************************************
24 #ifdef UNIX_FILE_SYSTEM
25 int (*ZipFile::uncompress_file[10]) (FILE *, char *, long) =
26 {
27     UncompressFile0,
28     UncompressFile1,
29     UncompressFile2,
30     UncompressFile3,
31     UncompressFile4,
32     UncompressFile5,
33     UncompressFile6,
34     UncompressFile7,
35     UncompressFile8,
36     UncompressFile9
37 };
38 
GetU1()39 inline u1 ZipFile::GetU1()
40 {
41     return getc(zipfile);
42 }
43 
Skip(u4 length)44 inline void ZipFile::Skip(u4 length)
45 {
46     for (u4 i = 0; i < length; i++)
47         getc(zipfile);
48 }
49 
50 #elif defined(WIN32_FILE_SYSTEM) // ! UNIX_FILE_SYSTEM
51 
52 int (*ZipFile::uncompress_file[10]) (char *, char *, long) =
53 {
54     UncompressFile0,
55     UncompressFile1,
56     UncompressFile2,
57     UncompressFile3,
58     UncompressFile4,
59     UncompressFile5,
60     UncompressFile6,
61     UncompressFile7,
62     UncompressFile8,
63     UncompressFile9
64 };
65 
66 inline u1 ZipFile::GetU1()
67 {
68     return *file_buffer++;
69 }
70 
71 inline void ZipFile::Skip(u4 length)
72 {
73     file_buffer += length;
74 }
75 #endif // WIN32_FILE_SYSTEM
76 
77 
GetU2()78 inline u2 ZipFile::GetU2()
79 {
80     u4 val = GetU1();
81     val |= (((u4) GetU1()) << 8);
82 
83     return val;
84 }
85 
86 
GetU4()87 inline u4 ZipFile::GetU4()
88 {
89     u4 val = GetU1();
90     val |= (((u4) GetU1()) << 8);
91     val |= (((u4) GetU1()) << 16);
92     val |= (((u4) GetU1()) << 24);
93 
94     return val;
95 }
96 
97 
ZipFile(FileSymbol * file_symbol)98 ZipFile::ZipFile(FileSymbol *file_symbol) : buffer(NULL)
99 {
100     Zip *zip = file_symbol -> Zipfile();
101 
102     assert(zip -> IsValid());
103 
104 #ifdef UNIX_FILE_SYSTEM
105     zipfile = zip -> zipfile;
106     int rc = fseek(zipfile, file_symbol -> offset, SEEK_SET);
107 
108     assert(rc == 0);
109 
110 #elif defined(WIN32_FILE_SYSTEM)
111     file_buffer = &zip -> zipbuffer[file_symbol -> offset];
112 #endif
113 
114     Skip(8); // u4 magic                     = GetU4();
115              // u2 version_needed_to_extract = GetU2();
116              // u2 general_purpose_bits      = GetU2();
117     u2 compression_method                    = GetU2();
118     Skip(16); // u2 time                     = GetU2();
119               // u2 date                     = GetU2();
120               // u4 crc32                    = GetU4();
121               // u4 compressed_size          = GetU4();
122               // u4 uncompressed_size        = GetU4();
123     u2 filename_length                       = GetU2();
124     u2 extra_field_length                    = GetU2();
125     Skip(filename_length + extra_field_length);
126 
127 #ifdef UNIX_FILE_SYSTEM
128     this -> buffer = new char[file_symbol -> uncompressed_size];
129     if (! uncompress_file[compression_method < 9 ? compression_method : 9](zipfile, this -> buffer, file_symbol -> uncompressed_size))
130     {
131         delete [] this -> buffer;
132         this -> buffer = NULL;
133     }
134 #elif defined(WIN32_FILE_SYSTEM)
135     if (compression_method > 0)
136     {
137         this -> buffer = new char[file_symbol -> uncompressed_size];
138         if (! uncompress_file[compression_method < 9 ? compression_method : 9](file_buffer,
139                                                                                this -> buffer,
140                                                                                file_symbol -> uncompressed_size))
141         {
142             delete [] this -> buffer;
143             this -> buffer = NULL;
144             this -> file_buffer = NULL;
145         }
146     }
147 #endif
148 }
149 
150 
~ZipFile()151 ZipFile::~ZipFile()
152 {
153     delete [] buffer;
154 }
155 
156 
157 //********************************************************************
158 //
159 // The Zip methods follow:
160 //
161 //********************************************************************
GetU1()162 inline u1 Zip::GetU1()
163 {
164     return *buffer_ptr++;
165 }
166 
167 
GetU2()168 inline u2 Zip::GetU2()
169 {
170     u4 val = GetU1();
171     val |= (((u4) GetU1()) << 8);
172 
173     return val;
174 }
175 
176 
GetU4()177 inline u4 Zip::GetU4()
178 {
179     u4 val = GetU1();
180     val |= (((u4) GetU1()) << 8);
181     val |= (((u4) GetU1()) << 16);
182     val |= (((u4) GetU1()) << 24);
183 
184     return val;
185 }
186 
187 
Skip(u4 length)188 inline void Zip::Skip(u4 length)
189 {
190     buffer_ptr += length;
191 }
192 
193 
ProcessSubdirectoryEntries(DirectorySymbol * directory_symbol,char * name,int name_length)194 inline DirectorySymbol *Zip::ProcessSubdirectoryEntries(DirectorySymbol *directory_symbol, char *name, int name_length)
195 {
196     wchar_t *directory_name = new wchar_t[name_length];
197 
198     for (int start = 0, end; start < name_length; start = end + 1)
199     {
200         end = start;
201         for (int i = 0; end < name_length && name[end] != U_SLASH; i++, end++)
202              directory_name[i] = name[end];
203         NameSymbol *name_symbol = control.FindOrInsertName(directory_name, end - start);
204         DirectorySymbol *subdirectory_symbol = directory_symbol -> FindDirectorySymbol(name_symbol);
205         if (! subdirectory_symbol)
206             subdirectory_symbol = directory_symbol -> InsertDirectorySymbol(name_symbol, false);
207         directory_symbol = subdirectory_symbol;
208     }
209 
210     delete [] directory_name;
211 
212     return directory_symbol;
213 }
214 
215 
ProcessFilename(char * name,int name_length)216 inline NameSymbol *Zip::ProcessFilename(char *name, int name_length)
217 {
218     wchar_t *input_filename = new wchar_t[name_length];
219     for (int i = 0; i < name_length; i++)
220         input_filename[i] = name[i];
221     NameSymbol *name_symbol = control.FindOrInsertName(input_filename, name_length);
222 
223     delete [] input_filename;
224 
225     return name_symbol;
226 }
227 
228 
ProcessDirectoryEntry()229 inline void Zip::ProcessDirectoryEntry()
230 {
231     Skip(8); // u2 version_made_by           = GetU2();
232              // u2 version_needed_to_extract = GetU2();
233              // u2 general_purpose_bits      = GetU2();
234              // u2 compression_method        = GetU2();
235     u2 last_mod_file_time                    = GetU2();
236     u2 last_mod_file_date                    = GetU2();
237     Skip(4); // u4 crc32                     = GetU4();
238     Skip(4); // u4 compressed_size           = GetU4();
239     u4 uncompressed_size                     = GetU4();
240     u2 file_name_length                      = GetU2();
241     u2 extra_field_length                    = GetU2();
242     u2 file_comment_length                   = GetU2();
243     Skip(8); // u2 disk_number_start         = GetU2();
244              // u2 internal_file_attributes  = GetU2();
245              // u4 external_file_attributes  = GetU4();
246     u4 relative_offset_of_local_header       = GetU4();
247 
248     u4 date_time = ((u4) last_mod_file_date) << 16 | last_mod_file_time;
249     char *name = buffer_ptr;
250 
251     Skip(file_name_length + extra_field_length + file_comment_length);
252 
253     //
254     // Note that we need to process all subdirectory entries
255     // that appear in the zip file, and not just the ones that
256     // contain java and class files. Recall that in java the
257     // dot notation is used in specifying a package. Therefore,
258     // in processing a qualified-name that represents a package,
259     // we need to recognize each name as a subpackage. E.g.,
260     // when processing "java.lang", we need to recognize "java"
261     // as a package before looking for "lang"...
262 
263     // start at the "." directory.
264     DirectorySymbol *directory_symbol = root_directory;
265     // -1 to remove last '/'
266     if (name[file_name_length - 1] == U_SLASH)
267         ProcessSubdirectoryEntries(directory_symbol,
268                                    name,
269                                    file_name_length - 1);
270     else
271     {
272         bool java_file = (file_name_length >= FileSymbol::java_suffix_length &&
273                           FileSymbol::IsJavaSuffix(&name[file_name_length - FileSymbol::java_suffix_length])),
274              class_file = (file_name_length >= FileSymbol::class_suffix_length &&
275                            FileSymbol::IsClassSuffix(&name[file_name_length - FileSymbol::class_suffix_length]));
276 
277         if (java_file || class_file)
278         {
279             int name_length = file_name_length - (java_file ? FileSymbol::java_suffix_length : FileSymbol::class_suffix_length);
280             int i;
281             for (i = name_length - 1; i >= 0 && name[i] != U_SLASH; i--)
282                 ;
283             if (i > 0) // directory specified?
284                 directory_symbol = ProcessSubdirectoryEntries(directory_symbol,
285                                                               name, i);
286             NameSymbol *name_symbol = ProcessFilename(&name[i + 1],
287                                                       name_length - (i + 1));
288 
289             //
290             // Search for a file of that name in the directory.
291             // If one is not found, then insert ... Otherwise,
292             // either a class file of that name was previously
293             // processed and now we found a java file with the
294             // same name or vice-versa... In that case keep
295             // (or replace with) the file with the most recent
296             // date stamp.
297             //
298             FileSymbol *file_symbol = directory_symbol ->
299                 FindFileSymbol(name_symbol);
300             if (! file_symbol)
301             {
302                 file_symbol = directory_symbol -> InsertFileSymbol(name_symbol);
303 
304                 file_symbol -> directory_symbol = directory_symbol;
305                 if (java_file)
306                      file_symbol -> SetJava();
307                 else file_symbol -> SetClassOnly();
308 
309                 file_symbol -> uncompressed_size = uncompressed_size;
310                 file_symbol -> offset = relative_offset_of_local_header;
311                 file_symbol -> date_time = date_time;
312             }
313             else if (file_symbol -> date_time < date_time)
314             {
315                 if (java_file)
316                      file_symbol -> SetJava();
317                 else file_symbol -> SetClass();
318 
319                 file_symbol -> uncompressed_size = uncompressed_size;
320                 file_symbol -> offset = relative_offset_of_local_header;
321                 file_symbol -> date_time = date_time;
322             }
323         }
324     }
325 }
326 
327 
Zip(Control & control_,char * zipfile_name)328 Zip::Zip(Control &control_, char *zipfile_name) : control(control_),
329                                                   magic(0),
330                                                   zipbuffer(NULL)
331 {
332 #ifdef UNIX_FILE_SYSTEM
333     zipfile = SystemFopen(zipfile_name, "rb");
334     if (zipfile)
335     {
336         int rc = fseek(zipfile, -END_SIZE, SEEK_END);
337         if (rc == 0)
338         {
339             zipbuffer = new char[END_SIZE];
340             buffer_ptr = zipbuffer;
341             SystemFread(buffer_ptr, sizeof(char), END_SIZE, zipfile);
342 
343             magic = GetU4();
344         }
345     }
346 #elif defined(WIN32_FILE_SYSTEM)
347     zipfile = CreateFile(zipfile_name,
348                          GENERIC_READ,
349                          FILE_SHARE_READ,
350                          NULL,
351                          OPEN_EXISTING,
352                          FILE_ATTRIBUTE_READONLY,
353                          NULL);
354     if (zipfile != INVALID_HANDLE_VALUE)
355     {
356         mapfile = CreateFileMapping(zipfile, NULL, PAGE_READONLY, 0, 0, NULL);
357         zipbuffer = (mapfile == INVALID_HANDLE_VALUE ?
358                      NULL :
359                      (char *) MapViewOfFile(mapfile,
360                                             FILE_MAP_READ,
361                                             0, 0, 0)
362                      );
363         if (zipbuffer)
364         {
365             buffer_ptr = &zipbuffer[GetFileSize(zipfile, NULL) - END_SIZE];
366             magic = GetU4();
367         }
368     }
369 #endif
370 
371     // The following was posted to the dev list, but was just
372     // too good to not put in here, the next person to have to
373     // deal with this crap will appreciate it. -=Chris
374     //
375     // From: Mo DeJong <supermo@bayarea.net>
376     //
377     //   Ode to a zip file:
378     //
379     //   I can't read it forwards
380     //   I can't read it backwards
381     //   I must know where to begin
382     //   so I need to look in the middle
383     //   to find the middle, I must know the end
384     //   but I don't know where that is, so I guess
385     //
386     // -------------------------------------------------
387 
388 
389     // This may or may not be a valid zip file. The zip file might have
390     // a file comment so we can't be sure where the END header is located.
391     // We check for the LOC header at byte 0 to make sure this is a valid
392     // zip file and then scan over the file backwards in search of the
393     // END header.
394 
395     if (zipbuffer != NULL && ! IsValid()) {
396         u4 sig = 0;
397 
398 #ifdef UNIX_FILE_SYSTEM
399         int res = fseek(zipfile, 0, SEEK_SET);
400         assert(res == 0);
401 
402         char *tmpbuffer = new char[LOC_SIZE];
403         buffer_ptr = tmpbuffer;
404         SystemFread(buffer_ptr, sizeof(char), LOC_SIZE, zipfile);
405         sig = GetU4();
406         delete [] tmpbuffer;
407         buffer_ptr = NULL;
408 
409         if (sig == LOC_SIG)
410         {
411             int block_size = 8192;
412             tmpbuffer = new char[block_size];
413             char *holdbuffer = new char[8];
414             char *index_ptr;
415 
416             res = fseek(zipfile, 0, SEEK_END);
417             assert(res == 0);
418 
419             long zip_file_size = ftell(zipfile);
420             int num_loops = zip_file_size / block_size;
421             magic = 0;
422 
423             for (; magic == 0 && num_loops >= 0 ; num_loops--) {
424 
425                 if ((ftell(zipfile) - block_size) < 0)
426                 {
427                     block_size = ftell(zipfile);
428                     res = fseek(zipfile, 0L, SEEK_SET);
429                 }
430                 else
431                 {
432                     res = fseek(zipfile, -block_size, SEEK_CUR);
433                 }
434 
435                 assert(res == 0);
436                 SystemFread(tmpbuffer, sizeof(char), block_size, zipfile);
437                 res = fseek(zipfile, -block_size, SEEK_CUR); // undo fread
438                 assert(res == 0);
439 
440                 for (index_ptr = tmpbuffer + block_size - 1;
441                      index_ptr >= tmpbuffer;
442                      index_ptr--)
443                 {
444                     if (*index_ptr == 'P')
445                     {
446                         // Check for header signature that spans buffer
447                         int span = (tmpbuffer + block_size) - index_ptr;
448 
449                         if (span < 4)
450                         {
451                             memmove(holdbuffer+span, holdbuffer, 3);
452                             memmove(holdbuffer, index_ptr, span);
453                             buffer_ptr = holdbuffer;
454                         }
455                         else
456                         {
457                             buffer_ptr = index_ptr;
458                         }
459 
460                         sig = GetU4();
461 
462                         if (sig == END_SIG)
463                         {
464                             // Found the END header, put it in zipbuffer.
465                             buffer_ptr = zipbuffer;
466                             fseek(zipfile, block_size-span, SEEK_CUR);
467                             SystemFread(buffer_ptr, sizeof(char),
468                                 END_SIZE, zipfile);
469 
470                             magic = GetU4();
471                             break;
472                         }
473                     }
474                 }
475 
476                 // Copy first 3 bytes into holdbuffer in case sig spans
477                 holdbuffer[0] = tmpbuffer[0];
478                 holdbuffer[1] = tmpbuffer[1];
479                 holdbuffer[2] = tmpbuffer[2];
480             }
481 
482             delete [] tmpbuffer;
483             delete [] holdbuffer;
484         }
485 #elif defined(WIN32_FILE_SYSTEM)
486         buffer_ptr = &zipbuffer[0];
487         sig = GetU4();
488 
489         if (sig == LOC_SIG)
490         {
491             buffer_ptr = &zipbuffer[GetFileSize(zipfile, NULL) - END_SIZE];
492             for ( ; buffer_ptr >= zipbuffer; buffer_ptr--)
493             {
494                 if (*buffer_ptr == 'P')
495                 {
496                     sig = GetU4();
497                     if (sig == END_SIG)
498                     {
499                        magic = sig;
500                        break;
501                     }
502                     else
503                        buffer_ptr -= 4;
504                 }
505             }
506         }
507 #endif
508     }
509 
510     ReadDirectory();
511 }
512 
513 
~Zip()514 Zip::~Zip()
515 {
516 #ifdef UNIX_FILE_SYSTEM
517     delete [] zipbuffer;
518     if (zipfile)
519         fclose(zipfile);
520 #elif defined(WIN32_FILE_SYSTEM)
521     if (zipfile != INVALID_HANDLE_VALUE)
522     {
523         if (mapfile != INVALID_HANDLE_VALUE)
524         {
525             if (zipbuffer)
526                 UnmapViewOfFile(zipbuffer);
527             CloseHandle(mapfile);
528         }
529         CloseHandle(zipfile);
530     }
531 #endif
532 
533     delete root_directory;
534 }
535 
536 
537 //
538 // Upon successful termination of this function, IsValid() should yield true.
539 // Each CEN header would have been read so the magic number would get reset
540 // when the END header is again read.
541 //
ReadDirectory()542 void Zip::ReadDirectory()
543 {
544     // Not a sourcepath (since we don't read java files from zip files)
545     root_directory = new DirectorySymbol(control.dot_name_symbol, NULL, false);
546 
547     if (IsValid())
548     {
549         Skip(8); // u2 number_of_this_disk              = GetU2();
550                  // u2 number_of_the_disk_with_the_star = GetU2();
551                  // u2 start_of_the_central_directory   = GetU2();
552                  // u2 total_number_of_entries_in_the_  = GetU2();
553         u4 central_directory_size                       = GetU4();
554 
555 #ifdef UNIX_FILE_SYSTEM
556         u4 central_directory_offset                     = GetU4();
557         Skip(2); // u2 comment_length                   = GetU2();
558         int rc = fseek(zipfile, central_directory_offset, SEEK_SET);
559 
560         assert(rc == 0);
561 
562         delete [] zipbuffer;
563         zipbuffer = new char[central_directory_size + END_SIZE];
564         buffer_ptr = zipbuffer;
565         SystemFread(buffer_ptr, sizeof(char),
566                     central_directory_size + END_SIZE,
567                     zipfile);
568 #elif defined(WIN32_FILE_SYSTEM)
569         Skip(6); // u4 central_directory_offset         = GetU4();
570                  // u2 comment_length                   = GetU2();
571         buffer_ptr -= END_SIZE + central_directory_size;
572 #endif
573         for (magic = GetU4(); magic == CEN_SIG; magic = GetU4())
574              ProcessDirectoryEntry();
575     }
576 }
577 
578 #ifdef HAVE_JIKES_NAMESPACE
579 } // Close namespace Jikes block
580 #endif
581 
582