1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2009 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.0 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_0.txt. |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 | |
15 | **** WARNING **** |
16 | |
17 | This module makes use of unRAR - free utility for RAR archives. |
18 | Its license states that you MUST NOT use its code to develop |
19 | a RAR (WinRAR) compatible archiver. |
20 | Please, read unRAR license for full information. |
21 | unRAR & RAR copyrights are owned by Eugene Roshal |
22 +----------------------------------------------------------------------+
23 | Author: Gustavo Lopes <cataphract@php.net> |
24 +----------------------------------------------------------------------+
25 */
26
27 #ifdef HAVE_CONFIG_H
28 # include "config.h"
29 #endif
30
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34
35 #include <php.h>
36 #include <wchar.h>
37 #include "php_rar.h"
38
39 #if HAVE_RAR
40
41 /* {{{ Structure definitions */
42
43 typedef struct _rar_find_state {
44 rar_find_output out;
45 rar_file_t *rar;
46 size_t index; /* next unread in entries_array or entries_array_s */
47 } rar_find_state;
48
49 struct _rar_unique_entry {
50 size_t id; /* position in the entries_array */
51 struct RARHeaderDataEx entry; /* last entry */
52 unsigned long packed_size;
53 int depth; /* number of directory separators */
54 size_t name_wlen; /* excluding L'\0' terminator */
55 };
56
57 /* last_accessed has the index of the last accessed entry. Its purpose is to make
58 * more efficient the situation wherein the user traverses a directory and
59 * stats each the gotten entry in each iteration. This gives 100% cache hits in
60 * directory traversal tests 064 and 065 for exact name searches */
61 struct _rar_entries {
62 size_t num_entries;
63 struct _rar_unique_entry **entries_array; /* shoud not be NULL */
64 struct _rar_unique_entry **entries_array_s; /* sorted version for bsearch */
65 struct _rar_unique_entry *last_accessed;
66 int list_result; /* tell whether the archive's broken */
67 };
68 /* }}} */
69
70
71 /* {{{ Function prototypes for functions with internal linkage */
72 static void _rar_nav_get_depth_and_length(wchar_t *filenamew, const size_t file_size,
73 int *depth_out, size_t *wlen_out TSRMLS_DC);
74 static int _rar_nav_get_depth(const wchar_t *filenamew, const size_t file_size);
75 static int _rar_nav_compare_entries(const void *op1, const void *op2 TSRMLS_DC);
76 #if PHP_MAJOR_VERSION >= 7
77 static void _rar_nav_swap_entries(void *op1, void *op2);
78 #endif
79 static int _rar_nav_compare_entries_std(const void *op1, const void *op2);
80 static inline int _rar_nav_compare_values(const wchar_t *str1, const int depth1,
81 const wchar_t *str2, const int depth2,
82 const size_t max_size);
83 static int _rar_nav_directory_match(const wchar_t *dir, const size_t dir_len,
84 const wchar_t *entry, const size_t entry_len);
85 static size_t _rar_nav_position_on_dir_start(const wchar_t *dir_name,
86 int dir_depth,
87 size_t dir_size,
88 struct _rar_unique_entry **entries,
89 size_t low, size_t high);
90 /* }}} */
91
92
93 /* {{{ Functions with external linkage */
94
95 /* {{{ _rar_entry_count */
_rar_entry_count(rar_file_t * rar)96 size_t _rar_entry_count(rar_file_t *rar) {
97 return rar->entries->num_entries;
98 }
99 /* }}} */
100
101 /* {{{ _rar_entry_search_start */
_rar_entry_search_start(rar_file_t * rar,unsigned mode,rar_find_output ** state TSRMLS_DC)102 void _rar_entry_search_start(rar_file_t *rar,
103 unsigned mode,
104 rar_find_output **state TSRMLS_DC)
105 {
106 rar_find_state **out = (rar_find_state **) state;
107 assert(out != NULL);
108 *out = ecalloc(1, sizeof **out);
109 (*out)->rar = rar;
110 (*out)->out.position = -1;
111 assert(rar->entries != NULL);
112 assert(rar->entries->num_entries == 0 || rar->entries->entries_array != NULL);
113 if ((mode & 0x02U) && (rar->entries->num_entries > 0) &&
114 (rar->entries->entries_array_s == NULL)) {
115 rar->entries->entries_array_s = emalloc(rar->entries->num_entries *
116 sizeof rar->entries->entries_array_s[0]);
117 memcpy(rar->entries->entries_array_s, rar->entries->entries_array,
118 rar->entries->num_entries * sizeof rar->entries->entries_array[0]);
119 #if PHP_MAJOR_VERSION < 7
120 zend_qsort(rar->entries->entries_array_s, rar->entries->num_entries,
121 sizeof *rar->entries->entries_array_s, _rar_nav_compare_entries
122 TSRMLS_CC);
123 #else
124 zend_qsort(rar->entries->entries_array_s, rar->entries->num_entries,
125 sizeof *rar->entries->entries_array_s, _rar_nav_compare_entries,
126 _rar_nav_swap_entries);
127 #endif
128 }
129 }
130 /* }}} */
131
132 /* {{{ _rar_entry_search_seek */
_rar_entry_search_seek(rar_find_output * state,size_t pos)133 void _rar_entry_search_seek(rar_find_output *state, size_t pos)
134 {
135 rar_find_state *rstate = (rar_find_state *) state;
136 assert(pos >= 0);
137 rstate->out.eof = 0;
138 rstate->out.found = 0;
139 rstate->out.position = -1;
140 rstate->out.header = NULL;
141 rstate->out.packed_size = 0;
142 rstate->index = pos;
143 }
144 /* }}} */
145
146 /* {{{ _rar_entry_search_end */
_rar_entry_search_end(rar_find_output * state)147 void _rar_entry_search_end(rar_find_output *state)
148 {
149 if (state) {
150 /* may not have been initialized due to error conditions
151 * in rararch_it_get_iterator that jumped out of the function */
152 efree(state);
153 }
154 }
155 /* }}} */
156
157 /* {{{ _rar_entry_search_rewind */
_rar_entry_search_rewind(rar_find_output * state)158 void _rar_entry_search_rewind(rar_find_output *state)
159 {
160 rar_find_state *rstate = (rar_find_state *) state;
161 rstate->out.eof = 0;
162 rstate->out.found = 0;
163 rstate->out.position = -1;
164 rstate->out.header = NULL;
165 rstate->out.packed_size = 0;
166 rstate->index = 0;
167 }
168 /* }}} */
169
170 /* {{{ _rar_entry_search_advance */
_rar_entry_search_advance(rar_find_output * state,const wchar_t * const file,size_t file_size,int directory_match)171 void _rar_entry_search_advance(rar_find_output *state,
172 const wchar_t * const file, /* NULL = give next */
173 size_t file_size, /* length + 1; 0 if unknown */
174 int directory_match)
175 {
176 rar_find_state *rstate = (rar_find_state *) state;
177 struct _rar_entries *entries;
178 int found = FALSE;
179 int in_sorted;
180 size_t filenamewsize;
181
182 assert(state != NULL);
183 assert(file == NULL || file_size == 0 || file[file_size - 1] == L'\0');
184
185 entries = rstate->rar->entries;
186 assert(entries != NULL);
187
188 if ((file != NULL) && (file_size == 0))
189 file_size = wcslen(file) + 1;
190
191 /* reset output */
192 memset(&rstate->out, 0, sizeof rstate->out);
193
194 filenamewsize = sizeof(entries->entries_array[0]->entry.FileNameW) /
195 sizeof(entries->entries_array[0]->entry.FileNameW[0]); /* = 1024 */
196 if (rstate->out.eof || (rstate->index >= entries->num_entries) ||
197 (file_size > filenamewsize)) {
198 rstate->out.found = 0;
199 rstate->out.eof = 1;
200 return;
201 }
202
203 /* three different cases:
204 * (1) ask next
205 * (2) ask by name
206 * (3) ask next directory child */
207
208 if (!directory_match && (file == NULL)) {
209 /* ask next */
210 in_sorted = FALSE;
211 found = TRUE;
212 /* populate cache for exact name access */
213 entries->last_accessed = entries->entries_array[rstate->index];
214 }
215 else if (!directory_match) {
216 /* ask by exact name */
217 struct _rar_unique_entry temp_entry,
218 *temp_entry_ptr = &temp_entry,
219 **found_entry;
220 /* try to hit cache */
221 if (entries->last_accessed != NULL) {
222 if ((entries->last_accessed->name_wlen == file_size - 1) &&
223 wmemcmp(entries->last_accessed->entry.FileNameW, file,
224 file_size) == 0) {
225 /* cache hit */
226 in_sorted = FALSE;
227 found = TRUE;
228 rstate->index = entries->last_accessed->id;
229 /*php_printf("cache hit\n", entries);*/
230 }
231 else {
232 entries->last_accessed = NULL;
233 /*php_printf("cache miss\n", entries);*/
234 }
235 }
236 /*else
237 php_printf("cache miss (empty)\n", entries);*/
238
239 if (!found) { /* the cache didn't do; use binary search */
240 wmemcpy(temp_entry.entry.FileNameW, file, file_size);
241 temp_entry.depth = _rar_nav_get_depth(file, file_size);
242 found_entry = bsearch(&temp_entry_ptr,
243 &entries->entries_array_s[rstate->index],
244 entries->num_entries - rstate->index,
245 sizeof entries->entries_array_s[0],
246 _rar_nav_compare_entries_std);
247 if (found_entry != NULL) {
248 in_sorted = TRUE;
249 found = TRUE;
250 rstate->index = found_entry - entries->entries_array_s;
251 }
252 }
253 }
254 else {
255 /* ask by next directory child */
256 struct _rar_unique_entry *cur = entries->entries_array_s[rstate->index];
257 in_sorted = TRUE;
258 assert(file != NULL);
259 if (_rar_nav_directory_match(file, file_size - 1,
260 cur->entry.FileNameW, cur->name_wlen)) {
261 found = TRUE;
262 /* populate cache for exact name access */
263 entries->last_accessed = cur;
264 }
265 else {
266 /* no directory match for current */
267 int comp, dir_depth;
268 dir_depth = _rar_nav_get_depth(file, file_size);
269 comp = _rar_nav_compare_values(cur->entry.FileNameW, cur->depth,
270 file, dir_depth + 1, file_size); /* guaranteed file_size <= 1024 */
271 assert(comp != 0); /* because + 1 was summed to the depth */
272 if (comp > 0) {
273 /* past the entries of the directory */
274 /* do nothing */
275 }
276 else {
277 int pos = _rar_nav_position_on_dir_start(file, dir_depth,
278 file_size, entries->entries_array_s, rstate->index,
279 entries->num_entries);
280 if (pos != -1) {
281 found = TRUE;
282 rstate->index = pos;
283 /* populate cache for exact name access */
284 entries->last_accessed = entries->entries_array_s[pos];
285 }
286 }
287 }
288 }
289
290 if (found == FALSE) {
291 rstate->out.found = 0;
292 rstate->out.eof = 1;
293 }
294 else {
295 struct _rar_unique_entry *cur;
296 if (in_sorted)
297 cur = entries->entries_array_s[rstate->index];
298 else
299 cur = entries->entries_array[rstate->index];
300 rstate->out.found = 1;
301 rstate->out.position = cur->id;
302 rstate->out.header = &cur->entry;
303 rstate->out.packed_size = cur->packed_size;
304 rstate->index++;
305 }
306 }
307 /* }}} */
308
309 /* {{{ _rar_delete_entries - accepts an allocated entries list */
_rar_delete_entries(rar_file_t * rar TSRMLS_DC)310 void _rar_delete_entries(rar_file_t *rar TSRMLS_DC)
311 {
312 if (rar->entries != NULL) {
313 if (rar->entries->entries_array != NULL) {
314 size_t i;
315 for (i = 0; i < rar->entries->num_entries; i++) {
316 if (rar->entries->entries_array[i]->entry.RedirName != NULL) {
317 efree(rar->entries->entries_array[i]->entry.RedirName);
318 }
319 efree(rar->entries->entries_array[i]);
320 }
321 efree(rar->entries->entries_array);
322
323 if (rar->entries->entries_array_s != NULL)
324 efree(rar->entries->entries_array_s);
325 }
326 efree(rar->entries);
327 }
328 }
329 /* }}} */
330
331 /* guarantees correct initialization of rar->entries on failure
332 * If the passed rar_file_t structure has the allow_broken option, it
333 * always returns success (ERAR_END_ARCHIVE) */
_rar_list_files(rar_file_t * rar TSRMLS_DC)334 int _rar_list_files(rar_file_t *rar TSRMLS_DC) /* {{{ */
335 {
336 int result = 0;
337 size_t capacity = 0;
338 int first_file_check = TRUE;
339 unsigned long packed_size = 0UL;
340 struct _rar_entries *ents;
341
342 if (rar->entries != NULL) {
343 /* we've already listed this file's entries */
344 if (rar->allow_broken)
345 return ERAR_END_ARCHIVE;
346 else
347 return rar->entries->list_result;
348 }
349
350 assert(rar->entries == NULL);
351 rar->entries = emalloc(sizeof *rar->entries);
352 ents = rar->entries;
353 ents->num_entries = 0;
354 ents->entries_array = NULL;
355 ents->entries_array_s = NULL;
356 ents->last_accessed = NULL;
357
358 while (result == 0) {
359 struct _rar_unique_entry *ue;
360 struct RARHeaderDataEx entry = {0};
361 wchar_t redir_name[1024] = L"";
362 entry.RedirName = redir_name;
363 entry.RedirNameSize = sizeof(redir_name) / sizeof(redir_name[0]);
364 result = RARReadHeaderEx(rar->arch_handle, &entry);
365 /* value of 2nd argument is irrelevant in RAR_OM_LIST_[SPLIT] mode */
366 if (result == 0) {
367 result = RARProcessFile(rar->arch_handle, RAR_SKIP, NULL, NULL);
368 }
369 if (result != 0)
370 break;
371
372 if (first_file_check) {
373 if (entry.Flags & RHDF_SPLITBEFORE)
374 continue;
375 else
376 first_file_check = FALSE;
377 }
378
379 /* reset packed size if not split before */
380 if ((entry.Flags & RHDF_SPLITBEFORE) == 0)
381 packed_size = 0UL;
382
383 /* we would exceed size of ulong. cap at ulong_max
384 * equivalent to packed_size + entry.PackSize > ULONG_MAX,
385 * but without overflowing */
386 if (ULONG_MAX - packed_size < entry.PackSize)
387 packed_size = ULONG_MAX;
388 else {
389 packed_size += entry.PackSize;
390 if (entry.PackSizeHigh != 0) {
391 #if ULONG_MAX > 0xffffffffUL
392 packed_size += ((unsigned long) entry.PackSizeHigh) << 32;
393 #else
394 packed_size = ULONG_MAX; /* cap */
395 #endif
396 }
397 }
398
399 if (entry.Flags & RHDF_SPLITAFTER) /* do not commit */
400 continue;
401
402 /* commit the entry */
403 assert(capacity >= ents->num_entries);
404 if (capacity == ents->num_entries) { /* 0, 2, 6, 14, 30... */
405 capacity = (capacity + 1) * 2;
406 ents->entries_array = safe_erealloc(ents->entries_array, capacity,
407 sizeof(*ents->entries_array), 0);
408 }
409 assert(capacity > ents->num_entries);
410
411 ents->entries_array[ents->num_entries] = ue =
412 emalloc(sizeof *ents->entries_array[0]);
413 memcpy(&ue->entry, &entry, sizeof ents->entries_array[0]->entry);
414 ue->id = ents->num_entries;
415 ue->packed_size = packed_size;
416 _rar_nav_get_depth_and_length(entry.FileNameW,
417 sizeof(entry.FileNameW) / sizeof(entry.FileNameW[0]), /* = 1024 */
418 &ue->depth, &ue->name_wlen TSRMLS_CC);
419 if (redir_name[0] != L'\0') {
420 size_t size = (wcslen(redir_name) + 1) * sizeof(redir_name[0]);
421 ue->entry.RedirName = emalloc(size);
422 memcpy(ue->entry.RedirName, redir_name, size);
423 } else {
424 ue->entry.RedirName = NULL;
425 ue->entry.RedirNameSize = 0;
426 }
427 ents->num_entries++;
428 }
429
430 rar->entries->list_result = result;
431
432 return rar->allow_broken ? ERAR_END_ARCHIVE : result;
433 }
434 /* }}} */
435
436 /* end functions with external linkage }}} */
437
438
439 /* {{{ Functions with internal linkage */
440
_rar_nav_get_depth_and_length(wchar_t * filenamew,const size_t file_size,int * depth_out,size_t * wlen_out TSRMLS_DC)441 static void _rar_nav_get_depth_and_length(wchar_t *filenamew, const size_t file_size,
442 int *depth_out, size_t *wlen_out TSRMLS_DC) /* {{{ */
443 {
444 size_t i;
445 int depth = 0;
446
447 assert(file_size >= 1);
448
449 for (i = 0; i < file_size; i++) {
450 if (filenamew[i] == L'\0')
451 break;
452 if (filenamew[i] == SPATHDIVIDER[0])
453 depth++;
454 }
455
456 if (i == file_size) { /* should not happen */
457 php_error_docref(NULL TSRMLS_CC, E_WARNING,
458 "The library gave an unterminated file name. "
459 "This is a bug, please report it.");
460 i--;
461 filenamew[i] = L'\0';
462 }
463
464 if ((i >= 1) && (filenamew[i-1] == SPATHDIVIDER[0])) {
465 /* entry name ended in path divider. shouldn't happen */
466 i--;
467 filenamew[i] = L'\0';
468 depth--;
469 }
470
471 *depth_out = depth;
472 if (wlen_out != NULL)
473 *wlen_out = (size_t) i;
474 }
475 /* }}} */
476
_rar_nav_get_depth(const wchar_t * filenamew,const size_t file_size)477 static int _rar_nav_get_depth(const wchar_t *filenamew, const size_t file_size) /* {{{ */
478 {
479 size_t i;
480 int depth = 0;
481
482 for (i = 0; i < file_size; i++) {
483 if (filenamew[i] == L'\0')
484 break;
485 if (filenamew[i] == SPATHDIVIDER[0])
486 depth++;
487 }
488 assert(i < file_size);
489
490 return depth;
491 }
492 /* }}} */
493
_rar_nav_compare_entries(const void * op1,const void * op2 TSRMLS_DC)494 static int _rar_nav_compare_entries(const void *op1, const void *op2 TSRMLS_DC) /* {{{ */
495 {
496 const struct _rar_unique_entry *a = *((struct _rar_unique_entry **) op1),
497 *b = *((struct _rar_unique_entry **) op2);
498
499 return _rar_nav_compare_values(a->entry.FileNameW, a->depth,
500 b->entry.FileNameW, b->depth,
501 sizeof(a->entry.FileNameW) / sizeof(a->entry.FileNameW[0]) /*1024*/);
502 }
503 /* }}} */
504
505 #if PHP_MAJOR_VERSION >= 7
_rar_nav_swap_entries(void * op1,void * op2)506 static void _rar_nav_swap_entries(void *op1, void *op2) /* {{{ */
507 {
508 /* just swaps two pointer values */
509 struct _rar_unique_entry **a = op1,
510 **b = op2,
511 *tmp;
512 tmp = *a;
513 *a = *b;
514 *b = tmp;
515
516 }
517 /* }}} */
518 #endif
519
_rar_nav_compare_entries_std(const void * op1,const void * op2)520 static int _rar_nav_compare_entries_std(const void *op1, const void *op2) /* {{{ */
521 {
522 const struct _rar_unique_entry *a = *((struct _rar_unique_entry **) op1),
523 *b = *((struct _rar_unique_entry **) op2);
524
525 return _rar_nav_compare_values(a->entry.FileNameW, a->depth,
526 b->entry.FileNameW, b->depth,
527 sizeof(a->entry.FileNameW) / sizeof(a->entry.FileNameW[0]) /*1024*/);
528 }
529 /* }}} */
530
_rar_nav_compare_values(const wchar_t * str1,const int depth1,const wchar_t * str2,const int depth2,const size_t max_size)531 static inline int _rar_nav_compare_values(const wchar_t *str1, const int depth1,
532 const wchar_t *str2, const int depth2,
533 const size_t max_size) /* {{{ */
534 {
535 if (depth1 == depth2) {
536 return wcsncmp(str1, str2, max_size);
537 }
538 else {
539 return depth1 > depth2 ? 1 : -1;
540 }
541 }
542 /* }}} */
543
544 /* does not assume null termination */
_rar_nav_directory_match(const wchar_t * dir,const size_t dir_len,const wchar_t * entry,const size_t entry_len)545 static int _rar_nav_directory_match(const wchar_t *dir, const size_t dir_len,
546 const wchar_t *entry, const size_t entry_len) /* {{{ */
547 {
548 const wchar_t *chr,
549 *entry_rem;
550 size_t entry_rem_len;
551
552 /* dir does not end with the path separator */
553
554 if (dir_len > 0) {
555 if (entry_len <= dir_len) /* don't match the dir itself */
556 return FALSE;
557 /* assert(entry_len > dir_len > 0) */
558 if (wmemcmp(dir, entry, dir_len) != 0)
559 return FALSE;
560 /* directory name does not follow path sep or path sep ends the name */
561 if (entry[dir_len] != SPATHDIVIDER[0] || entry_len == dir_len + 1)
562 return FALSE;
563 /* assert(entry_len > dir_len + 1) */
564 entry_rem = &entry[dir_len + 1];
565 entry_rem_len = entry_len - (dir_len + 1);
566 }
567 else {
568 entry_rem = entry;
569 entry_rem_len = entry_len;
570 }
571
572 chr = wmemchr(entry_rem, SPATHDIVIDER[0], entry_rem_len);
573 /* must have no / after the directory */
574 return (chr == NULL);
575 }
576 /* }}} */
577
_rar_nav_position_on_dir_start(const wchar_t * dir_name,int dir_depth,size_t dir_size,struct _rar_unique_entry ** entries,size_t low,size_t high)578 static size_t _rar_nav_position_on_dir_start(const wchar_t *dir_name,
579 int dir_depth,
580 size_t dir_size,
581 struct _rar_unique_entry **entries,
582 size_t low, size_t high) /* {{{ */
583 {
584 size_t mid;
585 int comp;
586 size_t orig_high = high;
587
588 if (dir_size == 1) { /* root */
589 if (low >= high)
590 return -1;
591
592 if (entries[low]->depth == 0)
593 return low;
594 else
595 return -1;
596 }
597
598 while (low < high) {
599 mid = low + (high - low) / 2;
600 comp = _rar_nav_compare_values(dir_name, dir_depth + 1,
601 entries[mid]->entry.FileNameW, entries[mid]->depth,
602 dir_size);
603 if (comp > 0)
604 low = mid + 1;
605 else
606 high = mid;
607 }
608
609 if (low >= orig_high)
610 return -1;
611
612 if (_rar_nav_directory_match(dir_name, dir_size - 1,
613 entries[low]->entry.FileNameW, entries[low]->name_wlen))
614 return low;
615 else
616 return -1;
617 }
618 /* }}} */
619
620
621 /* end functions with internal linkage */
622
623 #endif /* HAVE_RAR */
624
625 #ifdef __cplusplus
626 }
627 #endif
628
629 /*
630 * Local variables:
631 * tab-width: 4
632 * c-basic-offset: 4
633 * End:
634 * vim600: noet sw=4 ts=4 fdm=marker
635 * vim<600: noet sw=4 ts=4
636 */
637
638
639