1 /*
2     Copyright (C) 2000 Masanao Izumo <mo@goice.co.jp>
3 
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18 
19 #include "config.h"
20 #include <stdio.h>
21 #include <stdlib.h>
22 #ifndef NO_STRING_H
23 #include <string.h>
24 #else
25 #include <strings.h>
26 #endif
27 #include "libarc/mblock.h"
28 #include "zip.h"
29 #include "libarc/arc.h"
30 
31 #ifdef HAVE_SAFE_MALLOC
32 extern char *safe_strdup(char *);
33 #else
34 #define safe_strdup strdup
35 #endif /* HAVE_SAFE_MALLOC */
36 
37 #ifndef MAX_CHECK_LINES
38 #define MAX_CHECK_LINES 1024
39 #endif /* MAX_CHECK_LINES */
40 
41 struct StringStackElem
42 {
43     struct StringStackElem *next;
44     char str[1];		/* variable length */
45 };
46 
47 struct StringStack
48 {
49     struct StringStackElem *elem;
50     MBlockList pool;
51 };
52 
53 static void init_string_stack(struct StringStack *stk);
54 static void push_string_stack(struct StringStack *stk, char *str, int len);
55 static char *top_string_stack(struct StringStack *stk);
56 static void pop_string_stack(struct StringStack *stk);
57 static void delete_string_stack(struct StringStack *stk);
58 
59 struct MIMEHeaderStream
60 {
61     URL url;
62     char *field;
63     char *value;
64     char *line;
65     int bufflen;
66     int eof;
67     MBlockList pool;
68 };
69 
70 static void init_mime_stream(struct MIMEHeaderStream *hdr, URL url);
71 static int  next_mime_header(struct MIMEHeaderStream *hdr);
72 static void end_mime_stream(struct MIMEHeaderStream *hdr);
73 static int seek_next_boundary(URL url, char *boundary, long *endpoint);
74 static int whole_read_line(URL url, char *buff, int bufsiz);
75 static void *arc_mime_decode(void *data, long size,
76 			     int comptype, long *newsize);
77 
next_mime_entry(void)78 ArchiveEntryNode *next_mime_entry(void)
79 {
80     ArchiveEntryNode *head, *tail;
81     URL url;
82     int part;
83     struct StringStack boundary;
84     struct MIMEHeaderStream hdr;
85     int c;
86 
87     if(arc_handler.counter != 0)
88 	return NULL;
89 
90     head = tail = NULL;
91     url = arc_handler.url; /* url_seek must be safety */
92 
93     init_string_stack(&boundary);
94     url_rewind(url);
95     c = url_getc(url);
96     if(c != '\0')
97 	url_rewind(url);
98     else
99 	url_skip(url, 128-1);	/* skip macbin header */
100 
101     part = 1;
102     for(;;)
103     {
104 	char *new_boundary, *encoding, *name, *filename;
105 	char *p;
106 	MBlockList pool;
107 	long data_start, data_end, savepoint;
108 	int last_check, comptype, arctype;
109 	void *part_data;
110 	long part_data_size;
111 
112 	new_boundary = encoding = name = filename = NULL;
113 	init_mblock(&pool);
114 	init_mime_stream(&hdr, url);
115 	while(next_mime_header(&hdr))
116 	{
117 	    if(strncmp(hdr.field, "Content-", 8) != 0)
118 		continue;
119 	    if(strcmp(hdr.field + 8, "Type") == 0)
120 	    {
121 		if((p = strchr(hdr.value, ';')) == NULL)
122 		    continue;
123 		*p++ = '\0';
124 		while(*p == ' ')
125 		    p++;
126 		if(strncasecmp(hdr.value, "multipart/mixed", 15) == 0)
127 		{
128 		    /* Content-Type: multipart/mixed; boundary="XXXX" */
129 		    if(strncasecmp(p, "boundary=", 9) == 0)
130 		    {
131 			p += 9;
132 			if(*p == '"')
133 			{
134 			    p++;
135 			    new_boundary = p;
136 			    if((p = strchr(p, '"')) == NULL)
137 				continue;
138 			}
139 			else
140 			{
141 			    new_boundary = p;
142 			    while(*p > '"' && *p < 0x7f)
143 				p++;
144 			}
145 
146 			*p = '\0';
147 			new_boundary = strdup_mblock(&pool, new_boundary);
148 		    }
149 		}
150 		else if(strcasecmp(hdr.value, "multipart/mixed") == 0)
151 		{
152 		    /* Content-Type: XXXX/YYYY; name="ZZZZ" */
153 		    if(strncasecmp(p, "name=\"", 6) == 0)
154 		    {
155 			p += 6;
156 			name = p;
157 			if((p = strchr(p, '"')) == NULL)
158 			    continue;
159 			*p = '\0';
160 			name = strdup_mblock(&pool, name);
161 		    }
162 		}
163 	    }
164 	    else if(strcmp(hdr.field + 8, "Disposition") == 0)
165 	    {
166 		if((p = strchr(hdr.value, ';')) == NULL)
167 		    continue;
168 		*p++ = '\0';
169 		while(*p == ' ')
170 		    p++;
171 		if((p = strstr(p, "filename=\"")) == NULL)
172 		    continue;
173 		p += 10;
174 		filename = p;
175 		if((p = strchr(p, '"')) == NULL)
176 		    continue;
177 		*p = '\0';
178 		filename = strdup_mblock(&pool, filename);
179 	    }
180 	    else if(strcmp(hdr.field + 8, "Transfer-Encoding") == 0)
181 	    {
182 		/* Content-Transfer-Encoding: X */
183 		/* X := X-uuencode, base64, quoted-printable, ... */
184 		encoding = strdup_mblock(&pool, hdr.value);
185 	    }
186 	}
187 
188 	if(hdr.eof)
189 	{
190 	    reuse_mblock(&pool);
191 	    end_mime_stream(&hdr);
192 	    delete_string_stack(&boundary);
193 	    return head;
194 	}
195 
196 	if(filename == NULL)
197 	    filename = name;
198 
199 	if(new_boundary)
200 	    push_string_stack(&boundary, new_boundary, strlen(new_boundary));
201 
202 	data_start = url_tell(url);
203 	last_check = seek_next_boundary(url, top_string_stack(&boundary),
204 					&data_end);
205 
206 	savepoint = url_tell(url);
207 
208 	/* find data type */
209 	comptype = -1;
210 	if(encoding != NULL)
211 	{
212 	    if(strcmp("base64", encoding) == 0)
213 		comptype = ARCHIVEC_B64;
214 	    else if(strcmp("quoted-printable", encoding) == 0)
215 		comptype = ARCHIVEC_QS;
216 	    else if(strcmp("X-uuencode", encoding) == 0)
217 	    {
218 		char buff[BUFSIZ];
219 		int i;
220 
221 		comptype = ARCHIVEC_UU;
222 		url_seek(url, data_start, SEEK_SET);
223 		url_set_readlimit(url, data_end - data_start);
224 
225 		/* find '^begin \d\d\d \S+' */
226 		for(i = 0; i < MAX_CHECK_LINES; i++)
227 		{
228 		    if(whole_read_line(url, buff, sizeof(buff)) == -1)
229 			break; /* ?? */
230 		    if(strncmp(buff, "begin ", 6) == 0)
231 		    {
232 			data_start = url_tell(url);
233 			p = strchr(buff + 6, ' ');
234 			if(p != NULL)
235 			    filename = strdup_mblock(&pool, p + 1);
236 			break;
237 		    }
238 		}
239 		url_set_readlimit(url, -1);
240 	    }
241 	}
242 
243 	if(comptype == -1)
244 	{
245 	    char buff[BUFSIZ];
246 	    int i;
247 
248 	    url_seek(url, data_start, SEEK_SET);
249 	    url_set_readlimit(url, data_end - data_start);
250 
251 	    for(i = 0; i < MAX_CHECK_LINES; i++)
252 	    {
253 		if(whole_read_line(url, buff, sizeof(buff)) == -1)
254 		    break; /* ?? */
255 		if(strncmp(buff, "begin ", 6) == 0)
256 		{
257 		    comptype = ARCHIVEC_UU;
258 		    data_start = url_tell(url);
259 		    p = strchr(buff + 6, ' ');
260 		    if(p != NULL)
261 			filename = strdup_mblock(&pool, p + 1);
262 		    break;
263 		}
264 		else if((strncmp(buff, "(This file", 10) == 0) ||
265 			(strncmp(buff, "(Convert with", 13) == 0))
266 		{
267 		    int c;
268 		    while((c = url_getc(url)) != EOF)
269 		    {
270 			if(c == ':')
271 			{
272 			    comptype = ARCHIVEC_HQX;
273 			    data_start = url_tell(url);
274 			    break;
275 			}
276 			else if(c == '\n')
277 			{
278 			    if(++i >= MAX_CHECK_LINES)
279 				break;
280 			}
281 		    }
282 		    if(comptype != -1)
283 			break;
284 		}
285 	    }
286 	    url_set_readlimit(url, -1);
287 	}
288 
289 	if(comptype == -1)
290 	    comptype = ARCHIVEC_STORED;
291 
292 	if(filename == NULL)
293 	{
294 	    char buff[32];
295 	    sprintf(buff, "part%d", part);
296 	    filename = strdup_mblock(&pool, buff);
297 	    arctype = -1;
298 	}
299 	else
300 	{
301 	    arctype = get_archive_type(filename);
302 	    switch(arctype)
303 	      {
304 	      case ARCHIVE_TAR:
305 	      case ARCHIVE_TGZ:
306 	      case ARCHIVE_ZIP:
307 	      case ARCHIVE_LZH:
308 		break;
309 	      default:
310 		arctype = -1;
311 		break;
312 	      }
313 	}
314 
315 	if(data_start == data_end)
316 	  {
317 	    ArchiveEntryNode *entry;
318 	    entry = new_entry_node(filename, strlen(filename));
319 	    entry->comptype = ARCHIVEC_STORED;
320 	    entry->compsize = 0;
321 	    entry->origsize = 0;
322 	    entry->start = 0;
323 	    entry->cache = safe_strdup("");
324 	    if(head == NULL)
325 		head = tail = entry;
326 	    else
327 		tail = tail->next = entry;
328 	    goto next_entry;
329 	  }
330 
331 	url_seek(url, data_start, SEEK_SET);
332 	part_data = url_dump(url, data_end - data_start, &part_data_size);
333 	part_data = arc_mime_decode(part_data, part_data_size,
334 				    comptype, &part_data_size);
335 	if(part_data == NULL)
336 	  goto next_entry;
337 
338 	if(arctype == -1)
339 	{
340 	  int gzmethod, gzhdrsiz, len, gz;
341 	  ArchiveEntryNode *entry;
342 
343 	  len = strlen(filename);
344 	  if(len >= 3 && strcasecmp(filename + len - 3, ".gz") == 0)
345 	    {
346 	      gz = 1;
347 	      filename[len - 3] = '\0';
348 	    }
349 	  else
350 	    gz = 0;
351 	  entry = new_entry_node(filename, strlen(filename));
352 
353 	  if(gz)
354 	    gzmethod = parse_gzip_header_bytes(part_data, part_data_size,
355 					       &gzhdrsiz);
356 	  else
357 	    gzmethod = -1;
358 	  if(gzmethod == ARCHIVEC_DEFLATED)
359 	    {
360 	      entry->comptype = ARCHIVEC_DEFLATED;
361 	      entry->compsize = part_data_size - gzhdrsiz;
362 	      entry->origsize = -1;
363 	      entry->start = gzhdrsiz;
364 	      entry->cache = part_data;
365 	    }
366 	  else
367 	    {
368 	      entry->comptype = ARCHIVEC_DEFLATED;
369 	      entry->origsize = part_data_size;
370 	      entry->start = 0;
371 	      entry->cache = arc_compress(part_data, part_data_size,
372 					 ARC_DEFLATE_LEVEL, &entry->compsize);
373 	      free(part_data);
374 	      if(entry->cache == NULL)
375 		{
376 		  free_entry_node(entry);
377 		  goto next_entry;
378 		}
379 	    }
380 	    if(head == NULL)
381 		head = tail = entry;
382 	    else
383 		tail = tail->next = entry;
384 	}
385 	else
386 	{
387 	    URL arcurl;
388 	    ArchiveEntryNode *entry;
389 	    ArchiveHandler orig;
390 
391 	    arcurl = url_mem_open(part_data, part_data_size, 1);
392 	    orig = arc_handler; /* save */
393 	    entry = arc_parse_entry(arcurl, arctype);
394 	    arc_handler = orig; /* restore */
395 	    if(head == NULL)
396 		head = tail = entry;
397 	    else
398 		tail = tail->next = entry;
399 	    while(tail->next)
400 	      tail = tail->next;
401 	}
402 
403       next_entry:
404 	url_seek(url, savepoint, SEEK_SET);
405 	part++;
406 	reuse_mblock(&pool);
407 	end_mime_stream(&hdr);
408 
409 	if(last_check)
410 	{
411 	    pop_string_stack(&boundary);
412 	    if(top_string_stack(&boundary) == NULL)
413 		break;
414 	}
415     }
416     delete_string_stack(&boundary);
417     return head;
418 }
419 
init_string_stack(struct StringStack * stk)420 static void init_string_stack(struct StringStack *stk)
421 {
422     stk->elem = NULL;
423     init_mblock(&stk->pool);
424 }
425 
push_string_stack(struct StringStack * stk,char * str,int len)426 static void push_string_stack(struct StringStack *stk, char *str, int len)
427 {
428     struct StringStackElem *elem;
429 
430     elem = (struct StringStackElem *)
431 	new_segment(&stk->pool, sizeof(struct StringStackElem) + len + 1);
432     memcpy(elem->str, str, len);
433     elem->str[len] = '\0';
434     elem->next = stk->elem;
435     stk->elem = elem;
436 }
437 
top_string_stack(struct StringStack * stk)438 static char *top_string_stack(struct StringStack *stk)
439 {
440     if(stk->elem == NULL)
441 	return NULL;
442     return stk->elem->str;
443 }
444 
pop_string_stack(struct StringStack * stk)445 static void pop_string_stack(struct StringStack *stk)
446 {
447     if(stk->elem == NULL)
448 	return;
449     stk->elem = stk->elem->next;
450 }
451 
delete_string_stack(struct StringStack * stk)452 static void delete_string_stack(struct StringStack *stk)
453 {
454     reuse_mblock(&stk->pool);
455 }
456 
init_mime_stream(struct MIMEHeaderStream * hdr,URL url)457 static void init_mime_stream(struct MIMEHeaderStream *hdr, URL url)
458 {
459     hdr->url = url;
460     hdr->field = hdr->value = hdr->line = NULL;
461     hdr->eof = 0;
462     init_mblock(&hdr->pool);
463 }
464 
whole_read_line(URL url,char * buff,int bufsiz)465 static int whole_read_line(URL url, char *buff, int bufsiz)
466 {
467     int len;
468 
469     if(url_gets(url, buff, bufsiz) == NULL)
470 	return -1;
471     len = strlen(buff);
472     if(len == 0)
473 	return 0;
474     if(buff[len - 1] == '\n')
475     {
476 	buff[--len] = '\0';
477 	if(len > 0 && buff[len - 1] == '\r')
478 	    buff[--len] = '\0';
479     }
480     else
481     {
482 	/* skip line */
483 	int c;
484 	do
485 	{
486 	    c = url_getc(url);
487 	} while(c != EOF && c != '\n');
488     }
489 
490     return len;
491 }
492 
next_mime_header(struct MIMEHeaderStream * hdr)493 static int next_mime_header(struct MIMEHeaderStream *hdr)
494 {
495     int len, c, n;
496     char *p;
497 
498     if(hdr->eof)
499 	return 0;
500 
501     if(hdr->line == NULL)
502     {
503 	hdr->line = (char *)new_segment(&hdr->pool, MIN_MBLOCK_SIZE);
504 	len = whole_read_line(hdr->url, hdr->line, MIN_MBLOCK_SIZE);
505 	if(len <= 0)
506 	{
507 	    if(len == -1)
508 		hdr->eof = 1;
509 	    return 0;
510 	}
511 	hdr->field = (char *)new_segment(&hdr->pool, MIN_MBLOCK_SIZE);
512 	hdr->bufflen = 0;
513     }
514 
515     if((hdr->bufflen = strlen(hdr->line)) == 0)
516 	return 0;
517 
518     memcpy(hdr->field, hdr->line, hdr->bufflen);
519     hdr->field[hdr->bufflen] = '\0';
520 
521     for(;;)
522     {
523 	len = whole_read_line(hdr->url, hdr->line, MIN_MBLOCK_SIZE);
524 	if(len <= 0)
525 	{
526 	    if(len == -1)
527 		hdr->eof = 1;
528 	    break;
529 	}
530 	c = *hdr->line;
531 	if(c == '>' || ('A' <= c && c <= 'Z') ||  ('a' <= c && c <= 'z'))
532 	    break;
533 	if(c != ' ' && c != '\t')
534 	    return 0; /* ?? */
535 
536 	n = MIN_MBLOCK_SIZE - 1 - hdr->bufflen;
537 	if(n > 0)
538 	{
539 	    int i;
540 
541 	    if(len > n)
542 		len = n;
543 
544 	    /* s/\t/ /g; */
545 	    p = hdr->line;
546 	    for(i = 0; i < len; i++)
547 		if(p[i] == '\t')
548 		    p[i] = ' ';
549 
550 	    memcpy(hdr->field + hdr->bufflen, p, len);
551 	    hdr->bufflen += len;
552 	    hdr->field[hdr->bufflen] = '\0';
553 	}
554     }
555     p = hdr->field;
556     while(*p && *p != ':')
557 	p++;
558     if(!*p)
559 	return 0;
560     *p++ = '\0';
561     while(*p && *p == ' ')
562 	p++;
563     hdr->value = p;
564     return 1;
565 }
566 
end_mime_stream(struct MIMEHeaderStream * hdr)567 static void end_mime_stream(struct MIMEHeaderStream *hdr)
568 {
569     reuse_mblock(&hdr->pool);
570 }
571 
seek_next_boundary(URL url,char * boundary,long * endpoint)572 static int seek_next_boundary(URL url, char *boundary, long *endpoint)
573 {
574     MBlockList pool;
575     char *buff;
576     int blen, ret;
577 
578     if(boundary == NULL)
579     {
580 	url_seek(url, 0, SEEK_END);
581 	*endpoint = url_tell(url);
582 	return 0;
583     }
584 
585     init_mblock(&pool);
586     buff = (char *)new_segment(&pool, MIN_MBLOCK_SIZE);
587     blen = strlen(boundary);
588     ret = 0;
589     for(;;)
590     {
591 	int len;
592 
593 	*endpoint = url_tell(url);
594 	if((len = whole_read_line(url, buff, MIN_MBLOCK_SIZE)) < 0)
595 	    break;
596 	if(len < blen + 2)
597 	    continue;
598 
599 	if(buff[0] == '-' && buff[1] == '-' &&
600 	   strncmp(buff + 2, boundary, blen) == 0)
601 	{
602 	    if(buff[blen + 2] == '-' && buff[blen + 3] == '-')
603 		ret = 1;
604 	    break;
605 	}
606     }
607     reuse_mblock(&pool);
608     return ret;
609 }
610 
arc_mime_decode(void * data,long size,int comptype,long * newsize)611 static void *arc_mime_decode(void *data, long size,
612 			     int comptype, long *newsize)
613 {
614   URL url;
615 
616   if(comptype == ARCHIVEC_STORED)
617     return data;
618 
619   if(data == NULL)
620     return NULL;
621 
622   if((url = url_mem_open(data, size, 1)) == NULL)
623     return NULL;
624 
625   switch(comptype)
626     {
627     case ARCHIVEC_UU:		/* uu encoded */
628       url = url_uudecode_open(url, 1);
629       break;
630     case ARCHIVEC_B64:		/* base64 encoded */
631       url = url_b64decode_open(url, 1);
632       break;
633     case ARCHIVEC_QS:		/* quoted string encoded */
634       url = url_hqxdecode_open(url, 1, 1);
635       break;
636     case ARCHIVEC_HQX:		/* HQX encoded */
637       url = url_qsdecode_open(url, 1);
638       break;
639     default:
640       url_close(url);
641       return NULL;
642     }
643   data = url_dump(url, -1, newsize);
644   url_close(url);
645   return data;
646 }
647