1 /*
2  *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3  *  Copyright (C) 2007-2013 Sourcefire, Inc.
4  *
5  *  Authors: Nigel Horne
6  *
7  *  Summary: Extract files compressed with TAR compression format.
8  *
9  *  Acknowledgements: ClamAV untar code is based on a public domain minitar utility
10  *                    by Charles G. Waldman.
11  *
12  *  This program is free software; you can redistribute it and/or modify
13  *  it under the terms of the GNU General Public License version 2 as
14  *  published by the Free Software Foundation.
15  *
16  *  This program is distributed in the hope that it will be useful,
17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  *  GNU General Public License for more details.
20  *
21  *  You should have received a copy of the GNU General Public License
22  *  along with this program; if not, write to the Free Software
23  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24  *  MA 02110-1301, USA.
25  */
26 
27 #if HAVE_CONFIG_H
28 #include "clamav-config.h"
29 #endif
30 
31 #include <stdio.h>
32 #include <errno.h>
33 #include <string.h>
34 #ifdef HAVE_UNISTD_H
35 #include <unistd.h>
36 #endif
37 #include <sys/stat.h>
38 #include <fcntl.h>
39 #ifdef HAVE_SYS_PARAM_H
40 #include <sys/param.h> /* for NAME_MAX */
41 #endif
42 
43 #include "clamav.h"
44 #include "others.h"
45 #include "untar.h"
46 #include "mbox.h"
47 #include "blob.h"
48 #include "scanners.h"
49 #include "matcher.h"
50 
51 #define TARHEADERSIZE 512
52 /* BLOCKSIZE must be >= TARHEADERSIZE */
53 #define BLOCKSIZE TARHEADERSIZE
54 #define TARSIZEOFFSET 124
55 #define TARSIZELEN 12
56 #define TARCHECKSUMOFFSET 148
57 #define TARCHECKSUMLEN 8
58 #define TARFILETYPEOFFSET 156
59 
60 static int
octal(const char * str)61 octal(const char *str)
62 {
63     int ret;
64 
65     if (sscanf(str, "%o", (unsigned int *)&ret) != 1)
66         return -1;
67     return ret;
68 }
69 
70 /**
71  * Retrieve checksum values from a tar header block.
72  * @param header Header data block, padded with zeroes to reach BLOCKSIZE
73  * @return int value of checksum, -1 (from octal()) if bad value
74  */
75 static int
getchecksum(const char * header)76 getchecksum(const char *header)
77 {
78     char ochecksum[TARCHECKSUMLEN + 1];
79     int checksum = -1;
80 
81     strncpy(ochecksum, header + TARCHECKSUMOFFSET, TARCHECKSUMLEN);
82     ochecksum[TARCHECKSUMLEN] = '\0';
83     checksum                  = octal(ochecksum);
84     return checksum;
85 }
86 
87 /**
88  * Calculate checksum values for tar header blocks.
89  * @param header Header data block, padded with zeroes to reach BLOCKSIZE
90  * @param targetsum Check value to match (as int not octal!)
91  * @return 0 if checksum matches target, -1 if not
92  */
93 static int
testchecksum(const char * header,int targetsum)94 testchecksum(const char *header, int targetsum)
95 {
96     const unsigned char *posix;
97     const signed char *legacy;
98     int posix_sum = 0, legacy_sum = 0;
99     int i;
100 
101     // targetsum -1 represents an error from octal()
102     if (targetsum == -1) {
103         return -1;
104     }
105 
106     /* Build checksums. POSIX is unsigned; some legacy tars use signed. */
107     posix  = (unsigned char *)header;
108     legacy = (signed char *)header;
109     for (i = 0; i < BLOCKSIZE; i++) {
110         if ((i >= TARCHECKSUMOFFSET) && (i < TARCHECKSUMOFFSET + TARCHECKSUMLEN)) {
111             /* Use ascii value of space in place of checksum value */
112             posix_sum += 32;
113             legacy_sum += 32;
114         } else {
115             posix_sum += posix[i];
116             legacy_sum += legacy[i];
117         }
118     }
119 
120     if ((targetsum == posix_sum) || (targetsum == legacy_sum)) {
121         return 0;
122     }
123     return -1;
124 }
125 
cli_untar(const char * dir,unsigned int posix,cli_ctx * ctx)126 cl_error_t cli_untar(const char *dir, unsigned int posix, cli_ctx *ctx)
127 {
128     cl_error_t ret;
129     size_t size         = 0;
130     int size_int        = 0;
131     int fout            = -1;
132     int in_block        = 0;
133     int last_header_bad = 0;
134     int limitnear       = 0;
135     unsigned int files  = 0;
136     char fullname[PATH_MAX + 1];
137     char name[101];
138     size_t pos      = 0;
139     size_t currsize = 0;
140     char zero[BLOCKSIZE];
141     unsigned int num_viruses = 0;
142 
143     cli_dbgmsg("In untar(%s)\n", dir);
144     memset(zero, 0, sizeof(zero));
145 
146     for (;;) {
147         const char *block;
148         size_t nread;
149 
150         block = fmap_need_off_once_len(ctx->fmap, pos, BLOCKSIZE, &nread);
151         cli_dbgmsg("cli_untar: pos = %lu\n", (unsigned long)pos);
152 
153         if (!in_block && !nread)
154             break;
155 
156         if (!nread)
157             block = zero;
158 
159         if (!block) {
160             if (fout >= 0)
161                 close(fout);
162             cli_errmsg("cli_untar: block read error\n");
163             return CL_EREAD;
164         }
165         pos += nread;
166 
167         if (!in_block) {
168             char type;
169             int directory, skipEntry = 0;
170             int checksum = -1;
171             char magic[7], osize[TARSIZELEN + 1];
172             currsize = 0;
173 
174             if (fout >= 0) {
175                 lseek(fout, 0, SEEK_SET);
176                 ret = cli_magic_scan_desc(fout, fullname, ctx, name);
177                 close(fout);
178                 if (!ctx->engine->keeptmp)
179                     if (cli_unlink(fullname)) return CL_EUNLINK;
180                 if (ret == CL_VIRUS) {
181                     if (!SCAN_ALLMATCHES)
182                         return CL_VIRUS;
183                     else
184                         num_viruses++;
185                 }
186                 fout = -1;
187             }
188 
189             if (block[0] == '\0') /* We're done */
190                 break;
191             if ((ret = cli_checklimits("cli_untar", ctx, 0, 0, 0)) != CL_CLEAN)
192                 return ret;
193 
194             if (nread < TARHEADERSIZE) {
195                 return CL_CLEAN;
196             }
197 
198             checksum = getchecksum(block);
199             cli_dbgmsg("cli_untar: Candidate checksum = %d, [%o in octal]\n", checksum, checksum);
200             if (testchecksum(block, checksum) != 0) {
201                 // If checksum is bad, dump and look for next header block
202                 cli_dbgmsg("cli_untar: Invalid checksum in tar header. Skip to next...\n");
203                 if (last_header_bad == 0) {
204                     last_header_bad++;
205                     cli_dbgmsg("cli_untar: Invalid checksum found inside archive!\n");
206                 }
207                 continue;
208             } else {
209                 last_header_bad = 0;
210                 cli_dbgmsg("cli_untar: Checksum %d is valid.\n", checksum);
211             }
212 
213             if (posix) {
214                 strncpy(magic, block + 257, 5);
215                 magic[5] = '\0';
216                 if (strcmp(magic, "ustar") != 0) {
217                     cli_dbgmsg("cli_untar: Incorrect magic string '%s' in tar header\n", magic);
218                     return CL_EFORMAT;
219                 }
220             }
221 
222             type = block[TARFILETYPEOFFSET];
223 
224             switch (type) {
225                 default:
226                     cli_dbgmsg("cli_untar: unknown type flag %c\n", type);
227                     /* fall-through */
228                 case '0':  /* plain file */
229                 case '\0': /* plain file */
230                 case '7':  /* contiguous file */
231                 case 'M':  /* continuation of a file from another volume; might as well scan it. */
232                     files++;
233                     directory = 0;
234                     break;
235                 case '1': /* Link to already archived file */
236                 case '5': /* directory */
237                 case '2': /* sym link */
238                 case '3': /* char device */
239                 case '4': /* block device */
240                 case '6': /* fifo special */
241                 case 'V': /* Volume header */
242                     directory = 1;
243                     break;
244                 case 'K':
245                 case 'L':
246                     /* GNU extension - ././@LongLink
247 					 * Discard the blocks with the extended filename,
248 					 * the last header will contain parts of it anyway
249 					 */
250                 case 'N': /* Old GNU format way of storing long filenames. */
251                 case 'A': /* Solaris ACL */
252                 case 'E': /* Solaris Extended attribute s*/
253                 case 'I': /* Inode only */
254                 case 'g': /* Global extended header */
255                 case 'x': /* Extended attributes */
256                 case 'X': /* Extended attributes (POSIX) */
257                     directory = 0;
258                     skipEntry = 1;
259                     break;
260             }
261 
262             if (directory) {
263                 in_block = 0;
264                 continue;
265             }
266 
267             strncpy(osize, block + TARSIZEOFFSET, TARSIZELEN);
268             osize[TARSIZELEN] = '\0';
269             size_int          = octal(osize);
270             if (size_int < 0) {
271                 cli_dbgmsg("cli_untar: Invalid size in tar header\n");
272                 skipEntry++;
273             } else {
274                 size = (size_t)size_int;
275                 cli_dbgmsg("cli_untar: size = %zu\n", size);
276                 ret = cli_checklimits("cli_untar", ctx, size, 0, 0);
277                 switch (ret) {
278                     case CL_EMAXFILES: // Scan no more files
279                         skipEntry++;
280                         limitnear = 0;
281                         break;
282                     case CL_EMAXSIZE: // Either single file limit or total byte limit would be exceeded
283                         cli_dbgmsg("cli_untar: would exceed limit, will try up to max");
284                         limitnear = 1;
285                         break;
286                     default: // Ok based on reported content size
287                         limitnear = 0;
288                         break;
289                 }
290             }
291 
292             if (skipEntry) {
293                 const int nskip = (size % BLOCKSIZE || !size) ? size + BLOCKSIZE - (size % BLOCKSIZE) : size;
294 
295                 if (nskip < 0) {
296                     cli_dbgmsg("cli_untar: got negative skip size, giving up\n");
297                     return CL_CLEAN;
298                 }
299                 cli_dbgmsg("cli_untar: skipping entry\n");
300                 pos += nskip;
301                 continue;
302             }
303 
304             strncpy(name, block, 100);
305             name[100] = '\0';
306             if (cli_matchmeta(ctx, name, size, size, 0, files, 0, NULL) == CL_VIRUS) {
307                 if (!SCAN_ALLMATCHES)
308                     return CL_VIRUS;
309                 else
310                     num_viruses++;
311             }
312 
313             snprintf(fullname, sizeof(fullname) - 1, "%s" PATHSEP "tar%02u", dir, files);
314             fullname[sizeof(fullname) - 1] = '\0';
315             fout                           = open(fullname, O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY, 0600);
316 
317             if (fout < 0) {
318                 char err[128];
319                 cli_errmsg("cli_untar: Can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
320                 return CL_ETMPFILE;
321             }
322 
323             cli_dbgmsg("cli_untar: extracting to %s\n", fullname);
324 
325             in_block = 1;
326         } else { /* write or continue writing file contents */
327             size_t nbytes, nwritten;
328             int skipwrite = 0;
329             char err[128];
330 
331             nbytes = (size > 512) ? 512 : size;
332             if (nread && (nread < nbytes))
333                 nbytes = nread;
334 
335             if (limitnear > 0) {
336                 currsize += nbytes;
337                 cli_dbgmsg("cli_untar: Approaching limit...\n");
338                 if (cli_checklimits("cli_untar", ctx, (unsigned long)currsize, 0, 0) != CL_SUCCESS) {
339                     // Limit would be exceeded by this file, suppress writing beyond limit
340                     // Need to keep reading to get to end of file chunk
341                     skipwrite++;
342                 }
343             }
344 
345             if (skipwrite == 0) {
346                 nwritten = cli_writen(fout, block, nbytes);
347 
348                 if (nwritten != nbytes) {
349                     cli_errmsg("cli_untar: only wrote %zu bytes to file %s (out of disc space?): %s\n",
350                                nwritten, fullname, cli_strerror(errno, err, sizeof(err)));
351                     close(fout);
352                     return CL_EWRITE;
353                 }
354             }
355             if (nbytes > size) {
356                 cli_warnmsg("cli_untar: More bytes written than requested!\n");
357                 size = 0;
358             } else {
359                 size -= nbytes;
360             }
361             if ((size != 0) && (nread == 0)) {
362                 // Truncated tar file, so end file content like tar behavior
363                 cli_dbgmsg("cli_untar: No bytes read! Forcing end of file content.\n");
364                 size = 0;
365             }
366         }
367         if (size == 0)
368             in_block = 0;
369     }
370     if (fout >= 0) {
371         lseek(fout, 0, SEEK_SET);
372         ret = cli_magic_scan_desc(fout, fullname, ctx, name);
373         close(fout);
374         if (!ctx->engine->keeptmp)
375             if (cli_unlink(fullname)) return CL_EUNLINK;
376         if (ret == CL_VIRUS)
377             return CL_VIRUS;
378     }
379     if (num_viruses)
380         return CL_VIRUS;
381     return CL_CLEAN;
382 }
383