1 /*
2 * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3 * Copyright (C) 2007-2013 Sourcefire, Inc.
4 *
5 * Authors: Nigel Horne
6 *
7 * Summary: Extract files compressed with TAR compression format.
8 *
9 * Acknowledgements: ClamAV untar code is based on a public domain minitar utility
10 * by Charles G. Waldman.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 * MA 02110-1301, USA.
25 */
26
27 #if HAVE_CONFIG_H
28 #include "clamav-config.h"
29 #endif
30
31 #include <stdio.h>
32 #include <errno.h>
33 #include <string.h>
34 #ifdef HAVE_UNISTD_H
35 #include <unistd.h>
36 #endif
37 #include <sys/stat.h>
38 #include <fcntl.h>
39 #ifdef HAVE_SYS_PARAM_H
40 #include <sys/param.h> /* for NAME_MAX */
41 #endif
42
43 #include "clamav.h"
44 #include "others.h"
45 #include "untar.h"
46 #include "mbox.h"
47 #include "blob.h"
48 #include "scanners.h"
49 #include "matcher.h"
50
51 #define TARHEADERSIZE 512
52 /* BLOCKSIZE must be >= TARHEADERSIZE */
53 #define BLOCKSIZE TARHEADERSIZE
54 #define TARSIZEOFFSET 124
55 #define TARSIZELEN 12
56 #define TARCHECKSUMOFFSET 148
57 #define TARCHECKSUMLEN 8
58 #define TARFILETYPEOFFSET 156
59
60 static int
octal(const char * str)61 octal(const char *str)
62 {
63 int ret;
64
65 if (sscanf(str, "%o", (unsigned int *)&ret) != 1)
66 return -1;
67 return ret;
68 }
69
70 /**
71 * Retrieve checksum values from a tar header block.
72 * @param header Header data block, padded with zeroes to reach BLOCKSIZE
73 * @return int value of checksum, -1 (from octal()) if bad value
74 */
75 static int
getchecksum(const char * header)76 getchecksum(const char *header)
77 {
78 char ochecksum[TARCHECKSUMLEN + 1];
79 int checksum = -1;
80
81 strncpy(ochecksum, header + TARCHECKSUMOFFSET, TARCHECKSUMLEN);
82 ochecksum[TARCHECKSUMLEN] = '\0';
83 checksum = octal(ochecksum);
84 return checksum;
85 }
86
87 /**
88 * Calculate checksum values for tar header blocks.
89 * @param header Header data block, padded with zeroes to reach BLOCKSIZE
90 * @param targetsum Check value to match (as int not octal!)
91 * @return 0 if checksum matches target, -1 if not
92 */
93 static int
testchecksum(const char * header,int targetsum)94 testchecksum(const char *header, int targetsum)
95 {
96 const unsigned char *posix;
97 const signed char *legacy;
98 int posix_sum = 0, legacy_sum = 0;
99 int i;
100
101 // targetsum -1 represents an error from octal()
102 if (targetsum == -1) {
103 return -1;
104 }
105
106 /* Build checksums. POSIX is unsigned; some legacy tars use signed. */
107 posix = (unsigned char *)header;
108 legacy = (signed char *)header;
109 for (i = 0; i < BLOCKSIZE; i++) {
110 if ((i >= TARCHECKSUMOFFSET) && (i < TARCHECKSUMOFFSET + TARCHECKSUMLEN)) {
111 /* Use ascii value of space in place of checksum value */
112 posix_sum += 32;
113 legacy_sum += 32;
114 } else {
115 posix_sum += posix[i];
116 legacy_sum += legacy[i];
117 }
118 }
119
120 if ((targetsum == posix_sum) || (targetsum == legacy_sum)) {
121 return 0;
122 }
123 return -1;
124 }
125
cli_untar(const char * dir,unsigned int posix,cli_ctx * ctx)126 cl_error_t cli_untar(const char *dir, unsigned int posix, cli_ctx *ctx)
127 {
128 cl_error_t ret;
129 size_t size = 0;
130 int size_int = 0;
131 int fout = -1;
132 int in_block = 0;
133 int last_header_bad = 0;
134 int limitnear = 0;
135 unsigned int files = 0;
136 char fullname[PATH_MAX + 1];
137 char name[101];
138 size_t pos = 0;
139 size_t currsize = 0;
140 char zero[BLOCKSIZE];
141 unsigned int num_viruses = 0;
142
143 cli_dbgmsg("In untar(%s)\n", dir);
144 memset(zero, 0, sizeof(zero));
145
146 for (;;) {
147 const char *block;
148 size_t nread;
149
150 block = fmap_need_off_once_len(ctx->fmap, pos, BLOCKSIZE, &nread);
151 cli_dbgmsg("cli_untar: pos = %lu\n", (unsigned long)pos);
152
153 if (!in_block && !nread)
154 break;
155
156 if (!nread)
157 block = zero;
158
159 if (!block) {
160 if (fout >= 0)
161 close(fout);
162 cli_errmsg("cli_untar: block read error\n");
163 return CL_EREAD;
164 }
165 pos += nread;
166
167 if (!in_block) {
168 char type;
169 int directory, skipEntry = 0;
170 int checksum = -1;
171 char magic[7], osize[TARSIZELEN + 1];
172 currsize = 0;
173
174 if (fout >= 0) {
175 lseek(fout, 0, SEEK_SET);
176 ret = cli_magic_scan_desc(fout, fullname, ctx, name);
177 close(fout);
178 if (!ctx->engine->keeptmp)
179 if (cli_unlink(fullname)) return CL_EUNLINK;
180 if (ret == CL_VIRUS) {
181 if (!SCAN_ALLMATCHES)
182 return CL_VIRUS;
183 else
184 num_viruses++;
185 }
186 fout = -1;
187 }
188
189 if (block[0] == '\0') /* We're done */
190 break;
191 if ((ret = cli_checklimits("cli_untar", ctx, 0, 0, 0)) != CL_CLEAN)
192 return ret;
193
194 if (nread < TARHEADERSIZE) {
195 return CL_CLEAN;
196 }
197
198 checksum = getchecksum(block);
199 cli_dbgmsg("cli_untar: Candidate checksum = %d, [%o in octal]\n", checksum, checksum);
200 if (testchecksum(block, checksum) != 0) {
201 // If checksum is bad, dump and look for next header block
202 cli_dbgmsg("cli_untar: Invalid checksum in tar header. Skip to next...\n");
203 if (last_header_bad == 0) {
204 last_header_bad++;
205 cli_dbgmsg("cli_untar: Invalid checksum found inside archive!\n");
206 }
207 continue;
208 } else {
209 last_header_bad = 0;
210 cli_dbgmsg("cli_untar: Checksum %d is valid.\n", checksum);
211 }
212
213 if (posix) {
214 strncpy(magic, block + 257, 5);
215 magic[5] = '\0';
216 if (strcmp(magic, "ustar") != 0) {
217 cli_dbgmsg("cli_untar: Incorrect magic string '%s' in tar header\n", magic);
218 return CL_EFORMAT;
219 }
220 }
221
222 type = block[TARFILETYPEOFFSET];
223
224 switch (type) {
225 default:
226 cli_dbgmsg("cli_untar: unknown type flag %c\n", type);
227 /* fall-through */
228 case '0': /* plain file */
229 case '\0': /* plain file */
230 case '7': /* contiguous file */
231 case 'M': /* continuation of a file from another volume; might as well scan it. */
232 files++;
233 directory = 0;
234 break;
235 case '1': /* Link to already archived file */
236 case '5': /* directory */
237 case '2': /* sym link */
238 case '3': /* char device */
239 case '4': /* block device */
240 case '6': /* fifo special */
241 case 'V': /* Volume header */
242 directory = 1;
243 break;
244 case 'K':
245 case 'L':
246 /* GNU extension - ././@LongLink
247 * Discard the blocks with the extended filename,
248 * the last header will contain parts of it anyway
249 */
250 case 'N': /* Old GNU format way of storing long filenames. */
251 case 'A': /* Solaris ACL */
252 case 'E': /* Solaris Extended attribute s*/
253 case 'I': /* Inode only */
254 case 'g': /* Global extended header */
255 case 'x': /* Extended attributes */
256 case 'X': /* Extended attributes (POSIX) */
257 directory = 0;
258 skipEntry = 1;
259 break;
260 }
261
262 if (directory) {
263 in_block = 0;
264 continue;
265 }
266
267 strncpy(osize, block + TARSIZEOFFSET, TARSIZELEN);
268 osize[TARSIZELEN] = '\0';
269 size_int = octal(osize);
270 if (size_int < 0) {
271 cli_dbgmsg("cli_untar: Invalid size in tar header\n");
272 skipEntry++;
273 } else {
274 size = (size_t)size_int;
275 cli_dbgmsg("cli_untar: size = %zu\n", size);
276 ret = cli_checklimits("cli_untar", ctx, size, 0, 0);
277 switch (ret) {
278 case CL_EMAXFILES: // Scan no more files
279 skipEntry++;
280 limitnear = 0;
281 break;
282 case CL_EMAXSIZE: // Either single file limit or total byte limit would be exceeded
283 cli_dbgmsg("cli_untar: would exceed limit, will try up to max");
284 limitnear = 1;
285 break;
286 default: // Ok based on reported content size
287 limitnear = 0;
288 break;
289 }
290 }
291
292 if (skipEntry) {
293 const int nskip = (size % BLOCKSIZE || !size) ? size + BLOCKSIZE - (size % BLOCKSIZE) : size;
294
295 if (nskip < 0) {
296 cli_dbgmsg("cli_untar: got negative skip size, giving up\n");
297 return CL_CLEAN;
298 }
299 cli_dbgmsg("cli_untar: skipping entry\n");
300 pos += nskip;
301 continue;
302 }
303
304 strncpy(name, block, 100);
305 name[100] = '\0';
306 if (cli_matchmeta(ctx, name, size, size, 0, files, 0, NULL) == CL_VIRUS) {
307 if (!SCAN_ALLMATCHES)
308 return CL_VIRUS;
309 else
310 num_viruses++;
311 }
312
313 snprintf(fullname, sizeof(fullname) - 1, "%s" PATHSEP "tar%02u", dir, files);
314 fullname[sizeof(fullname) - 1] = '\0';
315 fout = open(fullname, O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY, 0600);
316
317 if (fout < 0) {
318 char err[128];
319 cli_errmsg("cli_untar: Can't create temporary file %s: %s\n", fullname, cli_strerror(errno, err, sizeof(err)));
320 return CL_ETMPFILE;
321 }
322
323 cli_dbgmsg("cli_untar: extracting to %s\n", fullname);
324
325 in_block = 1;
326 } else { /* write or continue writing file contents */
327 size_t nbytes, nwritten;
328 int skipwrite = 0;
329 char err[128];
330
331 nbytes = (size > 512) ? 512 : size;
332 if (nread && (nread < nbytes))
333 nbytes = nread;
334
335 if (limitnear > 0) {
336 currsize += nbytes;
337 cli_dbgmsg("cli_untar: Approaching limit...\n");
338 if (cli_checklimits("cli_untar", ctx, (unsigned long)currsize, 0, 0) != CL_SUCCESS) {
339 // Limit would be exceeded by this file, suppress writing beyond limit
340 // Need to keep reading to get to end of file chunk
341 skipwrite++;
342 }
343 }
344
345 if (skipwrite == 0) {
346 nwritten = cli_writen(fout, block, nbytes);
347
348 if (nwritten != nbytes) {
349 cli_errmsg("cli_untar: only wrote %zu bytes to file %s (out of disc space?): %s\n",
350 nwritten, fullname, cli_strerror(errno, err, sizeof(err)));
351 close(fout);
352 return CL_EWRITE;
353 }
354 }
355 if (nbytes > size) {
356 cli_warnmsg("cli_untar: More bytes written than requested!\n");
357 size = 0;
358 } else {
359 size -= nbytes;
360 }
361 if ((size != 0) && (nread == 0)) {
362 // Truncated tar file, so end file content like tar behavior
363 cli_dbgmsg("cli_untar: No bytes read! Forcing end of file content.\n");
364 size = 0;
365 }
366 }
367 if (size == 0)
368 in_block = 0;
369 }
370 if (fout >= 0) {
371 lseek(fout, 0, SEEK_SET);
372 ret = cli_magic_scan_desc(fout, fullname, ctx, name);
373 close(fout);
374 if (!ctx->engine->keeptmp)
375 if (cli_unlink(fullname)) return CL_EUNLINK;
376 if (ret == CL_VIRUS)
377 return CL_VIRUS;
378 }
379 if (num_viruses)
380 return CL_VIRUS;
381 return CL_CLEAN;
382 }
383