1 #include "cado.h" // IWYU pragma: keep
2 // IWYU pragma: no_include <bits/types/struct_rusage.h>
3 #include <cstdlib>
4 #include <climits>
5 #include <cstdio> // FILE // IWYU pragma: keep
6 #include <cstring>
7 #include <sys/types.h> // pid_t
8 #include <sys/wait.h> // WIFEXITED WEXITSTATUS (on freebsd at least)
9 #include <unistd.h> // close getpid
10 #include <sys/stat.h> // stat // IWYU pragma: keep
11 #ifdef HAVE_GETRUSAGE
12 #include <sys/time.h> // IWYU pragma: keep
13 #include <sys/resource.h> // IWYU pragma: keep
14 #endif
15 #include <cerrno>
16
17 #include "fmt/format.h"
18
19
20 #include "macros.h"
21 #include "gzip.h"
22 #include "misc.h"
23 #include "cado_popen.h"
24 #include "cado_pipe_streambuf.hpp"
25
26 struct suffix_handler {
27 const char * suffix;
28 const char * pfmt_in;
29 const char * pfmt_out;
30 };
31
32 static char antebuffer[PATH_MAX]; /* "directory/antebuffer" or "cat" */
33 static int antebuffer_buffer_size = 24; /* default value 2^24 = 16 Mo */
34
35 #if 0
36 const char * suffix = NULL;
37
38 const char * copy_suffix_noalloc(const char * name)
39 {
40 const char * p = strrchr(name, '.');
41 if (p == NULL)
42 p = name + strlen(name);
43 return strdup(p);
44 }
45
46 const char * copy_suffix_alloc(const char * name)
47 {
48 return strdup(copy_suffix_noalloc(name));
49 }
50 const char * path_remove_suffix(char * name)
51 {
52 char * p = strrchr(name, '.');
53 if (p) *p=0;
54 return name;
55 }
56
57 #endif
58
59 struct suffix_handler supported_compression_formats[] = {
60 { ".gz", "gzip -dc %s", "gzip -c --fast > %s", },
61 { ".bz2", "bzip2 -dc %s", "bzip2 -c --fast > %s", },
62 /* zstd seems to be uniformly better than any other alternative */
63 { ".zstd", "zstdcat %s", "zstd --fast > %s", },
64 /* xz is really slow */
65 { ".xz", "xzcat %s", "xz --fast > %s", },
66 { ".lzma", "lzma -dc %s", "lzma -c -0 > %s", },
67 /* These two have to be present */
68 { "", NULL, NULL },
69 { NULL, NULL, NULL },
70 };
71
path_basename(const char * path)72 const char * path_basename(const char * path)
73 {
74 const char *p = strrchr(path, '/');
75 if (p == NULL) {
76 p = path;
77 } else {
78 p = p + 1;
79 }
80 return p;
81 }
82
is_supported_compression_format(const char * s)83 int is_supported_compression_format(const char * s)
84 {
85 struct suffix_handler * r = supported_compression_formats;
86 for( ; r->suffix ; r++) {
87 if (strcmp(r->suffix, s) == 0)
88 return 1;
89 }
90 return 0;
91 }
92
filename_matches_one_compression_format(const char * path)93 int filename_matches_one_compression_format(const char * path)
94 {
95 const struct suffix_handler * r = supported_compression_formats;
96
97 for( ; r->suffix ; r++) {
98 if (!*r->suffix) continue;
99 if (has_suffix(path, r->suffix)) return 1;
100 }
101 return 0;
102 }
103
get_suffix_from_filename(char * s,char const ** sfx)104 void get_suffix_from_filename (char *s, char const **sfx)
105 {
106 const struct suffix_handler * r = supported_compression_formats;
107 for( ; r->suffix ; r++)
108 {
109 if (has_suffix(s, r->suffix))
110 {
111 *sfx = r->suffix;
112 return;
113 }
114 }
115
116 /* If we arrive here, it's because "" is not among the suffixes */
117 abort();
118 return;
119 }
120
try_antebuffer_path()121 static int try_antebuffer_path()
122 {
123 int rc = access(antebuffer, X_OK);
124 if (rc >= 0) {
125 fprintf(stderr, "antebuffer set to %s\n", antebuffer);
126 return 1;
127 }
128 fprintf(stderr, "access to %s: %s\n", antebuffer, strerror(errno));
129 *antebuffer = 0;
130 return 0;
131 }
132
set_antebuffer_path(const char * executable_filename,const char * path_antebuffer)133 int set_antebuffer_path (const char *executable_filename, const char *path_antebuffer)
134 {
135 *antebuffer = 0;
136 antebuffer[PATH_MAX-1]='\0';
137 /* First, if we have path_antebuffer, we must have antebuffer or error */
138 if (path_antebuffer) {
139 struct stat sbuf[1];
140 int rc = stat(path_antebuffer, sbuf);
141 if (rc < 0) {
142 fprintf(stderr, "%s: path_antebuffer=\"%s\" access error: %s\n",
143 __func__, path_antebuffer, strerror(errno));
144 } else {
145 /* Older versions had path_antebuffer be a directory. We still
146 * support this, but only as a compatibility measure. */
147 if (S_ISDIR(sbuf->st_mode)) {
148 #ifdef EXECUTABLE_SUFFIX
149 snprintf(antebuffer, PATH_MAX-1, "%s/antebuffer" EXECUTABLE_SUFFIX, path_antebuffer);
150 #else
151 snprintf(antebuffer, PATH_MAX-1, "%s/antebuffer", path_antebuffer);
152 #endif
153 } else {
154 strncpy(antebuffer, path_antebuffer, PATH_MAX-1);
155 }
156 if (try_antebuffer_path()) return 1;
157 }
158 }
159 /* Second option: if we failed for any reason, and if $0 was given to
160 * us, use that as a potential fallback */
161 if (executable_filename) {
162 char dummy[PATH_MAX];
163 char dummy2[PATH_MAX + 64];
164 const char * slash = strrchr(executable_filename, '/');
165 if (slash) {
166 int len = MIN(PATH_MAX - 1, slash - executable_filename);
167 strncpy(dummy, executable_filename, len);
168 dummy[len]='\0';
169 } else {
170 dummy[0]='.';
171 dummy[1]='\0';
172 }
173 #ifdef EXECUTABLE_SUFFIX
174 snprintf(dummy2, sizeof(dummy2), "%s/../utils/antebuffer" EXECUTABLE_SUFFIX, dummy);
175 #else
176 snprintf(dummy2, sizeof(dummy2), "%s/../utils/antebuffer", dummy);
177 #endif
178 if (realpath(dummy2, antebuffer) && try_antebuffer_path())
179 return 1;
180 }
181 /* Third option: walk $PATH */
182 if ((path_resolve("antebuffer", antebuffer)) != NULL && try_antebuffer_path()) {
183 return 1;
184 }
185 *antebuffer = 0;
186 fprintf(stderr, "No antebuffer configured\n");
187 *antebuffer='\0';
188 return 0;
189 }
190
191 /* Return a list of unix commands to _read_ a set of files. Consecutive
192 * files sharing the same decompression mechanism are grouped into a
193 * single command line.
194 *
195 * antebuffer file1.gz file2.gz file3.gz | gzip -dc
196 * antebuffer file4.bz2 | gzip -dc
197 * antebuffer file5.gz | gzip -dc
198 * antebuffer file6.gz | cat // useless use of cat, should be fixed.
199 *
200 * Note that antebuffer may also not be defined. In that case, the
201 * simpler command formats like "gzip -dc file1.gz file2.gz file3.gz" are
202 * used.
203 *
204 * All strings returned are meant to be passed to popen(). The return
205 * value is a malloc()-ed array of malloc()-ed strings, and the caller is
206 * in charge of freeing it (with filelist_clear, for instance).
207 */
prepare_grouped_command_lines(char ** list_of_files)208 char **prepare_grouped_command_lines(char **list_of_files)
209 {
210 const struct suffix_handler *r = supported_compression_formats;
211 char ** new_commands = NULL;
212 size_t n_new_commands = 0;
213
214 /* Allow a few bytes extra for popen's "/bin/sh" "-c" prefix */
215 ASSERT_ALWAYS(get_arg_max() >= 20);
216 size_t arg_max = get_arg_max() - 20;
217
218 for(char ** grouphead = list_of_files ; *grouphead ; ) {
219 char *cmd_prefix = NULL, *cmd_postfix = NULL;
220 size_t prefix_len, postfix_len;
221 const struct suffix_handler * this_suffix = r;
222 for (; this_suffix && this_suffix->suffix; this_suffix++)
223 if (has_suffix(*grouphead, this_suffix->suffix))
224 break;
225 ASSERT_ALWAYS(this_suffix);
226 size_t filenames_total_size = 0;
227 char ** grouptail;
228
229 if (*antebuffer) {
230 if (this_suffix->pfmt_in) {
231 /* antebuffer 24 file1.gz file2.gz file3.gz | gzip -dc - */
232 int rc = asprintf(&cmd_prefix, "%s %d ", antebuffer, antebuffer_buffer_size);
233 ASSERT_ALWAYS(rc >= 0);
234 char *tmp;
235 rc = asprintf(&tmp, this_suffix->pfmt_in, "-");
236 ASSERT_ALWAYS(rc >= 0);
237 rc = asprintf(&cmd_postfix, " | %s", tmp);
238 ASSERT_ALWAYS(rc >= 0);
239 free(tmp);
240 } else {
241 /* antebuffer 24 file1.txt file2.txt file3.txt */
242 /* avoid piping through cat */
243 int rc = asprintf(&cmd_prefix, "%s %d ", antebuffer, antebuffer_buffer_size);
244 ASSERT_ALWAYS(rc >= 0);
245 }
246 } else {
247 if (this_suffix->pfmt_in) {
248 /* gzip -dc file1.gz file2.gz file3.gz */
249 int rc = asprintf(&cmd_prefix, this_suffix->pfmt_in, "");
250 ASSERT_ALWAYS(rc >= 0);
251 } else {
252 /* cat file1.txt file2.txt file3.txt */
253 /* There's potential for this to qualify as a useless use
254 * of cat, but anyway we don't expect to meet this case
255 * often.
256 */
257 int rc = asprintf(&cmd_prefix, "cat ");
258 ASSERT_ALWAYS(rc >= 0);
259 }
260 }
261 prefix_len = cmd_prefix ? strlen(cmd_prefix) : 0;
262 postfix_len = cmd_postfix ? strlen(cmd_postfix) : 0;
263
264 for(grouptail = grouphead ; *grouptail ; grouptail++) {
265 const struct suffix_handler * other_suffix = r;
266 for (; other_suffix && other_suffix->suffix; other_suffix++)
267 if (has_suffix(*grouptail, other_suffix->suffix))
268 break;
269 if (other_suffix != this_suffix)
270 break;
271 /* Add 1 for a space */
272 size_t ds = strlen(*grouptail) + 1;
273 if (filenames_total_size + prefix_len + postfix_len + ds > arg_max)
274 break;
275 filenames_total_size += ds;
276 }
277 /* Now all file names referenced by pointers in the interval
278 * [grouphead..grouptail[ have the same suffix. Create a new
279 * command for unpacking them.
280 */
281 new_commands = (char**) realloc(new_commands, ++n_new_commands * sizeof(char*));
282
283 /* intermediary string for the list of file names */
284 char * tmp = (char*) malloc(filenames_total_size + 1);
285 size_t k = 0;
286 for(char ** g = grouphead ; g != grouptail ; g++) {
287 k += snprintf(tmp + k, filenames_total_size + 1 - k, "%s ", *g);
288 }
289 tmp[k-1]='\0'; /* turn final space to a null byte */
290 filenames_total_size--; /* and adjust filenames_total_size for deleted space */
291
292 char * cmd;
293 int rc;
294
295 rc = asprintf(&cmd, "%s%s%s",
296 cmd_prefix ? cmd_prefix : "",
297 tmp,
298 cmd_postfix ? cmd_postfix : "");
299 ASSERT_ALWAYS(rc >= 0);
300 ASSERT_ALWAYS(strlen(cmd) <= arg_max);
301 ASSERT_ALWAYS(strlen(cmd) == filenames_total_size + prefix_len + postfix_len);
302 new_commands[n_new_commands-1] = cmd;
303 free(tmp);
304 if (cmd_prefix) free(cmd_prefix);
305 if (cmd_postfix) free(cmd_postfix);
306 grouphead = grouptail;
307 }
308 new_commands = (char**) realloc(new_commands, ++n_new_commands * sizeof(char*));
309 new_commands[n_new_commands-1] = NULL;
310 return new_commands;
311 }
312
313 FILE*
fopen_maybe_compressed2(const char * name,const char * mode,int * p_pipeflag,char const ** suf)314 fopen_maybe_compressed2 (const char * name, const char * mode, int* p_pipeflag, char const ** suf)
315 {
316 const struct suffix_handler * r = supported_compression_formats;
317 FILE * f;
318
319 // coverity[fs_check_call]
320 if (strchr(mode, 'r') && access(name, R_OK) != 0)
321 return NULL;
322
323 for( ; r->suffix ; r++) {
324 if (!has_suffix(name, r->suffix)) continue;
325 if (suf) *suf = r->suffix;
326 char * command = NULL;
327 char * tempname = NULL;
328 int ret;
329
330 /* Just *any* file that we write to will get a .tmp.$PID suffix
331 */
332 if (strchr(mode, 'w')) {
333 ret = asprintf(&tempname, "%s.tmp.%d", name, getpid());
334 ASSERT_ALWAYS(ret >= 0);
335 name = tempname;
336 }
337
338 if (strchr(mode, 'r') && r->pfmt_in) {
339 int ret = asprintf(&command, r->pfmt_in, name);
340 ASSERT_ALWAYS(ret >= 0);
341 } else if (strchr(mode, 'w') && r->pfmt_out) {
342 ret = asprintf(&command, r->pfmt_out, name);
343 ASSERT_ALWAYS(ret >= 0);
344 }
345
346 if (command) {
347 /* apparently popen() under Linux does not accept the 'b' modifier */
348 char pmode[2] = "x";
349 pmode[0] = mode[0];
350 f = cado_popen(command, pmode);
351 if (p_pipeflag) *p_pipeflag = 1;
352 #ifdef F_SETPIPE_SZxxx
353 /* The pipe capacity is 2^16 by default; we can increase it,
354 * but it does not seem to make a difference, thus we don't
355 * change it by default (patch from Alain Filbois). */
356 fcntl (fileno (f), F_SETPIPE_SZ, 1UL << 20);
357 #endif
358 free(command);
359 } else {
360 f = fopen(name, mode);
361 if (p_pipeflag) *p_pipeflag = 0;
362 }
363 if (tempname)
364 free(tempname);
365 return f;
366 }
367 /* If we arrive here, it's because "" is not among the suffixes */
368 abort();
369 return NULL;
370 }
371
372
373 FILE*
fopen_maybe_compressed(const char * name,const char * mode)374 fopen_maybe_compressed (const char * name, const char * mode)
375 {
376 return fopen_maybe_compressed2(name, mode, NULL, NULL);
377 }
378
379 #ifdef HAVE_GETRUSAGE
380 int
fclose_maybe_compressed2(FILE * f,const char * name,struct rusage * rr)381 fclose_maybe_compressed2 (FILE * f, const char * name, struct rusage * rr)
382 #else
383 /* if we don't even have getrusage, then no fclose_maybe_compressed2 is
384 * exposed. Yet, we use one as a code shortcut
385 */
386 static int
387 fclose_maybe_compressed2 (FILE * f, const char * name, void * rr MAYBE_UNUSED)
388 #endif
389 {
390 const struct suffix_handler * r = supported_compression_formats;
391
392 for( ; r->suffix ; r++) {
393 if (!has_suffix(name, r->suffix)) continue;
394 /* It doesn't really make sense to imagine that one of these two
395 * may exist and not the other */
396 ASSERT_ALWAYS((r->pfmt_out == NULL) == (r->pfmt_in == NULL));
397
398 char * tempname;
399 int ret = asprintf(&tempname, "%s.tmp.%d", name, getpid());
400 struct stat sbuf[1];
401 ASSERT_ALWAYS(ret >= 0);
402
403 if (r->pfmt_in || r->pfmt_out) {
404 #ifdef HAVE_GETRUSAGE
405 if (rr)
406 ret = cado_pclose2(f, rr);
407 else
408 #endif
409 ret = cado_pclose(f);
410
411 #if defined(WIFEXITED) && defined(WEXITSTATUS)
412 /* Unless child process finished normally and with exit ret 0,
413 we return an error */
414 if (ret == -1 || !WIFEXITED(ret) || WEXITSTATUS(ret) != 0)
415 return EOF;
416 #else
417 /* What do under MinGW? -1 definitely means an error, but how do
418 we parse the other possible ret codes? */
419 if (ret == -1)
420 return EOF;
421 #endif
422
423 } else {
424 #ifdef HAVE_GETRUSAGE
425 if (rr) memset(rr, 0, sizeof(*rr));
426 #endif
427 ret = fclose(f);
428 if (ret != 0)
429 return ret;
430 }
431
432 /* do the rename only if the child completed successfully */
433
434 // coverity[fs_check_call]
435 if (stat(tempname, sbuf) == 0) {
436 ret = rename(tempname, name);
437 if (ret != 0) return EOF;
438 }
439
440 return 0;
441 }
442 /* If we arrive here, it's because "" is not among the suffixes */
443 abort();
444 return EOF;
445 }
446
447 int
fclose_maybe_compressed(FILE * f,const char * name)448 fclose_maybe_compressed (FILE * f, const char * name)
449 {
450 return fclose_maybe_compressed2(f, name, NULL);
451 }
452
453 #include <stdexcept>
454 #include <ios> // std::ios_base::openmode // IWYU pragma: keep
455 #include <fstream> // filebuf
456 #include "portability.h" // strdup // IWYU pragma: keep
457
streambase_maybe_compressed(const char * name,std::ios_base::openmode mode)458 streambase_maybe_compressed::streambase_maybe_compressed(const char * name, std::ios_base::openmode mode)
459 {
460 open(name, mode);
461 init(buf);
462 }
463
open(const char * name,std::ios_base::openmode mode)464 void streambase_maybe_compressed::open(const char * name, std::ios_base::openmode mode)
465 {
466 orig_name = name;
467 const struct suffix_handler * r = supported_compression_formats;
468 if (mode & std::ios_base::out && r->pfmt_out) {
469 // fmtlib's fmt::format oddly mentions that it can throw a format
470 // error, while its constexpr nature should be able to mark it as
471 // impossible.
472 // coverity[exception_thrown]
473 tempname = fmt::format(FMT_STRING("{}.tmp.{}"), name, getpid());
474 name = tempname.c_str();
475 }
476
477 if (mode & std::ios_base::in && access(name, R_OK) != 0)
478 throw std::runtime_error("cannot open file for reading");
479 /* creating is ok, of course
480 if (mode & std::ios_base::out && access(name, W_OK) != 0)
481 throw std::runtime_error("cannot open file for writing");
482 */
483 for( ; r->suffix ; r++) {
484 if (!has_suffix(orig_name.c_str(), r->suffix)) continue;
485 char * command = NULL;
486 if (mode & std::ios_base::in && r->pfmt_in) {
487 int ret = asprintf(&command, r->pfmt_in, name);
488 ASSERT_ALWAYS(ret >= 0);
489 }
490 if (mode & std::ios_base::out && r->pfmt_out) {
491 int ret = asprintf(&command, r->pfmt_out, name);
492 ASSERT_ALWAYS(ret >= 0);
493 }
494
495 if (command) {
496 /* apparently popen() under Linux does not accept the 'b' modifier */
497 pbuf.reset(new cado_pipe_streambuf(command, mode));
498 buf = pbuf.get();
499 pipe = true;
500 free(command);
501 } else {
502 fbuf.reset(new std::filebuf());
503 fbuf->open(name, mode);
504 buf = fbuf.get();
505 pipe = false;
506 }
507 /* hmmm */
508 return;
509 }
510 };
511
close()512 void streambase_maybe_compressed::close()
513 {
514 if (pipe) pbuf->close();
515 else fbuf->close();
516 if (!tempname.empty()) {
517 int rc = rename(tempname.c_str(), orig_name.c_str());
518 ASSERT_ALWAYS(rc == 0);
519 tempname.clear();
520 }
521 }
522
523 // we're in a dtor, exceptions can turn your computer into a coconut.
524 // yet we have an ASSERT_ALWAYS in close()
525 // coverity[exn_spec_violation]
~streambase_maybe_compressed()526 streambase_maybe_compressed::~streambase_maybe_compressed()
527 {
528 sync();
529 close();
530 }
531