1 /*  $Id: archive.c 10342 2019-05-30 14:18:48Z iulius $
2 **
3 **  Read batchfiles on standard input and archive them.
4 */
5 
6 #include "config.h"
7 #include "clibrary.h"
8 #include <errno.h>
9 #include <sys/stat.h>
10 #include <time.h>
11 
12 #include "inn/buffer.h"
13 #include "inn/innconf.h"
14 #include "inn/messages.h"
15 #include "inn/qio.h"
16 #include "inn/vector.h"
17 #include "inn/wire.h"
18 #include "inn/libinn.h"
19 #include "inn/paths.h"
20 #include "inn/storage.h"
21 
22 
23 /* Holds various configuration options and command-line parameters. */
24 struct config {
25     const char *root;           /* Root of the archive. */
26     const char *pattern;        /* Wildmat pattern of groups to process. */
27     FILE *index;                /* Where to put the index entries. */
28     bool concat;                /* Concatenate articles together. */
29     bool flat;                  /* Use a flat directory structure. */
30 };
31 
32 
33 /*
34 **  Try to make one directory.  Return false on error.
35 */
36 static bool
MakeDir(char * Name)37 MakeDir(char *Name)
38 {
39     struct stat         Sb;
40 
41     if (mkdir(Name, GROUPDIR_MODE) >= 0)
42         return true;
43 
44     /* See if it failed because it already exists. */
45     return stat(Name, &Sb) >= 0 && S_ISDIR(Sb.st_mode);
46 }
47 
48 
49 /*
50 **  Given an entry, comp/foo/bar/1123, create the directory and all
51 **  parent directories needed.  Return false on error.
52 */
53 static bool
mkpath(const char * file)54 mkpath(const char *file)
55 {
56     char *path, *delim;
57     bool status;
58 
59     path = xstrdup(file);
60     delim = strrchr(path, '/');
61     if (delim == NULL) {
62         free(path);
63         return false;
64     }
65     *delim = '\0';
66 
67     /* Optimize common case -- parent almost always exists. */
68     if (MakeDir(path)) {
69         free(path);
70         return true;
71     }
72 
73     /* Try to make each of comp and comp/foo in turn. */
74     for (delim = path; *delim != '\0'; delim++)
75         if (*delim == '/' && delim != path) {
76             *delim = '\0';
77             if (!MakeDir(path)) {
78                 free(path);
79                 return false;
80             }
81             *delim = '/';
82         }
83     status = MakeDir(path);
84     free(path);
85     return status;
86 }
87 
88 
89 /*
90 **  Write an article from memory into a file on disk.  Takes the handle of the
91 **  article, the file name into which to write it, and a flag saying whether
92 **  to concatenate the message to the end of an existing file if any.
93 */
94 static bool
write_article(ARTHANDLE * article,const char * file,bool concat)95 write_article(ARTHANDLE *article, const char *file, bool concat)
96 {
97     FILE *out;
98     char *text = NULL;
99     size_t length = 0;
100 
101     /* Open the output file. */
102     out = fopen(file, concat ? "a" : "w");
103     if (out == NULL && errno == ENOENT) {
104         if (!mkpath(file)) {
105             syswarn("cannot mkdir for %s", file);
106             return false;
107         }
108         out = fopen(file, concat ? "a" : "w");
109     }
110     if (out == NULL) {
111         syswarn("cannot open %s for writing", file);
112         return false;
113     }
114 
115     /* Get the data in wire format and write it out to the file. */
116     text = wire_to_native(article->data, article->len, &length);
117     if (concat)
118         fprintf(out, "-----------\n");
119     if (fwrite(text, length, 1, out) != 1) {
120         syswarn("cannot write to %s", file);
121         fclose(out);
122         if (!concat)
123             unlink(file);
124         free(text);
125         return false;
126     }
127     free(text);
128 
129     /* Flush and close the output. */
130     if (ferror(out) || fflush(out) == EOF) {
131         syswarn("cannot flush %s", file);
132         fclose(out);
133         if (!concat)
134             unlink(file);
135         return false;
136     }
137     if (fclose(out) == EOF) {
138         syswarn("cannot close %s", file);
139         if (!concat)
140             unlink(file);
141         return false;
142     }
143     return true;
144 }
145 
146 
147 /*
148 **  Link an article.  First try a hard link, then a soft link, and if both
149 **  fail, write the article out again to the new path.
150 */
151 static bool
link_article(const char * oldpath,const char * newpath,ARTHANDLE * art)152 link_article(const char *oldpath, const char *newpath, ARTHANDLE *art)
153 {
154     if (link(oldpath, newpath) < 0) {
155         if (!mkpath(newpath)) {
156             syswarn("cannot mkdir for %s", newpath);
157             return false;
158         }
159         if (link(oldpath, newpath) < 0)
160             if (symlink(oldpath, newpath) < 0)
161                 if (!write_article(art, newpath, false))
162                     return false;
163     }
164     return true;
165 }
166 
167 
168 /*
169 **  Write out a single header to stdout, applying the standard overview
170 **  transformation to it.  This code is partly stolen from overdata.c; it
171 **  would be nice to find a way to only write this in one place.
172 */
173 static void
write_index_header(FILE * index,ARTHANDLE * art,const char * header)174 write_index_header(FILE *index, ARTHANDLE *art, const char *header)
175 {
176     const char *start, *end, *p;
177 
178     start = wire_findheader(art->data, art->len, header, false);
179     if (start == NULL) {
180         fprintf(index, "<none>");
181         return;
182     }
183     end = wire_endheader(start, art->data + art->len - 1);
184     if (end == NULL) {
185         fprintf(index, "<none>");
186         return;
187     }
188     for (p = start; p <= end; p++) {
189         if (*p == '\r' && p < end && p[1] == '\n') {
190             p++;
191             continue;
192         }
193         if (*p == '\0' || *p == '\t' || *p == '\r' || *p == '\n')
194             putc(' ', index);
195         else
196             putc(*p, index);
197     }
198 }
199 
200 
201 /*
202 **  Write an index entry to standard output.  This is the path (without the
203 **  archive root), the message-ID of the article, and the subject.
204 */
205 static void
write_index(FILE * index,ARTHANDLE * art,const char * file)206 write_index(FILE *index, ARTHANDLE *art, const char *file)
207 {
208     fprintf(index, "%s ", file);
209     write_index_header(index, art, "Subject");
210     putc(' ', index);
211     write_index_header(index, art, "Message-ID");
212     fprintf(index, "\n");
213     if (ferror(index) || fflush(index) == EOF)
214         syswarn("cannot write index for %s", file);
215 }
216 
217 
218 /*
219 **  Build the archive path for a particular article.  Takes a pointer to the
220 **  (nul-terminated) group name and a pointer to the article number as a
221 **  string, as well as the config struct.  Also takes a buffer to use to build
222 **  the path, which may be NULL to allocate a new buffer.  Returns the path to
223 **  which to write the article as a buffer (but still nul-terminated).
224 */
225 static struct buffer *
build_path(const char * group,const char * number,struct config * config,struct buffer * path)226 build_path(const char *group, const char *number, struct config *config,
227            struct buffer *path)
228 {
229     char *p;
230 
231     /* Initialize the path buffer to config-root followed by /.  */
232     if (path == NULL)
233         path = buffer_new();
234     buffer_set(path, config->root, strlen(config->root));
235     buffer_append(path, "/", 1);
236 
237     /* Append the group name, replacing dots with slashes unless we're using a
238        flat structure. */
239     p = path->data + path->left;
240     buffer_append(path, group, strlen(group));
241     if (!config->flat)
242         for (; (size_t) (p - path->data) < path->left; p++)
243             if (*p == '.')
244                 *p = '/';
245 
246     /* If we're saving by date, append the date now.  Otherwise, append the
247        group number. */
248     if (config->concat) {
249         struct tm *tm;
250         time_t now;
251         int year, month;
252 
253         now = time(NULL);
254         tm = localtime(&now);
255         year = tm->tm_year + 1900;
256         month = tm->tm_mon + 1;
257         buffer_append_sprintf(path, "/%04d%02d", year, month);
258     } else {
259         buffer_append(path, "/", 1);
260         buffer_append(path, number, strlen(number));
261     }
262     buffer_append(path, "", 1);
263     return path;
264 }
265 
266 
267 /*
268 **  Process a single article, saving it to the appropriate file or files (if
269 **  crossposted).
270 */
271 static void
process_article(ARTHANDLE * art,const char * token,struct config * config)272 process_article(ARTHANDLE *art, const char *token, struct config *config)
273 {
274     char *start, *end, *xref, *delim, *p, *first;
275     const char *group;
276     size_t i;
277     struct cvector *groups;
278     struct buffer *path = NULL;
279 
280     /* Determine the groups from the Xref: header.  In groups will be the split
281        Xref: header; from the second string on should be a group, a colon, and
282        an article number. */
283     start = wire_findheader(art->data, art->len, "Xref", true);
284     if (start == NULL) {
285         warn("cannot find Xref: header in %s", token);
286         return;
287     }
288     end = wire_endheader(start, art->data + art->len - 1);
289     xref = xstrndup(start, end - start);
290     for (p = xref; *p != '\0'; p++)
291         if (*p == '\r' || *p == '\n')
292             *p = ' ';
293     groups = cvector_split_space(xref, NULL);
294     if (groups->count < 2) {
295         warn("bogus Xref: header in %s", token);
296         free(xref);
297         return;
298     }
299 
300     /* Walk through each newsgroup, saving the article in the appropriate
301        location. */
302     first = NULL;
303     for (i = 1; i < groups->count; i++) {
304         group = groups->strings[i];
305         delim = strchr(group, ':');
306         if (delim == NULL) {
307             warn("bogus Xref: entry %s in %s", group, token);
308             continue;
309         }
310         *delim = '\0';
311 
312         /* Skip newsgroups that don't match our pattern, if provided. */
313         if (config->pattern != NULL) {
314             if (uwildmat_poison(group, config->pattern) != UWILDMAT_MATCH)
315                 continue;
316         }
317 
318         /* Get the path to which to write the article. */
319         path = build_path(group, delim + 1, config, path);
320 
321         /* If this isn't the first group, and we're not saving by date, try to
322            just link or symlink between the archive directories rather than
323            writing out multiple copies. */
324         if (first == NULL || config->concat) {
325             if (!write_article(art, path->data, config->concat))
326                 continue;
327             if (groups->count > 2)
328                 first = xstrdup(path->data);
329         } else {
330             if (!link_article(first, path->data, art))
331                 continue;
332         }
333 
334         /* Write out the index if desired. */
335         if (config->index)
336             write_index(config->index, art,
337                         path->data + strlen(config->root) + 1);
338     }
339     free(xref);
340     cvector_free(groups);
341     if (path != NULL)
342         buffer_free(path);
343     if (first != NULL)
344         free(first);
345 }
346 
347 
348 int
main(int argc,char * argv[])349 main(int argc, char *argv[])
350 {
351     struct config config = { NULL, NULL, NULL, 0, 0 };
352     int option, status;
353     bool redirect = true;
354     QIOSTATE *qp;
355     char *line, *file;
356     TOKEN token;
357     ARTHANDLE *art;
358     FILE *spool;
359     char buffer[BUFSIZ];
360 
361     /* First thing, set up our identity. */
362     message_program_name = "archive";
363 
364     /* Set defaults. */
365     if (!innconf_read(NULL))
366         exit(1);
367     config.root = innconf->patharchive;
368     umask(NEWSUMASK);
369 
370     /* Parse options. */
371     while ((option = getopt(argc, argv, "a:cfi:p:r")) != EOF)
372         switch (option) {
373         default:
374             die("usage error");
375             break;
376         case 'a':
377             config.root = optarg;
378             break;
379         case 'c':
380             config.flat = true;
381             config.concat = true;
382             break;
383         case 'f':
384             config.flat = true;
385             break;
386         case 'i':
387             config.index = fopen(optarg, "a");
388             if (config.index == NULL)
389                 sysdie("cannot open index %s for output", optarg);
390             break;
391         case 'p':
392             config.pattern = optarg;
393             break;
394         case 'r':
395             redirect = false;
396             break;
397         }
398 
399     /* Parse arguments, which should just be the batch file. */
400     argc -= optind;
401     argv += optind;
402     if (argc > 1)
403         die("usage error");
404     if (redirect) {
405         file = concatpath(innconf->pathlog, INN_PATH_ERRLOG);
406         freopen(file, "a", stderr);
407     }
408     if (argc == 1)
409         if (freopen(argv[0], "r", stdin) == NULL)
410             sysdie("cannot open %s for input", argv[0]);
411 
412     /* Initialize the storage manager. */
413     if (!SMinit())
414         die("cannot initialize storage manager: %s", SMerrorstr);
415 
416     /* Read input. */
417     qp = QIOfdopen(fileno(stdin));
418     if (qp == NULL)
419         sysdie("cannot reopen input");
420     while ((line = QIOread(qp)) != NULL) {
421         if (*line == '\0' || *line == '#')
422             continue;
423 
424         /* Currently, we only handle tokens.  It would be good to handle
425            regular files as well, if for no other reason than for testing, but
426            we need a good way of faking an ARTHANDLE from a file. */
427         if (IsToken(line)) {
428             token = TextToToken(line);
429             art = SMretrieve(token, RETR_ALL);
430             if (art == NULL) {
431                 warn("cannot retrieve %s", line);
432                 continue;
433             }
434             process_article(art, line, &config);
435             SMfreearticle(art);
436         } else {
437             warn("%s is not a token", line);
438         }
439     }
440 
441     /* Close down the storage manager API. */
442     SMshutdown();
443 
444     /* If we read all our input, try to remove the file, and we're done. */
445     if (!QIOerror(qp)) {
446         fclose(stdin);
447         if (argv[0])
448             unlink(argv[0]);
449         exit(0);
450     }
451 
452     /* Otherwise, make an appropriate spool file. */
453     if (argv[0] == NULL)
454         file = concatpath(innconf->pathoutgoing, "archive");
455     else if (argv[0][0] == '/')
456         file = concat(argv[0], ".bch", (char *) 0);
457     else
458         file = concat(innconf->pathoutgoing, "/", argv[0], ".bch", (char *) 0);
459     spool = fopen(file, "a");
460     if (spool == NULL)
461         sysdie("cannot spool to %s", file);
462 
463     /* Write the rest of stdin to the spool file. */
464     status = 0;
465     if (fprintf(spool, "%s\n", line != NULL ? line : "") == EOF) {
466         syswarn("cannot start spool");
467         status = 1;
468     }
469     while (fgets(buffer, sizeof(buffer), stdin) != NULL)
470         if (fputs(buffer, spool) == EOF) {
471             syswarn("cannot write to spool");
472             status = 1;
473             break;
474         }
475     if (fclose(spool) == EOF) {
476         syswarn("cannot close spool");
477         status = 1;
478     }
479 
480     /* If we had a named input file, try to rename the spool. */
481     if (argv[0] != NULL && rename(file, argv[0]) < 0) {
482         syswarn("cannot rename spool");
483         status = 1;
484     }
485 
486     exit(status);
487 }
488