1 /*  $Id: tdx-util.c 9019 2010-03-19 21:27:15Z iulius $
2 **
3 **  Utility for managing a tradindexed overview spool.
4 **
5 **  This utility can manipulate a tradindexed overview spool in various ways,
6 **  including some ways that are useful for recovery from crashes.  It allows
7 **  the user to view the contents of the various data structures that
8 **  tradindexed stores on disk.
9 */
10 
11 #include "config.h"
12 #include "clibrary.h"
13 #include <ctype.h>
14 #include <dirent.h>
15 #include <sys/stat.h>
16 
17 #include "inn/buffer.h"
18 #include "inn/libinn.h"
19 #include "inn/history.h"
20 #include "inn/innconf.h"
21 #include "inn/messages.h"
22 #include "inn/newsuser.h"
23 #include "inn/ov.h"
24 #include "inn/paths.h"
25 #include "inn/vector.h"
26 #include "inn/wire.h"
27 #include "ovinterface.h"
28 #include "tdx-private.h"
29 #include "tdx-structure.h"
30 
31 /*
32 **  Dump the main index data, either all of it or that for a particular group
33 **  if the group argument is non-NULL.
34 */
35 static void
dump_index(const char * group)36 dump_index(const char *group)
37 {
38     struct group_index *index;
39 
40     index = tdx_index_open(OV_READ);
41     if (index == NULL)
42         return;
43     if (group == NULL)
44         tdx_index_dump(index, stdout);
45     else {
46         const struct group_entry *entry;
47 
48         entry = tdx_index_entry(index, group);
49         if (entry == NULL) {
50             warn("cannot find group %s", group);
51             return;
52         }
53         tdx_index_print(group, entry, stdout);
54     }
55     tdx_index_close(index);
56 }
57 
58 
59 /*
60 **  Dump the data index file for a particular group.
61 */
62 static void
dump_group_index(const char * group)63 dump_group_index(const char *group)
64 {
65     struct group_index *index;
66     struct group_entry *entry;
67     struct group_data *data;
68 
69     index = tdx_index_open(OV_READ);
70     if (index == NULL)
71         return;
72     entry = tdx_index_entry(index, group);
73     if (entry == NULL) {
74         warn("cannot find group %s in the index", group);
75         return;
76     }
77     data = tdx_data_open(index, group, entry);
78     if (data == NULL) {
79         warn("cannot open group %s", group);
80         return;
81     }
82     tdx_data_index_dump(data, stdout);
83     tdx_data_close(data);
84     tdx_index_close(index);
85 }
86 
87 
88 /*
89 **  Dump the overview data for a particular group.  If number is 0, dump the
90 **  overview data for all current articles; otherwise, only dump the data for
91 **  that particular article.  Include the article number, token, arrived time,
92 **  and expires time (if any) in the overview data as additional fields
93 **  normally.  If overchan is true, instead write out the data in the format
94 **  that overchan would expect as input.
95 */
96 static void
dump_overview(const char * group,ARTNUM low,ARTNUM high,bool overchan)97 dump_overview(const char *group, ARTNUM low, ARTNUM high, bool overchan)
98 {
99     struct group_index *index;
100     struct group_data *data;
101     struct group_entry *entry;
102     struct article article;
103     struct search *search;
104     char *p;
105     char datestring[256];
106 
107     index = tdx_index_open(OV_READ);
108     if (index == NULL)
109         return;
110     entry = tdx_index_entry(index, group);
111     if (entry == NULL) {
112         warn("cannot find group %s", group);
113         return;
114     }
115     data = tdx_data_open(index, group, entry);
116     if (data == NULL) {
117         warn("cannot open group %s", group);
118         return;
119     }
120     data->refcount++;
121 
122     if (low == 0)
123         low = entry->low;
124     if (high == 0)
125         high = entry->high;
126     search = tdx_search_open(data, low, high, entry->high);
127 
128     if (search == NULL) {
129         if (low == high)
130             puts("Article not found");
131         else
132             warn("cannot open search in %s: %lu - %lu", group, low, high);
133         return;
134     }
135     while (tdx_search(search, &article)) {
136         if (overchan) {
137             p = memchr(article.overview, '\t', article.overlen - 3);
138             if (p == NULL)
139                 continue;
140             p++;
141             printf("%s %lu %lu ", TokenToText(article.token),
142                    (unsigned long) article.arrived,
143                    (unsigned long) article.expires);
144             fwrite(p, article.overlen - 2 - (p - article.overview), 1, stdout);
145         } else {
146             fwrite(article.overview, article.overlen - 2, 1, stdout);
147             printf("\tArticle: %lu\tToken: %s", article.number,
148                    TokenToText(article.token));
149             makedate(article.arrived, true, datestring, sizeof(datestring));
150             printf("\tArrived: %s", datestring);
151             if (article.expires != 0) {
152                 makedate(article.expires, true, datestring,
153                          sizeof(datestring));
154                 printf("\tExpires: %s", datestring);
155             }
156         }
157         printf("\n");
158     }
159     tdx_search_close(search);
160     tdx_data_close(data);
161     tdx_index_close(index);
162 }
163 
164 
165 /*
166 **  Check a string to see if its a valid number.
167 */
168 static bool
check_number(const char * string)169 check_number(const char *string)
170 {
171     const char *p;
172 
173     for (p = string; *p != '\0'; p++)
174         if (!isdigit((unsigned char) *p))
175             return false;
176     return true;
177 }
178 
179 
180 /*
181 **  Find the message ID in the group overview data and return a copy of it.
182 **  Caller is responsible for freeing.
183 */
184 static char *
extract_messageid(const char * overview)185 extract_messageid(const char *overview)
186 {
187     const char *p, *end;
188     int count;
189 
190     for (p = overview, count = 0; count < 4; count++) {
191         p = strchr(p + 1, '\t');
192         if (p == NULL)
193             return NULL;
194     }
195     p++;
196     end = strchr(p, '\t');
197     if (end == NULL)
198         return NULL;
199     return xstrndup(p, end - p);
200 }
201 
202 
203 /*
204 **  Compare two file names assuming they're numbers, used to sort the list of
205 **  articles numerically.  Suitable for use as a comparison function for
206 **  qsort.
207 */
208 static int
file_compare(const void * p1,const void * p2)209 file_compare(const void *p1, const void *p2)
210 {
211     const char *file1 = *((const char * const *) p1);
212     const char *file2 = *((const char * const *) p2);
213     ARTNUM n1, n2;
214 
215     n1 = strtoul(file1, NULL, 10);
216     n2 = strtoul(file2, NULL, 10);
217     if (n1 > n2)
218         return 1;
219     else if (n1 < n2)
220         return -1;
221     else
222         return 0;
223 }
224 
225 
226 /*
227 **  Get a list of articles in a directory, sorted by article number.
228 */
229 static struct vector *
article_list(const char * directory)230 article_list(const char *directory)
231 {
232     DIR *articles;
233     struct dirent *file;
234     struct vector *list;
235 
236     list = vector_new();
237     articles = opendir(directory);
238     if (articles == NULL)
239         sysdie("cannot open directory %s", directory);
240     while ((file = readdir(articles)) != NULL) {
241         if (!check_number(file->d_name))
242             continue;
243         vector_add(list, file->d_name);
244     }
245     closedir(articles);
246 
247     qsort(list->strings, list->count, sizeof(list->strings[0]), file_compare);
248     return list;
249 }
250 
251 
252 /*
253 **  Create a new newsgroup in the overview database.  Takes the group flag and
254 **  the high and low water marks.
255 */
256 static void
group_create(const char * group,ARTNUM low,ARTNUM high,char flag)257 group_create(const char *group, ARTNUM low, ARTNUM high, char flag)
258 {
259     struct group_index *index;
260 
261     index = tdx_index_open(true);
262     if (index == NULL)
263         die("cannot open group index");
264     if (low == 0)
265         low = 1;
266     if (!tdx_index_add(index, group, low, high, &flag))
267         die("cannot create group %s", group);
268     tdx_index_close(index);
269 }
270 
271 
272 /*
273 **  Rebuild the overview data for a particular group.  Takes a path to a
274 **  directory containing all the articles, as individual files, that should be
275 **  in that group.  The names of the files should be the article numbers in
276 **  the group.
277 */
278 static void
group_rebuild(const char * group,const char * path)279 group_rebuild(const char *group, const char *path)
280 {
281     char *filename, *histpath, *article, *wireformat, *p;
282     size_t size, length, file;
283     int flags;
284     struct buffer *overview = NULL;
285     struct vector *extra, *files;
286     struct history *history;
287     struct group_index *index;
288     struct group_data *data;
289     struct group_entry *entry, info;
290     struct article artdata;
291     struct stat st;
292 
293     index = tdx_index_open(OV_READ);
294     if (index == NULL)
295         die("cannot open group index");
296     entry = tdx_index_entry(index, group);
297     if (entry == NULL) {
298         if (!tdx_index_add(index, group, 1, 0, NF_FLAG_OK_STRING))
299             die("cannot create group %s", group);
300         entry = tdx_index_entry(index, group);
301         if (entry == NULL)
302             die("cannot find group %s", group);
303     }
304     info = *entry;
305     data = tdx_data_rebuild_start(group);
306     if (data == NULL)
307         die("cannot start data rebuild for %s", group);
308     if (!tdx_index_rebuild_start(index, entry))
309         die("cannot start index rebuild for %s", group);
310 
311     histpath = concatpath(innconf->pathdb, INN_PATH_HISTORY);
312     flags = HIS_RDONLY | HIS_ONDISK;
313     history = HISopen(histpath, innconf->hismethod, flags);
314     if (history == NULL)
315         sysdie("cannot open history %s", histpath);
316     free(histpath);
317 
318     extra = overview_extra_fields(true);
319     files = article_list(path);
320 
321     info.count = 0;
322     info.high = 0;
323     info.low = 0;
324     for (file = 0; file < files->count; file++) {
325         filename = concatpath(path, files->strings[file]);
326         article = ReadInFile(filename, &st);
327         size = st.st_size;
328         if (article == NULL) {
329             syswarn("cannot read in %s", filename);
330             free(filename);
331             continue;
332         }
333 
334         /* Check to see if the article is not in wire format.  If it isn't,
335            convert it.  We only check the first line ending. */
336         p = strchr(article, '\n');
337         if (p != NULL && (p == article || p[-1] != '\r')) {
338             wireformat = wire_from_native(article, size, &length);
339             free(article);
340             article = wireformat;
341             size = length;
342         }
343 
344         artdata.number = strtoul(files->strings[file], NULL, 10);
345         if (artdata.number > info.high)
346             info.high = artdata.number;
347         if (artdata.number < info.low || info.low == 0)
348             info.low = artdata.number;
349         info.count++;
350         overview = overview_build(artdata.number, article, size, extra,
351                                   overview);
352         artdata.overview = overview->data;
353         artdata.overlen = overview->left;
354         p = extract_messageid(overview->data);
355         if (p == NULL) {
356             warn("cannot find message ID in %s", filename);
357             free(filename);
358             free(article);
359             continue;
360         }
361         if (HISlookup(history, p, &artdata.arrived, NULL, &artdata.expires,
362                       &artdata.token)) {
363             if (!tdx_data_store(data, &artdata))
364                 warn("cannot store data for %s", filename);
365         } else {
366             warn("cannot find article %s in history", p);
367         }
368         free(p);
369         free(filename);
370         free(article);
371     }
372     vector_free(files);
373     vector_free(extra);
374 
375     info.indexinode = data->indexinode;
376     info.base = data->base;
377     if (!tdx_index_rebuild_finish(index, entry, &info))
378         die("cannot update group index for %s", group);
379     if (!tdx_data_rebuild_finish(group))
380         die("cannot finish rebuilding data for group %s", group);
381     tdx_data_close(data);
382     HISclose(history);
383 }
384 
385 
386 /*
387 **  Parse an article number or range, returning the low and high values in the
388 **  provided arguments.  Returns true if the number or range was parsed
389 **  successfully, false otherwise.  Allows such constructs as "-", "20-", or
390 **  "-50" and leaves the unspecified ends of the range set to zero.
391 **
392 **  This code is similar to code in nnrpd and possibly should move into libinn
393 **  as common code.
394 */
395 static bool
parse_range(char * range,ARTNUM * low,ARTNUM * high)396 parse_range(char *range, ARTNUM *low, ARTNUM *high)
397 {
398     char *hyphen, *end;
399 
400     *low = 0;
401     *high = 0;
402     hyphen = strchr(range, '-');
403     if (hyphen == NULL) {
404         *low = strtoul(range, &end, 10);
405         if (*low == 0 || *end != '\0')
406             return false;
407         *high = *low;
408         return true;
409     } else {
410         *hyphen = '\0';
411         if (*range != '\0') {
412             *low = strtoul(range, &end, 10);
413             if (*low == 0 || *end != '\0')
414                 return false;
415         }
416         if (hyphen[1] != '\0') {
417             *high = strtoul(hyphen + 1, &end, 10);
418             if (*high == 0 || *end != '\0')
419                 return false;
420         }
421         *hyphen = '-';
422         return true;
423     }
424 }
425 
426 
427 /*
428 **  Main routine.  Load inn.conf, parse the arguments, and dispatch to the
429 **  appropriate function.
430 */
431 int
main(int argc,char * argv[])432 main(int argc, char *argv[])
433 {
434     int option;
435     char flag = 'y';
436     char mode = '\0';
437     const char *newsgroup = NULL;
438     const char *path = NULL;
439     ARTNUM artlow = 0;
440     ARTNUM arthigh = 0;
441 
442     message_program_name = "tdx-util";
443 
444     if (!innconf_read(NULL))
445         exit(1);
446 
447     /* Parse options. */
448     opterr = 0;
449     while ((option = getopt(argc, argv, "a:f:n:p:AFR:cgiOo")) != EOF) {
450         switch (option) {
451         case 'a':
452             if (!parse_range(optarg, &artlow, &arthigh))
453                 die("invalid article number or range %s", optarg);
454             break;
455         case 'f':
456             flag = optarg[0];
457             break;
458         case 'n':
459             newsgroup = optarg;
460             break;
461         case 'p':
462             innconf->pathoverview = xstrdup(optarg);
463             break;
464         case 'A':
465             if (mode != '\0')
466                 die("only one mode option allowed");
467             mode = 'A';
468             break;
469         case 'F':
470             if (mode != '\0')
471                 die("only one mode option allowed");
472             mode = 'F';
473             break;
474         case 'R':
475             if (mode != '\0')
476                 die("only one mode option allowed");
477             mode = 'R';
478             path = optarg;
479             break;
480         case 'c':
481             if (mode != '\0')
482                 die("only one mode option allowed");
483             mode = 'c';
484             break;
485         case 'g':
486             if (mode != '\0')
487                 die("only one mode option allowed");
488             mode = 'g';
489             break;
490         case 'i':
491             if (mode != '\0')
492                 die("only one mode option allowed");
493             mode = 'i';
494             break;
495         case 'O':
496             if (mode != '\0')
497                 die("only one mode option allowed");
498             mode = 'O';
499             break;
500         case 'o':
501             if (mode != '\0')
502                 die("only one mode option allowed");
503             mode = 'o';
504             break;
505         default:
506             die("invalid option %c", optopt);
507             break;
508         }
509     }
510 
511     /* Some modes require a group be specified. */
512     if (strchr("cgoOR", mode) != NULL && newsgroup == NULL)
513         die("group must be specified for -%c", mode);
514 
515     /* Run the specified function. */
516     switch (mode) {
517     case 'A':
518         tdx_index_audit(false);
519         break;
520     case 'F':
521         if (getenv("INN_TESTSUITE") == NULL)
522             ensure_news_user_grp(true, true);
523         tdx_index_audit(true);
524         break;
525     case 'R':
526         if (getenv("INN_TESTSUITE") == NULL)
527             ensure_news_user_grp(true, true);
528         group_rebuild(newsgroup, path);
529         break;
530     case 'c':
531         if (getenv("INN_TESTSUITE") == NULL)
532             ensure_news_user_grp(true, true);
533         group_create(newsgroup, artlow, arthigh, flag);
534         break;
535     case 'i':
536         dump_index(newsgroup);
537         break;
538     case 'g':
539         dump_group_index(newsgroup);
540         break;
541     case 'O':
542         dump_overview(newsgroup, artlow, arthigh, true);
543         break;
544     case 'o':
545         dump_overview(newsgroup, artlow, arthigh, false);
546         break;
547     default:
548         die("a mode option must be specified");
549         break;
550     }
551     exit(0);
552 }
553