1 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
2 /* All Rights Reserved */
3
4
5 /*
6 * Copyright (c) 1980 Regents of the University of California.
7 * All rights reserved. The Berkeley software License Agreement
8 * specifies the terms and conditions for redistribution.
9 */
10
11 /*
12 * Copyright (c) 1983, 1984 1985, 1986, 1987, 1988, Sun Microsystems, Inc.
13 * All Rights Reserved.
14 */
15
16 /* from OpenSolaris "inv1.c 1.5 05/06/02 SMI" */
17
18 /*
19 * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany
20 *
21 * Sccsid @(#)inv1.c 1.3 (gritter) 10/22/05
22 */
23
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <stdio.h>
29 #include <locale.h>
30 #include <assert.h>
31 #ifdef __FreeBSD__
32 #include <paths.h>
33 #endif
34 #include <stdlib.h>
35 #include <string.h>
36 #include "refer..c"
37
38 int
main(int argc,char ** argv)39 main(int argc, char **argv)
40 {
41 /* Make inverted file indexes. Reads a stream from mkey which
42 * gives record pointer items and keys. Generates set of files
43 * a. NHASH pointers to file b.
44 * b. lists of record numbers.
45 * c. record pointer items.
46 *
47 * these files are named xxx.ia, xxx.ib, xxx.ic;
48 * where xxx is taken from arg1.
49 * If the files exist they are updated.
50 */
51
52 FILE *fa, *fb, *fc, *fta, *ftb;
53 FILE *fd = NULL;
54 int nhash = 256;
55 int appflg = 1;
56 int keepkey = 0, pipein = 0;
57 char nma[100], nmb[100], nmc[100], com[100], nmd[100];
58 char tmpa[20], tmpb[20], tmpc[20];
59 char *remove = NULL;
60 int chatty = 0, docs, hashes;
61 long keys;
62 int iflong =0;
63 char *sortdir;
64
65 #ifdef __FreeBSD__
66 sortdir = _PATH_VARTMP;
67 #else
68 sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/var/tmp";
69 #endif
70 while (argc>1 && argv[1][0] == '-')
71 {
72 switch(argv[1][1])
73 {
74 case 'h': /* size of hash table */
75 nhash = atoi (argv[1]+2);
76 break;
77 case 'n': /* new, don't append */
78 appflg=0;
79 break;
80 case 'a': /* append to old file */
81 appflg=1;
82 break;
83 case 'v': /* verbose output */
84 chatty=1;
85 break;
86 case 'd': /* keep keys on file .id for check on searching */
87 keepkey=1;
88 break;
89 case 'p': /* pipe into sort (saves space, costs time)*/
90 pipein = 1;
91 break;
92 case 'i': /* input is on file, not stdin */
93 close(0);
94 if (open(argv[2], O_RDONLY) != 0)
95 err("Can't read input %s", argv[2]);
96 if (argv[1][2]=='u') /* unlink */
97 remove = argv[2];
98 argc--;
99 argv++;
100 break;
101 }
102 argc--;
103 argv++;
104 }
105 n_strcpy (nma, argc >= 2 ? argv[1] : "Index", sizeof(nma));
106 n_strcpy (nmb, nma, sizeof(nmb));
107 n_strcpy (nmc, nma, sizeof(nmc));
108 n_strcpy (nmd, nma, sizeof(nmd));
109 n_strcat (nma, ".ia", sizeof(nma));
110 n_strcat (nmb, ".ib", sizeof(nmb));
111 n_strcat (nmc, ".ic", sizeof(nmc));
112 n_strcat (nmd, ".id", sizeof(nmd));
113
114 snprintf(tmpa, sizeof tmpa, "junk%di", (int)getpid());
115 if (pipein)
116 {
117 snprintf(com, sizeof com, "sort -T %s -o %s", sortdir, tmpa);
118 fta = popen(com, "w");
119 }
120 else /* use tmp file */
121 {
122 fta = fopen(tmpa, "w");
123 assert (fta != NULL);
124 }
125 fb = 0;
126 if (appflg )
127 {
128 if ((fb = fopen(nmb, "r")))
129 {
130 snprintf(tmpb, sizeof tmpb, "junk%dj", (int)getpid());
131 ftb = fopen(tmpb, "w");
132 if (ftb==NULL)
133 err("Can't get scratch file %s",tmpb);
134 nhash = recopy(ftb, fb, fopen(nma, "r"), nhash);
135 fclose(ftb);
136 }
137 else
138 appflg=0;
139 }
140 fc = fopen(nmc, appflg ? "a" : "w");
141 if (keepkey)
142 fd = keepkey ? fopen(nmd, "w") : 0;
143 docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
144 fclose(stdin);
145 if (remove != NULL)
146 unlink(remove);
147 fclose(fta);
148 if (pipein)
149 {
150 pclose(fta);
151 }
152 else
153 {
154 snprintf(com, sizeof com,
155 "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
156 system(com);
157 }
158 if (appflg)
159 {
160 snprintf(tmpc, sizeof tmpc, "junk%dk", (int)getpid());
161 snprintf(com, sizeof com, "mv %s %s", tmpa, tmpc);
162 system(com);
163 snprintf(com, sizeof com, "sort -T %s -m %s %s -o %s", sortdir,
164 tmpb, tmpc, tmpa);
165 system(com);
166 }
167 fta = fopen(tmpa, "r");
168 fa = fopen(nma, "w");
169 fb = fopen(nmb, "w");
170 whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
171 fclose(fta);
172 # ifndef D1
173 unlink(tmpa);
174 # endif
175 if (appflg)
176 {
177 unlink(tmpb);
178 unlink(tmpc);
179 }
180 if (chatty)
181
182 printf ("%ld key occurrences, %d hashes, %d docs\n",
183 keys, hashes, docs);
184
185 return 0;
186 }
187