1 /*-
2 * %sccs.include.proprietary.c%
3 */
4
5 #ifndef lint
6 static char sccsid[] = "@(#)inv1.c 4.4 (Berkeley) 04/18/91";
7 #endif /* not lint */
8
9 #include <stdio.h>
10 #include <assert.h>
11 #include "pathnames.h"
12
main(argc,argv)13 main(argc, argv)
14 char *argv[];
15 {
16 /* Make inverted file indexes. Reads a stream from mkey which
17 * gives record pointer items and keys. Generates set of files
18 * a. NHASH pointers to file b.
19 * b. lists of record numbers.
20 * c. record pointer items.
21 *
22 * these files are named xxx.ia, xxx.ib, xxx.ic;
23 * where xxx is taken from arg1.
24 * If the files exist they are updated.
25 */
26
27 FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd;
28 int nhash = 256;
29 int appflg = 1;
30 int keepkey = 0, pipein = 0;
31 char nma[100], nmb[100], nmc[100], com[100], nmd[100];
32 char tmpa[20], tmpb[20], tmpc[20];
33 char *remove = NULL;
34 int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
35 int i,j,k;
36 long keys;
37 int iflong =0;
38 char *sortdir;
39
40 sortdir = _PATH_USRTMP;
41 while (argv[1][0] == '-')
42 {
43 switch(argv[1][1])
44 {
45 case 'h': /* size of hash table */
46 nhash = atoi (argv[1]+2);
47 break;
48 case 'n': /* new, don't append */
49 appflg=0;
50 break;
51 case 'a': /* append to old file */
52 appflg=1;
53 break;
54 case 'v': /* verbose output */
55 chatty=1;
56 break;
57 case 'd': /* keep keys on file .id for check on searching */
58 keepkey=1;
59 break;
60 case 'p': /* pipe into sort (saves space, costs time)*/
61 pipein = 1;
62 break;
63 case 'i': /* input is on file, not stdin */
64 close(0);
65 if (open(argv[2], 0) != 0)
66 err("Can't read input %s", argv[2]);
67 if (argv[1][2]=='u') /* unlink */
68 remove = argv[2];
69 argc--;
70 argv++;
71 break;
72 }
73 argc--;
74 argv++;
75 }
76 strcpy (nma, argc >= 2 ? argv[1] : "Index");
77 strcpy (nmb, nma);
78 strcpy (nmc, nma);
79 strcpy (nmd, nma);
80 strcat (nma, ".ia");
81 strcat (nmb, ".ib");
82 strcat (nmc, ".ic");
83 strcat (nmd, ".id");
84
85 sprintf(tmpa, "junk%di", getpid());
86 if (pipein)
87 {
88 pipe(fp);
89 fr=fp[0];
90 fw=fp[1];
91 if ( (pfork=fork()) == 0)
92 {
93 close(fw);
94 close(0);
95 _assert(dup(fr)==0);
96 close(fr);
97 execl(_PATH_SORT, "sort", "-T", sortdir, "-o", tmpa, 0);
98 _assert(0);
99 }
100 _assert(pfork!= -1);
101 close(fr);
102 fta = fopen(_PATH_DEVNULL, "w");
103 close(fta->_file);
104 fta->_file = fw;
105 }
106 else /* use tmp file */
107 {
108 fta = fopen(tmpa, "w");
109 _assert (fta != NULL);
110 }
111 fb = 0;
112 if (appflg )
113 {
114 if (fb = fopen(nmb, "r"))
115 {
116 sprintf(tmpb, "junk%dj", getpid());
117 ftb = fopen(tmpb, "w");
118 if (ftb==NULL)
119 err("Can't get scratch file %s",tmpb);
120 nhash = recopy(ftb, fb, fopen(nma, "r"));
121 fclose(ftb);
122 }
123 else
124 appflg=0;
125 }
126 fc = fopen(nmc, appflg ? "a" : "w");
127 fd = keepkey ? fopen(nmd, "w") : 0;
128 docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
129 fclose(stdin);
130 if (remove != NULL)
131 unlink(remove);
132 fclose(fta);
133 if (pipein)
134 {
135 pwait = wait(&status);
136 printf("pfork %o pwait %o status %d\n",pfork,pwait,status);
137 _assert(pwait==pfork);
138 _assert(status==0);
139 }
140 else
141 {
142 sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
143 system(com);
144 }
145 if (appflg)
146 {
147 sprintf(tmpc, "junk%dk", getpid());
148 sprintf(com, "mv %s %s", tmpa, tmpc);
149 system(com);
150 sprintf(com, "sort -T %s -m %s %s -o %s", sortdir,
151 tmpb, tmpc, tmpa);
152 system(com);
153 }
154 fta = fopen(tmpa, "r");
155 fa = fopen(nma, "w");
156 fb = fopen(nmb, "w");
157 whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
158 fclose(fta);
159 # ifndef D1
160 unlink(tmpa);
161 # endif
162 if (appflg)
163 {
164 unlink(tmpb);
165 unlink(tmpc);
166 }
167 if (chatty)
168
169 printf ("%ld key occurrences, %d hashes, %d docs\n",
170 keys, hashes, docs);
171 }
172