xref: /original-bsd/old/refer/inv/inv1.c (revision 2ab53118)
1 /*-
2  * %sccs.include.proprietary.c%
3  */
4 
5 #ifndef lint
6 static char sccsid[] = "@(#)inv1.c	4.4 (Berkeley) 04/18/91";
7 #endif /* not lint */
8 
9 #include <stdio.h>
10 #include <assert.h>
11 #include "pathnames.h"
12 
main(argc,argv)13 main(argc, argv)
14 char *argv[];
15 {
16 	/* Make inverted file indexes.  Reads a stream from mkey which
17 	 * gives record pointer items and keys.  Generates set of files
18 	 *	a. NHASH pointers to file b.
19 	 *	b. lists of record numbers.
20 	 *	c. record pointer items.
21 	 *
22 	 *  these files are named xxx.ia, xxx.ib, xxx.ic;
23 	 *  where xxx is taken from arg1.
24 	 *  If the files exist they are updated.
25 	 */
26 
27 	FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd;
28 	int nhash = 256;
29 	int appflg = 1;
30 	int keepkey = 0, pipein = 0;
31 	char nma[100], nmb[100], nmc[100], com[100], nmd[100];
32 	char tmpa[20], tmpb[20], tmpc[20];
33 	char *remove = NULL;
34 	int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
35 	int i,j,k;
36 	long keys;
37 	int iflong =0;
38 	char *sortdir;
39 
40 	sortdir = _PATH_USRTMP;
41 	while (argv[1][0] == '-')
42 	{
43 		switch(argv[1][1])
44 		{
45 		case 'h': /* size of hash table */
46 			nhash = atoi (argv[1]+2);
47 			break;
48 		case 'n': /* new, don't append */
49 			appflg=0;
50 			break;
51 		case 'a': /* append to old file */
52 			appflg=1;
53 			break;
54 		case 'v': /* verbose output */
55 			chatty=1;
56 			break;
57 		case 'd': /* keep keys on file .id for check on searching */
58 			keepkey=1;
59 			break;
60 		case 'p': /* pipe into sort (saves space, costs time)*/
61 			pipein = 1;
62 			break;
63 		case 'i': /* input is on file, not stdin */
64 			close(0);
65 			if (open(argv[2], 0) != 0)
66 				err("Can't read input %s", argv[2]);
67 			if (argv[1][2]=='u') /* unlink */
68 				remove = argv[2];
69 			argc--;
70 			argv++;
71 			break;
72 		}
73 		argc--;
74 		argv++;
75 	}
76 	strcpy (nma, argc >= 2 ? argv[1] : "Index");
77 	strcpy (nmb, nma);
78 	strcpy (nmc, nma);
79 	strcpy (nmd, nma);
80 	strcat (nma, ".ia");
81 	strcat (nmb, ".ib");
82 	strcat (nmc, ".ic");
83 	strcat (nmd, ".id");
84 
85 	sprintf(tmpa, "junk%di", getpid());
86 	if (pipein)
87 	{
88 		pipe(fp);
89 		fr=fp[0];
90 		fw=fp[1];
91 		if ( (pfork=fork()) == 0)
92 		{
93 			close(fw);
94 			close(0);
95 			_assert(dup(fr)==0);
96 			close(fr);
97 			execl(_PATH_SORT, "sort", "-T", sortdir, "-o", tmpa, 0);
98 			_assert(0);
99 		}
100 		_assert(pfork!= -1);
101 		close(fr);
102 		fta = fopen(_PATH_DEVNULL, "w");
103 		close(fta->_file);
104 		fta->_file = fw;
105 	}
106 	else /* use tmp file */
107 	{
108 		fta = fopen(tmpa, "w");
109 		_assert (fta != NULL);
110 	}
111 	fb = 0;
112 	if (appflg )
113 	{
114 		if (fb = fopen(nmb, "r"))
115 		{
116 			sprintf(tmpb, "junk%dj", getpid());
117 			ftb = fopen(tmpb, "w");
118 			if (ftb==NULL)
119 				err("Can't get scratch file %s",tmpb);
120 			nhash = recopy(ftb, fb, fopen(nma, "r"));
121 			fclose(ftb);
122 		}
123 		else
124 			appflg=0;
125 	}
126 	fc = fopen(nmc,  appflg ? "a" : "w");
127 	fd = keepkey ? fopen(nmd, "w") : 0;
128 	docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
129 	fclose(stdin);
130 	if (remove != NULL)
131 		unlink(remove);
132 	fclose(fta);
133 	if (pipein)
134 	{
135 		pwait = wait(&status);
136 		printf("pfork %o pwait %o status %d\n",pfork,pwait,status);
137 		_assert(pwait==pfork);
138 		_assert(status==0);
139 	}
140 	else
141 	{
142 		sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
143 		system(com);
144 	}
145 	if (appflg)
146 	{
147 		sprintf(tmpc, "junk%dk", getpid());
148 		sprintf(com, "mv %s %s", tmpa, tmpc);
149 		system(com);
150 		sprintf(com, "sort -T %s  -m %s %s -o %s", sortdir,
151 		tmpb, tmpc, tmpa);
152 		system(com);
153 	}
154 	fta = fopen(tmpa, "r");
155 	fa = fopen(nma, "w");
156 	fb = fopen(nmb, "w");
157 	whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
158 	fclose(fta);
159 # ifndef D1
160 	unlink(tmpa);
161 # endif
162 	if (appflg)
163 	{
164 		unlink(tmpb);
165 		unlink(tmpc);
166 	}
167 	if (chatty)
168 
169 		printf ("%ld key occurrences,  %d hashes, %d docs\n",
170 		keys, hashes, docs);
171 }
172