1 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
2 /*	  All Rights Reserved  	*/
3 
4 
5 /*
6  * Copyright (c) 1980 Regents of the University of California.
7  * All rights reserved. The Berkeley software License Agreement
8  * specifies the terms and conditions for redistribution.
9  */
10 
11 /*
12  * Copyright (c) 1983, 1984 1985, 1986, 1987, 1988, Sun Microsystems, Inc.
13  * All Rights Reserved.
14  */
15 
16 /*	from OpenSolaris "inv1.c	1.5	05/06/02 SMI" 	*/
17 
18 /*
19  * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany
20  *
21  * Sccsid @(#)inv1.c	1.3 (gritter) 10/22/05
22  */
23 
24 #include <sys/types.h>
25 #include <sys/stat.h>
26 #include <fcntl.h>
27 #include <unistd.h>
28 #include <stdio.h>
29 #include <locale.h>
30 #include <assert.h>
31 #ifdef __FreeBSD__
32 #include <paths.h>
33 #endif
34 #include <stdlib.h>
35 #include <string.h>
36 #include "refer..c"
37 
38 int
main(int argc,char ** argv)39 main(int argc, char **argv)
40 {
41 	/* Make inverted file indexes.  Reads a stream from mkey which
42 	 * gives record pointer items and keys.  Generates set of files
43 	 *	a. NHASH pointers to file b.
44 	 *	b. lists of record numbers.
45 	 *	c. record pointer items.
46 	 *
47 	 *  these files are named xxx.ia, xxx.ib, xxx.ic;
48 	 *  where xxx is taken from arg1.
49 	 *  If the files exist they are updated.
50 	 */
51 
52 	FILE *fa, *fb, *fc, *fta, *ftb;
53 	FILE *fd = NULL;
54 	int nhash = 256;
55 	int appflg = 1;
56 	int keepkey = 0, pipein = 0;
57 	char nma[100], nmb[100], nmc[100], com[100], nmd[100];
58 	char tmpa[20], tmpb[20], tmpc[20];
59 	char *remove = NULL;
60 	int chatty = 0, docs, hashes;
61 	long keys;
62 	int iflong =0;
63 	char *sortdir;
64 
65 #ifdef __FreeBSD__
66 	sortdir = _PATH_VARTMP;
67 #else
68 	sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/var/tmp";
69 #endif
70 	while (argc>1 && argv[1][0] == '-')
71 	{
72 		switch(argv[1][1])
73 		{
74 		case 'h': /* size of hash table */
75 			nhash = atoi (argv[1]+2);
76 			break;
77 		case 'n': /* new, don't append */
78 			appflg=0;
79 			break;
80 		case 'a': /* append to old file */
81 			appflg=1;
82 			break;
83 		case 'v': /* verbose output */
84 			chatty=1;
85 			break;
86 		case 'd': /* keep keys on file .id for check on searching */
87 			keepkey=1;
88 			break;
89 		case 'p': /* pipe into sort (saves space, costs time)*/
90 			pipein = 1;
91 			break;
92 		case 'i': /* input is on file, not stdin */
93 			close(0);
94 			if (open(argv[2], O_RDONLY) != 0)
95 				err("Can't read input %s", argv[2]);
96 			if (argv[1][2]=='u') /* unlink */
97 				remove = argv[2];
98 			argc--;
99 			argv++;
100 			break;
101 		}
102 		argc--;
103 		argv++;
104 	}
105 	n_strcpy (nma, argc >= 2 ? argv[1] : "Index", sizeof(nma));
106 	n_strcpy (nmb, nma, sizeof(nmb));
107 	n_strcpy (nmc, nma, sizeof(nmc));
108 	n_strcpy (nmd, nma, sizeof(nmd));
109 	n_strcat (nma, ".ia", sizeof(nma));
110 	n_strcat (nmb, ".ib", sizeof(nmb));
111 	n_strcat (nmc, ".ic", sizeof(nmc));
112 	n_strcat (nmd, ".id", sizeof(nmd));
113 
114 	snprintf(tmpa, sizeof tmpa, "junk%di", (int)getpid());
115 	if (pipein)
116 	{
117 		snprintf(com, sizeof com, "sort -T %s -o %s", sortdir, tmpa);
118 		fta = popen(com, "w");
119 	}
120 	else /* use tmp file */
121 	{
122 		fta = fopen(tmpa, "w");
123 		assert (fta != NULL);
124 	}
125 	fb = 0;
126 	if (appflg )
127 	{
128 		if ((fb = fopen(nmb, "r")))
129 		{
130 			snprintf(tmpb, sizeof tmpb, "junk%dj", (int)getpid());
131 			ftb = fopen(tmpb, "w");
132 			if (ftb==NULL)
133 				err("Can't get scratch file %s",tmpb);
134 			nhash = recopy(ftb, fb, fopen(nma, "r"), nhash);
135 			fclose(ftb);
136 		}
137 		else
138 			appflg=0;
139 	}
140 	fc = fopen(nmc,  appflg ? "a" : "w");
141 	if (keepkey)
142 		fd = keepkey ? fopen(nmd, "w") : 0;
143 	docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
144 	fclose(stdin);
145 	if (remove != NULL)
146 		unlink(remove);
147 	fclose(fta);
148 	if (pipein)
149 	{
150 		pclose(fta);
151 	}
152 	else
153 	{
154 		snprintf(com, sizeof com,
155 				"sort -T %s %s -o %s", sortdir, tmpa, tmpa);
156 		system(com);
157 	}
158 	if (appflg)
159 	{
160 		snprintf(tmpc, sizeof tmpc, "junk%dk", (int)getpid());
161 		snprintf(com, sizeof com, "mv %s %s", tmpa, tmpc);
162 		system(com);
163 		snprintf(com, sizeof com, "sort -T %s  -m %s %s -o %s", sortdir,
164 		tmpb, tmpc, tmpa);
165 		system(com);
166 	}
167 	fta = fopen(tmpa, "r");
168 	fa = fopen(nma, "w");
169 	fb = fopen(nmb, "w");
170 	whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
171 	fclose(fta);
172 # ifndef D1
173 	unlink(tmpa);
174 # endif
175 	if (appflg)
176 	{
177 		unlink(tmpb);
178 		unlink(tmpc);
179 	}
180 	if (chatty)
181 
182 		printf ("%ld key occurrences,  %d hashes, %d docs\n",
183 		keys, hashes, docs);
184 
185 	return 0;
186 }
187