xref: /original-bsd/old/refer/inv/inv1.c (revision 262b24ac)
1 #ifndef lint
2 static char *sccsid = "@(#)inv1.c	4.2 (Berkeley) 03/04/86";
3 #endif
4 
5 #include <stdio.h>
6 #include <assert.h>
7 
8 main(argc, argv)
9 char *argv[];
10 {
11 	/* Make inverted file indexes.  Reads a stream from mkey which
12 	 * gives record pointer items and keys.  Generates set of files
13 	 *	a. NHASH pointers to file b.
14 	 *	b. lists of record numbers.
15 	 *	c. record pointer items.
16 	 *
17 	 *  these files are named xxx.ia, xxx.ib, xxx.ic;
18 	 *  where xxx is taken from arg1.
19 	 *  If the files exist they are updated.
20 	 */
21 
22 	FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd;
23 	int nhash = 256;
24 	int appflg = 1;
25 	int keepkey = 0, pipein = 0;
26 	char nma[100], nmb[100], nmc[100], com[100], nmd[100];
27 	char tmpa[20], tmpb[20], tmpc[20];
28 	char *remove = NULL;
29 	int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status;
30 	int i,j,k;
31 	long keys;
32 	int iflong =0;
33 	char *sortdir;
34 
35 	sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp";
36 	while (argv[1][0] == '-')
37 	{
38 		switch(argv[1][1])
39 		{
40 		case 'h': /* size of hash table */
41 			nhash = atoi (argv[1]+2);
42 			break;
43 		case 'n': /* new, don't append */
44 			appflg=0;
45 			break;
46 		case 'a': /* append to old file */
47 			appflg=1;
48 			break;
49 		case 'v': /* verbose output */
50 			chatty=1;
51 			break;
52 		case 'd': /* keep keys on file .id for check on searching */
53 			keepkey=1;
54 			break;
55 		case 'p': /* pipe into sort (saves space, costs time)*/
56 			pipein = 1;
57 			break;
58 		case 'i': /* input is on file, not stdin */
59 			close(0);
60 			if (open(argv[2], 0) != 0)
61 				err("Can't read input %s", argv[2]);
62 			if (argv[1][2]=='u') /* unlink */
63 				remove = argv[2];
64 			argc--;
65 			argv++;
66 			break;
67 		}
68 		argc--;
69 		argv++;
70 	}
71 	strcpy (nma, argc >= 2 ? argv[1] : "Index");
72 	strcpy (nmb, nma);
73 	strcpy (nmc, nma);
74 	strcpy (nmd, nma);
75 	strcat (nma, ".ia");
76 	strcat (nmb, ".ib");
77 	strcat (nmc, ".ic");
78 	strcat (nmd, ".id");
79 
80 	sprintf(tmpa, "junk%di", getpid());
81 	if (pipein)
82 	{
83 		pipe(fp);
84 		fr=fp[0];
85 		fw=fp[1];
86 		if ( (pfork=fork()) == 0)
87 		{
88 			close(fw);
89 			close(0);
90 			_assert(dup(fr)==0);
91 			close(fr);
92 			execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0);
93 			execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0);
94 			_assert(0);
95 		}
96 		_assert(pfork!= -1);
97 		close(fr);
98 		fta = fopen("/dev/null", "w");
99 		close(fta->_file);
100 		fta->_file = fw;
101 	}
102 	else /* use tmp file */
103 	{
104 		fta = fopen(tmpa, "w");
105 		_assert (fta != NULL);
106 	}
107 	fb = 0;
108 	if (appflg )
109 	{
110 		if (fb = fopen(nmb, "r"))
111 		{
112 			sprintf(tmpb, "junk%dj", getpid());
113 			ftb = fopen(tmpb, "w");
114 			if (ftb==NULL)
115 				err("Can't get scratch file %s",tmpb);
116 			nhash = recopy(ftb, fb, fopen(nma, "r"));
117 			fclose(ftb);
118 		}
119 		else
120 			appflg=0;
121 	}
122 	fc = fopen(nmc,  appflg ? "a" : "w");
123 	fd = keepkey ? fopen(nmd, "w") : 0;
124 	docs = newkeys(fta, stdin, fc, nhash, fd, &iflong);
125 	fclose(stdin);
126 	if (remove != NULL)
127 		unlink(remove);
128 	fclose(fta);
129 	if (pipein)
130 	{
131 		pwait = wait(&status);
132 		printf("pfork %o pwait %o status %d\n",pfork,pwait,status);
133 		_assert(pwait==pfork);
134 		_assert(status==0);
135 	}
136 	else
137 	{
138 		sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa);
139 		system(com);
140 	}
141 	if (appflg)
142 	{
143 		sprintf(tmpc, "junk%dk", getpid());
144 		sprintf(com, "mv %s %s", tmpa, tmpc);
145 		system(com);
146 		sprintf(com, "sort -T %s  -m %s %s -o %s", sortdir,
147 		tmpb, tmpc, tmpa);
148 		system(com);
149 	}
150 	fta = fopen(tmpa, "r");
151 	fa = fopen(nma, "w");
152 	fb = fopen(nmb, "w");
153 	whash(fta, fa, fb, nhash, iflong, &keys, &hashes);
154 	fclose(fta);
155 # ifndef D1
156 	unlink(tmpa);
157 # endif
158 	if (appflg)
159 	{
160 		unlink(tmpb);
161 		unlink(tmpc);
162 	}
163 	if (chatty)
164 
165 		printf ("%ld key occurrences,  %d hashes, %d docs\n",
166 		keys, hashes, docs);
167 }
168