1 /*- 2 * %sccs.include.proprietary.c% 3 */ 4 5 #ifndef lint 6 static char sccsid[] = "@(#)inv1.c 4.4 (Berkeley) 04/18/91"; 7 #endif /* not lint */ 8 9 #include <stdio.h> 10 #include <assert.h> 11 #include "pathnames.h" 12 13 main(argc, argv) 14 char *argv[]; 15 { 16 /* Make inverted file indexes. Reads a stream from mkey which 17 * gives record pointer items and keys. Generates set of files 18 * a. NHASH pointers to file b. 19 * b. lists of record numbers. 20 * c. record pointer items. 21 * 22 * these files are named xxx.ia, xxx.ib, xxx.ic; 23 * where xxx is taken from arg1. 24 * If the files exist they are updated. 25 */ 26 27 FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; 28 int nhash = 256; 29 int appflg = 1; 30 int keepkey = 0, pipein = 0; 31 char nma[100], nmb[100], nmc[100], com[100], nmd[100]; 32 char tmpa[20], tmpb[20], tmpc[20]; 33 char *remove = NULL; 34 int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; 35 int i,j,k; 36 long keys; 37 int iflong =0; 38 char *sortdir; 39 40 sortdir = _PATH_USRTMP; 41 while (argv[1][0] == '-') 42 { 43 switch(argv[1][1]) 44 { 45 case 'h': /* size of hash table */ 46 nhash = atoi (argv[1]+2); 47 break; 48 case 'n': /* new, don't append */ 49 appflg=0; 50 break; 51 case 'a': /* append to old file */ 52 appflg=1; 53 break; 54 case 'v': /* verbose output */ 55 chatty=1; 56 break; 57 case 'd': /* keep keys on file .id for check on searching */ 58 keepkey=1; 59 break; 60 case 'p': /* pipe into sort (saves space, costs time)*/ 61 pipein = 1; 62 break; 63 case 'i': /* input is on file, not stdin */ 64 close(0); 65 if (open(argv[2], 0) != 0) 66 err("Can't read input %s", argv[2]); 67 if (argv[1][2]=='u') /* unlink */ 68 remove = argv[2]; 69 argc--; 70 argv++; 71 break; 72 } 73 argc--; 74 argv++; 75 } 76 strcpy (nma, argc >= 2 ? argv[1] : "Index"); 77 strcpy (nmb, nma); 78 strcpy (nmc, nma); 79 strcpy (nmd, nma); 80 strcat (nma, ".ia"); 81 strcat (nmb, ".ib"); 82 strcat (nmc, ".ic"); 83 strcat (nmd, ".id"); 84 85 sprintf(tmpa, "junk%di", getpid()); 86 if (pipein) 87 { 88 pipe(fp); 89 fr=fp[0]; 90 fw=fp[1]; 91 if ( (pfork=fork()) == 0) 92 { 93 close(fw); 94 close(0); 95 _assert(dup(fr)==0); 96 close(fr); 97 execl(_PATH_SORT, "sort", "-T", sortdir, "-o", tmpa, 0); 98 _assert(0); 99 } 100 _assert(pfork!= -1); 101 close(fr); 102 fta = fopen(_PATH_DEVNULL, "w"); 103 close(fta->_file); 104 fta->_file = fw; 105 } 106 else /* use tmp file */ 107 { 108 fta = fopen(tmpa, "w"); 109 _assert (fta != NULL); 110 } 111 fb = 0; 112 if (appflg ) 113 { 114 if (fb = fopen(nmb, "r")) 115 { 116 sprintf(tmpb, "junk%dj", getpid()); 117 ftb = fopen(tmpb, "w"); 118 if (ftb==NULL) 119 err("Can't get scratch file %s",tmpb); 120 nhash = recopy(ftb, fb, fopen(nma, "r")); 121 fclose(ftb); 122 } 123 else 124 appflg=0; 125 } 126 fc = fopen(nmc, appflg ? "a" : "w"); 127 fd = keepkey ? fopen(nmd, "w") : 0; 128 docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); 129 fclose(stdin); 130 if (remove != NULL) 131 unlink(remove); 132 fclose(fta); 133 if (pipein) 134 { 135 pwait = wait(&status); 136 printf("pfork %o pwait %o status %d\n",pfork,pwait,status); 137 _assert(pwait==pfork); 138 _assert(status==0); 139 } 140 else 141 { 142 sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); 143 system(com); 144 } 145 if (appflg) 146 { 147 sprintf(tmpc, "junk%dk", getpid()); 148 sprintf(com, "mv %s %s", tmpa, tmpc); 149 system(com); 150 sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, 151 tmpb, tmpc, tmpa); 152 system(com); 153 } 154 fta = fopen(tmpa, "r"); 155 fa = fopen(nma, "w"); 156 fb = fopen(nmb, "w"); 157 whash(fta, fa, fb, nhash, iflong, &keys, &hashes); 158 fclose(fta); 159 # ifndef D1 160 unlink(tmpa); 161 # endif 162 if (appflg) 163 { 164 unlink(tmpb); 165 unlink(tmpc); 166 } 167 if (chatty) 168 169 printf ("%ld key occurrences, %d hashes, %d docs\n", 170 keys, hashes, docs); 171 } 172