1 #ifndef lint 2 static char *sccsid = "@(#)inv1.c 4.2 (Berkeley) 03/04/86"; 3 #endif 4 5 #include <stdio.h> 6 #include <assert.h> 7 8 main(argc, argv) 9 char *argv[]; 10 { 11 /* Make inverted file indexes. Reads a stream from mkey which 12 * gives record pointer items and keys. Generates set of files 13 * a. NHASH pointers to file b. 14 * b. lists of record numbers. 15 * c. record pointer items. 16 * 17 * these files are named xxx.ia, xxx.ib, xxx.ic; 18 * where xxx is taken from arg1. 19 * If the files exist they are updated. 20 */ 21 22 FILE *fa, *fb, *fc, *fta, *ftb, *ftc, *fd; 23 int nhash = 256; 24 int appflg = 1; 25 int keepkey = 0, pipein = 0; 26 char nma[100], nmb[100], nmc[100], com[100], nmd[100]; 27 char tmpa[20], tmpb[20], tmpc[20]; 28 char *remove = NULL; 29 int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; 30 int i,j,k; 31 long keys; 32 int iflong =0; 33 char *sortdir; 34 35 sortdir = (access("/crp/tmp", 06)==0) ? "/crp/tmp" : "/usr/tmp"; 36 while (argv[1][0] == '-') 37 { 38 switch(argv[1][1]) 39 { 40 case 'h': /* size of hash table */ 41 nhash = atoi (argv[1]+2); 42 break; 43 case 'n': /* new, don't append */ 44 appflg=0; 45 break; 46 case 'a': /* append to old file */ 47 appflg=1; 48 break; 49 case 'v': /* verbose output */ 50 chatty=1; 51 break; 52 case 'd': /* keep keys on file .id for check on searching */ 53 keepkey=1; 54 break; 55 case 'p': /* pipe into sort (saves space, costs time)*/ 56 pipein = 1; 57 break; 58 case 'i': /* input is on file, not stdin */ 59 close(0); 60 if (open(argv[2], 0) != 0) 61 err("Can't read input %s", argv[2]); 62 if (argv[1][2]=='u') /* unlink */ 63 remove = argv[2]; 64 argc--; 65 argv++; 66 break; 67 } 68 argc--; 69 argv++; 70 } 71 strcpy (nma, argc >= 2 ? argv[1] : "Index"); 72 strcpy (nmb, nma); 73 strcpy (nmc, nma); 74 strcpy (nmd, nma); 75 strcat (nma, ".ia"); 76 strcat (nmb, ".ib"); 77 strcat (nmc, ".ic"); 78 strcat (nmd, ".id"); 79 80 sprintf(tmpa, "junk%di", getpid()); 81 if (pipein) 82 { 83 pipe(fp); 84 fr=fp[0]; 85 fw=fp[1]; 86 if ( (pfork=fork()) == 0) 87 { 88 close(fw); 89 close(0); 90 _assert(dup(fr)==0); 91 close(fr); 92 execl("/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); 93 execl("/usr/bin/sort", "sort", "-T", sortdir, "-o", tmpa, 0); 94 _assert(0); 95 } 96 _assert(pfork!= -1); 97 close(fr); 98 fta = fopen("/dev/null", "w"); 99 close(fta->_file); 100 fta->_file = fw; 101 } 102 else /* use tmp file */ 103 { 104 fta = fopen(tmpa, "w"); 105 _assert (fta != NULL); 106 } 107 fb = 0; 108 if (appflg ) 109 { 110 if (fb = fopen(nmb, "r")) 111 { 112 sprintf(tmpb, "junk%dj", getpid()); 113 ftb = fopen(tmpb, "w"); 114 if (ftb==NULL) 115 err("Can't get scratch file %s",tmpb); 116 nhash = recopy(ftb, fb, fopen(nma, "r")); 117 fclose(ftb); 118 } 119 else 120 appflg=0; 121 } 122 fc = fopen(nmc, appflg ? "a" : "w"); 123 fd = keepkey ? fopen(nmd, "w") : 0; 124 docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); 125 fclose(stdin); 126 if (remove != NULL) 127 unlink(remove); 128 fclose(fta); 129 if (pipein) 130 { 131 pwait = wait(&status); 132 printf("pfork %o pwait %o status %d\n",pfork,pwait,status); 133 _assert(pwait==pfork); 134 _assert(status==0); 135 } 136 else 137 { 138 sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); 139 system(com); 140 } 141 if (appflg) 142 { 143 sprintf(tmpc, "junk%dk", getpid()); 144 sprintf(com, "mv %s %s", tmpa, tmpc); 145 system(com); 146 sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, 147 tmpb, tmpc, tmpa); 148 system(com); 149 } 150 fta = fopen(tmpa, "r"); 151 fa = fopen(nma, "w"); 152 fb = fopen(nmb, "w"); 153 whash(fta, fa, fb, nhash, iflong, &keys, &hashes); 154 fclose(fta); 155 # ifndef D1 156 unlink(tmpa); 157 # endif 158 if (appflg) 159 { 160 unlink(tmpb); 161 unlink(tmpc); 162 } 163 if (chatty) 164 165 printf ("%ld key occurrences, %d hashes, %d docs\n", 166 keys, hashes, docs); 167 } 168