1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1988 AT&T */ 23*7c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 24*7c478bd9Sstevel@tonic-gate 25*7c478bd9Sstevel@tonic-gate 26*7c478bd9Sstevel@tonic-gate /* 27*7c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 28*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 29*7c478bd9Sstevel@tonic-gate */ 30*7c478bd9Sstevel@tonic-gate 31*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 32*7c478bd9Sstevel@tonic-gate 33*7c478bd9Sstevel@tonic-gate #include <ctype.h> 34*7c478bd9Sstevel@tonic-gate #include <stdio.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/byteorder.h> 38*7c478bd9Sstevel@tonic-gate #if SHARE 39*7c478bd9Sstevel@tonic-gate #include <sys/ipc.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/shm.h> 41*7c478bd9Sstevel@tonic-gate #define ERR -1 42*7c478bd9Sstevel@tonic-gate #endif 43*7c478bd9Sstevel@tonic-gate #include "invlib.h" 44*7c478bd9Sstevel@tonic-gate #include "library.h" 45*7c478bd9Sstevel@tonic-gate 46*7c478bd9Sstevel@tonic-gate #define DEBUG 0 /* debugging code and realloc messages */ 47*7c478bd9Sstevel@tonic-gate #define BLOCKSIZE 2 * BUFSIZ /* logical block size */ 48*7c478bd9Sstevel@tonic-gate #define LINEMAX 1000 /* sorted posting line max size */ 49*7c478bd9Sstevel@tonic-gate #define POSTINC 10000 /* posting buffer size increment */ 50*7c478bd9Sstevel@tonic-gate #define SEP ' ' /* sorted posting field separator */ 51*7c478bd9Sstevel@tonic-gate #define SETINC 100 /* posting set size increment */ 52*7c478bd9Sstevel@tonic-gate #define STATS 0 /* print statistics */ 53*7c478bd9Sstevel@tonic-gate #define SUPERINC 10000 /* super index size increment */ 54*7c478bd9Sstevel@tonic-gate #define TERMMAX 512 /* term max size */ 55*7c478bd9Sstevel@tonic-gate #define VERSION 1 /* inverted index format version */ 56*7c478bd9Sstevel@tonic-gate #define ZIPFSIZE 200 /* zipf curve size */ 57*7c478bd9Sstevel@tonic-gate #define FREAD "r" /* fopen for reading */ 58*7c478bd9Sstevel@tonic-gate #define FREADP "r+" /* fopen for update */ 59*7c478bd9Sstevel@tonic-gate #define FWRITE "w" /* fopen truncate or create for writing */ 60*7c478bd9Sstevel@tonic-gate #define FWRITEP "w+" /* fopen truncate or create for update */ 61*7c478bd9Sstevel@tonic-gate 62*7c478bd9Sstevel@tonic-gate extern char *argv0; /* command name (must be set in main function) */ 63*7c478bd9Sstevel@tonic-gate 64*7c478bd9Sstevel@tonic-gate int invbreak; 65*7c478bd9Sstevel@tonic-gate 66*7c478bd9Sstevel@tonic-gate #if STATS 67*7c478bd9Sstevel@tonic-gate int showzipf; /* show postings per term distribution */ 68*7c478bd9Sstevel@tonic-gate #endif 69*7c478bd9Sstevel@tonic-gate 70*7c478bd9Sstevel@tonic-gate static POSTING *item, *enditem, *item1 = NULL, *item2 = NULL; 71*7c478bd9Sstevel@tonic-gate static unsigned setsize1, setsize2; 72*7c478bd9Sstevel@tonic-gate static long numitems, totterm, zerolong; 73*7c478bd9Sstevel@tonic-gate static char *indexfile, *postingfile; 74*7c478bd9Sstevel@tonic-gate static FILE *outfile, *fpost; 75*7c478bd9Sstevel@tonic-gate static unsigned supersize = SUPERINC, supintsize; 76*7c478bd9Sstevel@tonic-gate static int numpost, numlogblk, amtused, nextpost, 77*7c478bd9Sstevel@tonic-gate lastinblk, numinvitems; 78*7c478bd9Sstevel@tonic-gate static POSTING *POST, *postptr; 79*7c478bd9Sstevel@tonic-gate static unsigned long *SUPINT, *supint, nextsupfing; 80*7c478bd9Sstevel@tonic-gate static char *SUPFING, *supfing; 81*7c478bd9Sstevel@tonic-gate static char thisterm[TERMMAX]; 82*7c478bd9Sstevel@tonic-gate static union { 83*7c478bd9Sstevel@tonic-gate long invblk[BLOCKSIZE / sizeof (long)]; 84*7c478bd9Sstevel@tonic-gate char chrblk[BLOCKSIZE]; 85*7c478bd9Sstevel@tonic-gate } logicalblk; 86*7c478bd9Sstevel@tonic-gate 87*7c478bd9Sstevel@tonic-gate #if DEBUG || STATS 88*7c478bd9Sstevel@tonic-gate static long totpost; 89*7c478bd9Sstevel@tonic-gate #endif 90*7c478bd9Sstevel@tonic-gate 91*7c478bd9Sstevel@tonic-gate #if STATS 92*7c478bd9Sstevel@tonic-gate static int zipf[ZIPFSIZE + 1]; 93*7c478bd9Sstevel@tonic-gate #endif 94*7c478bd9Sstevel@tonic-gate 95*7c478bd9Sstevel@tonic-gate static void invcannotalloc(size_t n); 96*7c478bd9Sstevel@tonic-gate static void invcannotopen(char *file); 97*7c478bd9Sstevel@tonic-gate static void invcannotwrite(char *file); 98*7c478bd9Sstevel@tonic-gate static int invnewterm(void); 99*7c478bd9Sstevel@tonic-gate static int boolready(void); 100*7c478bd9Sstevel@tonic-gate 101*7c478bd9Sstevel@tonic-gate long 102*7c478bd9Sstevel@tonic-gate invmake(char *invname, char *invpost, FILE *infile) 103*7c478bd9Sstevel@tonic-gate { 104*7c478bd9Sstevel@tonic-gate unsigned char *s; 105*7c478bd9Sstevel@tonic-gate long num; 106*7c478bd9Sstevel@tonic-gate int i; 107*7c478bd9Sstevel@tonic-gate long fileindex; 108*7c478bd9Sstevel@tonic-gate unsigned postsize = POSTINC * sizeof (POSTING); 109*7c478bd9Sstevel@tonic-gate unsigned long *intptr; 110*7c478bd9Sstevel@tonic-gate char line[LINEMAX]; 111*7c478bd9Sstevel@tonic-gate long tlong; 112*7c478bd9Sstevel@tonic-gate PARAM param; 113*7c478bd9Sstevel@tonic-gate POSTING posting; 114*7c478bd9Sstevel@tonic-gate #if STATS 115*7c478bd9Sstevel@tonic-gate int j; 116*7c478bd9Sstevel@tonic-gate unsigned maxtermlen = 0; 117*7c478bd9Sstevel@tonic-gate #endif 118*7c478bd9Sstevel@tonic-gate /* output file */ 119*7c478bd9Sstevel@tonic-gate if ((outfile = vpfopen(invname, FWRITEP)) == NULL) { 120*7c478bd9Sstevel@tonic-gate invcannotopen(invname); 121*7c478bd9Sstevel@tonic-gate return (0); 122*7c478bd9Sstevel@tonic-gate } 123*7c478bd9Sstevel@tonic-gate indexfile = invname; 124*7c478bd9Sstevel@tonic-gate (void) fseek(outfile, (long)BUFSIZ, 0); 125*7c478bd9Sstevel@tonic-gate 126*7c478bd9Sstevel@tonic-gate /* posting file */ 127*7c478bd9Sstevel@tonic-gate if ((fpost = vpfopen(invpost, FWRITE)) == NULL) { 128*7c478bd9Sstevel@tonic-gate invcannotopen(invpost); 129*7c478bd9Sstevel@tonic-gate return (0); 130*7c478bd9Sstevel@tonic-gate } 131*7c478bd9Sstevel@tonic-gate postingfile = invpost; 132*7c478bd9Sstevel@tonic-gate nextpost = 0; 133*7c478bd9Sstevel@tonic-gate /* get space for the postings list */ 134*7c478bd9Sstevel@tonic-gate if ((POST = (POSTING *)malloc(postsize)) == NULL) { 135*7c478bd9Sstevel@tonic-gate invcannotalloc(postsize); 136*7c478bd9Sstevel@tonic-gate return (0); 137*7c478bd9Sstevel@tonic-gate } 138*7c478bd9Sstevel@tonic-gate postptr = POST; 139*7c478bd9Sstevel@tonic-gate /* get space for the superfinger (superindex) */ 140*7c478bd9Sstevel@tonic-gate if ((SUPFING = malloc(supersize)) == NULL) { 141*7c478bd9Sstevel@tonic-gate invcannotalloc(supersize); 142*7c478bd9Sstevel@tonic-gate return (0); 143*7c478bd9Sstevel@tonic-gate } 144*7c478bd9Sstevel@tonic-gate supfing = SUPFING; 145*7c478bd9Sstevel@tonic-gate supintsize = supersize / 40; 146*7c478bd9Sstevel@tonic-gate /* also for the superfinger index */ 147*7c478bd9Sstevel@tonic-gate if ((SUPINT = malloc(supintsize * sizeof (long))) == NULL) { 148*7c478bd9Sstevel@tonic-gate invcannotalloc(supintsize * sizeof (long)); 149*7c478bd9Sstevel@tonic-gate return (0); 150*7c478bd9Sstevel@tonic-gate } 151*7c478bd9Sstevel@tonic-gate supint = SUPINT; 152*7c478bd9Sstevel@tonic-gate supint++; /* leave first term open for a count */ 153*7c478bd9Sstevel@tonic-gate /* initialize using an empty term */ 154*7c478bd9Sstevel@tonic-gate (void) strcpy(thisterm, ""); 155*7c478bd9Sstevel@tonic-gate *supint++ = 0; 156*7c478bd9Sstevel@tonic-gate *supfing++ = ' '; 157*7c478bd9Sstevel@tonic-gate *supfing++ = '\0'; 158*7c478bd9Sstevel@tonic-gate nextsupfing = 2; 159*7c478bd9Sstevel@tonic-gate #if DEBUG || STATS 160*7c478bd9Sstevel@tonic-gate totpost = 0L; 161*7c478bd9Sstevel@tonic-gate #endif 162*7c478bd9Sstevel@tonic-gate totterm = 0L; 163*7c478bd9Sstevel@tonic-gate numpost = 1; 164*7c478bd9Sstevel@tonic-gate 165*7c478bd9Sstevel@tonic-gate /* 166*7c478bd9Sstevel@tonic-gate * set up as though a block had come and gone, i.e., set up for 167*7c478bd9Sstevel@tonic-gate * new block 168*7c478bd9Sstevel@tonic-gate */ 169*7c478bd9Sstevel@tonic-gate amtused = 16; /* leave no space - init 3 words + one for luck */ 170*7c478bd9Sstevel@tonic-gate numinvitems = 0; 171*7c478bd9Sstevel@tonic-gate numlogblk = 0; 172*7c478bd9Sstevel@tonic-gate lastinblk = BLOCKSIZE; 173*7c478bd9Sstevel@tonic-gate 174*7c478bd9Sstevel@tonic-gate /* now loop as long as more to read (till eof) */ 175*7c478bd9Sstevel@tonic-gate while (fgets(line, LINEMAX, infile) != NULL) { 176*7c478bd9Sstevel@tonic-gate #if DEBUG || STATS 177*7c478bd9Sstevel@tonic-gate ++totpost; 178*7c478bd9Sstevel@tonic-gate #endif 179*7c478bd9Sstevel@tonic-gate s = (unsigned char *) strchr(line, SEP); 180*7c478bd9Sstevel@tonic-gate if (s == NULL) /* where did this line come from ??? */ 181*7c478bd9Sstevel@tonic-gate continue; /* workaround: just skip it */ 182*7c478bd9Sstevel@tonic-gate *s = '\0'; 183*7c478bd9Sstevel@tonic-gate #if STATS 184*7c478bd9Sstevel@tonic-gate if ((i = strlen(line)) > maxtermlen) { 185*7c478bd9Sstevel@tonic-gate maxtermlen = i; 186*7c478bd9Sstevel@tonic-gate } 187*7c478bd9Sstevel@tonic-gate #endif 188*7c478bd9Sstevel@tonic-gate #if DEBUG 189*7c478bd9Sstevel@tonic-gate (void) printf("%ld: %s ", totpost, line); 190*7c478bd9Sstevel@tonic-gate (void) fflush(stdout); 191*7c478bd9Sstevel@tonic-gate #endif 192*7c478bd9Sstevel@tonic-gate if (strcmp(thisterm, line) == 0) { 193*7c478bd9Sstevel@tonic-gate if (postptr + 10 > POST + postsize / sizeof (POSTING)) { 194*7c478bd9Sstevel@tonic-gate i = postptr - POST; 195*7c478bd9Sstevel@tonic-gate postsize += POSTINC * sizeof (POSTING); 196*7c478bd9Sstevel@tonic-gate if ((POST = realloc(POST, postsize)) == NULL) { 197*7c478bd9Sstevel@tonic-gate invcannotalloc(postsize); 198*7c478bd9Sstevel@tonic-gate return (0); 199*7c478bd9Sstevel@tonic-gate } 200*7c478bd9Sstevel@tonic-gate postptr = i + POST; 201*7c478bd9Sstevel@tonic-gate #if DEBUG 202*7c478bd9Sstevel@tonic-gate (void) printf("reallocated post space to %u, " 203*7c478bd9Sstevel@tonic-gate "totpost=%ld\n", postsize, totpost); 204*7c478bd9Sstevel@tonic-gate #endif 205*7c478bd9Sstevel@tonic-gate } 206*7c478bd9Sstevel@tonic-gate numpost++; 207*7c478bd9Sstevel@tonic-gate } else { 208*7c478bd9Sstevel@tonic-gate /* have a new term */ 209*7c478bd9Sstevel@tonic-gate if (!invnewterm()) { 210*7c478bd9Sstevel@tonic-gate return (0); 211*7c478bd9Sstevel@tonic-gate } 212*7c478bd9Sstevel@tonic-gate (void) strcpy(thisterm, line); 213*7c478bd9Sstevel@tonic-gate numpost = 1; 214*7c478bd9Sstevel@tonic-gate postptr = POST; 215*7c478bd9Sstevel@tonic-gate fileindex = 0; 216*7c478bd9Sstevel@tonic-gate } 217*7c478bd9Sstevel@tonic-gate /* get the new posting */ 218*7c478bd9Sstevel@tonic-gate num = *++s - '!'; 219*7c478bd9Sstevel@tonic-gate i = 1; 220*7c478bd9Sstevel@tonic-gate do { 221*7c478bd9Sstevel@tonic-gate num = BASE * num + *++s - '!'; 222*7c478bd9Sstevel@tonic-gate } while (++i < PRECISION); 223*7c478bd9Sstevel@tonic-gate posting.lineoffset = num; 224*7c478bd9Sstevel@tonic-gate while (++fileindex < nsrcoffset && num > srcoffset[fileindex]) { 225*7c478bd9Sstevel@tonic-gate ; 226*7c478bd9Sstevel@tonic-gate } 227*7c478bd9Sstevel@tonic-gate posting.fileindex = --fileindex; 228*7c478bd9Sstevel@tonic-gate posting.type = *++s; 229*7c478bd9Sstevel@tonic-gate num = *++s - '!'; 230*7c478bd9Sstevel@tonic-gate if (*s != '\n') { 231*7c478bd9Sstevel@tonic-gate num = *++s - '!'; 232*7c478bd9Sstevel@tonic-gate while (*++s != '\n') { 233*7c478bd9Sstevel@tonic-gate num = BASE * num + *s - '!'; 234*7c478bd9Sstevel@tonic-gate } 235*7c478bd9Sstevel@tonic-gate posting.fcnoffset = num; 236*7c478bd9Sstevel@tonic-gate } else { 237*7c478bd9Sstevel@tonic-gate posting.fcnoffset = 0; 238*7c478bd9Sstevel@tonic-gate } 239*7c478bd9Sstevel@tonic-gate *postptr++ = posting; 240*7c478bd9Sstevel@tonic-gate #if DEBUG 241*7c478bd9Sstevel@tonic-gate (void) printf("%ld %ld %ld %ld\n", posting.fileindex, 242*7c478bd9Sstevel@tonic-gate posting.fcnoffset, posting.lineoffset, posting.type); 243*7c478bd9Sstevel@tonic-gate (void) fflush(stdout); 244*7c478bd9Sstevel@tonic-gate #endif 245*7c478bd9Sstevel@tonic-gate } 246*7c478bd9Sstevel@tonic-gate if (!invnewterm()) { 247*7c478bd9Sstevel@tonic-gate return (0); 248*7c478bd9Sstevel@tonic-gate } 249*7c478bd9Sstevel@tonic-gate /* now clean up final block */ 250*7c478bd9Sstevel@tonic-gate logicalblk.invblk[0] = numinvitems; 251*7c478bd9Sstevel@tonic-gate /* loops pointer around to start */ 252*7c478bd9Sstevel@tonic-gate logicalblk.invblk[1] = 0; 253*7c478bd9Sstevel@tonic-gate logicalblk.invblk[2] = numlogblk - 1; 254*7c478bd9Sstevel@tonic-gate if (fwrite((char *)&logicalblk, BLOCKSIZE, 1, outfile) == 0) { 255*7c478bd9Sstevel@tonic-gate goto cannotwrite; 256*7c478bd9Sstevel@tonic-gate } 257*7c478bd9Sstevel@tonic-gate numlogblk++; 258*7c478bd9Sstevel@tonic-gate /* write out block to save space. what in it doesn't matter */ 259*7c478bd9Sstevel@tonic-gate if (fwrite((char *)&logicalblk, BLOCKSIZE, 1, outfile) == 0) { 260*7c478bd9Sstevel@tonic-gate goto cannotwrite; 261*7c478bd9Sstevel@tonic-gate } 262*7c478bd9Sstevel@tonic-gate /* finish up the super finger */ 263*7c478bd9Sstevel@tonic-gate *SUPINT = numlogblk; 264*7c478bd9Sstevel@tonic-gate /* add to the offsets the size of the offset pointers */ 265*7c478bd9Sstevel@tonic-gate intptr = (SUPINT + 1); 266*7c478bd9Sstevel@tonic-gate i = (char *)supint - (char *)SUPINT; 267*7c478bd9Sstevel@tonic-gate while (intptr < supint) 268*7c478bd9Sstevel@tonic-gate *intptr++ += i; 269*7c478bd9Sstevel@tonic-gate /* write out the offsets (1 for the N at start) and the super finger */ 270*7c478bd9Sstevel@tonic-gate if (fwrite((char *)SUPINT, sizeof (*SUPINT), numlogblk + 1, 271*7c478bd9Sstevel@tonic-gate outfile) == 0 || 272*7c478bd9Sstevel@tonic-gate fwrite(SUPFING, 1, supfing - SUPFING, outfile) == 0) { 273*7c478bd9Sstevel@tonic-gate goto cannotwrite; 274*7c478bd9Sstevel@tonic-gate } 275*7c478bd9Sstevel@tonic-gate /* save the size for reference later */ 276*7c478bd9Sstevel@tonic-gate nextsupfing = sizeof (long) + sizeof (long) * numlogblk + 277*7c478bd9Sstevel@tonic-gate (supfing - SUPFING); 278*7c478bd9Sstevel@tonic-gate /* 279*7c478bd9Sstevel@tonic-gate * make sure the file ends at a logical block boundary. This is 280*7c478bd9Sstevel@tonic-gate * necessary for invinsert to correctly create extended blocks 281*7c478bd9Sstevel@tonic-gate */ 282*7c478bd9Sstevel@tonic-gate i = nextsupfing % BLOCKSIZE; 283*7c478bd9Sstevel@tonic-gate /* write out junk to fill log blk */ 284*7c478bd9Sstevel@tonic-gate if (fwrite(SUPFING, BLOCKSIZE - i, 1, outfile) == 0 || 285*7c478bd9Sstevel@tonic-gate fflush(outfile) == EOF) { 286*7c478bd9Sstevel@tonic-gate /* rewind doesn't check for write failure */ 287*7c478bd9Sstevel@tonic-gate goto cannotwrite; 288*7c478bd9Sstevel@tonic-gate } 289*7c478bd9Sstevel@tonic-gate /* write the control area */ 290*7c478bd9Sstevel@tonic-gate rewind(outfile); 291*7c478bd9Sstevel@tonic-gate param.version = VERSION; 292*7c478bd9Sstevel@tonic-gate param.filestat = 0; 293*7c478bd9Sstevel@tonic-gate param.sizeblk = BLOCKSIZE; 294*7c478bd9Sstevel@tonic-gate param.startbyte = (numlogblk + 1) * BLOCKSIZE + BUFSIZ; 295*7c478bd9Sstevel@tonic-gate param.supsize = nextsupfing; 296*7c478bd9Sstevel@tonic-gate param.cntlsize = BUFSIZ; 297*7c478bd9Sstevel@tonic-gate param.share = 0; 298*7c478bd9Sstevel@tonic-gate if (fwrite((char *)¶m, sizeof (param), 1, outfile) == 0) { 299*7c478bd9Sstevel@tonic-gate goto cannotwrite; 300*7c478bd9Sstevel@tonic-gate } 301*7c478bd9Sstevel@tonic-gate for (i = 0; i < 10; i++) /* for future use */ 302*7c478bd9Sstevel@tonic-gate if (fwrite((char *)&zerolong, sizeof (zerolong), 303*7c478bd9Sstevel@tonic-gate 1, outfile) == 0) { 304*7c478bd9Sstevel@tonic-gate goto cannotwrite; 305*7c478bd9Sstevel@tonic-gate } 306*7c478bd9Sstevel@tonic-gate 307*7c478bd9Sstevel@tonic-gate /* make first block loop backwards to last block */ 308*7c478bd9Sstevel@tonic-gate if (fflush(outfile) == EOF) { 309*7c478bd9Sstevel@tonic-gate /* fseek doesn't check for write failure */ 310*7c478bd9Sstevel@tonic-gate goto cannotwrite; 311*7c478bd9Sstevel@tonic-gate } 312*7c478bd9Sstevel@tonic-gate /* get to second word first block */ 313*7c478bd9Sstevel@tonic-gate (void) fseek(outfile, (long)BUFSIZ + 8, 0); 314*7c478bd9Sstevel@tonic-gate tlong = numlogblk - 1; 315*7c478bd9Sstevel@tonic-gate if (fwrite((char *)&tlong, sizeof (tlong), 1, outfile) == 0 || 316*7c478bd9Sstevel@tonic-gate fclose(outfile) == EOF) { 317*7c478bd9Sstevel@tonic-gate cannotwrite: 318*7c478bd9Sstevel@tonic-gate invcannotwrite(invname); 319*7c478bd9Sstevel@tonic-gate return (0); 320*7c478bd9Sstevel@tonic-gate } 321*7c478bd9Sstevel@tonic-gate if (fclose(fpost) == EOF) { 322*7c478bd9Sstevel@tonic-gate invcannotwrite(postingfile); 323*7c478bd9Sstevel@tonic-gate return (0); 324*7c478bd9Sstevel@tonic-gate } 325*7c478bd9Sstevel@tonic-gate --totterm; /* don't count null term */ 326*7c478bd9Sstevel@tonic-gate #if STATS 327*7c478bd9Sstevel@tonic-gate (void) printf("logical blocks = %d, postings = %ld, terms = %ld, " 328*7c478bd9Sstevel@tonic-gate "max term length = %d\n", numlogblk, totpost, totterm, maxtermlen); 329*7c478bd9Sstevel@tonic-gate if (showzipf) { 330*7c478bd9Sstevel@tonic-gate (void) printf( 331*7c478bd9Sstevel@tonic-gate "\n************* ZIPF curve ****************\n"); 332*7c478bd9Sstevel@tonic-gate for (j = ZIPFSIZE; j > 1; j--) 333*7c478bd9Sstevel@tonic-gate if (zipf[j]) 334*7c478bd9Sstevel@tonic-gate break; 335*7c478bd9Sstevel@tonic-gate for (i = 1; i < j; ++i) { 336*7c478bd9Sstevel@tonic-gate (void) printf("%3d -%6d ", i, zipf[i]); 337*7c478bd9Sstevel@tonic-gate if (i % 6 == 0) (void) putchar('\n'); 338*7c478bd9Sstevel@tonic-gate } 339*7c478bd9Sstevel@tonic-gate (void) printf(">%d-%6d\n", ZIPFSIZE, zipf[0]); 340*7c478bd9Sstevel@tonic-gate } 341*7c478bd9Sstevel@tonic-gate #endif 342*7c478bd9Sstevel@tonic-gate /* free all malloc'd memory */ 343*7c478bd9Sstevel@tonic-gate free(POST); 344*7c478bd9Sstevel@tonic-gate free(SUPFING); 345*7c478bd9Sstevel@tonic-gate free(SUPINT); 346*7c478bd9Sstevel@tonic-gate return (totterm); 347*7c478bd9Sstevel@tonic-gate } 348*7c478bd9Sstevel@tonic-gate 349*7c478bd9Sstevel@tonic-gate /* add a term to the data base */ 350*7c478bd9Sstevel@tonic-gate 351*7c478bd9Sstevel@tonic-gate static int 352*7c478bd9Sstevel@tonic-gate invnewterm(void) 353*7c478bd9Sstevel@tonic-gate { 354*7c478bd9Sstevel@tonic-gate int backupflag, i, j, maxback, holditems, gooditems, howfar; 355*7c478bd9Sstevel@tonic-gate int len, numwilluse, wdlen; 356*7c478bd9Sstevel@tonic-gate char *tptr, *tptr2, *tptr3; 357*7c478bd9Sstevel@tonic-gate union { 358*7c478bd9Sstevel@tonic-gate unsigned long packword[2]; 359*7c478bd9Sstevel@tonic-gate ENTRY e; 360*7c478bd9Sstevel@tonic-gate } iteminfo; 361*7c478bd9Sstevel@tonic-gate 362*7c478bd9Sstevel@tonic-gate totterm++; 363*7c478bd9Sstevel@tonic-gate #if STATS 364*7c478bd9Sstevel@tonic-gate /* keep zipfian info on the distribution */ 365*7c478bd9Sstevel@tonic-gate if (numpost <= ZIPFSIZE) 366*7c478bd9Sstevel@tonic-gate zipf[numpost]++; 367*7c478bd9Sstevel@tonic-gate else 368*7c478bd9Sstevel@tonic-gate zipf[0]++; 369*7c478bd9Sstevel@tonic-gate #endif 370*7c478bd9Sstevel@tonic-gate len = strlen(thisterm); 371*7c478bd9Sstevel@tonic-gate wdlen = (len + (sizeof (long) - 1)) / sizeof (long); 372*7c478bd9Sstevel@tonic-gate numwilluse = (wdlen + 3) * sizeof (long); 373*7c478bd9Sstevel@tonic-gate /* new block if at least 1 item in block */ 374*7c478bd9Sstevel@tonic-gate if (numinvitems && numwilluse + amtused > BLOCKSIZE) { 375*7c478bd9Sstevel@tonic-gate /* set up new block */ 376*7c478bd9Sstevel@tonic-gate if (supfing + 500 > SUPFING + supersize) { 377*7c478bd9Sstevel@tonic-gate i = supfing - SUPFING; 378*7c478bd9Sstevel@tonic-gate supersize += 20000; 379*7c478bd9Sstevel@tonic-gate if ((SUPFING = realloc(SUPFING, supersize)) == NULL) { 380*7c478bd9Sstevel@tonic-gate invcannotalloc(supersize); 381*7c478bd9Sstevel@tonic-gate return (0); 382*7c478bd9Sstevel@tonic-gate } 383*7c478bd9Sstevel@tonic-gate supfing = i + SUPFING; 384*7c478bd9Sstevel@tonic-gate #if DEBUG 385*7c478bd9Sstevel@tonic-gate (void) printf("reallocated superfinger space to %d, " 386*7c478bd9Sstevel@tonic-gate "totpost=%ld\n", supersize, totpost); 387*7c478bd9Sstevel@tonic-gate #endif 388*7c478bd9Sstevel@tonic-gate } 389*7c478bd9Sstevel@tonic-gate /* check that room for the offset as well */ 390*7c478bd9Sstevel@tonic-gate if ((numlogblk + 10) > supintsize) { 391*7c478bd9Sstevel@tonic-gate i = supint - SUPINT; 392*7c478bd9Sstevel@tonic-gate supintsize += SUPERINC; 393*7c478bd9Sstevel@tonic-gate if ((SUPINT = realloc((char *)SUPINT, 394*7c478bd9Sstevel@tonic-gate supintsize * sizeof (long))) == NULL) { 395*7c478bd9Sstevel@tonic-gate invcannotalloc(supintsize * sizeof (long)); 396*7c478bd9Sstevel@tonic-gate return (0); 397*7c478bd9Sstevel@tonic-gate } 398*7c478bd9Sstevel@tonic-gate supint = i + SUPINT; 399*7c478bd9Sstevel@tonic-gate #if DEBUG 400*7c478bd9Sstevel@tonic-gate (void) printf("reallocated superfinger offset to %d, " 401*7c478bd9Sstevel@tonic-gate "totpost = %ld\n", supintsize * sizeof (long), 402*7c478bd9Sstevel@tonic-gate totpost); 403*7c478bd9Sstevel@tonic-gate #endif 404*7c478bd9Sstevel@tonic-gate } 405*7c478bd9Sstevel@tonic-gate /* See if backup is efficatious */ 406*7c478bd9Sstevel@tonic-gate backupflag = 0; 407*7c478bd9Sstevel@tonic-gate maxback = strlen(thisterm) / 10; 408*7c478bd9Sstevel@tonic-gate holditems = numinvitems; 409*7c478bd9Sstevel@tonic-gate if (maxback > numinvitems) 410*7c478bd9Sstevel@tonic-gate maxback = numinvitems - 2; 411*7c478bd9Sstevel@tonic-gate howfar = 0; 412*7c478bd9Sstevel@tonic-gate while (--maxback > 0) { 413*7c478bd9Sstevel@tonic-gate howfar++; 414*7c478bd9Sstevel@tonic-gate iteminfo.packword[0] = 415*7c478bd9Sstevel@tonic-gate logicalblk.invblk[--holditems * 2 + 416*7c478bd9Sstevel@tonic-gate (sizeof (long) - 1)]; 417*7c478bd9Sstevel@tonic-gate if ((i = iteminfo.e.size / 10) < maxback) { 418*7c478bd9Sstevel@tonic-gate maxback = i; 419*7c478bd9Sstevel@tonic-gate backupflag = howfar; 420*7c478bd9Sstevel@tonic-gate gooditems = holditems; 421*7c478bd9Sstevel@tonic-gate tptr2 = logicalblk.chrblk + iteminfo.e.offset; 422*7c478bd9Sstevel@tonic-gate } 423*7c478bd9Sstevel@tonic-gate } 424*7c478bd9Sstevel@tonic-gate /* see if backup will occur */ 425*7c478bd9Sstevel@tonic-gate if (backupflag) { 426*7c478bd9Sstevel@tonic-gate numinvitems = gooditems; 427*7c478bd9Sstevel@tonic-gate } 428*7c478bd9Sstevel@tonic-gate logicalblk.invblk[0] = numinvitems; 429*7c478bd9Sstevel@tonic-gate /* set forward pointer pointing to next */ 430*7c478bd9Sstevel@tonic-gate logicalblk.invblk[1] = numlogblk + 1; 431*7c478bd9Sstevel@tonic-gate /* set back pointer to last block */ 432*7c478bd9Sstevel@tonic-gate logicalblk.invblk[2] = numlogblk - 1; 433*7c478bd9Sstevel@tonic-gate if (fwrite((char *)logicalblk.chrblk, 1, 434*7c478bd9Sstevel@tonic-gate BLOCKSIZE, outfile) == 0) { 435*7c478bd9Sstevel@tonic-gate invcannotwrite(indexfile); 436*7c478bd9Sstevel@tonic-gate return (0); 437*7c478bd9Sstevel@tonic-gate } 438*7c478bd9Sstevel@tonic-gate amtused = 16; 439*7c478bd9Sstevel@tonic-gate numlogblk++; 440*7c478bd9Sstevel@tonic-gate /* check if had to back up, if so do it */ 441*7c478bd9Sstevel@tonic-gate if (backupflag) { 442*7c478bd9Sstevel@tonic-gate /* find out where the end of the new block is */ 443*7c478bd9Sstevel@tonic-gate iteminfo.packword[0] = 444*7c478bd9Sstevel@tonic-gate logicalblk.invblk[numinvitems * 2 + 1]; 445*7c478bd9Sstevel@tonic-gate tptr3 = logicalblk.chrblk + iteminfo.e.offset; 446*7c478bd9Sstevel@tonic-gate /* move the index for this block */ 447*7c478bd9Sstevel@tonic-gate for (i = 3; i <= (backupflag * 2 + 2); i++) { 448*7c478bd9Sstevel@tonic-gate logicalblk.invblk[i] = 449*7c478bd9Sstevel@tonic-gate logicalblk.invblk[numinvitems * 2+i]; 450*7c478bd9Sstevel@tonic-gate } 451*7c478bd9Sstevel@tonic-gate /* move the word into the super index */ 452*7c478bd9Sstevel@tonic-gate iteminfo.packword[0] = logicalblk.invblk[3]; 453*7c478bd9Sstevel@tonic-gate iteminfo.packword[1] = logicalblk.invblk[4]; 454*7c478bd9Sstevel@tonic-gate tptr2 = logicalblk.chrblk + iteminfo.e.offset; 455*7c478bd9Sstevel@tonic-gate (void) strncpy(supfing, tptr2, (int)iteminfo.e.size); 456*7c478bd9Sstevel@tonic-gate *(supfing + iteminfo.e.size) = '\0'; 457*7c478bd9Sstevel@tonic-gate #if DEBUG 458*7c478bd9Sstevel@tonic-gate (void) printf("backup %d at term=%s to term=%s\n", 459*7c478bd9Sstevel@tonic-gate backupflag, thisterm, supfing); 460*7c478bd9Sstevel@tonic-gate #endif 461*7c478bd9Sstevel@tonic-gate *supint++ = nextsupfing; 462*7c478bd9Sstevel@tonic-gate nextsupfing += strlen(supfing) + 1; 463*7c478bd9Sstevel@tonic-gate supfing += strlen(supfing) + 1; 464*7c478bd9Sstevel@tonic-gate /* now fix up the logical block */ 465*7c478bd9Sstevel@tonic-gate tptr = logicalblk.chrblk + lastinblk; 466*7c478bd9Sstevel@tonic-gate lastinblk = BLOCKSIZE; 467*7c478bd9Sstevel@tonic-gate tptr2 = logicalblk.chrblk + lastinblk; 468*7c478bd9Sstevel@tonic-gate j = tptr3 - tptr; 469*7c478bd9Sstevel@tonic-gate while (tptr3 > tptr) 470*7c478bd9Sstevel@tonic-gate *--tptr2 = *--tptr3; 471*7c478bd9Sstevel@tonic-gate lastinblk -= j; 472*7c478bd9Sstevel@tonic-gate amtused += (8 * backupflag + j); 473*7c478bd9Sstevel@tonic-gate for (i = 3; i < (backupflag * 2 + 2); i += 2) { 474*7c478bd9Sstevel@tonic-gate iteminfo.packword[0] = logicalblk.invblk[i]; 475*7c478bd9Sstevel@tonic-gate iteminfo.e.offset += (tptr2 - tptr3); 476*7c478bd9Sstevel@tonic-gate logicalblk.invblk[i] = iteminfo.packword[0]; 477*7c478bd9Sstevel@tonic-gate } 478*7c478bd9Sstevel@tonic-gate numinvitems = backupflag; 479*7c478bd9Sstevel@tonic-gate } else { /* no backup needed */ 480*7c478bd9Sstevel@tonic-gate numinvitems = 0; 481*7c478bd9Sstevel@tonic-gate lastinblk = BLOCKSIZE; 482*7c478bd9Sstevel@tonic-gate /* add new term to superindex */ 483*7c478bd9Sstevel@tonic-gate (void) strcpy(supfing, thisterm); 484*7c478bd9Sstevel@tonic-gate supfing += strlen(thisterm) + 1; 485*7c478bd9Sstevel@tonic-gate *supint++ = nextsupfing; 486*7c478bd9Sstevel@tonic-gate nextsupfing += strlen(thisterm) + 1; 487*7c478bd9Sstevel@tonic-gate } 488*7c478bd9Sstevel@tonic-gate } 489*7c478bd9Sstevel@tonic-gate lastinblk -= (numwilluse - 8); 490*7c478bd9Sstevel@tonic-gate iteminfo.e.offset = lastinblk; 491*7c478bd9Sstevel@tonic-gate iteminfo.e.size = (char)len; 492*7c478bd9Sstevel@tonic-gate iteminfo.e.space = 0; 493*7c478bd9Sstevel@tonic-gate iteminfo.e.post = numpost; 494*7c478bd9Sstevel@tonic-gate (void) strncpy(logicalblk.chrblk + lastinblk, thisterm, len); 495*7c478bd9Sstevel@tonic-gate amtused += numwilluse; 496*7c478bd9Sstevel@tonic-gate logicalblk.invblk[(lastinblk/sizeof (long))+wdlen] = nextpost; 497*7c478bd9Sstevel@tonic-gate if ((i = postptr - POST) > 0) { 498*7c478bd9Sstevel@tonic-gate if (fwrite((char *)POST, sizeof (POSTING), i, fpost) == 0) { 499*7c478bd9Sstevel@tonic-gate invcannotwrite(postingfile); 500*7c478bd9Sstevel@tonic-gate return (0); 501*7c478bd9Sstevel@tonic-gate } 502*7c478bd9Sstevel@tonic-gate nextpost += i * sizeof (POSTING); 503*7c478bd9Sstevel@tonic-gate } 504*7c478bd9Sstevel@tonic-gate logicalblk.invblk[3+2*numinvitems++] = iteminfo.packword[0]; 505*7c478bd9Sstevel@tonic-gate logicalblk.invblk[2+2*numinvitems] = iteminfo.packword[1]; 506*7c478bd9Sstevel@tonic-gate return (1); 507*7c478bd9Sstevel@tonic-gate } 508*7c478bd9Sstevel@tonic-gate 509*7c478bd9Sstevel@tonic-gate static void 510*7c478bd9Sstevel@tonic-gate swap_ints(void *p, size_t sz) 511*7c478bd9Sstevel@tonic-gate { 512*7c478bd9Sstevel@tonic-gate uint32_t *s; 513*7c478bd9Sstevel@tonic-gate uint32_t *e = (uint32_t *)p + (sz / sizeof (uint32_t)); 514*7c478bd9Sstevel@tonic-gate 515*7c478bd9Sstevel@tonic-gate for (s = p; s < e; s++) 516*7c478bd9Sstevel@tonic-gate *s = BSWAP_32(*s); 517*7c478bd9Sstevel@tonic-gate } 518*7c478bd9Sstevel@tonic-gate 519*7c478bd9Sstevel@tonic-gate static void 520*7c478bd9Sstevel@tonic-gate write_param(INVCONTROL *invcntl) 521*7c478bd9Sstevel@tonic-gate { 522*7c478bd9Sstevel@tonic-gate if (invcntl->swap) 523*7c478bd9Sstevel@tonic-gate swap_ints(&invcntl->param, sizeof (invcntl->param)); 524*7c478bd9Sstevel@tonic-gate 525*7c478bd9Sstevel@tonic-gate rewind(invcntl->invfile); 526*7c478bd9Sstevel@tonic-gate (void) fwrite((char *)&invcntl->param, sizeof (invcntl->param), 1, 527*7c478bd9Sstevel@tonic-gate invcntl->invfile); 528*7c478bd9Sstevel@tonic-gate 529*7c478bd9Sstevel@tonic-gate if (invcntl->swap) 530*7c478bd9Sstevel@tonic-gate swap_ints(&invcntl->param, sizeof (invcntl->param)); 531*7c478bd9Sstevel@tonic-gate } 532*7c478bd9Sstevel@tonic-gate 533*7c478bd9Sstevel@tonic-gate static void 534*7c478bd9Sstevel@tonic-gate read_superfinger(INVCONTROL *invcntl) 535*7c478bd9Sstevel@tonic-gate { 536*7c478bd9Sstevel@tonic-gate size_t count; 537*7c478bd9Sstevel@tonic-gate 538*7c478bd9Sstevel@tonic-gate (void) fseek(invcntl->invfile, invcntl->param.startbyte, SEEK_SET); 539*7c478bd9Sstevel@tonic-gate (void) fread(invcntl->iindex, (int)invcntl->param.supsize, 540*7c478bd9Sstevel@tonic-gate 1, invcntl->invfile); 541*7c478bd9Sstevel@tonic-gate 542*7c478bd9Sstevel@tonic-gate if (invcntl->swap) { 543*7c478bd9Sstevel@tonic-gate /* 544*7c478bd9Sstevel@tonic-gate * The superfinger consists of a count, followed by 545*7c478bd9Sstevel@tonic-gate * count offsets, followed by a string table (which 546*7c478bd9Sstevel@tonic-gate * the offsets reference). 547*7c478bd9Sstevel@tonic-gate * 548*7c478bd9Sstevel@tonic-gate * We need to swap the count and the offsets. 549*7c478bd9Sstevel@tonic-gate */ 550*7c478bd9Sstevel@tonic-gate count = 1 + BSWAP_32(*(uint32_t *)invcntl->iindex); 551*7c478bd9Sstevel@tonic-gate swap_ints(invcntl->iindex, count * sizeof (unsigned long)); 552*7c478bd9Sstevel@tonic-gate } 553*7c478bd9Sstevel@tonic-gate } 554*7c478bd9Sstevel@tonic-gate 555*7c478bd9Sstevel@tonic-gate static void 556*7c478bd9Sstevel@tonic-gate read_logblock(INVCONTROL *invcntl, int block) 557*7c478bd9Sstevel@tonic-gate { 558*7c478bd9Sstevel@tonic-gate /* note always fetch it if the file is busy */ 559*7c478bd9Sstevel@tonic-gate if ((block != invcntl->numblk) || 560*7c478bd9Sstevel@tonic-gate (invcntl->param.filestat >= INVBUSY)) { 561*7c478bd9Sstevel@tonic-gate (void) fseek(invcntl->invfile, 562*7c478bd9Sstevel@tonic-gate (block * invcntl->param.sizeblk) + invcntl->param.cntlsize, 563*7c478bd9Sstevel@tonic-gate SEEK_SET); 564*7c478bd9Sstevel@tonic-gate invcntl->numblk = block; 565*7c478bd9Sstevel@tonic-gate (void) fread(invcntl->logblk, (int)invcntl->param.sizeblk, 566*7c478bd9Sstevel@tonic-gate 1, invcntl->invfile); 567*7c478bd9Sstevel@tonic-gate 568*7c478bd9Sstevel@tonic-gate if (invcntl->swap) { 569*7c478bd9Sstevel@tonic-gate size_t count; 570*7c478bd9Sstevel@tonic-gate ENTRY *ecur, *eend; 571*7c478bd9Sstevel@tonic-gate uint32_t *postptr; 572*7c478bd9Sstevel@tonic-gate 573*7c478bd9Sstevel@tonic-gate /* 574*7c478bd9Sstevel@tonic-gate * A logblock consists of a count, a next block id, 575*7c478bd9Sstevel@tonic-gate * and a previous block id, followed by count 576*7c478bd9Sstevel@tonic-gate * ENTRYs, followed by alternating strings and 577*7c478bd9Sstevel@tonic-gate * offsets. 578*7c478bd9Sstevel@tonic-gate */ 579*7c478bd9Sstevel@tonic-gate swap_ints(invcntl->logblk, 3 * sizeof (unsigned long)); 580*7c478bd9Sstevel@tonic-gate 581*7c478bd9Sstevel@tonic-gate count = *(uint32_t *)invcntl->logblk; 582*7c478bd9Sstevel@tonic-gate 583*7c478bd9Sstevel@tonic-gate ecur = (ENTRY *)((uint32_t *)invcntl->logblk + 3); 584*7c478bd9Sstevel@tonic-gate eend = ecur + count; 585*7c478bd9Sstevel@tonic-gate 586*7c478bd9Sstevel@tonic-gate for (; ecur < eend; ecur++) { 587*7c478bd9Sstevel@tonic-gate ecur->offset = BSWAP_16(ecur->offset); 588*7c478bd9Sstevel@tonic-gate ecur->post = BSWAP_32(ecur->post); 589*7c478bd9Sstevel@tonic-gate 590*7c478bd9Sstevel@tonic-gate /* 591*7c478bd9Sstevel@tonic-gate * After the string is the posting offset. 592*7c478bd9Sstevel@tonic-gate */ 593*7c478bd9Sstevel@tonic-gate postptr = (uint32_t *)(invcntl->logblk + 594*7c478bd9Sstevel@tonic-gate ecur->offset + 595*7c478bd9Sstevel@tonic-gate P2ROUNDUP(ecur->size, sizeof (long))); 596*7c478bd9Sstevel@tonic-gate 597*7c478bd9Sstevel@tonic-gate *postptr = BSWAP_32(*postptr); 598*7c478bd9Sstevel@tonic-gate } 599*7c478bd9Sstevel@tonic-gate } 600*7c478bd9Sstevel@tonic-gate } 601*7c478bd9Sstevel@tonic-gate } 602*7c478bd9Sstevel@tonic-gate 603*7c478bd9Sstevel@tonic-gate void 604*7c478bd9Sstevel@tonic-gate read_next_posting(INVCONTROL *invcntl, POSTING *posting) 605*7c478bd9Sstevel@tonic-gate { 606*7c478bd9Sstevel@tonic-gate (void) fread((char *)posting, sizeof (*posting), 1, invcntl->postfile); 607*7c478bd9Sstevel@tonic-gate if (invcntl->swap) { 608*7c478bd9Sstevel@tonic-gate posting->lineoffset = BSWAP_32(posting->lineoffset); 609*7c478bd9Sstevel@tonic-gate posting->fcnoffset = BSWAP_32(posting->fcnoffset); 610*7c478bd9Sstevel@tonic-gate /* 611*7c478bd9Sstevel@tonic-gate * fileindex is a 24-bit field, so shift it before swapping 612*7c478bd9Sstevel@tonic-gate */ 613*7c478bd9Sstevel@tonic-gate posting->fileindex = BSWAP_32(posting->fileindex << 8); 614*7c478bd9Sstevel@tonic-gate } 615*7c478bd9Sstevel@tonic-gate } 616*7c478bd9Sstevel@tonic-gate 617*7c478bd9Sstevel@tonic-gate int 618*7c478bd9Sstevel@tonic-gate invopen(INVCONTROL *invcntl, char *invname, char *invpost, int stat) 619*7c478bd9Sstevel@tonic-gate { 620*7c478bd9Sstevel@tonic-gate int read_index; 621*7c478bd9Sstevel@tonic-gate 622*7c478bd9Sstevel@tonic-gate if ((invcntl->invfile = vpfopen(invname, 623*7c478bd9Sstevel@tonic-gate ((stat == 0) ? FREAD : FREADP))) == NULL) { 624*7c478bd9Sstevel@tonic-gate invcannotopen(invname); 625*7c478bd9Sstevel@tonic-gate return (-1); 626*7c478bd9Sstevel@tonic-gate } 627*7c478bd9Sstevel@tonic-gate if (fread((char *)&invcntl->param, sizeof (invcntl->param), 1, 628*7c478bd9Sstevel@tonic-gate invcntl->invfile) == 0) { 629*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: empty inverted file\n", argv0); 630*7c478bd9Sstevel@tonic-gate goto closeinv; 631*7c478bd9Sstevel@tonic-gate } 632*7c478bd9Sstevel@tonic-gate if (invcntl->param.version != VERSION && 633*7c478bd9Sstevel@tonic-gate BSWAP_32(invcntl->param.version) != VERSION) { 634*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 635*7c478bd9Sstevel@tonic-gate "%s: cannot read old index format; use -U option to " 636*7c478bd9Sstevel@tonic-gate "force database to rebuild\n", argv0); 637*7c478bd9Sstevel@tonic-gate goto closeinv; 638*7c478bd9Sstevel@tonic-gate } 639*7c478bd9Sstevel@tonic-gate invcntl->swap = (invcntl->param.version != VERSION); 640*7c478bd9Sstevel@tonic-gate if (invcntl->swap) 641*7c478bd9Sstevel@tonic-gate swap_ints(&invcntl->param, sizeof (invcntl->param)); 642*7c478bd9Sstevel@tonic-gate 643*7c478bd9Sstevel@tonic-gate if (stat == 0 && invcntl->param.filestat == INVALONE) { 644*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: inverted file is locked\n", argv0); 645*7c478bd9Sstevel@tonic-gate goto closeinv; 646*7c478bd9Sstevel@tonic-gate } 647*7c478bd9Sstevel@tonic-gate if ((invcntl->postfile = vpfopen(invpost, 648*7c478bd9Sstevel@tonic-gate ((stat == 0) ? FREAD : FREADP))) == NULL) { 649*7c478bd9Sstevel@tonic-gate invcannotopen(invpost); 650*7c478bd9Sstevel@tonic-gate goto closeinv; 651*7c478bd9Sstevel@tonic-gate } 652*7c478bd9Sstevel@tonic-gate /* allocate core for a logical block */ 653*7c478bd9Sstevel@tonic-gate if ((invcntl->logblk = malloc(invcntl->param.sizeblk)) == NULL) { 654*7c478bd9Sstevel@tonic-gate invcannotalloc((unsigned)invcntl->param.sizeblk); 655*7c478bd9Sstevel@tonic-gate goto closeboth; 656*7c478bd9Sstevel@tonic-gate } 657*7c478bd9Sstevel@tonic-gate /* allocate for and read in superfinger */ 658*7c478bd9Sstevel@tonic-gate read_index = 1; 659*7c478bd9Sstevel@tonic-gate invcntl->iindex = NULL; 660*7c478bd9Sstevel@tonic-gate #if SHARE 661*7c478bd9Sstevel@tonic-gate if (invcntl->param.share == 1) { 662*7c478bd9Sstevel@tonic-gate key_t ftok(), shm_key; 663*7c478bd9Sstevel@tonic-gate struct shmid_ds shm_buf; 664*7c478bd9Sstevel@tonic-gate char *shmat(); 665*7c478bd9Sstevel@tonic-gate int shm_id; 666*7c478bd9Sstevel@tonic-gate 667*7c478bd9Sstevel@tonic-gate /* see if the shared segment exists */ 668*7c478bd9Sstevel@tonic-gate shm_key = ftok(invname, 2); 669*7c478bd9Sstevel@tonic-gate shm_id = shmget(shm_key, 0, 0); 670*7c478bd9Sstevel@tonic-gate /* 671*7c478bd9Sstevel@tonic-gate * Failure simply means (hopefully) that segment doesn't 672*7c478bd9Sstevel@tonic-gate * exist 673*7c478bd9Sstevel@tonic-gate */ 674*7c478bd9Sstevel@tonic-gate if (shm_id == -1) { 675*7c478bd9Sstevel@tonic-gate /* 676*7c478bd9Sstevel@tonic-gate * Have to give general write permission due to AMdahl 677*7c478bd9Sstevel@tonic-gate * not having protected segments 678*7c478bd9Sstevel@tonic-gate */ 679*7c478bd9Sstevel@tonic-gate shm_id = shmget(shm_key, 680*7c478bd9Sstevel@tonic-gate invcntl->param.supsize + sizeof (long), 681*7c478bd9Sstevel@tonic-gate IPC_CREAT | 0666); 682*7c478bd9Sstevel@tonic-gate if (shm_id == -1) 683*7c478bd9Sstevel@tonic-gate perror("Could not create shared " 684*7c478bd9Sstevel@tonic-gate "memory segment"); 685*7c478bd9Sstevel@tonic-gate } else 686*7c478bd9Sstevel@tonic-gate read_index = 0; 687*7c478bd9Sstevel@tonic-gate 688*7c478bd9Sstevel@tonic-gate if (shm_id != -1) { 689*7c478bd9Sstevel@tonic-gate invcntl->iindex = shmat(shm_id, 0, 690*7c478bd9Sstevel@tonic-gate ((read_index) ? 0 : SHM_RDONLY)); 691*7c478bd9Sstevel@tonic-gate if (invcntl->iindex == (char *)ERR) { 692*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, 693*7c478bd9Sstevel@tonic-gate "%s: shared memory link failed\n", argv0); 694*7c478bd9Sstevel@tonic-gate invcntl->iindex = NULL; 695*7c478bd9Sstevel@tonic-gate read_index = 1; 696*7c478bd9Sstevel@tonic-gate } 697*7c478bd9Sstevel@tonic-gate } 698*7c478bd9Sstevel@tonic-gate } 699*7c478bd9Sstevel@tonic-gate #endif 700*7c478bd9Sstevel@tonic-gate if (invcntl->iindex == NULL) 701*7c478bd9Sstevel@tonic-gate invcntl->iindex = malloc(invcntl->param.supsize + 16); 702*7c478bd9Sstevel@tonic-gate if (invcntl->iindex == NULL) { 703*7c478bd9Sstevel@tonic-gate invcannotalloc((unsigned)invcntl->param.supsize); 704*7c478bd9Sstevel@tonic-gate free(invcntl->logblk); 705*7c478bd9Sstevel@tonic-gate goto closeboth; 706*7c478bd9Sstevel@tonic-gate } 707*7c478bd9Sstevel@tonic-gate if (read_index) { 708*7c478bd9Sstevel@tonic-gate read_superfinger(invcntl); 709*7c478bd9Sstevel@tonic-gate } 710*7c478bd9Sstevel@tonic-gate invcntl->numblk = -1; 711*7c478bd9Sstevel@tonic-gate if (boolready() == -1) { 712*7c478bd9Sstevel@tonic-gate closeboth: 713*7c478bd9Sstevel@tonic-gate (void) fclose(invcntl->postfile); 714*7c478bd9Sstevel@tonic-gate closeinv: 715*7c478bd9Sstevel@tonic-gate (void) fclose(invcntl->invfile); 716*7c478bd9Sstevel@tonic-gate return (-1); 717*7c478bd9Sstevel@tonic-gate } 718*7c478bd9Sstevel@tonic-gate /* write back out the control block if anything changed */ 719*7c478bd9Sstevel@tonic-gate invcntl->param.filestat = stat; 720*7c478bd9Sstevel@tonic-gate if (stat > invcntl->param.filestat) 721*7c478bd9Sstevel@tonic-gate write_param(invcntl); 722*7c478bd9Sstevel@tonic-gate return (1); 723*7c478bd9Sstevel@tonic-gate } 724*7c478bd9Sstevel@tonic-gate 725*7c478bd9Sstevel@tonic-gate /* invclose must be called to wrap things up and deallocate core */ 726*7c478bd9Sstevel@tonic-gate void 727*7c478bd9Sstevel@tonic-gate invclose(INVCONTROL *invcntl) 728*7c478bd9Sstevel@tonic-gate { 729*7c478bd9Sstevel@tonic-gate /* write out the control block in case anything changed */ 730*7c478bd9Sstevel@tonic-gate if (invcntl->param.filestat > 0) { 731*7c478bd9Sstevel@tonic-gate invcntl->param.filestat = 0; 732*7c478bd9Sstevel@tonic-gate write_param(invcntl); 733*7c478bd9Sstevel@tonic-gate } 734*7c478bd9Sstevel@tonic-gate (void) fclose(invcntl->invfile); 735*7c478bd9Sstevel@tonic-gate (void) fclose(invcntl->postfile); 736*7c478bd9Sstevel@tonic-gate #if SHARE 737*7c478bd9Sstevel@tonic-gate if (invcntl->param.share > 0) { 738*7c478bd9Sstevel@tonic-gate shmdt(invcntl->iindex); 739*7c478bd9Sstevel@tonic-gate invcntl->iindex = NULL; 740*7c478bd9Sstevel@tonic-gate } 741*7c478bd9Sstevel@tonic-gate #endif 742*7c478bd9Sstevel@tonic-gate if (invcntl->iindex != NULL) 743*7c478bd9Sstevel@tonic-gate free(invcntl->iindex); 744*7c478bd9Sstevel@tonic-gate free(invcntl->logblk); 745*7c478bd9Sstevel@tonic-gate } 746*7c478bd9Sstevel@tonic-gate 747*7c478bd9Sstevel@tonic-gate /* invstep steps the inverted file forward one item */ 748*7c478bd9Sstevel@tonic-gate void 749*7c478bd9Sstevel@tonic-gate invstep(INVCONTROL *invcntl) 750*7c478bd9Sstevel@tonic-gate { 751*7c478bd9Sstevel@tonic-gate if (invcntl->keypnt < (*(int *)invcntl->logblk - 1)) { 752*7c478bd9Sstevel@tonic-gate invcntl->keypnt++; 753*7c478bd9Sstevel@tonic-gate return; 754*7c478bd9Sstevel@tonic-gate } 755*7c478bd9Sstevel@tonic-gate 756*7c478bd9Sstevel@tonic-gate /* move forward a block else wrap */ 757*7c478bd9Sstevel@tonic-gate read_logblock(invcntl, *(int *)(invcntl->logblk + sizeof (long))); 758*7c478bd9Sstevel@tonic-gate 759*7c478bd9Sstevel@tonic-gate invcntl->keypnt = 0; 760*7c478bd9Sstevel@tonic-gate } 761*7c478bd9Sstevel@tonic-gate 762*7c478bd9Sstevel@tonic-gate /* invforward moves forward one term in the inverted file */ 763*7c478bd9Sstevel@tonic-gate int 764*7c478bd9Sstevel@tonic-gate invforward(INVCONTROL *invcntl) 765*7c478bd9Sstevel@tonic-gate { 766*7c478bd9Sstevel@tonic-gate invstep(invcntl); 767*7c478bd9Sstevel@tonic-gate /* skip things with 0 postings */ 768*7c478bd9Sstevel@tonic-gate while (((ENTRY *)(invcntl->logblk + 12) + invcntl->keypnt)->post == 0) { 769*7c478bd9Sstevel@tonic-gate invstep(invcntl); 770*7c478bd9Sstevel@tonic-gate } 771*7c478bd9Sstevel@tonic-gate /* Check for having wrapped - reached start of inverted file! */ 772*7c478bd9Sstevel@tonic-gate if ((invcntl->numblk == 0) && (invcntl->keypnt == 0)) 773*7c478bd9Sstevel@tonic-gate return (0); 774*7c478bd9Sstevel@tonic-gate return (1); 775*7c478bd9Sstevel@tonic-gate } 776*7c478bd9Sstevel@tonic-gate 777*7c478bd9Sstevel@tonic-gate /* invterm gets the present term from the present logical block */ 778*7c478bd9Sstevel@tonic-gate int 779*7c478bd9Sstevel@tonic-gate invterm(INVCONTROL *invcntl, char *term) 780*7c478bd9Sstevel@tonic-gate { 781*7c478bd9Sstevel@tonic-gate ENTRY * entryptr; 782*7c478bd9Sstevel@tonic-gate 783*7c478bd9Sstevel@tonic-gate entryptr = (ENTRY *)(invcntl->logblk + 12) + invcntl->keypnt; 784*7c478bd9Sstevel@tonic-gate (void) strncpy(term, entryptr->offset + invcntl->logblk, 785*7c478bd9Sstevel@tonic-gate (int)entryptr->size); 786*7c478bd9Sstevel@tonic-gate *(term + entryptr->size) = '\0'; 787*7c478bd9Sstevel@tonic-gate return (entryptr->post); 788*7c478bd9Sstevel@tonic-gate } 789*7c478bd9Sstevel@tonic-gate 790*7c478bd9Sstevel@tonic-gate /* invfind searches for an individual item in the inverted file */ 791*7c478bd9Sstevel@tonic-gate long 792*7c478bd9Sstevel@tonic-gate invfind(INVCONTROL *invcntl, char *searchterm) 793*7c478bd9Sstevel@tonic-gate { 794*7c478bd9Sstevel@tonic-gate int imid, ilow, ihigh; 795*7c478bd9Sstevel@tonic-gate long num; 796*7c478bd9Sstevel@tonic-gate int i; 797*7c478bd9Sstevel@tonic-gate unsigned long *intptr, *intptr2; 798*7c478bd9Sstevel@tonic-gate ENTRY *entryptr; 799*7c478bd9Sstevel@tonic-gate 800*7c478bd9Sstevel@tonic-gate /* make sure it is initialized via invready */ 801*7c478bd9Sstevel@tonic-gate if (invcntl->invfile == 0) 802*7c478bd9Sstevel@tonic-gate return (-1L); 803*7c478bd9Sstevel@tonic-gate 804*7c478bd9Sstevel@tonic-gate /* now search for the appropriate finger block */ 805*7c478bd9Sstevel@tonic-gate intptr = (unsigned long *)invcntl->iindex; 806*7c478bd9Sstevel@tonic-gate 807*7c478bd9Sstevel@tonic-gate ilow = 0; 808*7c478bd9Sstevel@tonic-gate ihigh = *intptr++ - 1; 809*7c478bd9Sstevel@tonic-gate while (ilow <= ihigh) { 810*7c478bd9Sstevel@tonic-gate imid = (ilow + ihigh) / 2; 811*7c478bd9Sstevel@tonic-gate intptr2 = intptr + imid; 812*7c478bd9Sstevel@tonic-gate i = strcmp(searchterm, (invcntl->iindex + *intptr2)); 813*7c478bd9Sstevel@tonic-gate if (i < 0) 814*7c478bd9Sstevel@tonic-gate ihigh = imid - 1; 815*7c478bd9Sstevel@tonic-gate else if (i > 0) 816*7c478bd9Sstevel@tonic-gate ilow = ++imid; 817*7c478bd9Sstevel@tonic-gate else { 818*7c478bd9Sstevel@tonic-gate ilow = imid + 1; 819*7c478bd9Sstevel@tonic-gate break; 820*7c478bd9Sstevel@tonic-gate } 821*7c478bd9Sstevel@tonic-gate } 822*7c478bd9Sstevel@tonic-gate /* be careful about case where searchterm is after last in this block */ 823*7c478bd9Sstevel@tonic-gate imid = (ilow) ? ilow - 1 : 0; 824*7c478bd9Sstevel@tonic-gate 825*7c478bd9Sstevel@tonic-gate /* fetch the appropriate logical block if not in core */ 826*7c478bd9Sstevel@tonic-gate read_logblock(invcntl, imid); 827*7c478bd9Sstevel@tonic-gate 828*7c478bd9Sstevel@tonic-gate srch_ext: 829*7c478bd9Sstevel@tonic-gate /* now find the term in this block. tricky this */ 830*7c478bd9Sstevel@tonic-gate intptr = (unsigned long *)invcntl->logblk; 831*7c478bd9Sstevel@tonic-gate 832*7c478bd9Sstevel@tonic-gate ilow = 0; 833*7c478bd9Sstevel@tonic-gate ihigh = *intptr - 1; 834*7c478bd9Sstevel@tonic-gate intptr += 3; 835*7c478bd9Sstevel@tonic-gate num = 0; 836*7c478bd9Sstevel@tonic-gate while (ilow <= ihigh) { 837*7c478bd9Sstevel@tonic-gate imid = (ilow + ihigh) / 2; 838*7c478bd9Sstevel@tonic-gate entryptr = (ENTRY *)intptr + imid; 839*7c478bd9Sstevel@tonic-gate i = strncmp(searchterm, (invcntl->logblk + entryptr->offset), 840*7c478bd9Sstevel@tonic-gate (int)entryptr->size); 841*7c478bd9Sstevel@tonic-gate if (i == 0) 842*7c478bd9Sstevel@tonic-gate i = strlen(searchterm) - entryptr->size; 843*7c478bd9Sstevel@tonic-gate if (i < 0) 844*7c478bd9Sstevel@tonic-gate ihigh = imid - 1; 845*7c478bd9Sstevel@tonic-gate else if (i > 0) 846*7c478bd9Sstevel@tonic-gate ilow = ++imid; 847*7c478bd9Sstevel@tonic-gate else { 848*7c478bd9Sstevel@tonic-gate num = entryptr->post; 849*7c478bd9Sstevel@tonic-gate break; 850*7c478bd9Sstevel@tonic-gate } 851*7c478bd9Sstevel@tonic-gate } 852*7c478bd9Sstevel@tonic-gate /* be careful about case where searchterm is after last in this block */ 853*7c478bd9Sstevel@tonic-gate if (imid >= *(long *)invcntl->logblk) { 854*7c478bd9Sstevel@tonic-gate invcntl->keypnt = *(long *)invcntl->logblk; 855*7c478bd9Sstevel@tonic-gate invstep(invcntl); 856*7c478bd9Sstevel@tonic-gate /* note if this happens the term could be in extended block */ 857*7c478bd9Sstevel@tonic-gate if (invcntl->param.startbyte < 858*7c478bd9Sstevel@tonic-gate invcntl->numblk * invcntl->param.sizeblk) 859*7c478bd9Sstevel@tonic-gate goto srch_ext; 860*7c478bd9Sstevel@tonic-gate } else 861*7c478bd9Sstevel@tonic-gate invcntl->keypnt = imid; 862*7c478bd9Sstevel@tonic-gate return (num); 863*7c478bd9Sstevel@tonic-gate } 864*7c478bd9Sstevel@tonic-gate 865*7c478bd9Sstevel@tonic-gate #if DEBUG 866*7c478bd9Sstevel@tonic-gate 867*7c478bd9Sstevel@tonic-gate /* invdump dumps the block the term parameter is in */ 868*7c478bd9Sstevel@tonic-gate void 869*7c478bd9Sstevel@tonic-gate invdump(INVCONTROL *invcntl, char *term) 870*7c478bd9Sstevel@tonic-gate { 871*7c478bd9Sstevel@tonic-gate long i, j, n, *longptr; 872*7c478bd9Sstevel@tonic-gate ENTRY * entryptr; 873*7c478bd9Sstevel@tonic-gate char temp[512], *ptr; 874*7c478bd9Sstevel@tonic-gate 875*7c478bd9Sstevel@tonic-gate /* dump superindex if term is "-" */ 876*7c478bd9Sstevel@tonic-gate if (*term == '-') { 877*7c478bd9Sstevel@tonic-gate j = atoi(term + 1); 878*7c478bd9Sstevel@tonic-gate longptr = (long *)invcntl->iindex; 879*7c478bd9Sstevel@tonic-gate n = *longptr++; 880*7c478bd9Sstevel@tonic-gate (void) printf("Superindex dump, num blocks=%ld\n", n); 881*7c478bd9Sstevel@tonic-gate longptr += j; 882*7c478bd9Sstevel@tonic-gate while ((longptr <= ((long *)invcntl->iindex) + n) && 883*7c478bd9Sstevel@tonic-gate invbreak == 0) { 884*7c478bd9Sstevel@tonic-gate (void) printf("%2ld %6ld %s\n", j++, *longptr, 885*7c478bd9Sstevel@tonic-gate invcntl->iindex + *longptr); 886*7c478bd9Sstevel@tonic-gate longptr++; 887*7c478bd9Sstevel@tonic-gate } 888*7c478bd9Sstevel@tonic-gate return; 889*7c478bd9Sstevel@tonic-gate } else if (*term == '#') { 890*7c478bd9Sstevel@tonic-gate j = atoi(term + 1); 891*7c478bd9Sstevel@tonic-gate /* fetch the appropriate logical block */ 892*7c478bd9Sstevel@tonic-gate read_logblock(invcntl, j); 893*7c478bd9Sstevel@tonic-gate } else 894*7c478bd9Sstevel@tonic-gate i = abs((int)invfind(invcntl, term)); 895*7c478bd9Sstevel@tonic-gate longptr = (long *)invcntl->logblk; 896*7c478bd9Sstevel@tonic-gate n = *longptr++; 897*7c478bd9Sstevel@tonic-gate (void) printf("Entry term to invdump=%s, postings=%ld, " 898*7c478bd9Sstevel@tonic-gate "forward ptr=%ld, back ptr=%ld\n", term, i, *(longptr), 899*7c478bd9Sstevel@tonic-gate *(longptr + 1)); 900*7c478bd9Sstevel@tonic-gate entryptr = (ENTRY *)(invcntl->logblk + 12); 901*7c478bd9Sstevel@tonic-gate (void) printf("%ld terms in this block, block=%ld\n", n, 902*7c478bd9Sstevel@tonic-gate invcntl->numblk); 903*7c478bd9Sstevel@tonic-gate (void) printf("\tterm\t\t\tposts\tsize\toffset\tspace\t1st word\n"); 904*7c478bd9Sstevel@tonic-gate for (j = 0; j < n && invbreak == 0; j++) { 905*7c478bd9Sstevel@tonic-gate ptr = invcntl->logblk + entryptr->offset; 906*7c478bd9Sstevel@tonic-gate (void) strncpy(temp, ptr, (int)entryptr->size); 907*7c478bd9Sstevel@tonic-gate temp[entryptr->size] = '\0'; 908*7c478bd9Sstevel@tonic-gate ptr += (sizeof (long) * 909*7c478bd9Sstevel@tonic-gate (long)((entryptr->size + 910*7c478bd9Sstevel@tonic-gate (sizeof (long) - 1)) / sizeof (long))); 911*7c478bd9Sstevel@tonic-gate (void) printf("%2ld %-24s\t%5ld\t%3d\t%d\t%d\t%ld\n", j, temp, 912*7c478bd9Sstevel@tonic-gate entryptr->post, entryptr->size, entryptr->offset, 913*7c478bd9Sstevel@tonic-gate entryptr->space, *(long *)ptr); 914*7c478bd9Sstevel@tonic-gate entryptr++; 915*7c478bd9Sstevel@tonic-gate } 916*7c478bd9Sstevel@tonic-gate } 917*7c478bd9Sstevel@tonic-gate #endif 918*7c478bd9Sstevel@tonic-gate 919*7c478bd9Sstevel@tonic-gate static int 920*7c478bd9Sstevel@tonic-gate boolready(void) 921*7c478bd9Sstevel@tonic-gate { 922*7c478bd9Sstevel@tonic-gate numitems = 0; 923*7c478bd9Sstevel@tonic-gate if (item1 != NULL) 924*7c478bd9Sstevel@tonic-gate free(item1); 925*7c478bd9Sstevel@tonic-gate setsize1 = SETINC; 926*7c478bd9Sstevel@tonic-gate if ((item1 = (POSTING *)malloc(SETINC * sizeof (POSTING))) == NULL) { 927*7c478bd9Sstevel@tonic-gate invcannotalloc(SETINC); 928*7c478bd9Sstevel@tonic-gate return (-1); 929*7c478bd9Sstevel@tonic-gate } 930*7c478bd9Sstevel@tonic-gate if (item2 != NULL) 931*7c478bd9Sstevel@tonic-gate free(item2); 932*7c478bd9Sstevel@tonic-gate setsize2 = SETINC; 933*7c478bd9Sstevel@tonic-gate if ((item2 = (POSTING *)malloc(SETINC * sizeof (POSTING))) == NULL) { 934*7c478bd9Sstevel@tonic-gate invcannotalloc(SETINC); 935*7c478bd9Sstevel@tonic-gate return (-1); 936*7c478bd9Sstevel@tonic-gate } 937*7c478bd9Sstevel@tonic-gate item = item1; 938*7c478bd9Sstevel@tonic-gate enditem = item; 939*7c478bd9Sstevel@tonic-gate return (0); 940*7c478bd9Sstevel@tonic-gate } 941*7c478bd9Sstevel@tonic-gate 942*7c478bd9Sstevel@tonic-gate void 943*7c478bd9Sstevel@tonic-gate boolclear(void) 944*7c478bd9Sstevel@tonic-gate { 945*7c478bd9Sstevel@tonic-gate numitems = 0; 946*7c478bd9Sstevel@tonic-gate item = item1; 947*7c478bd9Sstevel@tonic-gate enditem = item; 948*7c478bd9Sstevel@tonic-gate } 949*7c478bd9Sstevel@tonic-gate 950*7c478bd9Sstevel@tonic-gate POSTING * 951*7c478bd9Sstevel@tonic-gate boolfile(INVCONTROL *invcntl, long *num, int bool) 952*7c478bd9Sstevel@tonic-gate { 953*7c478bd9Sstevel@tonic-gate ENTRY *entryptr; 954*7c478bd9Sstevel@tonic-gate FILE *file; 955*7c478bd9Sstevel@tonic-gate char *ptr; 956*7c478bd9Sstevel@tonic-gate unsigned long *ptr2; 957*7c478bd9Sstevel@tonic-gate POSTING *newitem; 958*7c478bd9Sstevel@tonic-gate POSTING posting; 959*7c478bd9Sstevel@tonic-gate unsigned u; 960*7c478bd9Sstevel@tonic-gate POSTING *newsetp, *set1p; 961*7c478bd9Sstevel@tonic-gate long newsetc, set1c, set2c; 962*7c478bd9Sstevel@tonic-gate 963*7c478bd9Sstevel@tonic-gate entryptr = (ENTRY *) (invcntl->logblk + 12) + invcntl->keypnt; 964*7c478bd9Sstevel@tonic-gate ptr = invcntl->logblk + entryptr->offset; 965*7c478bd9Sstevel@tonic-gate ptr2 = ((unsigned long *)ptr) + 966*7c478bd9Sstevel@tonic-gate (entryptr->size + (sizeof (long) - 1)) / sizeof (long); 967*7c478bd9Sstevel@tonic-gate *num = entryptr->post; 968*7c478bd9Sstevel@tonic-gate switch (bool) { 969*7c478bd9Sstevel@tonic-gate case OR: 970*7c478bd9Sstevel@tonic-gate case NOT: 971*7c478bd9Sstevel@tonic-gate if (*num == 0) { 972*7c478bd9Sstevel@tonic-gate *num = numitems; 973*7c478bd9Sstevel@tonic-gate return (item); 974*7c478bd9Sstevel@tonic-gate } 975*7c478bd9Sstevel@tonic-gate } 976*7c478bd9Sstevel@tonic-gate /* make room for the new set */ 977*7c478bd9Sstevel@tonic-gate u = 0; 978*7c478bd9Sstevel@tonic-gate switch (bool) { 979*7c478bd9Sstevel@tonic-gate case AND: 980*7c478bd9Sstevel@tonic-gate case NOT: 981*7c478bd9Sstevel@tonic-gate newsetp = set1p = item; 982*7c478bd9Sstevel@tonic-gate break; 983*7c478bd9Sstevel@tonic-gate 984*7c478bd9Sstevel@tonic-gate case OR: 985*7c478bd9Sstevel@tonic-gate u = enditem - item; 986*7c478bd9Sstevel@tonic-gate /* FALLTHROUGH */ 987*7c478bd9Sstevel@tonic-gate case REVERSENOT: 988*7c478bd9Sstevel@tonic-gate u += *num; 989*7c478bd9Sstevel@tonic-gate if (item == item2) { 990*7c478bd9Sstevel@tonic-gate if (u > setsize1) { 991*7c478bd9Sstevel@tonic-gate u += SETINC; 992*7c478bd9Sstevel@tonic-gate if ((item1 = (POSTING *) realloc(item1, 993*7c478bd9Sstevel@tonic-gate u * sizeof (POSTING))) == NULL) { 994*7c478bd9Sstevel@tonic-gate goto cannotalloc; 995*7c478bd9Sstevel@tonic-gate } 996*7c478bd9Sstevel@tonic-gate setsize1 = u; 997*7c478bd9Sstevel@tonic-gate } 998*7c478bd9Sstevel@tonic-gate newitem = item1; 999*7c478bd9Sstevel@tonic-gate } else { 1000*7c478bd9Sstevel@tonic-gate if (u > setsize2) { 1001*7c478bd9Sstevel@tonic-gate u += SETINC; 1002*7c478bd9Sstevel@tonic-gate if ((item2 = (POSTING *)realloc(item2, 1003*7c478bd9Sstevel@tonic-gate u * sizeof (POSTING))) == NULL) { 1004*7c478bd9Sstevel@tonic-gate cannotalloc: 1005*7c478bd9Sstevel@tonic-gate invcannotalloc(u * sizeof (POSTING)); 1006*7c478bd9Sstevel@tonic-gate (void) boolready(); 1007*7c478bd9Sstevel@tonic-gate *num = -1; 1008*7c478bd9Sstevel@tonic-gate return (NULL); 1009*7c478bd9Sstevel@tonic-gate } 1010*7c478bd9Sstevel@tonic-gate setsize2 = u; 1011*7c478bd9Sstevel@tonic-gate } 1012*7c478bd9Sstevel@tonic-gate newitem = item2; 1013*7c478bd9Sstevel@tonic-gate } 1014*7c478bd9Sstevel@tonic-gate set1p = item; 1015*7c478bd9Sstevel@tonic-gate newsetp = newitem; 1016*7c478bd9Sstevel@tonic-gate } 1017*7c478bd9Sstevel@tonic-gate file = invcntl->postfile; 1018*7c478bd9Sstevel@tonic-gate (void) fseek(file, (long)*ptr2, SEEK_SET); 1019*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1020*7c478bd9Sstevel@tonic-gate newsetc = 0; 1021*7c478bd9Sstevel@tonic-gate switch (bool) { 1022*7c478bd9Sstevel@tonic-gate case OR: 1023*7c478bd9Sstevel@tonic-gate /* while something in both sets */ 1024*7c478bd9Sstevel@tonic-gate set1p = item; 1025*7c478bd9Sstevel@tonic-gate newsetp = newitem; 1026*7c478bd9Sstevel@tonic-gate for (set1c = 0, set2c = 0; 1027*7c478bd9Sstevel@tonic-gate set1c < numitems && set2c < *num; newsetc++) { 1028*7c478bd9Sstevel@tonic-gate if (set1p->lineoffset < posting.lineoffset) { 1029*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++; 1030*7c478bd9Sstevel@tonic-gate set1c++; 1031*7c478bd9Sstevel@tonic-gate } else if (set1p->lineoffset > posting.lineoffset) { 1032*7c478bd9Sstevel@tonic-gate *newsetp++ = posting; 1033*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1034*7c478bd9Sstevel@tonic-gate set2c++; 1035*7c478bd9Sstevel@tonic-gate } else if (set1p->type < posting.type) { 1036*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++; 1037*7c478bd9Sstevel@tonic-gate set1c++; 1038*7c478bd9Sstevel@tonic-gate } else if (set1p->type > posting.type) { 1039*7c478bd9Sstevel@tonic-gate *newsetp++ = posting; 1040*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1041*7c478bd9Sstevel@tonic-gate set2c++; 1042*7c478bd9Sstevel@tonic-gate } else { /* identical postings */ 1043*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++; 1044*7c478bd9Sstevel@tonic-gate set1c++; 1045*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1046*7c478bd9Sstevel@tonic-gate set2c++; 1047*7c478bd9Sstevel@tonic-gate } 1048*7c478bd9Sstevel@tonic-gate } 1049*7c478bd9Sstevel@tonic-gate /* find out what ran out and move the rest in */ 1050*7c478bd9Sstevel@tonic-gate if (set1c < numitems) { 1051*7c478bd9Sstevel@tonic-gate newsetc += numitems - set1c; 1052*7c478bd9Sstevel@tonic-gate while (set1c++ < numitems) { 1053*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++; 1054*7c478bd9Sstevel@tonic-gate } 1055*7c478bd9Sstevel@tonic-gate } else { 1056*7c478bd9Sstevel@tonic-gate while (set2c++ < *num) { 1057*7c478bd9Sstevel@tonic-gate *newsetp++ = posting; 1058*7c478bd9Sstevel@tonic-gate newsetc++; 1059*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1060*7c478bd9Sstevel@tonic-gate } 1061*7c478bd9Sstevel@tonic-gate } 1062*7c478bd9Sstevel@tonic-gate item = newitem; 1063*7c478bd9Sstevel@tonic-gate break; /* end of OR */ 1064*7c478bd9Sstevel@tonic-gate #if 0 1065*7c478bd9Sstevel@tonic-gate case AND: 1066*7c478bd9Sstevel@tonic-gate set1c = 0; 1067*7c478bd9Sstevel@tonic-gate set2c = 0; 1068*7c478bd9Sstevel@tonic-gate while (set1c < numitems && set2c < *num) { 1069*7c478bd9Sstevel@tonic-gate if (set1p->lineoffset < posting.lineoffset) { 1070*7c478bd9Sstevel@tonic-gate set1p++; 1071*7c478bd9Sstevel@tonic-gate set1c++; 1072*7c478bd9Sstevel@tonic-gate } else if (set1p->lineoffset > posting.lineoffset) { 1073*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1074*7c478bd9Sstevel@tonic-gate set2c++; 1075*7c478bd9Sstevel@tonic-gate } else if (set1p->type < posting.type) { 1076*7c478bd9Sstevel@tonic-gate *set1p++; 1077*7c478bd9Sstevel@tonic-gate set1c++; 1078*7c478bd9Sstevel@tonic-gate } else if (set1p->type > posting.type) { 1079*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1080*7c478bd9Sstevel@tonic-gate set2c++; 1081*7c478bd9Sstevel@tonic-gate } else { /* identical postings */ 1082*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++; 1083*7c478bd9Sstevel@tonic-gate newsetc++; 1084*7c478bd9Sstevel@tonic-gate set1c++; 1085*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1086*7c478bd9Sstevel@tonic-gate set2c++; 1087*7c478bd9Sstevel@tonic-gate } 1088*7c478bd9Sstevel@tonic-gate } 1089*7c478bd9Sstevel@tonic-gate break; /* end of AND */ 1090*7c478bd9Sstevel@tonic-gate 1091*7c478bd9Sstevel@tonic-gate case NOT: 1092*7c478bd9Sstevel@tonic-gate set1c = 0; 1093*7c478bd9Sstevel@tonic-gate set2c = 0; 1094*7c478bd9Sstevel@tonic-gate while (set1c < numitems && set2c < *num) { 1095*7c478bd9Sstevel@tonic-gate if (set1p->lineoffset < posting.lineoffset) { 1096*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++; 1097*7c478bd9Sstevel@tonic-gate newsetc++; 1098*7c478bd9Sstevel@tonic-gate set1c++; 1099*7c478bd9Sstevel@tonic-gate } else if (set1p->lineoffset > posting.lineoffset) { 1100*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1101*7c478bd9Sstevel@tonic-gate set2c++; 1102*7c478bd9Sstevel@tonic-gate } else if (set1p->type < posting.type) { 1103*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++; 1104*7c478bd9Sstevel@tonic-gate newsetc++; 1105*7c478bd9Sstevel@tonic-gate set1c++; 1106*7c478bd9Sstevel@tonic-gate } else if (set1p->type > posting.type) { 1107*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1108*7c478bd9Sstevel@tonic-gate set2c++; 1109*7c478bd9Sstevel@tonic-gate } else { /* identical postings */ 1110*7c478bd9Sstevel@tonic-gate set1c++; 1111*7c478bd9Sstevel@tonic-gate set1p++; 1112*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1113*7c478bd9Sstevel@tonic-gate set2c++; 1114*7c478bd9Sstevel@tonic-gate } 1115*7c478bd9Sstevel@tonic-gate } 1116*7c478bd9Sstevel@tonic-gate newsetc += numitems - set1c; 1117*7c478bd9Sstevel@tonic-gate while (set1c++ < numitems) { 1118*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++; 1119*7c478bd9Sstevel@tonic-gate } 1120*7c478bd9Sstevel@tonic-gate break; /* end of NOT */ 1121*7c478bd9Sstevel@tonic-gate 1122*7c478bd9Sstevel@tonic-gate case REVERSENOT: /* core NOT incoming set */ 1123*7c478bd9Sstevel@tonic-gate set1c = 0; 1124*7c478bd9Sstevel@tonic-gate set2c = 0; 1125*7c478bd9Sstevel@tonic-gate while (set1c < numitems && set2c < *num) { 1126*7c478bd9Sstevel@tonic-gate if (set1p->lineoffset < posting.lineoffset) { 1127*7c478bd9Sstevel@tonic-gate set1p++; 1128*7c478bd9Sstevel@tonic-gate set1c++; 1129*7c478bd9Sstevel@tonic-gate } else if (set1p->lineoffset > posting.lineoffset) { 1130*7c478bd9Sstevel@tonic-gate *newsetp++ = posting; 1131*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1132*7c478bd9Sstevel@tonic-gate set2c++; 1133*7c478bd9Sstevel@tonic-gate } else if (set1p->type < posting.type) { 1134*7c478bd9Sstevel@tonic-gate set1p++; 1135*7c478bd9Sstevel@tonic-gate set1c++; 1136*7c478bd9Sstevel@tonic-gate } else if (set1p->type > posting.type) { 1137*7c478bd9Sstevel@tonic-gate *newsetp++ = posting; 1138*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1139*7c478bd9Sstevel@tonic-gate set2c++; 1140*7c478bd9Sstevel@tonic-gate } else { /* identical postings */ 1141*7c478bd9Sstevel@tonic-gate set1c++; 1142*7c478bd9Sstevel@tonic-gate set1p++; 1143*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1144*7c478bd9Sstevel@tonic-gate set2c++; 1145*7c478bd9Sstevel@tonic-gate } 1146*7c478bd9Sstevel@tonic-gate } 1147*7c478bd9Sstevel@tonic-gate while (set2c++ < *num) { 1148*7c478bd9Sstevel@tonic-gate *newsetp++ = posting; 1149*7c478bd9Sstevel@tonic-gate newsetc++; 1150*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting); 1151*7c478bd9Sstevel@tonic-gate } 1152*7c478bd9Sstevel@tonic-gate item = newitem; 1153*7c478bd9Sstevel@tonic-gate break; /* end of REVERSENOT */ 1154*7c478bd9Sstevel@tonic-gate #endif 1155*7c478bd9Sstevel@tonic-gate } 1156*7c478bd9Sstevel@tonic-gate numitems = newsetc; 1157*7c478bd9Sstevel@tonic-gate *num = newsetc; 1158*7c478bd9Sstevel@tonic-gate enditem = (POSTING *)newsetp; 1159*7c478bd9Sstevel@tonic-gate return ((POSTING *)item); 1160*7c478bd9Sstevel@tonic-gate } 1161*7c478bd9Sstevel@tonic-gate 1162*7c478bd9Sstevel@tonic-gate #if 0 1163*7c478bd9Sstevel@tonic-gate POSTING * 1164*7c478bd9Sstevel@tonic-gate boolsave(int clear) 1165*7c478bd9Sstevel@tonic-gate { 1166*7c478bd9Sstevel@tonic-gate int i; 1167*7c478bd9Sstevel@tonic-gate POSTING *ptr; 1168*7c478bd9Sstevel@tonic-gate POSTING *oldstuff, *newstuff; 1169*7c478bd9Sstevel@tonic-gate 1170*7c478bd9Sstevel@tonic-gate if (numitems == 0) { 1171*7c478bd9Sstevel@tonic-gate if (clear) 1172*7c478bd9Sstevel@tonic-gate boolclear(); 1173*7c478bd9Sstevel@tonic-gate return (NULL); 1174*7c478bd9Sstevel@tonic-gate } 1175*7c478bd9Sstevel@tonic-gate /* 1176*7c478bd9Sstevel@tonic-gate * if clear then give them what we have and use (void) 1177*7c478bd9Sstevel@tonic-gate * boolready to realloc 1178*7c478bd9Sstevel@tonic-gate */ 1179*7c478bd9Sstevel@tonic-gate if (clear) { 1180*7c478bd9Sstevel@tonic-gate ptr = item; 1181*7c478bd9Sstevel@tonic-gate /* free up the space we didn't give them */ 1182*7c478bd9Sstevel@tonic-gate if (item == item1) 1183*7c478bd9Sstevel@tonic-gate item1 = NULL; 1184*7c478bd9Sstevel@tonic-gate else 1185*7c478bd9Sstevel@tonic-gate item2 = NULL; 1186*7c478bd9Sstevel@tonic-gate (void) boolready(); 1187*7c478bd9Sstevel@tonic-gate return (ptr); 1188*7c478bd9Sstevel@tonic-gate } 1189*7c478bd9Sstevel@tonic-gate i = (enditem - item) * sizeof (POSTING) + 100; 1190*7c478bd9Sstevel@tonic-gate if ((ptr = (POSTING *)malloc(i))r == NULL) { 1191*7c478bd9Sstevel@tonic-gate invcannotalloc(i); 1192*7c478bd9Sstevel@tonic-gate return (ptr); 1193*7c478bd9Sstevel@tonic-gate } 1194*7c478bd9Sstevel@tonic-gate /* move present set into place */ 1195*7c478bd9Sstevel@tonic-gate oldstuff = item; 1196*7c478bd9Sstevel@tonic-gate newstuff = ptr; 1197*7c478bd9Sstevel@tonic-gate while (oldstuff < enditem) 1198*7c478bd9Sstevel@tonic-gate *newstuff++ = *oldstuff++; 1199*7c478bd9Sstevel@tonic-gate return (ptr); 1200*7c478bd9Sstevel@tonic-gate } 1201*7c478bd9Sstevel@tonic-gate #endif 1202*7c478bd9Sstevel@tonic-gate 1203*7c478bd9Sstevel@tonic-gate static void 1204*7c478bd9Sstevel@tonic-gate invcannotalloc(size_t n) 1205*7c478bd9Sstevel@tonic-gate { 1206*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: cannot allocate %u bytes\n", argv0, n); 1207*7c478bd9Sstevel@tonic-gate } 1208*7c478bd9Sstevel@tonic-gate 1209*7c478bd9Sstevel@tonic-gate static void 1210*7c478bd9Sstevel@tonic-gate invcannotopen(char *file) 1211*7c478bd9Sstevel@tonic-gate { 1212*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: cannot open file %s\n", argv0, file); 1213*7c478bd9Sstevel@tonic-gate } 1214*7c478bd9Sstevel@tonic-gate 1215*7c478bd9Sstevel@tonic-gate static void 1216*7c478bd9Sstevel@tonic-gate invcannotwrite(char *file) 1217*7c478bd9Sstevel@tonic-gate { 1218*7c478bd9Sstevel@tonic-gate (void) perror(argv0); /* must be first to preserve errno */ 1219*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: write to file %s failed\n", argv0, file); 1220*7c478bd9Sstevel@tonic-gate } 1221