1 #ifndef __GCDB_H 2 #define __GCDB_H 3 #include <stdlib.h> 4 #include <stddef.h> 5 #include <fcntl.h> 6 #include <sys/stat.h> 7 #include "GBase.h" 8 9 #if defined(__WIN32__) || defined(WIN32) 10 #define PROT_READ 1 11 #define PROT_WRITE 2 12 #define PROT_READWRITE 3 13 #define MAP_SHARED 1 14 #define MAP_PRIVATE 2 15 #define F_OK 0 16 #define R_OK 4 17 #define W_OK 2 18 #define RW_OK 6 19 20 #if !defined(MAP_FAILED) 21 #define MAP_FAILED ((void *) -1) 22 #endif 23 void *mmap(char *,size_t,int,int,int,off_t); 24 int munmap(void *,size_t); 25 #else 26 #include <sys/mman.h> 27 #endif 28 29 #define MAX_UINT 0xFFFFFFFFUL 30 31 32 //===================================================== 33 //------------- buffer stuff ------------------- 34 //===================================================== 35 #define GCDBUFFER_INSIZE 8192 36 #define GCDBUFFER_OUTSIZE 8192 37 38 39 typedef int (*opfunc)(int, char*, size_t); 40 41 //typedef unsigned long gcdb_seek_pos; 42 typedef off_t gcdb_seek_pos; 43 typedef unsigned int (*uint_conv)(void*); //uint conversion function pointer 44 typedef off_t (*offt_conv)(void*); //uint conversion function pointer 45 46 47 //conversion function --> to platform independent uint 48 extern uint_conv gcvt_uint; 49 extern offt_conv gcvt_offt; 50 51 int endian_test(void); 52 unsigned int uint32_sun(void* x86int); 53 unsigned int uint32_x86(void* x86int); 54 //for file offsets: off_t runtime conversions: 55 off_t offt_sun(void* offt); 56 off_t offt_x86(void* offt); 57 58 59 class GCDBuffer { 60 public: 61 char *x; 62 unsigned int p; 63 unsigned int n; 64 int fd; 65 opfunc op; 66 //methods: GCDBuffer()67 GCDBuffer() { 68 x=NULL; 69 fd=0; 70 op=NULL; 71 n=0; 72 //check endianness 73 gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86; 74 gcvt_offt=(endian_test())? &offt_sun : &offt_x86; 75 } GCDBuffer(opfunc aop,int afd,char * buf,unsigned int len)76 GCDBuffer(opfunc aop,int afd,char *buf,unsigned int len) { 77 //check endianness 78 gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86; 79 gcvt_offt=(endian_test())? &offt_sun : &offt_x86; 80 init(aop, afd, buf, len); 81 } init(opfunc aop,int afd,char * buf,unsigned int len)82 void init(opfunc aop,int afd,char *buf,unsigned int len) { 83 x=buf; 84 fd=afd; 85 op=aop; 86 p=0; 87 n=len; 88 } 89 int flush(); 90 int write_all(char* buf, unsigned int pt); 91 int put(char* buf,unsigned int len); 92 int putalign(char* buf,unsigned int len); 93 int putflush(char* buf,unsigned int len); 94 int puts(char *buf); 95 int putsalign(char *buf); 96 int putsflush(char *buf); 97 int oneRead(char* buf, unsigned int len); 98 int getthis(char* buf,unsigned int len); 99 int get(char* buf,unsigned int len); 100 int bget(char* buf,unsigned int len); 101 int feed(); 102 char *peek(); 103 void seek(unsigned int len); 104 int copy(GCDBuffer* bin); 105 }; 106 107 108 //===================================================== 109 //------------- cdb utils ------------------- 110 //===================================================== 111 #ifndef __WIN32__ 112 #ifdef __DragonFly__ 113 #include <errno.h> 114 #else 115 extern int errno; 116 #endif 117 #endif 118 extern int error_intr; 119 extern int error_nomem; 120 extern int error_proto; 121 122 //additional data to be appended to the cdb file: 123 #define CDBMSK_OPT_MULTI 0x00000001 124 #define CDBMSK_OPT_C 0x00000002 125 #define CDBMSK_OPT_CADD 0x00000004 126 #define CDBMSK_OPT_COMPRESS 0x00000008 127 //creates a compressed version of the database 128 //uses plenty of unions for ensuring compatibility with 129 // the old 'CIDX' info structure 130 131 //damn, sun and 64bit machines 132 // align this to 64bit -- so sizeof() is misled! 133 #pragma pack(4) 134 // I wish, but stupid gcc 2.95.3 alpha-decosf version does not 135 // recognize this pragma directive !!? 136 // 137 struct cdbInfo { 138 uint32 num_keys; 139 union { 140 uint32 num_records; 141 char oldtag[4]; // 'CIDX' for old tag style 142 }; 143 // data file size -- used to be uint32, now it could be 64bit 144 union { 145 off_t dbsize; 146 uint32 oldnum[2]; //num_keys, num_records 147 }; 148 union { 149 uint32 idxflags; 150 uint32 old_dbsize; 151 }; 152 union { 153 int dbnamelen; 154 int old_idxflags; 155 }; 156 // -- the actual db name precedes this fixed-size record 157 union { 158 char tag[4]; //'CDBX' for new files with LFS 159 uint32 old_dbnamelen; 160 }; 161 }; 162 #pragma pack() 163 164 extern int cdbInfoSIZE; 165 166 void uint32_pack(char *,uint32); 167 void uint32_pack_big(char *,uint32); 168 void uint32_unpack(char *,uint32 *); 169 void uint32_unpack_big(char *,uint32 *); 170 171 //===================================================== 172 //------------- cdb index ------------------- 173 //===================================================== 174 175 #define CDB_HPLIST 1000 176 177 struct cdb_hp { uint32 h; uint32 p; } ; 178 179 struct cdb_hplist { 180 struct cdb_hp hp[CDB_HPLIST]; 181 struct cdb_hplist *next; 182 int num; 183 }; 184 185 //the index file should always be smaller than 4GB ! 186 187 class GCdbWrite { 188 GCDBuffer* cdbuf; 189 char bspace[8192]; 190 char fname[1024]; 191 char final[2048]; 192 uint32 count[256]; 193 uint32 start[256]; 194 struct cdb_hplist *head; 195 struct cdb_hp *split; /* includes space for hash */ 196 struct cdb_hp *hash; 197 uint32 numentries; 198 uint32 pos; //file position 199 int posplus(uint32 len); 200 int fd; //file descriptor 201 public: 202 //methods: 203 GCdbWrite(int afd); //was: init 204 GCdbWrite(char* fname); 205 ~GCdbWrite(); 206 int addbegin(unsigned int keylen,unsigned int datalen); 207 int addend(unsigned int keylen,unsigned int datalen,uint32 h); 208 int addrec(const char *key,unsigned int keylen,char *data,unsigned int datalen); 209 int add(const char *key, char *data, unsigned int datalen); getNumEntries()210 int getNumEntries() { return numentries; } 211 int finish(); 212 int close(); getfd()213 int getfd() { return fd; } getfile()214 char* getfile() { return fname; } 215 }; 216 217 218 //===================================================== 219 //------------- cdb ------------------- 220 //===================================================== 221 222 #define CDB_HASHSTART 5381 223 224 uint32 cdb_hashadd(uint32,unsigned char); 225 uint32 cdb_hash(const char *,unsigned int); 226 227 class GCdbRead { 228 uint32 size; // initialized if map is nonzero 229 uint32 loop; // number of hash slots searched under this key 230 uint32 khash; // initialized if loop is nonzero 231 uint32 kpos; // initialized if loop is nonzero 232 uint32 hpos; // initialized if loop is nonzero 233 uint32 hslots; // initialized if loop is nonzero 234 uint32 dpos; // initialized if cdb_findnext() returns 1 235 uint32 dlen; // initialized if cdb_findnext() returns 1 236 char fname[1024]; 237 char *map; // 0 if no map is available 238 int fd; 239 public: 240 //methods: 241 GCdbRead(int fd); //was cdb_init 242 GCdbRead(char* afname); //was cdb_init 243 ~GCdbRead(); //was cdb_free 244 int read(char *,unsigned int,uint32); 245 int match(const char *key, unsigned int len, uint32 pos); findstart()246 void findstart() { loop =0; } 247 int findnext(const char *key,unsigned int len); 248 int find(const char *key); datapos()249 int datapos() { return dpos; } datalen()250 int datalen() { return dlen; } getfd()251 int getfd() { return fd; } getfile()252 char* getfile() { return fname; } 253 }; 254 255 class GReadBuf { 256 protected: 257 FILE* f; 258 uchar* buf; 259 int buflen; 260 int bufused; // 261 int bufpos; 262 off_t fpos; 263 bool eof; 264 bool eob; 265 266 int refill(bool repos=false) { 267 //refill the buffer----------- 268 if (repos && bufpos==0) return 0; //no need to repos 269 if (eof) return 0; 270 int fr=0; 271 if (repos && bufpos<bufused) { 272 int kept=bufused-bufpos; 273 memmove((void*)buf, (void*)(buf+bufpos),kept); 274 fr=(int)fread((void *)(buf+kept), 1, buflen-kept, f); 275 if (fr<buflen-kept) eof=true; 276 buf[kept+fr]='\0'; 277 bufused=kept+fr; 278 } 279 else { 280 fr=(int)fread((void *)buf, 1, buflen, f); 281 if (fr<buflen) eof=true; 282 buf[fr]='\0'; //only for text record parsers 283 bufused=fr; 284 } 285 if (feof(f)) eof=true; 286 if (ferror(f)) { 287 GMessage("GReadBuf::refill - error at fread!\n"); 288 eof=true; 289 } 290 bufpos=0; 291 fpos+=fr; //bytes read from file so far 292 return fr; 293 } 294 public: 295 GReadBuf(FILE* fin, int bsize=4096) { 296 f=fin; 297 buflen=bsize; 298 GMALLOC(buf,buflen+1); 299 bufpos=0; //current pointer for get function 300 bufused=0; 301 fpos=0; 302 eof=false; 303 eob=false; 304 refill(); 305 } ~GReadBuf()306 ~GReadBuf() { GFREE(buf); } 307 308 //reads len chars from stream into the outbuf 309 //updates bufpos 310 //->returns the number of bytes read get(uchar * outbuf,int len)311 int get(uchar *outbuf, int len) { 312 if (eob) return 0; 313 int rd=0; //bytes read 314 while (!eob && rd<len) { 315 int to_read=GMIN((bufused-bufpos),(len-rd)); 316 memcpy((void*)(outbuf+rd),(void*)(buf+bufpos), to_read); 317 bufpos+=to_read; 318 rd+=to_read; 319 if (bufpos>=bufused) { 320 if (eof) eob=true; 321 else refill(); 322 } 323 }//while 324 return rd; 325 } 326 getStr(uchar * outbuf,int len)327 uchar* getStr(uchar *outbuf, int len) { 328 int rd=get(outbuf,len); 329 if (rd==0) return NULL; 330 else { 331 outbuf[rd]='\0'; 332 return outbuf; 333 } 334 } 335 336 // getc equivalent getch()337 int getch() { 338 if (eob) return -1; 339 int ch=(int)(uchar)buf[bufpos]; 340 bufpos++; 341 if (bufpos>=bufused) { 342 if (eof) eob=true; 343 else refill(); 344 } 345 return ch; 346 } 347 348 //--- isEof()349 bool isEof() { return eob; } ended()350 bool ended() { return eob; } getPos()351 off_t getPos() { 352 //returns the virtual file position 353 // = the actual file offset of the byte at bufpos 354 return fpos-(bufused-bufpos); 355 } 356 //skip into the stream the specified number of bytes skip(int skiplen)357 int skip(int skiplen) { 358 if (eob) return 0; 359 int r=0; //the actual number of bytes skipped 360 while (skiplen && !eob) { 361 int dif=GMIN(bufused-bufpos,skiplen); 362 skiplen-=dif; 363 bufpos+=dif; 364 r+=dif; 365 if (bufpos>=bufused) { 366 if (eof) { eob=true; return r; } 367 refill(); 368 } 369 } 370 return r; 371 } 372 //look ahead without updating the read pointer (bufpos) 373 //Cannot peek more than buflen! peek(uchar * outbuf,int len)374 int peek(uchar* outbuf, int len) { 375 if (eob) return -1; 376 //if (eob || len>buflen) return -1; 377 if (len>bufused-bufpos) refill(true); 378 int mlen=GMIN((bufused-bufpos),len); 379 memcpy((void*)outbuf, (void*)(buf+bufpos), mlen); 380 return mlen; 381 } 382 peekStr(uchar * outbuf,int len)383 uchar* peekStr(uchar* outbuf, int len) { 384 int rd=peek(outbuf,len); 385 if (rd>0) { outbuf[rd]='\0'; return outbuf; } 386 else return NULL; 387 } 388 //looks ahead to check if what follows matches 389 int peekCmp(char* cmpstr, int cmplen=0) { 390 if (eob) //GError("GReadBuf::peekcmp error: eob!\n"); 391 return -2; 392 if (!cmplen) cmplen=strlen(cmpstr); 393 if (cmplen>bufused-bufpos) { 394 refill(true); 395 if (cmplen>bufused-bufpos) return -2; 396 } 397 //use memcmp 398 return memcmp((void*)(buf+bufpos), cmpstr, cmplen); 399 } 400 401 }; 402 403 //circular line buffer, with read-ahead (peeking) capability 404 class GReadBufLine { 405 protected: 406 struct BufLine { 407 off_t fpos; 408 int len; 409 char* chars; 410 }; 411 int bufcap; //total number of lines in the buf array 412 int bufidx; // the "current line" index in buf array 413 bool isEOF; 414 int lno; 415 FILE* file; 416 off_t filepos; //current file/stream offset for the first char of buf[bufidx] 417 BufLine* buf; //array of bufferred lines 418 char* readline(int idx);//read line from file into the buffer 419 int fillbuf(); 420 bool isEOB; 421 public: 422 const char* line(); //gets current line and advances the "current line" pointer 423 //use putLine() to revert/undo this advancement 424 off_t fpos(); //gets current line's byte offset in the file 425 // does NOT advance the "current line" pointer 426 int len(); //gets current line's length 427 // does NOT advance the "current line" pointer isEof()428 bool isEof() { return isEOB; } eof()429 bool eof() { return isEOB; } getfpos()430 off_t getfpos() { return fpos(); } getline()431 const char* getline() { return line(); } getLine()432 const char* getLine() { return line(); } getLen()433 int getLen() { return len(); } linenumber()434 int linenumber() { return lno; } lineno()435 int lineno() { return lno; } getLineNo()436 int getLineNo() { return lno; } 437 void putLine(); 438 GReadBufLine(FILE* stream, int bcap=20) { 439 if (bcap<2) bcap=2; //at least 1 prev line is needed for putLine() 440 bufcap=bcap; 441 bufidx=-1; 442 isEOB=false; 443 isEOF=false; 444 lno=0; 445 GMALLOC(buf, bufcap * sizeof(BufLine)); 446 for (int i=0;i<bufcap;i++) { 447 buf[i].chars=NULL; 448 buf[i].fpos=-1; 449 buf[i].len=0; 450 } 451 file=stream; 452 fillbuf(); 453 } ~GReadBufLine()454 ~GReadBufLine() { 455 for (int i=0;i<bufcap;i++) { 456 GFREE(buf[i].chars); 457 } 458 GFREE(buf); 459 } 460 }; 461 462 #endif 463