1 #ifndef __GCDB_H
2 #define __GCDB_H
3 #include <stdlib.h>
4 #include <stddef.h>
5 #include <fcntl.h>
6 #include <sys/stat.h>
7 #include "GBase.h"
8 
9 #if defined(__WIN32__) || defined(WIN32)
10   #define PROT_READ  1
11   #define PROT_WRITE  2
12   #define PROT_READWRITE  3
13   #define MAP_SHARED  1
14   #define MAP_PRIVATE  2
15   #define F_OK 0
16   #define R_OK 4
17   #define W_OK 2
18   #define RW_OK 6
19 
20   #if !defined(MAP_FAILED)
21   #define MAP_FAILED      ((void *) -1)
22   #endif
23   void *mmap(char *,size_t,int,int,int,off_t);
24   int   munmap(void *,size_t);
25 #else
26   #include <sys/mman.h>
27 #endif
28 
29 #define MAX_UINT 0xFFFFFFFFUL
30 
31 
32 //=====================================================
33 //-------------     buffer stuff    -------------------
34 //=====================================================
35 #define GCDBUFFER_INSIZE 8192
36 #define GCDBUFFER_OUTSIZE 8192
37 
38 
39 typedef int (*opfunc)(int, char*, size_t);
40 
41 //typedef unsigned long gcdb_seek_pos;
42 typedef off_t gcdb_seek_pos;
43 typedef unsigned int (*uint_conv)(void*); //uint conversion function pointer
44 typedef off_t (*offt_conv)(void*); //uint conversion function pointer
45 
46 
47 //conversion function --> to platform independent uint
48 extern uint_conv gcvt_uint;
49 extern offt_conv gcvt_offt;
50 
51 int endian_test(void);
52 unsigned int uint32_sun(void* x86int);
53 unsigned int uint32_x86(void* x86int);
54 //for file offsets: off_t runtime conversions:
55 off_t offt_sun(void* offt);
56 off_t offt_x86(void* offt);
57 
58 
59 class GCDBuffer {
60  public:
61   char *x;
62   unsigned int p;
63   unsigned int n;
64   int fd;
65   opfunc op;
66 //methods:
GCDBuffer()67   GCDBuffer() {
68     x=NULL;
69     fd=0;
70     op=NULL;
71     n=0;
72     //check endianness
73     gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86;
74     gcvt_offt=(endian_test())? &offt_sun : &offt_x86;
75     }
GCDBuffer(opfunc aop,int afd,char * buf,unsigned int len)76   GCDBuffer(opfunc aop,int afd,char *buf,unsigned int len) {
77     //check endianness
78     gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86;
79     gcvt_offt=(endian_test())? &offt_sun : &offt_x86;
80     init(aop, afd, buf, len);
81     }
init(opfunc aop,int afd,char * buf,unsigned int len)82   void init(opfunc aop,int afd,char *buf,unsigned int len) {
83      x=buf;
84      fd=afd;
85      op=aop;
86      p=0;
87      n=len;
88      }
89   int  flush();
90   int  write_all(char* buf, unsigned int pt);
91   int  put(char* buf,unsigned int len);
92   int  putalign(char* buf,unsigned int len);
93   int  putflush(char* buf,unsigned int len);
94   int  puts(char *buf);
95   int  putsalign(char *buf);
96   int  putsflush(char *buf);
97   int  oneRead(char* buf, unsigned int len);
98   int  getthis(char* buf,unsigned int len);
99   int  get(char* buf,unsigned int len);
100   int  bget(char* buf,unsigned int len);
101   int  feed();
102   char *peek();
103   void seek(unsigned int len);
104   int copy(GCDBuffer* bin);
105 };
106 
107 
108 //=====================================================
109 //-------------     cdb utils       -------------------
110 //=====================================================
111 #ifndef __WIN32__
112 #ifdef __DragonFly__
113 #include <errno.h>
114 #else
115  extern int errno;
116 #endif
117 #endif
118 extern int error_intr;
119 extern int error_nomem;
120 extern int error_proto;
121 
122 //additional data to be appended to the cdb file:
123 #define CDBMSK_OPT_MULTI    0x00000001
124 #define CDBMSK_OPT_C        0x00000002
125 #define CDBMSK_OPT_CADD     0x00000004
126 #define CDBMSK_OPT_COMPRESS 0x00000008
127 //creates a compressed version of the database
128 //uses plenty of unions for ensuring compatibility with
129 // the old 'CIDX' info structure
130 
131 //damn, sun and 64bit machines
132 // align this to 64bit -- so sizeof() is misled!
133 #pragma pack(4)
134 // I wish, but stupid gcc 2.95.3 alpha-decosf version does not
135 // recognize this pragma directive !!?
136 //
137 struct cdbInfo {
138     uint32 num_keys;
139     union {
140      uint32 num_records;
141      char oldtag[4]; // 'CIDX' for old tag style
142      };
143     // data file size -- used to be  uint32, now it could be 64bit
144     union {
145      off_t dbsize;
146      uint32 oldnum[2]; //num_keys, num_records
147      };
148     union {
149      uint32 idxflags;
150      uint32 old_dbsize;
151      };
152     union {
153      int dbnamelen;
154      int old_idxflags;
155      };
156       // -- the actual db name precedes this fixed-size record
157     union {
158      char tag[4]; //'CDBX' for new files with LFS
159      uint32 old_dbnamelen;
160      };
161    };
162 #pragma pack()
163 
164 extern int cdbInfoSIZE;
165 
166 void uint32_pack(char *,uint32);
167 void uint32_pack_big(char *,uint32);
168 void uint32_unpack(char *,uint32 *);
169 void uint32_unpack_big(char *,uint32 *);
170 
171 //=====================================================
172 //-------------     cdb index       -------------------
173 //=====================================================
174 
175 #define CDB_HPLIST 1000
176 
177 struct cdb_hp { uint32 h; uint32 p; } ;
178 
179 struct cdb_hplist {
180   struct cdb_hp hp[CDB_HPLIST];
181   struct cdb_hplist *next;
182   int num;
183   };
184 
185 //the index file should always be smaller than 4GB !
186 
187 class GCdbWrite {
188    GCDBuffer* cdbuf;
189    char bspace[8192];
190    char fname[1024];
191    char final[2048];
192    uint32 count[256];
193    uint32 start[256];
194    struct cdb_hplist *head;
195    struct cdb_hp *split; /* includes space for hash */
196    struct cdb_hp *hash;
197    uint32 numentries;
198    uint32 pos; //file position
199    int posplus(uint32 len);
200    int fd; //file descriptor
201   public:
202   //methods:
203    GCdbWrite(int afd); //was: init
204    GCdbWrite(char* fname);
205    ~GCdbWrite();
206    int addbegin(unsigned int keylen,unsigned int datalen);
207    int addend(unsigned int keylen,unsigned int datalen,uint32 h);
208    int addrec(const char *key,unsigned int keylen,char *data,unsigned int datalen);
209    int add(const char *key, char *data, unsigned int datalen);
getNumEntries()210    int getNumEntries() { return numentries; }
211    int finish();
212    int close();
getfd()213    int getfd() { return fd; }
getfile()214    char* getfile() { return fname; }
215 };
216 
217 
218 //=====================================================
219 //-------------        cdb          -------------------
220 //=====================================================
221 
222 #define CDB_HASHSTART 5381
223 
224 uint32 cdb_hashadd(uint32,unsigned char);
225 uint32 cdb_hash(const char *,unsigned int);
226 
227 class GCdbRead {
228   uint32 size; // initialized if map is nonzero
229   uint32 loop; // number of hash slots searched under this key
230   uint32 khash; // initialized if loop is nonzero
231   uint32 kpos; // initialized if loop is nonzero
232   uint32 hpos; // initialized if loop is nonzero
233   uint32 hslots; // initialized if loop is nonzero
234   uint32 dpos; // initialized if cdb_findnext() returns 1
235   uint32 dlen; // initialized if cdb_findnext() returns 1
236   char fname[1024];
237   char *map; // 0 if no map is available
238   int fd;
239  public:
240 //methods:
241   GCdbRead(int fd); //was cdb_init
242   GCdbRead(char* afname); //was cdb_init
243   ~GCdbRead(); //was cdb_free
244   int read(char *,unsigned int,uint32);
245   int match(const char *key, unsigned int len, uint32 pos);
findstart()246   void findstart() { loop =0; }
247   int findnext(const char *key,unsigned int len);
248   int find(const char *key);
datapos()249   int datapos() { return dpos; }
datalen()250   int datalen() { return dlen; }
getfd()251   int getfd() { return fd; }
getfile()252   char* getfile() { return fname; }
253 };
254 
255 class GReadBuf {
256  protected:
257   FILE* f;
258   uchar* buf;
259   int buflen;
260   int bufused; //
261   int bufpos;
262   off_t fpos;
263   bool eof;
264   bool eob;
265 
266   int refill(bool repos=false) {
267    //refill the buffer-----------
268    if (repos && bufpos==0) return 0; //no need to repos
269    if (eof) return 0;
270    int fr=0;
271    if (repos && bufpos<bufused) {
272       int kept=bufused-bufpos;
273       memmove((void*)buf, (void*)(buf+bufpos),kept);
274       fr=(int)fread((void *)(buf+kept), 1, buflen-kept, f);
275       if (fr<buflen-kept) eof=true;
276       buf[kept+fr]='\0';
277       bufused=kept+fr;
278       }
279      else {
280       fr=(int)fread((void *)buf, 1, buflen, f);
281       if (fr<buflen) eof=true;
282       buf[fr]='\0'; //only for text record parsers
283       bufused=fr;
284       }
285    if (feof(f)) eof=true;
286    if (ferror(f)) {
287      GMessage("GReadBuf::refill - error at fread!\n");
288      eof=true;
289      }
290    bufpos=0;
291    fpos+=fr; //bytes read from file so far
292    return fr;
293    }
294  public:
295   GReadBuf(FILE* fin, int bsize=4096) {
296     f=fin;
297     buflen=bsize;
298     GMALLOC(buf,buflen+1);
299     bufpos=0; //current pointer for get function
300     bufused=0;
301     fpos=0;
302     eof=false;
303     eob=false;
304     refill();
305     }
~GReadBuf()306   ~GReadBuf() { GFREE(buf); }
307 
308   //reads len chars from stream into the outbuf
309   //updates bufpos
310   //->returns the number of bytes read
get(uchar * outbuf,int len)311   int get(uchar *outbuf, int len) {
312     if (eob) return 0;
313     int rd=0; //bytes read
314     while (!eob && rd<len) {
315       int to_read=GMIN((bufused-bufpos),(len-rd));
316       memcpy((void*)(outbuf+rd),(void*)(buf+bufpos), to_read);
317       bufpos+=to_read;
318       rd+=to_read;
319       if (bufpos>=bufused) {
320         if (eof) eob=true;
321            else refill();
322         }
323       }//while
324     return rd;
325     }
326 
getStr(uchar * outbuf,int len)327   uchar* getStr(uchar *outbuf, int len) {
328     int rd=get(outbuf,len);
329     if (rd==0) return NULL;
330       else {
331        outbuf[rd]='\0';
332        return outbuf;
333        }
334     }
335 
336   // getc equivalent
getch()337   int getch() {
338     if (eob) return -1;
339     int ch=(int)(uchar)buf[bufpos];
340     bufpos++;
341     if (bufpos>=bufused) {
342         if (eof) eob=true;
343            else refill();
344         }
345     return ch;
346     }
347 
348   //---
isEof()349   bool isEof() { return eob; }
ended()350   bool ended() { return eob; }
getPos()351   off_t getPos() {
352   //returns the virtual file position
353   // = the actual file offset of the byte at bufpos
354     return fpos-(bufused-bufpos);
355     }
356   //skip into the stream the specified number of bytes
skip(int skiplen)357   int skip(int skiplen) {
358    if (eob) return 0;
359    int r=0; //the actual number of bytes skipped
360    while (skiplen && !eob) {
361      int dif=GMIN(bufused-bufpos,skiplen);
362      skiplen-=dif;
363      bufpos+=dif;
364      r+=dif;
365      if (bufpos>=bufused) {
366        if (eof) { eob=true; return r; }
367        refill();
368        }
369      }
370     return r;
371    }
372   //look ahead without updating the read pointer (bufpos)
373   //Cannot peek more than buflen!
peek(uchar * outbuf,int len)374   int peek(uchar* outbuf, int len) {
375     if (eob) return -1;
376     //if (eob || len>buflen) return -1;
377     if (len>bufused-bufpos) refill(true);
378     int mlen=GMIN((bufused-bufpos),len);
379     memcpy((void*)outbuf, (void*)(buf+bufpos), mlen);
380     return mlen;
381     }
382 
peekStr(uchar * outbuf,int len)383   uchar* peekStr(uchar* outbuf, int len) {
384    int rd=peek(outbuf,len);
385    if (rd>0) { outbuf[rd]='\0'; return outbuf; }
386         else return NULL;
387    }
388   //looks ahead to check if what follows matches
389   int peekCmp(char* cmpstr, int cmplen=0) {
390     if (eob) //GError("GReadBuf::peekcmp error: eob!\n");
391          return -2;
392     if (!cmplen) cmplen=strlen(cmpstr);
393     if (cmplen>bufused-bufpos) {
394        refill(true);
395        if (cmplen>bufused-bufpos) return -2;
396        }
397     //use memcmp
398     return memcmp((void*)(buf+bufpos), cmpstr, cmplen);
399     }
400 
401 };
402 
403 //circular line buffer, with read-ahead (peeking) capability
404 class GReadBufLine {
405   protected:
406     struct BufLine {
407         off_t fpos;
408         int len;
409         char* chars;
410         };
411     int bufcap; //total number of lines in the buf array
412     int bufidx; // the "current line" index in buf array
413     bool isEOF;
414     int lno;
415     FILE* file;
416     off_t filepos; //current file/stream offset for the first char of buf[bufidx]
417     BufLine* buf; //array of bufferred lines
418     char* readline(int idx);//read line from file into the buffer
419     int fillbuf();
420     bool isEOB;
421   public:
422     const char* line(); //gets current line and advances the "current line" pointer
423                      //use putLine() to revert/undo this advancement
424     off_t fpos(); //gets current line's byte offset in the file
425                         // does NOT advance the "current line" pointer
426     int   len();  //gets current line's length
427                        // does NOT advance the "current line" pointer
isEof()428     bool isEof() { return isEOB; }
eof()429     bool eof() { return isEOB; }
getfpos()430     off_t getfpos() { return fpos(); }
getline()431     const char* getline() { return line();  }
getLine()432     const char* getLine() { return line();  }
getLen()433     int getLen() { return len();  }
linenumber()434     int linenumber() { return lno; }
lineno()435     int lineno() { return lno; }
getLineNo()436     int getLineNo()  { return lno; }
437     void putLine();
438     GReadBufLine(FILE* stream, int bcap=20) {
439       if (bcap<2) bcap=2; //at least 1 prev line is needed for putLine()
440       bufcap=bcap;
441       bufidx=-1;
442       isEOB=false;
443       isEOF=false;
444       lno=0;
445       GMALLOC(buf, bufcap * sizeof(BufLine));
446       for (int i=0;i<bufcap;i++) {
447           buf[i].chars=NULL;
448           buf[i].fpos=-1;
449           buf[i].len=0;
450           }
451       file=stream;
452       fillbuf();
453       }
~GReadBufLine()454     ~GReadBufLine() {
455       for (int i=0;i<bufcap;i++) {
456           GFREE(buf[i].chars);
457           }
458       GFREE(buf);
459       }
460 };
461 
462 #endif
463