1 /*
2  * Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada
3  * And Contributors.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * 3. All modifications to the source code must be clearly marked as
17  *    such.  Binary redistributions based on modified source code
18  *    must be clearly marked as modified versions in the documentation
19  *    and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
25  * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 #ifndef _MYTHES_HXX_
36 #define _MYTHES_HXX_
37 
38 // some maximum sizes for buffers
39 #define MAX_WD_LEN 200
40 #define MAX_LN_LEN 16384
41 
42 
43 // a meaning with definition, count of synonyms and synonym list
44 struct mentry
45 {
46     char*  defn;
47     int  count;
48     char** psyns;
49 };
50 
51 class MyThes
52 {
53 
54     int  nw;                  /* number of entries in thesaurus */
55     char**  list;               /* stores word list */
56     unsigned int* offst;              /* stores offset list */
57     char *  encoding;           /* stores text encoding; */
58 
59     FILE  *pdfile;
60 
61     // disallow copy-constructor and assignment-operator for now
62     MyThes();
63     MyThes(const MyThes &);
64     MyThes & operator = (const MyThes &);
65 
66     char *mystrdup(const char * p);
67     int mystr_indexOfChar(const char * d, int c);
68     void mychomp(char * s);
69 
70 
71 public:
72     MyThes(const char* idxpath, const char* datpath);
73     ~MyThes();
74 
75     // lookup text in index and return number of meanings
76     // each meaning entry has a defintion, synonym count and pointer
77     // when complete return the *original* meaning entry and count via
78     // CleanUpAfterLookup to properly handle memory deallocation
79 
80     int Lookup(const char * pText, int len, mentry** pme);
81 
82     void CleanUpAfterLookup(mentry** pme, int nmean);
83 
84     char* get_th_encoding();
85 
86 private:
87     // Open index and dat files and load list array
88     int thInitialize (const char* indxpath, const char* datpath);
89 
90     // internal close and cleanup dat and idx files
91     int thCleanup ();
92 
93     // read a text line (\n terminated) stripping off line terminator
94     int readLine(FILE * pf, char * buf, int nc);
95 
96     // binary search on null terminated character strings
97     int binsearch(char * wrd, char* list[], int nlst);
98 
99 };
100 
101 #endif
102