1 /* 2 * Copyright 2003 Kevin B. Hendricks, Stratford, Ontario, Canada 3 * And Contributors. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * 3. All modifications to the source code must be clearly marked as 17 * such. Binary redistributions based on modified source code 18 * must be clearly marked as modified versions in the documentation 19 * and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 25 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 #ifndef _MYTHES_HXX_ 36 #define _MYTHES_HXX_ 37 38 // some maximum sizes for buffers 39 #define MAX_WD_LEN 200 40 #define MAX_LN_LEN 16384 41 42 43 // a meaning with definition, count of synonyms and synonym list 44 struct mentry 45 { 46 char* defn; 47 int count; 48 char** psyns; 49 }; 50 51 class MyThes 52 { 53 54 int nw; /* number of entries in thesaurus */ 55 char** list; /* stores word list */ 56 unsigned int* offst; /* stores offset list */ 57 char * encoding; /* stores text encoding; */ 58 59 FILE *pdfile; 60 61 // disallow copy-constructor and assignment-operator for now 62 MyThes(); 63 MyThes(const MyThes &); 64 MyThes & operator = (const MyThes &); 65 66 char *mystrdup(const char * p); 67 int mystr_indexOfChar(const char * d, int c); 68 void mychomp(char * s); 69 70 71 public: 72 MyThes(const char* idxpath, const char* datpath); 73 ~MyThes(); 74 75 // lookup text in index and return number of meanings 76 // each meaning entry has a defintion, synonym count and pointer 77 // when complete return the *original* meaning entry and count via 78 // CleanUpAfterLookup to properly handle memory deallocation 79 80 int Lookup(const char * pText, int len, mentry** pme); 81 82 void CleanUpAfterLookup(mentry** pme, int nmean); 83 84 char* get_th_encoding(); 85 86 private: 87 // Open index and dat files and load list array 88 int thInitialize (const char* indxpath, const char* datpath); 89 90 // internal close and cleanup dat and idx files 91 int thCleanup (); 92 93 // read a text line (\n terminated) stripping off line terminator 94 int readLine(FILE * pf, char * buf, int nc); 95 96 // binary search on null terminated character strings 97 int binsearch(char * wrd, char* list[], int nlst); 98 99 }; 100 101 #endif 102