1 /** \file tinydict.h
2     \brief .dict dictionary file support interface
3 
4     Lightweight implementation of .dict support, written from scratch.
5 
6     (c) Vadim Lopatin, 2009
7 
8     This source code is distributed under the terms of
9     GNU General Public License.
10 
11     See LICENSE file for details.
12 
13 
14 
15 	usage:
16 
17 		init:
18 			// create TinyDictionaryList object
19 			TinyDictionaryList dicts;
20 			// register dictionaries using
21 			dicts.add( "/dir1/dict1.index", "/dir1/dict1.dict.dz" );
22 			dicts.add( "/dir1/dict2.index", "/dir1/dict2.dict.dz" );
23 
24 	    search:
25 			// container for results
26 			TinyDictResultList results;
27 		    dicts.find(results, "word", 0 ); // find exact match
28 
29 		process results:
30 			// for each source dictionary that matches pattern
31 			for ( int d = 0; d<results.length(); d++ ) {
32 				TinyDictWordList * words = results.get(d);
33 				printf("dict: %s\n", words->getDictionaryName() );
34 				// for each found word
35 				for ( int i=0; i<words->length(); i++ ) {
36 					TinyDictWord * word = words->get(i);
37 					printf("word: %s\n", word->getWord() );
38 					printf("article: %s\n", words->getArticle( i ) );
39 				}
40 			}
41 */
42 
43 #ifndef TINYDICT_H_INCLUDED
44 #define TINYDICT_H_INCLUDED
45 
46 #include <stdlib.h>
47 #include <stdio.h>
48 #include <string.h>
49 #include <zlib.h>
50 
51 /// dictinary data file forward declaration
52 class TinyDictDataFile;
53 /// dictionary index file forward declaration
54 class TinyDictIndexFile;
55 /// dictonary forward declaration
56 class TinyDictionary;
57 
58 /// Word entry of index file
59 class TinyDictWord
60 {
61     unsigned index;
62     unsigned indexpos;
63     unsigned start;
64     unsigned size;
65     char * word;
TinyDictWord(unsigned _index,unsigned _indexpos,unsigned _start,unsigned _size,const char * _word)66     TinyDictWord( unsigned _index, unsigned _indexpos, unsigned _start, unsigned _size, const char * _word )
67     : index(_index)
68     , indexpos(_indexpos)
69     , start(_start)
70     , size(_size)
71     , word( strdup(_word) )
72     {
73     }
74 public:
75     /// factory - reading from index file
76     static TinyDictWord * read( FILE * f, unsigned index );
77 
78     // getters
getIndexPos()79     unsigned getIndexPos() const { return indexpos; }
getIndex()80     unsigned getIndex() const { return index; }
getStart()81     unsigned getStart() const { return start; }
getSize()82     unsigned getSize() const { return size; }
getWord()83     const char * getWord() const { return word; }
84 
85     int compare( const char * str ) const;
86     bool match( const char * str, bool exact ) const;
87 
~TinyDictWord()88     ~TinyDictWord() { if ( word ) free( word ); }
89 };
90 
91 /// word entry list
92 class TinyDictWordList
93 {
94 	TinyDictionary * dict;
95     TinyDictWord ** list;
96     int size;
97     int count;
98 public:
99 
100 	// article access functions
101 	/// set dictonary pointer list belongs to
setDict(TinyDictionary * p)102 	void setDict( TinyDictionary * p ) { dict = p; }
103 	/// returns word list's dictionary name
104 	const char * getDictionaryName();
105 	/// returns article for word by index
106 	const char * getArticle( int index );
107 
108 	// search functions
109 	/// searches list position by prefix
110     int find( const char * prefix );
111 
112 	// word list functions
113 	/// returns number of words in list
length()114     int length() { return count; }
115 	/// get item by index
get(int index)116     TinyDictWord * get( int index ) { return list[index]; }
117 	/// add word to list
118     void add( TinyDictWord * word );
119 	/// clear list
120     void clear();
121 
122 	/// empty list constructor
123     TinyDictWordList();
124 	/// destructor
125     ~TinyDictWordList();
126 };
127 
128 /// default mode: exact match
129 #define TINY_DICT_OPTION_EXACT_MATCH 0
130 /// search for words starting with specified pattern
131 #define TINY_DICT_OPTION_STARTS_WITH 1
132 
133 class TinyDictionary
134 {
135 	char * name;
136 	TinyDictDataFile * data;
137 	TinyDictIndexFile * index;
138 public:
139 	/// searches dictionary for specified word, caller is responsible for deleting of returned object
140     TinyDictWordList * find( const char * prefix, int options = 0 );
141 	/// returns short dictionary name
142 	const char * getDictionaryName();
143 	/// get dictionary data pointer
getData()144 	TinyDictDataFile * getData() { return data; }
145 	/// get dictionary index pointer
146 	//TinyDictIndexFile * getIndex() { return index; }
147 	/// minimize memory usage
148 	void compact();
149 	/// open dictonary from files
150 	bool open( const char * indexfile, const char * datafile );
151 	/// empty dictinary constructor
152 	TinyDictionary();
153 	/// destructor
154 	~TinyDictionary();
155 };
156 
157 /// dictionary search result list
158 class TinyDictResultList
159 {
160     TinyDictWordList ** list;
161     int size;
162     int count;
163 public:
164 
165 	// word list functions
166 	/// returns number of words in list
length()167     int length() { return count; }
168 	/// get item by index
get(int index)169     TinyDictWordList * get( int index ) { return list[index]; }
170 	/// remove all dictionaries from list
171 	void clear();
172 	/// create empty list
173 	TinyDictResultList();
174 	/// destructor
175 	~TinyDictResultList();
176 	/// add item to list
177 	void add( TinyDictWordList * p );
178 };
179 
180 
181 /// dictionary list
182 class TinyDictionaryList
183 {
184     TinyDictionary ** list;
185     int size;
186     int count;
187 public:
188 	/// search all dictionaries in list for specified pattern
189 	bool find( TinyDictResultList & result, const char * prefix, int options = 0 );
190 
191 	// word list functions
192 	/// returns number of words in list
length()193     int length() { return count; }
194 	/// get item by index
get(int index)195     TinyDictionary * get( int index ) { return list[index]; }
196 	/// remove all dictionaries from list
197 	void clear();
198 	/// create empty list
199 	TinyDictionaryList();
200 	/// destructor
201 	~TinyDictionaryList();
202 	/// try to open dictionary and add it to list
203 	bool add( const char * indexfile, const char * datafile );
204 };
205 
206 #endif //TINYDICT_H_INCLUDED
207