1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * lookup.c - see if a word appears in the dictionary
4  *
5  * Pace Willisson, 1983
6  *
7  * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All modifications to the source code must be clearly marked as
20  *    such.  Binary redistributions based on modified source code
21  *    must be clearly marked as modified versions in the documentation
22  *    and/or other materials provided with the distribution.
23  * 4. All advertising materials mentioning features or use of this software
24  *    must display the following acknowledgment:
25  *      This product includes software developed by Geoff Kuenning and
26  *      other unpaid contributors.
27  * 5. The name of Geoff Kuenning may not be used to endorse or promote
28  *    products derived from this software without specific prior
29  *    written permission.
30  *
31  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
32  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
35  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
36  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
37  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
38  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
39  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
40  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41  * SUCH DAMAGE.
42  */
43 
44 /*
45  * $Log$
46  * Revision 1.7  2003/09/25 02:44:48  dom
47  * bug 5813
48  *
49  * Revision 1.6  2003/08/26 13:20:40  dom
50  * ispell crasher fix, implement enchant_dictionary_release
51  *
52  * Revision 1.5  2003/08/26 13:08:03  uwog
53  * Fix segfault when the requested dictionary couldn't be found.
54  *
55  * Revision 1.4  2003/08/14 16:27:36  dom
56  * update some documentation
57  *
58  * Revision 1.3  2003/07/28 20:40:27  dom
59  * fix up the license clause, further win32-registry proof some directory getting functions
60  *
61  * Revision 1.2  2003/07/16 22:52:47  dom
62  * LGPL + exception license
63  *
64  * Revision 1.1  2003/07/15 01:15:07  dom
65  * ispell enchant backend
66  *
67  * Revision 1.3  2003/01/29 05:50:12  hippietrail
68  *
69  * Fixed my mess in EncodingManager.
70  * Changed many C casts to C++ casts.
71  *
72  * Revision 1.2  2003/01/25 03:16:05  hippietrail
73  *
74  * An UT_ICONV_INVALID fix which escaped the last commit.
75  *
76  * Revision 1.1  2003/01/24 05:52:34  hippietrail
77  *
78  * Refactored ispell code. Old ispell global variables had been put into
79  * an allocated structure, a pointer to which was passed to many functions.
80  * I have now made all such functions and variables private members of the
81  * ISpellChecker class. It was C OO, now it's C++ OO.
82  *
83  * I've fixed the makefiles and tested compilation but am unable to test
84  * operation. Please back out my changes if they cause problems which
85  * are not obvious or easy to fix.
86  *
87  * Revision 1.12  2003/01/06 18:48:39  dom
88  * ispell cleanup, start of using new 'add' save features
89  *
90  * Revision 1.11  2002/09/19 05:31:17  hippietrail
91  *
92  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
93  * K&R function declarations removed, converted to Doxygen style comments
94  * where possible.  No code has been changed (I hope).  Compiles for me but
95  * unable to test.
96  *
97  * Revision 1.10  2002/09/17 03:03:30  hippietrail
98  *
99  * After seeking permission on the developer list I've reformatted all the
100  * spelling source which seemed to have parts which used 2, 3, 4, and 8
101  * spaces for tabs.  It should all look good with our standard 4-space
102  * tabs now.
103  * I've concentrated just on indentation in the actual code.  More prettying
104  * could be done.
105  * * NO code changes were made *
106  *
107  * Revision 1.9  2002/09/13 17:20:13  mpritchett
108  * Fix more warnings for Linux build
109  *
110  * Revision 1.8  2002/05/03 09:49:43  fjfranklin
111  * o hash downloader update (Gabriel Gerhardsson)
112  * - Comment out the "Can't open <dictionary>" printf.
113  * - Make the progressbar more clean at the begining of the download.
114  * - Add support for tarballs that doesn't have the full path included
115  * - Fix copyright headers on the newly added files (*HashDownloader.*)
116  *
117  * Revision 1.7  2001/08/27 19:06:30  dom
118  * Lots of compilation fixes
119  *
120  * Revision 1.6  2001/08/10 18:32:40  dom
121  * Spelling and iconv updates. god, i hate iconv
122  *
123  * Revision 1.5  2001/08/10 09:57:49  hub
124  * Patch by sobomax@FreeBSD.org
125  * #include "iconv.h" directive is missed from src/other/spell/xp/lookup.c and
126  * src/wp/impexp/xp/ie_imp_RTF.cpp.
127  * See bug 1823
128  *
129  * Revision 1.4  2001/07/18 17:46:01  dom
130  * Module changes, and fix compiler warnings
131  *
132  * Revision 1.3  2001/06/12 21:32:49  dom
133  * More ispell work...
134  *
135  * Revision 1.2  2001/05/12 16:05:42  thomasf
136  * Big pseudo changes to ispell to make it pass around a structure rather
137  * than rely on all sorts of gloabals willy nilly here and there.  Also
138  * fixed our spelling class to work with accepting suggestions once more.
139  * This code is dirty, gross and ugly (not to mention still not supporting
140  * multiple hash sized just yet) but it works on my machine and will no
141  * doubt break other machines.
142  *
143  * Revision 1.1  2001/04/15 16:01:24  tomas_f
144  * moving to spell/xp
145  *
146  * Revision 1.7  1999/09/29 23:33:32  justin
147  * Updates to the underlying ispell-based code to support suggested corrections.
148  *
149  * Revision 1.6  1999/04/13 17:12:51  jeff
150  * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
151  * Fixed crash on Win32 with the new code.
152  *
153  * Revision 1.5  1999/01/07 01:07:48  paul
154  * Fixed spell leaks.
155  *
156  * Revision 1.5  1999/01/07 01:07:48  paul
157  * Fixed spell leaks.
158  *
159  * Revision 1.4  1998/12/29 14:55:33  eric
160  *
161  * I've doctored the ispell code pretty extensively here.  It is now
162  * warning-free on Win32.  It also *works* on Win32 now, since I
163  * replaced all the I/O calls with ANSI standard ones.
164  *
165  * Revision 1.3  1998/12/28 23:11:30  eric
166  *
167  * modified spell code and integration to build on Windows.
168  * This is still a hack.
169  *
170  * Actually, it doesn't yet WORK on Windows.  It just builds.
171  * SpellCheckInit is failing for some reason.
172  *
173  * Revision 1.2  1998/12/28 22:16:22  eric
174  *
175  * These changes begin to incorporate the spell checker into AbiWord.  Most
176  * of this is a hack.
177  *
178  * 1.  added other/spell to the -I list in config/abi_defs
179  * 2.  replaced other/spell/Makefile with one which is more like
180  * 	our build system.
181  * 3.  added other/spell to other/Makefile so that the build will now
182  * 	dive down and build the spell check library.
183  * 4.  added the AbiSpell library to the Makefiles in wp/main
184  * 5.  added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
185  * 	This call is a HACK and should be replaced with something
186  * 	proper later.
187  * 6.  added code to fv_View.cpp as follows:
188  * 	whenever you double-click on a word, the spell checker
189  * 	verifies that word and prints its status to stdout.
190  *
191  * Caveats:
192  * 1.  This will break the Windows build.  I'm going to work on fixing it
193  * 	now.
194  * 2.  This only works if your dictionary is in /usr/lib/ispell/american.hash.
195  * 	The dictionary location is currently hard-coded.  This will be
196  * 	fixed as well.
197  *
198  * Anyway, such as it is, it works.
199  *
200  * Revision 1.1  1998/12/28 18:04:43  davet
201  * Spell checker code stripped from ispell.  At this point, there are
202  * two external routines...  the Init routine, and a check-a-word routine
203  * which returns a boolean value, and takes a 16 bit char string.
204  * The code resembles the ispell code as much as possible still.
205  *
206  * Revision 1.42  1995/01/08  23:23:42  geoff
207  * Support MSDOS_BINARY_OPEN when opening the hash file to read it in.
208  *
209  * Revision 1.41  1994/01/25  07:11:51  geoff
210  * Get rid of all old RCS log lines in preparation for the 3.1 release.
211  *
212  */
213 
214 #include <stdlib.h>
215 #include <string.h>
216 #include <ctype.h>
217 
218 #include "enchant-provider.h"
219 #include "ispell_checker.h"
220 #include "msgs.h"
221 
222 #define G_ICONV_INVALID (GIConv)-1
223 
g_iconv_is_valid(GIConv i)224 static bool g_iconv_is_valid(GIConv i)
225 {
226   return (i != G_ICONV_INVALID);
227 }
228 
229 #ifdef INDEXDUMP
230 static void	dumpindex P ((struct flagptr * indexp, int depth));
231 #endif /* INDEXDUMP */
232 
233 int		gnMaskBits = 64;
234 
235 /*!
236  * \param hashname name of the hash file (dictionary)
237  *
238  * \return
239  */
linit(char * hashname)240 int ISpellChecker::linit (char *hashname)
241 {
242 	FILE*	fpHash;
243 
244     register int	i;
245     register struct dent * dp;
246     struct flagent *	entry;
247     struct flagptr *	ind;
248     int			nextchar, x;
249     int			viazero;
250     register ichar_t *	cp;
251 
252     if ((fpHash = enchant_fopen (hashname, "rb")) == NULL)
253 	{
254 		return (-1);
255 	}
256 
257     m_hashsize = fread (reinterpret_cast<char *>(&m_hashheader), 1, sizeof m_hashheader, fpHash);
258     if (m_hashsize < static_cast<int>(sizeof(m_hashheader)))
259 	{
260 		if (m_hashsize < 0)
261 			fprintf (stderr, LOOKUP_C_CANT_READ, hashname);
262 		else if (m_hashsize == 0)
263 			fprintf (stderr, LOOKUP_C_NULL_HASH, hashname);
264 		else
265 			fprintf (stderr,
266 			  LOOKUP_C_SHORT_HASH (m_hashname, m_hashsize,
267 				static_cast<int>(sizeof m_hashheader)));
268 		return (-1);
269 	}
270     else if (m_hashheader.magic != MAGIC)
271 	{
272 		fprintf (stderr,
273 		  LOOKUP_C_BAD_MAGIC (hashname, static_cast<unsigned int>(MAGIC),
274 			static_cast<unsigned int>(m_hashheader.magic)));
275 		return (-1);
276 	}
277     else if (m_hashheader.magic2 != MAGIC)
278 	{
279 		fprintf (stderr,
280 		  LOOKUP_C_BAD_MAGIC2 (hashname, static_cast<unsigned int>(MAGIC),
281 			static_cast<unsigned int>(m_hashheader.magic2)));
282 		return (-1);
283 	}
284 /*  else if (hashheader.compileoptions != COMPILEOPTIONS*/
285     else if ( 1 != 1
286       ||  m_hashheader.maxstringchars != MAXSTRINGCHARS
287       ||  m_hashheader.maxstringcharlen != MAXSTRINGCHARLEN)
288 	{
289 		fprintf (stderr,
290 		  LOOKUP_C_BAD_OPTIONS (static_cast<unsigned int>(m_hashheader.compileoptions),
291 			m_hashheader.maxstringchars, m_hashheader.maxstringcharlen,
292 			static_cast<unsigned int>(COMPILEOPTIONS), MAXSTRINGCHARS, MAXSTRINGCHARLEN));
293 		return (-1);
294 	}
295 
296 	{
297 		m_hashtbl =
298 		 (struct dent *)
299 			calloc (static_cast<unsigned>(m_hashheader.tblsize), sizeof (struct dent));
300 		m_hashsize = m_hashheader.tblsize;
301 		m_hashstrings = static_cast<char *>(malloc(static_cast<unsigned>(m_hashheader.stringsize)));
302 	}
303     m_numsflags = m_hashheader.stblsize;
304     m_numpflags = m_hashheader.ptblsize;
305     m_sflaglist = (struct flagent *)
306       malloc ((m_numsflags + m_numpflags) * sizeof (struct flagent));
307     if (m_hashtbl == NULL  ||  m_hashstrings == NULL  ||  m_sflaglist == NULL)
308 	{
309 		fprintf (stderr, LOOKUP_C_NO_HASH_SPACE);
310 		return (-1);
311 	}
312     m_pflaglist = m_sflaglist + m_numsflags;
313 
314 	{
315 		if( fread ( m_hashstrings, 1, static_cast<unsigned>(m_hashheader.stringsize), fpHash)
316 			!= static_cast<size_t>(m_hashheader.stringsize) )
317 	    {
318 		    fprintf (stderr, LOOKUP_C_BAD_FORMAT);
319 			fprintf (stderr, "stringsize err\n" );
320 	    	return (-1);
321 	    }
322 		if ( m_hashheader.compileoptions & 0x04 )
323 		{
324 			if(  fread (reinterpret_cast<char *>(m_hashtbl), 1, static_cast<unsigned>(m_hashheader.tblsize) * sizeof(struct dent), fpHash)
325 		    	!= (static_cast<size_t>(m_hashheader.tblsize * sizeof (struct dent))))
326 		    {
327 			    fprintf (stderr, LOOKUP_C_BAD_FORMAT);
328 		    	return (-1);
329 		    }
330 		}
331 		else
332 		{
333 			for( x=0; x<m_hashheader.tblsize; x++ )
334 			{
335 				if(  fread ( reinterpret_cast<char*>(m_hashtbl+x), sizeof( struct dent)-sizeof( MASKTYPE ), 1, fpHash)
336 			    	!= 1)
337 			    {
338 				    fprintf (stderr, LOOKUP_C_BAD_FORMAT);
339 			    	return (-1);
340 			    }
341 			}	/*for*/
342 		}	/*else*/
343 	}
344     if (fread (reinterpret_cast<char *>(m_sflaglist), 1,
345 	static_cast<unsigned>(m_numsflags+ m_numpflags) * sizeof (struct flagent), fpHash)
346       != (m_numsflags + m_numpflags) * sizeof (struct flagent))
347 	{
348 		fprintf (stderr, LOOKUP_C_BAD_FORMAT);
349 		return (-1);
350 	}
351     fclose (fpHash);
352 
353 	{
354 		for (i = m_hashsize, dp = m_hashtbl;  --i >= 0;  dp++)
355 		{
356 			if (dp->word == (char *) -1)
357 				dp->word = NULL;
358 			else
359 				dp->word = &m_hashstrings [ reinterpret_cast<size_t>(dp->word) ];
360 			if (dp->next == (struct dent *) -1)
361 				dp->next = NULL;
362 			else
363 				dp->next = &m_hashtbl [ reinterpret_cast<size_t>(dp->next) ];
364 	    }
365 	}
366 
367     for (i = m_numsflags + m_numpflags, entry = m_sflaglist; --i >= 0; entry++)
368 	{
369 		if (entry->stripl)
370 			entry->strip = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->strip)]);
371 		else
372 			entry->strip = NULL;
373 		if (entry->affl)
374 			entry->affix = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->affix)]);
375 		else
376 			entry->affix = NULL;
377 	}
378     /*
379     ** Warning - 'entry' and 'i' are reset in the body of the loop
380     ** below.  Don't try to optimize it by (e.g.) moving the decrement
381     ** of i into the loop condition.
382     */
383     for (i = m_numsflags, entry = m_sflaglist;  i > 0;  i--, entry++)
384 	{
385 		if (entry->affl == 0)
386 		{
387 			cp = NULL;
388 			ind = &m_sflagindex[0];
389 			viazero = 1;
390 		}
391 		else
392 		{
393 			cp = entry->affix + entry->affl - 1;
394 			ind = &m_sflagindex[*cp];
395 			viazero = 0;
396 			while (ind->numents == 0  &&  ind->pu.fp != NULL)
397 			{
398 				if (cp == entry->affix)
399 				{
400 					ind = &ind->pu.fp[0];
401 					viazero = 1;
402 				}
403 				else
404 				{
405 					ind = &ind->pu.fp[*--cp];
406 					viazero = 0;
407 				}
408 			}
409 		}
410 		if (ind->numents == 0)
411 			ind->pu.ent = entry;
412 		ind->numents++;
413 		/*
414 		** If this index entry has more than MAXSEARCH flags in
415 		** it, we will split it into subentries to reduce the
416 		** searching.  However, the split doesn't make sense in
417 		** two cases:  (a) if we are already at the end of the
418 		** current affix, or (b) if all the entries in the list
419 		** have identical affixes.  Since the list is sorted, (b)
420 		** is true if the first and last affixes in the list
421 		** are identical.
422 		*/
423 		if (!viazero  &&  ind->numents >= MAXSEARCH
424 		  &&  icharcmp (entry->affix, ind->pu.ent->affix) != 0)
425 		{
426 			/* Sneaky trick:  back up and reprocess */
427 			entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
428 			i = m_numsflags - (entry - m_sflaglist);
429 			ind->pu.fp =
430 			  (struct flagptr *)
431 			calloc (static_cast<unsigned>(SET_SIZE + m_hashheader.nstrchars),
432 			  sizeof (struct flagptr));
433 			if (ind->pu.fp == NULL)
434 			{
435 				fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
436 				return (-1);
437 			}
438 			ind->numents = 0;
439 		}
440 	}
441     /*
442     ** Warning - 'entry' and 'i' are reset in the body of the loop
443     ** below.  Don't try to optimize it by (e.g.) moving the decrement
444     ** of i into the loop condition.
445     */
446     for (i = m_numpflags, entry = m_pflaglist;  i > 0;  i--, entry++)
447 	{
448 		if (entry->affl == 0)
449 	    {
450 			cp = NULL;
451 			ind = &m_pflagindex[0];
452 			viazero = 1;
453 	    }
454 		else
455 		{
456 			cp = entry->affix;
457 			ind = &m_pflagindex[*cp++];
458 			viazero = 0;
459 			while (ind->numents == 0  &&  ind->pu.fp != NULL)
460 			{
461 				if (*cp == 0)
462 				{
463 					ind = &ind->pu.fp[0];
464 					viazero = 1;
465 				}
466 				else
467 				{
468 					ind = &ind->pu.fp[*cp++];
469 					viazero = 0;
470 				}
471 			}
472 		}
473 		if (ind->numents == 0)
474 			ind->pu.ent = entry;
475 		ind->numents++;
476 		/*
477 		** If this index entry has more than MAXSEARCH flags in
478 		** it, we will split it into subentries to reduce the
479 		** searching.  However, the split doesn't make sense in
480 		** two cases:  (a) if we are already at the end of the
481 		** current affix, or (b) if all the entries in the list
482 		** have identical affixes.  Since the list is sorted, (b)
483 		** is true if the first and last affixes in the list
484 		** are identical.
485 		*/
486 		if (!viazero  &&  ind->numents >= MAXSEARCH
487 		  &&  icharcmp (entry->affix, ind->pu.ent->affix) != 0)
488 		{
489 			/* Sneaky trick:  back up and reprocess */
490 			entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
491 			i = m_numpflags - (entry - m_pflaglist);
492 			ind->pu.fp =
493 			  static_cast<struct flagptr *>(calloc(SET_SIZE + m_hashheader.nstrchars,
494 				sizeof (struct flagptr)));
495 			if (ind->pu.fp == NULL)
496 			{
497 				fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
498 				return (-1);
499 			}
500 			ind->numents = 0;
501 		}
502 	}
503 #ifdef INDEXDUMP
504     fprintf (stderr, "Prefix index table:\n");
505     dumpindex (m_pflagindex, 0);
506     fprintf (stderr, "Suffix index table:\n");
507     dumpindex (m_sflagindex, 0);
508 #endif
509     if (m_hashheader.nstrchartype == 0)
510 		m_chartypes = NULL;
511     else
512 	{
513 		m_chartypes = (struct strchartype *)
514 		  malloc (m_hashheader.nstrchartype * sizeof (struct strchartype));
515 		if (m_chartypes == NULL)
516 		{
517 			fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
518 			return (-1);
519 		}
520 		for (i = 0, nextchar = m_hashheader.strtypestart;
521 		  i < m_hashheader.nstrchartype;
522 		  i++)
523 		{
524 			m_chartypes[i].name = &m_hashstrings[nextchar];
525 			nextchar += strlen (m_chartypes[i].name) + 1;
526 			m_chartypes[i].deformatter = &m_hashstrings[nextchar];
527 			nextchar += strlen (m_chartypes[i].deformatter) + 1;
528 			m_chartypes[i].suffixes = &m_hashstrings[nextchar];
529 			while (m_hashstrings[nextchar] != '\0')
530 				nextchar += strlen (&m_hashstrings[nextchar]) + 1;
531 			nextchar++;
532 		}
533 	}
534 
535     initckch(NULL);
536 
537     return (0);
538 }
539 
540 #ifndef FREEP
541 #define FREEP(p)	do { if (p) free(p); } while (0)
542 #endif
543 
544 /*!
545  * \param wchars Characters in -w option, if any
546  */
initckch(char * wchars)547 void ISpellChecker::initckch (char *wchars)
548 {
549 	register ichar_t    c;
550 	char                num[4];
551 
552 	for (c = 0; c < static_cast<ichar_t>(SET_SIZE+ m_hashheader.nstrchars); ++c)
553     {
554 		if (iswordch (c))
555 		{
556 			if (!mylower (c))
557 			{
558 				m_Try[m_Trynum] = c;
559 				++m_Trynum;
560 			}
561 		}
562 		else if (isboundarych (c))
563 		{
564 			m_Try[m_Trynum] = c;
565 			++m_Trynum;
566 		}
567 	}
568 	if (wchars != NULL)
569     {
570 		while (m_Trynum < SET_SIZE  &&  *wchars != '\0')
571 		{
572 			if (*wchars != 'n'  &&  *wchars != '\\')
573 			{
574 				c = *wchars;
575 				++wchars;
576 			}
577 			else
578 			{
579 			    ++wchars;
580 			    num[0] = '\0';
581 			    num[1] = '\0';
582 			    num[2] = '\0';
583 			    num[3] = '\0';
584 			    if (isdigit (wchars[0]))
585 				{
586 				    num[0] = wchars[0];
587 				    if (isdigit (wchars[1]))
588 				    {
589 						num[1] = wchars[1];
590 						if (isdigit (wchars[2]))
591 							num[2] = wchars[2];
592 					}
593 				}
594 				if (wchars[-1] == 'n')
595 				{
596 				    wchars += strlen (num);
597 				    c = atoi (num);
598 				}
599 				else
600 				{
601 				    wchars += strlen (num);
602 				    c = 0;
603 				    if (num[0])
604 						c = num[0] - '0';
605 				    if (num[1])
606 				    {
607 						c <<= 3;
608 						c += num[1] - '0';
609 					}
610 					if (num[2])
611 					{
612 						c <<= 3;
613 						c += num[2] - '0';
614 					}
615 				}
616 			}
617 /*	    	c &= NOPARITY;*/
618 			if (!m_hashheader.wordchars[c])
619 			{
620 				m_hashheader.wordchars[c] = 1;
621 				m_hashheader.sortorder[c] = m_hashheader.sortval++;
622 				m_Try[m_Trynum] = c;
623 				++m_Trynum;
624 			}
625 		}
626     }
627 }
628 
629 /*
630  * \param indexp
631  */
clearindex(struct flagptr * indexp)632 void ISpellChecker::clearindex (struct flagptr *indexp)
633 {
634     register int		i;
635     for (i = 0;  i < SET_SIZE + m_hashheader.nstrchars;  i++, indexp++)
636 	{
637 		if (indexp->numents == 0 && indexp->pu.fp != NULL)
638 		{
639 		    clearindex(indexp->pu.fp);
640 			free(indexp->pu.fp);
641 		}
642 	}
643 }
644 
645 #ifdef INDEXDUMP
646 static void dumpindex (indexp, depth)
647     register struct flagptr *	indexp;
648     register int		depth;
649 {
650     register int		i;
651     int				j;
652     int				k;
653     char			stripbuf[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
654 
655     for (i = 0;  i < SET_SIZE + hashheader.nstrchars;  i++, indexp++)
656 	{
657 		if (indexp->numents == 0  &&  indexp->pu.fp != NULL)
658 	    {
659 			for (j = depth;  --j >= 0;  )
660 				putc (' ', stderr);
661 			if (i >= ' '  &&  i <= '~')
662 				putc (i, stderr);
663 			else
664 				fprintf (stderr, "0x%x", i);
665 			putc ('\n', stderr);
666 			dumpindex (indexp->pu.fp, depth + 1);
667 	    }
668 		else if (indexp->numents)
669 		{
670 			for (j = depth;  --j >= 0;  )
671 				putc (' ', stderr);
672 			if (i >= ' '  &&  i <= '~')
673 				putc (i, stderr);
674 			else
675 				fprintf (stderr, "0x%x", i);
676 			fprintf (stderr, " -> %d entries\n", indexp->numents);
677 			for (k = 0;  k < indexp->numents;  k++)
678 			{
679 				for (j = depth;  --j >= 0;  )
680 					putc (' ', stderr);
681 				if (indexp->pu.ent[k].stripl)
682 				{
683 					ichartostr (stripbuf, indexp->pu.ent[k].strip,
684 					  sizeof stripbuf, 1);
685 					fprintf (stderr, "     entry %d (-%s,%s)\n",
686 					  &indexp->pu.ent[k] - sflaglist,
687 					  stripbuf,
688 					  indexp->pu.ent[k].affl
689 						? ichartosstr (indexp->pu.ent[k].affix, 1) : "-");
690 				}
691 				else
692 					fprintf (stderr, "     entry %d (%s)\n",
693 					  &indexp->pu.ent[k] - sflaglist,
694 					  ichartosstr (indexp->pu.ent[k].affix, 1));
695 			}
696 		}
697 	}
698 }
699 #endif
700 
701 /* n is length of s */
702 
703 /*
704  * \param s
705  * \param dotree
706  *
707  * \return
708  */
ispell_lookup(ichar_t * s,int dotree)709 struct dent * ISpellChecker::ispell_lookup (ichar_t *s, int dotree)
710 {
711     register struct dent *	dp;
712     register char *		s1;
713     char			schar[INPUTWORDLEN + MAXAFFIXLEN];
714 
715     dp = &m_hashtbl[hash (s, m_hashsize)];
716     if (ichartostr (schar, s, sizeof schar, 1))
717 		fprintf (stderr, WORD_TOO_LONG (schar));
718     for (  ;  dp != NULL;  dp = dp->next)
719 	{
720 		/* quick strcmp, but only for equality */
721 		s1 = dp->word;
722 		if (s1  &&  s1[0] == schar[0]  &&  strcmp (s1 + 1, schar + 1) == 0)
723 			return dp;
724 #ifndef NO_CAPITALIZATION_SUPPORT
725 		while (dp->flagfield & MOREVARIANTS)	/* Skip variations */
726 			dp = dp->next;
727 #endif
728 	}
729 	return NULL;
730 }
731 
alloc_ispell_struct()732 void ISpellChecker::alloc_ispell_struct()
733 {
734 	m_translate_in =
735 	m_translate_out = G_ICONV_INVALID;
736 }
737 
free_ispell_struct()738 void ISpellChecker::free_ispell_struct()
739 {
740 	if (g_iconv_is_valid(m_translate_in))
741 		g_iconv_close (m_translate_in);
742 	if (g_iconv_is_valid(m_translate_out))
743 		g_iconv_close (m_translate_out);
744 }
745