1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 #ifndef ISPELL_H
3 #define ISPELL_H
4 
5 #include <sys/types.h>
6 
7 /*
8  * $Id: ispell.h 28601 2010-01-11 12:40:00Z dom $
9  */
10 
11 /*
12  * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
13  * All rights reserved.
14  *
15  * Redistribution and use in source and binary forms, with or without
16  * modification, are permitted provided that the following conditions
17  * are met:
18  *
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions and the following disclaimer.
21  * 2. Redistributions in binary form must reproduce the above copyright
22  *    notice, this list of conditions and the following disclaimer in the
23  *    documentation and/or other materials provided with the distribution.
24  * 3. All modifications to the source code must be clearly marked as
25  *    such.  Binary redistributions based on modified source code
26  *    must be clearly marked as modified versions in the documentation
27  *    and/or other materials provided with the distribution.
28  * 4. All advertising materials mentioning features or use of this software
29  *    must display the following acknowledgment:
30  *      This product includes software developed by Geoff Kuenning and
31  *      other unpaid contributors.
32  * 5. The name of Geoff Kuenning may not be used to endorse or promote
33  *    products derived from this software without specific prior
34  *    written permission.
35  *
36  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
37  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
39  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
40  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
42  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
44  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
45  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46  * SUCH DAMAGE.
47  */
48 
49 /*
50  * $Log$
51  * Revision 1.4  2003/08/14 17:51:27  dom
52  * update license - exception clause should be Lesser GPL
53  *
54  * Revision 1.3  2003/07/28 20:40:26  dom
55  * fix up the license clause, further win32-registry proof some directory getting functions
56  *
57  * Revision 1.2  2003/07/16 22:52:40  dom
58  * LGPL + exception license
59  *
60  * Revision 1.1  2003/07/15 01:15:06  dom
61  * ispell enchant backend
62  *
63  * Revision 1.10  2003/01/24 05:52:33  hippietrail
64  *
65  * Refactored ispell code. Old ispell global variables had been put into
66  * an allocated structure, a pointer to which was passed to many functions.
67  * I have now made all such functions and variables private members of the
68  * ISpellChecker class. It was C OO, now it's C++ OO.
69  *
70  * I've fixed the makefiles and tested compilation but am unable to test
71  * operation. Please back out my changes if they cause problems which
72  * are not obvious or easy to fix.
73  *
74  * Revision 1.9  2002/09/19 05:31:15  hippietrail
75  *
76  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
77  * K&R function declarations removed, converted to Doxygen style comments
78  * where possible.  No code has been changed (I hope).  Compiles for me but
79  * unable to test.
80  *
81  * Revision 1.8  2002/09/17 03:03:29  hippietrail
82  *
83  * After seeking permission on the developer list I've reformatted all the
84  * spelling source which seemed to have parts which used 2, 3, 4, and 8
85  * spaces for tabs.  It should all look good with our standard 4-space
86  * tabs now.
87  * I've concentrated just on indentation in the actual code.  More prettying
88  * could be done.
89  * * NO code changes were made *
90  *
91  * Revision 1.7  2002/03/22 14:31:57  dom
92  * fix mg's compile problem
93  *
94  * Revision 1.6  2002/03/05 16:55:52  dom
95  * compound word support, tested against swedish
96  *
97  * Revision 1.5  2001/08/10 18:32:40  dom
98  * Spelling and iconv updates. god, i hate iconv
99  *
100  * Revision 1.4  2001/06/26 16:33:27  dom
101  * 128 StringChars and some other stuff
102  *
103  * Revision 1.3  2001/05/12 16:05:42  thomasf
104  * Big pseudo changes to ispell to make it pass around a structure rather
105  * than rely on all sorts of gloabals willy nilly here and there.  Also
106  * fixed our spelling class to work with accepting suggestions once more.
107  * This code is dirty, gross and ugly (not to mention still not supporting
108  * multiple hash sized just yet) but it works on my machine and will no
109  * doubt break other machines.
110  *
111  * Revision 1.2  2001/04/18 00:59:36  thomasf
112  * Removed the duplicate declarations of variables that was causing build
113  * to bail.  This new ispell stuff is a total mess.
114  *
115  * Revision 1.1  2001/04/15 16:01:24  tomas_f
116  * moving to spell/xp
117  *
118  * Revision 1.13  2001/04/13 12:33:12  tamlin
119  * ispell can now be used from C++
120  *
121  * Revision 1.12  2001/03/25 01:30:02  tomb
122  * 1. Fixed ispell #define problems on Win32
123  * 2. Changed the way that togglable toolbars are tracked so that Full
124  * Screen mode works right on Windows
125  * 3. Fixed SET_GATHER macro in ap_Win32Dialog_Options.h
126  * 4. Fixed Toggle Case dialog to default to Sentence Case when loaded
127  * 5. Added #define for Auto Save checkbox (though I haven't updated the
128  * Prefs dialog yet)
129  *
130  * Revision 1.11  2001/03/24 23:28:41  dom
131  * Make C++ aware and watch out for VOID on Win32
132  *
133  * Revision 1.10  1999/12/21 18:46:29  sterwill
134  * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se>
135  *
136  * Revision 1.9  1999/10/20 03:19:35  paul
137  * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary.  It ain't pretty, but at least we don't crash there any more.
138  *
139  * Revision 1.8  1999/09/29 23:33:32  justin
140  * Updates to the underlying ispell-based code to support suggested corrections.
141  *
142  * Revision 1.7  1999/04/13 17:12:51  jeff
143  * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
144  * Fixed crash on Win32 with the new code.
145  *
146  * Revision 1.6  1999/01/07 05:14:22  sterwill
147  * So it builds on Unix... it might break win32 in ispell, since ut_types
148  * is no longer included.  This is a temporary solution to a larger problem
149  * of including C++ headers in C source files.
150  *
151  * Revision 1.6  1999/01/07 05:14:22  sterwill
152  * So it builds on Unix... it might break win32 in ispell, since ut_types
153  * is no longer included.  This is a temporary solution to a larger problem
154  * of including C++ headers in C source files.
155  *
156  * Revision 1.5  1999/01/07 05:02:25  sterwill
157  * Checking in half-broken to avoid tree lossage
158  *
159  * Revision 1.4  1999/01/07 01:07:48  paul
160  * Fixed spell leaks.
161  *
162  * Revision 1.3  1998/12/29 15:03:54  eric
163  *
164  * minor fix to ispell.h to get things to compile on Linux again.
165  *
166  * Revision 1.2  1998/12/29 14:55:33  eric
167  *
168  * I've doctored the ispell code pretty extensively here.  It is now
169  * warning-free on Win32.  It also *works* on Win32 now, since I
170  * replaced all the I/O calls with ANSI standard ones.
171  *
172  * Revision 1.1  1998/12/28 18:04:43  davet
173  * Spell checker code stripped from ispell.  At this point, there are
174  * two external routines...  the Init routine, and a check-a-word routine
175  * which returns a boolean value, and takes a 16 bit char string.
176  * The code resembles the ispell code as much as possible still.
177  *
178  * Revision 1.68  1995/03/06  02:42:41  geoff
179  * Be vastly more paranoid about parenthesizing macro arguments.  This
180  * fixes a bug in defmt.c where a complex argument was passed to
181  * isstringch.
182  *
183  * Revision 1.67  1995/01/03  19:24:12  geoff
184  * Get rid of a non-global declaration.
185  *
186  * Revision 1.66  1994/12/27  23:08:49  geoff
187  * Fix a lot of subtly bad assumptions about the widths of ints and longs
188  * which only show up on 64-bit machines like the Cray and the DEC Alpha.
189  *
190  * Revision 1.65  1994/11/02  06:56:10  geoff
191  * Remove the anyword feature, which I've decided is a bad idea.
192  *
193  * Revision 1.64  1994/10/25  05:46:18  geoff
194  * Add the FF_ANYWORD flag for defining an affix that will apply to any
195  * word, even if not explicitly specified.  (Good for French.)
196  *
197  * Revision 1.63  1994/09/16  04:48:28  geoff
198  * Make stringdups and laststringch unsigned ints, and dupnos a plain
199  * int, so that we can handle more than 128 stringchars and stringchar
200  * types.
201  *
202  * Revision 1.62  1994/09/01  06:06:39  geoff
203  * Change erasechar/killchar to uerasechar/ukillchar to avoid
204  * shared-library problems on HP systems.
205  *
206  * Revision 1.61  1994/08/31  05:58:35  geoff
207  * Add contextoffset, used in -a mode to handle extremely long lines.
208  *
209  * Revision 1.60  1994/05/17  06:44:15  geoff
210  * Add support for controlled compound formation and the COMPOUNDONLY
211  * option to affix flags.
212  *
213  * Revision 1.59  1994/03/15  06:25:16  geoff
214  * Change deftflag's initialization so we can tell if -t/-n appeared.
215  *
216  * Revision 1.58  1994/02/07  05:53:28  geoff
217  * Add typecasts to the the 7-bit versions of ichar* routines
218  *
219  * Revision 1.57  1994/01/25  07:11:48  geoff
220  * Get rid of all old RCS log lines in preparation for the 3.1 release.
221  *
222  */
223 
224 #include <stdio.h>
225 /*  #include "ut_types.h" */
226 
227 #include "ispell_def.h"
228 
229 #ifdef __cplusplus
230 extern "C" {
231 #endif /* c++ */
232 
233 /* largest amount that a word might be extended by adding affixes */
234 #ifndef MAXAFFIXLEN
235 #define MAXAFFIXLEN 20
236 #endif
237 
238 /*
239 ** Number of mask bits (affix flags) supported.  Must be 32, 64, 128, or
240 ** 256.  If MASKBITS is 32 or 64, there are really only 26 or 58 flags
241 ** available, respectively.  If it is 32, the flags are named with the
242 ** 26 English uppercase letters;  lowercase will be converted to uppercase.
243 ** If MASKBITS is 64, the 58 flags are named 'A' through 'z' in ASCII
244 ** order, including the 6 special characters from 'Z' to 'a': "[\]^_`".
245 ** If MASKBITS is 128 or 256, all the 7-bit or 8-bit characters,
246 ** respectively, are theoretically available, though a few (newline, slash,
247 ** null byte) are pretty hard to actually use successfully.
248 **
249 ** Note that a number of non-English affix files depend on having a
250 ** larger value for MASKBITS.  See the affix files for more
251 ** information.
252 */
253 
254 #ifndef MASKBITS
255 #define MASKBITS	64
256 #endif
257 
258 extern int		gnMaskBits;
259 
260 /*
261 ** C type to use for masks.  This should be a type that the processor
262 ** accesses efficiently.
263 **
264 ** MASKTYPE_WIDTH must correctly reflect the number of bits in a
265 ** MASKTYPE.  Unfortunately, it is also required to be a constant at
266 ** preprocessor time, which means you can't use the sizeof operator to
267 ** define it.
268 **
269 ** Note that MASKTYPE *must* match MASKTYPE_WIDTH or you may get
270 ** division-by-zero errors!
271 */
272 #ifndef MASKTYPE
273 #define MASKTYPE	long
274 #endif
275 #ifndef MASKTYPE_WIDTH
276 #define MASKTYPE_WIDTH	32
277 #endif
278 
279   /* program: this should be coded now in init */
280 
281 #if MASKBITS < MASKTYPE_WIDTH
282 #undef MASKBITS
283 #define MASKBITS	MASKTYPE_WIDTH
284 #endif /* MASKBITS < MASKTYPE_WIDTH */
285 
286 /*
287 ** Maximum hash table fullness percentage.  Larger numbers trade space
288 ** for time.
289 **/
290 #ifndef MAXPCT
291 #define MAXPCT	70		/* Expand table when 70% full */
292 #endif
293 
294 /*
295 ** Maximum number of "string" characters that can be defined in a
296 ** language (affix) file.  Don't forget that an upper/lower string
297 ** character counts as two!
298 */
299 #ifndef MAXSTRINGCHARS
300 #define MAXSTRINGCHARS 128
301 #endif /* MAXSTRINGCHARS */
302 
303 /*
304 ** Maximum length of a "string" character.  The default is appropriate for
305 ** nroff-style characters starting with a backslash.
306 */
307 #ifndef MAXSTRINGCHARLEN
308 #define MAXSTRINGCHARLEN 10
309 #endif /* MAXSTRINGCHARLEN */
310 
311 /*
312 ** Maximum number of "hits" expected on a word.  This is basically the
313 ** number of different ways different affixes can produce the same word.
314 ** For example, with "english.aff", "brothers" can be produced 3 ways:
315 ** "brothers," "brother+s", or "broth+ers".  If this is too low, no major
316 ** harm will be done, but ispell may occasionally forget a capitalization.
317 */
318 #ifndef MAX_HITS
319 #define MAX_HITS	10
320 #endif
321 
322 /*
323 ** Maximum number of capitalization variations expected in any word.
324 ** Besides the obvious all-lower, all-upper, and capitalized versions,
325 ** this includes followcase variants.  If this is too low, no real
326 ** harm will be done, but ispell may occasionally fail to suggest a
327 ** correct capitalization.
328 */
329 #ifndef MAX_CAPS
330 #define MAX_CAPS	10
331 #endif /* MAX_CAPS */
332 
333 /* buffer size to use for file names if not in sys/param.h */
334 #ifndef MAXPATHLEN
335 #define MAXPATHLEN 512
336 #endif
337 
338 /*
339 ** Maximum language-table search size.  Smaller numbers make ispell
340 ** run faster, at the expense of more memory (the lowest reasonable value
341 ** is 2).  If a given character appears in a significant position in
342 ** more than MAXSEARCH suffixes, it will be given its own index table.
343 ** If you change this, define INDEXDUMP in lookup.c to be sure your
344 ** index table looks reasonable.
345 */
346 #ifndef MAXSEARCH
347 #define MAXSEARCH 4
348 #endif
349 
350 #if defined(__STDC__) || defined(__cplusplus)
351 #define P(x)	x
352  #ifndef VOID
353    #define VOID	void
354  #endif
355 #else /* __STDC__ */
356 #define P(x)	()
357  #ifndef VOID
358    #define VOID	char
359  #endif
360 #define const
361 #endif /* __STDC__ */
362 
363 #ifdef NO8BIT
364 #define SET_SIZE	128
365 #else
366 #define SET_SIZE	256
367 #endif
368 
369 #define MASKSIZE	(gnMaskBits / MASKTYPE_WIDTH)
370 
371 #ifdef lint
372 extern int	TSTMASKBIT P ((MASKTYPE * mask, int bit));
373 #else /* lint */
374 /* The following is really testing for MASKSIZE <= 1, but cpp can't do that */
375 #define TSTMASKBIT(mask, bit) \
376 		    ((mask)[(bit) / MASKTYPE_WIDTH] & \
377 		      ((MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1))))
378 #endif /* lint */
379 
380 #if MASKBITS > 64
381 #define FULLMASKSET
382 #endif
383 
384 #if MASKBITS <= 32
385 	#define FLAGBASE	((MASKTYPE_WIDTH) - 6)
386 #else
387 	# if MASKBITS <= 64
388 		#define FLAGBASE	((MASKTYPE_WIDTH) - 6)
389 	# else
390 		#define FLAGBASE	0
391 	# endif
392 #endif
393 
394 /*
395 ** Data type for internal word storage.  If necessary, we use shorts rather
396 ** than chars so that string characters can be encoded as a single unit.
397 */
398 #if (SET_SIZE + MAXSTRINGCHARS) <= 256
399 #ifndef lint
400 #define ICHAR_IS_CHAR
401 #endif /* lint */
402 #endif
403 
404 #ifdef ICHAR_IS_CHAR
405 typedef unsigned char	ichar_t;	/* Internal character */
406 #define icharlen(s)	strlen ((char *) (s))
407 #define icharcpy(a, b)	strcpy ((char *) (a), (char *) (b))
408 #define icharcmp(a, b)	strcmp ((char *) (a), (char *) (b))
409 #define icharncmp(a, b, n) strncmp ((char *) (a), (char *) (b), (n))
410 #define chartoichar(x)	((ichar_t) (x))
411 #else
412 typedef unsigned short	ichar_t;	/* Internal character */
413 #define chartoichar(x)	((ichar_t) (unsigned char) (x))
414 
415 /*
416  * Structure used to record data about successful lookups; these values
417  * are used in the ins_root_cap routine to produce correct capitalizations.
418  */
419 struct success
420 {
421     struct dent *		dictent;	/* Header of dict entry chain for wd */
422     struct flagent *	prefix;		/* Prefix flag used, or NULL */
423     struct flagent *	suffix;		/* Suffix flag used, or NULL */
424 };
425 
426 ichar_t* icharcpy (ichar_t* out, ichar_t* in);
427 int icharlen (ichar_t* in);
428 int icharcmp (ichar_t* s1, ichar_t* s2);
429 int icharncmp (ichar_t* s1, ichar_t* s2, int n);
430 
431 #endif
432 
433 struct dent
434 {
435     struct dent *	next;
436     char *			word;
437     MASKTYPE		mask[2];
438 #ifdef FULLMASKSET
439     char			flags;
440 #endif
441 };
442 
443 /*
444 ** Flags in the directory entry.  If FULLMASKSET is undefined, these are
445 ** stored in the highest bits of the last longword of the mask field.  If
446 ** FULLMASKSET is defined, they are stored in the extra "flags" field.
447 #ifndef NO_CAPITALIZATION_SUPPORT
448 **
449 ** If a word has only one capitalization form, and that form is not
450 ** FOLLOWCASE, it will have exactly one entry in the dictionary.  The
451 ** legal capitalizations will be indicated by the 2-bit capitalization
452 ** field, as follows:
453 **
454 **	ALLCAPS		The word must appear in all capitals.
455 **	CAPITALIZED	The word must be capitalized (e.g., London).
456 **			It will also be accepted in all capitals.
457 **	ANYCASE		The word may appear in lowercase, capitalized,
458 **			or all-capitals.
459 **
460 ** Regardless of the capitalization flags, the "word" field of the entry
461 ** will point to an all-uppercase copy of the word.  This is to simplify
462 ** the large portion of the code that doesn't care about capitalization.
463 ** Ispell will generate the correct version when needed.
464 **
465 ** If a word has more than one capitalization, there will be multiple
466 ** entries for it, linked together by the "next" field.  The initial
467 ** entry for such words will be a dummy entry, primarily for use by code
468 ** that ignores capitalization.  The "word" field of this entry will
469 ** again point to an all-uppercase copy of the word.  The "mask" field
470 ** will contain the logical OR of the mask fields of all variants.
471 ** A header entry is indicated by a capitalization type of ALLCAPS,
472 ** with the MOREVARIANTS bit set.
473 **
474 ** The following entries will define the individual variants.  Each
475 ** entry except the last has the MOREVARIANTS flag set, and each
476 ** contains one of the following capitalization options:
477 **
478 **	ALLCAPS		The word must appear in all capitals.
479 **	CAPITALIZED	The word must be capitalized (e.g., London).
480 **			It will also be accepted in all capitals.
481 **	FOLLOWCASE	The word must be capitalized exactly like the
482 **			sample in the entry.  Prefix (suffix) characters
483 **			must be rendered in the case of the first (last)
484 **			"alphabetic" character.  It will also be accepted
485 **			in all capitals.  ("Alphabetic" means "mentioned
486 **			in a 'casechars' statement".)
487 **	ANYCASE		The word may appear in lowercase, capitalized,
488 **			or all-capitals.
489 **
490 ** The "mask" field for the entry contains only the affix flag bits that
491 ** are legal for that capitalization.  The "word" field will be null
492 ** except for FOLLOWCASE entries, where it will point to the
493 ** correctly-capitalized spelling of the root word.
494 **
495 ** It is worth discussing why the ALLCAPS option is used in
496 ** the header entry.  The header entry accepts an all-capitals
497 ** version of the root plus every affix (this is always legal, since
498 ** words get capitalized in headers and so forth).  Further, all of
499 ** the following variant entries will reject any all-capitals form
500 ** that is illegal due to an affix.
501 **
502 ** Finally, note that variations in the KEEP flag can cause a multiple-variant
503 ** entry as well.  For example, if the personal dictionary contains "ALPHA",
504 ** (KEEP flag set) and the user adds "alpha" with the KEEP flag clear, a
505 ** multiple-variant entry will be created so that "alpha" will be accepted
506 ** but only "ALPHA" will actually be kept.
507 #endif
508 */
509 #ifdef FULLMASKSET
510 #define flagfield	flags
511 #else
512 #define flagfield	mask[1]
513 #endif
514 #define USED		((MASKTYPE) 1 << (FLAGBASE + 0))
515 #define KEEP		((MASKTYPE) 1 << (FLAGBASE + 1))
516 #ifdef NO_CAPITALIZATION_SUPPORT
517 #define ALLFLAGS	(USED | KEEP)
518 #else /* NO_CAPITALIZATION_SUPPORT */
519 #define ANYCASE		((MASKTYPE) 0 << (FLAGBASE + 2))
520 #define ALLCAPS		((MASKTYPE) 1 << (FLAGBASE + 2))
521 #define CAPITALIZED	((MASKTYPE) 2 << (FLAGBASE + 2))
522 #define FOLLOWCASE	((MASKTYPE) 3 << (FLAGBASE + 2))
523 #define CAPTYPEMASK	((MASKTYPE) 3 << (FLAGBASE + 2))
524 #define MOREVARIANTS	((MASKTYPE) 1 << (FLAGBASE + 4))
525 #define ALLFLAGS	(USED | KEEP | CAPTYPEMASK | MOREVARIANTS)
526 #define captype(x)	((x) & CAPTYPEMASK)
527 #endif /* NO_CAPITALIZATION_SUPPORT */
528 
529 /*
530  * Language tables used to encode prefix and suffix information.
531  */
532 struct flagent
533 {
534     ichar_t *		strip;		/* String to strip off */
535     ichar_t *		affix;		/* Affix to append */
536     short		flagbit;		/* Flag bit this ent matches */
537     short		stripl;			/* Length of strip */
538     short		affl;			/* Length of affix */
539     short		numconds;		/* Number of char conditions */
540     short		flagflags;		/* Modifiers on this flag */
541     char		conds[SET_SIZE + MAXSTRINGCHARS]; /* Adj. char conds */
542 };
543 
544 /*
545  * Bits in flagflags
546  */
547 #define FF_CROSSPRODUCT	(1 << 0)		/* Affix does cross-products */
548 #define FF_COMPOUNDONLY	(1 << 1)		/* Afx works in compounds */
549 
550 union ptr_union					/* Aid for building flg ptrs */
551 {
552     struct flagptr *	fp;			/* Pointer to more indexing */
553     struct flagent *	ent;		/* First of a list of ents */
554 };
555 
556 struct flagptr
557 {
558     union ptr_union	pu;			/* Ent list or more indexes */
559     int			numents;		/* If zero, pu.fp is valid */
560 };
561 
562 /*
563  * Description of a single string character type.
564  */
565 struct strchartype
566 {
567     char *		name;			/* Name of the type */
568     char *		deformatter;	/* Deformatter to use */
569     char *		suffixes;		/* File suffixes, null seps */
570 };
571 
572 /*
573  * Header placed at the beginning of the hash file.
574  */
575 struct hashheader
576 {
577     unsigned short magic;    	    	    	/* Magic number for ID */
578     unsigned short compileoptions;				/* How we were compiled */
579     short maxstringchars;						/* Max # strchrs we support */
580     short maxstringcharlen;						/* Max strchr len supported */
581     short compoundmin;							/* Min lth of compound parts */
582     short compoundbit;							/* Flag 4 compounding roots */
583     int stringsize;								/* Size of string table */
584     int lstringsize;							/* Size of lang. str tbl */
585     int tblsize;								/* No. entries in hash tbl */
586     int stblsize;								/* No. entries in sfx tbl */
587     int ptblsize;								/* No. entries in pfx tbl */
588     int sortval;								/* Largest sort ID assigned */
589     int nstrchars;								/* No. strchars defined */
590     int nstrchartype;							/* No. strchar types */
591     int strtypestart;							/* Start of strtype table */
592     char nrchars[5];							/* Nroff special characters */
593     char texchars[13];							/* TeX special characters */
594     char compoundflag;							/* Compund-word handling */
595     char defhardflag;							/* Default tryveryhard flag */
596     char flagmarker;							/* "Start-of-flags" char */
597     unsigned short sortorder[SET_SIZE + MAXSTRINGCHARS]; /* Sort ordering */
598     ichar_t lowerconv[SET_SIZE + MAXSTRINGCHARS]; /* Lower-conversion table */
599     ichar_t upperconv[SET_SIZE + MAXSTRINGCHARS]; /* Upper-conversion table */
600     char wordchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for chars found in wrds */
601     char upperchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for uppercase chars */
602     char lowerchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for lowercase chars */
603     char boundarychars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for boundary chars */
604     char stringstarts[SET_SIZE];		/* NZ if char can start str */
605     char stringchars[MAXSTRINGCHARS][MAXSTRINGCHARLEN + 1]; /* String chars */
606     unsigned int stringdups[MAXSTRINGCHARS];	/* No. of "base" char */
607     int dupnos[MAXSTRINGCHARS];			/* Dup char ID # */
608     unsigned short magic2;			/* Second magic for dbl chk */
609 };
610 
611 /* hash table magic number */
612 #define MAGIC			0x9602
613 
614 /* compile options, put in the hash header for consistency checking */
615 #ifdef NO8BIT
616 # define MAGIC8BIT		0x01
617 #else
618 # define MAGIC8BIT		0x00
619 #endif
620 #ifdef NO_CAPITALIZATION_SUPPORT
621 # define MAGICCAPITALIZATION	0x00
622 #else
623 # define MAGICCAPITALIZATION	0x02
624 #endif
625 #  define MAGICMASKSET		0x04
626 
627 #if MASKBITS <= 32
628 # define MAGICMASKSET		0x00
629 #else
630 # if MASKBITS <= 64
631 # else
632 #  if MASKBITS <= 128
633 #   define MAGICMASKSET		0x08
634 #  else
635 #   define MAGICMASKSET		0x0C
636 #  endif
637 # endif
638 #endif
639 
640 #define COMPILEOPTIONS	(MAGIC8BIT | MAGICCAPITALIZATION | MAGICMASKSET)
641 
642 /*
643 ** Offsets into the nroff special-character array
644 */
645 #define NRLEFTPAREN		hashheader.nrchars[0]
646 #define NRRIGHTPAREN	hashheader.nrchars[1]
647 #define NRDOT			hashheader.nrchars[2]
648 #define NRBACKSLASH		hashheader.nrchars[3]
649 #define NRSTAR			hashheader.nrchars[4]
650 
651 /*
652 ** Offsets into the TeX special-character array
653 */
654 #define TEXLEFTPAREN	hashheader.texchars[0]
655 #define TEXRIGHTPAREN	hashheader.texchars[1]
656 #define TEXLEFTSQUARE	hashheader.texchars[2]
657 #define TEXRIGHTSQUARE	hashheader.texchars[3]
658 #define TEXLEFTCURLY	hashheader.texchars[4]
659 #define TEXRIGHTCURLY	hashheader.texchars[5]
660 #define TEXLEFTANGLE	hashheader.texchars[6]
661 #define TEXRIGHTANGLE	hashheader.texchars[7]
662 #define TEXBACKSLASH	hashheader.texchars[8]
663 #define TEXDOLLAR		hashheader.texchars[9]
664 #define TEXSTAR			hashheader.texchars[10]
665 #define TEXDOT			hashheader.texchars[11]
666 #define TEXPERCENT		hashheader.texchars[12]
667 
668 /*
669 ** Values for compoundflag
670 */
671 #define COMPOUND_NEVER		0	/* Compound words are never good */
672 #define COMPOUND_ANYTIME	1	/* Accept run-together words */
673 #define COMPOUND_CONTROLLED	2	/* Compounds controlled by afx flags */
674 /*
675 ** These macros are similar to the ones above, but they take into account
676 ** the possibility of string characters.  Note well that they take a POINTER,
677 ** not a character.
678 **
679 ** The "l_" versions set "len" to the length of the string character as a
680 ** handy side effect.  (Note that the global "laststringch" is also set,
681 ** and sometimes used, by these macros.)
682 **
683 ** The "l1_" versions go one step further and guarantee that the "len"
684 ** field is valid for *all* characters, being set to 1 even if the macro
685 ** returns false.  This macro is a great example of how NOT to write
686 ** readable C.
687 */
688 /*TF NOTE: This is actually defined in code (makedent) now */
689 #if 0
690 #define isstringch(ptr, canon)	(isstringstart (*(ptr)) \
691 				  &&  stringcharlen ((ptr), (canon)) > 0)
692 #define l_isstringch(ptr, len, canon)	\
693 				(isstringstart (*(ptr)) \
694 				  &&  (len = stringcharlen ((ptr), (canon))) \
695 				    > 0)
696 #define l1_isstringch(ptr, len, canon)	\
697 				(len = 1, \
698 				  isstringstart ((unsigned char)(*(ptr))) \
699 				    &&  ((len = \
700 					  stringcharlen ((ptr), (canon))) \
701 					> 0 \
702 				      ? 1 : (len = 1, 0)))
703 #endif
704 
705 /*
706  * Sizes of buffers returned by ichartosstr/strtosichar.
707  */
708 #define ICHARTOSSTR_SIZE (INPUTWORDLEN + 4 * MAXAFFIXLEN + 4)
709 #define STRTOSICHAR_SIZE ((INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) \
710 			  * sizeof (ichar_t))
711 /* TF CHANGE: We should fill this as a structure
712               and then use it throughout.
713 */
714 
715 /*
716  * Initialized variables.  These are generated using macros so that they
717  * may be consistently declared in all programs.  Numerous examples of
718  * usage are given below.
719  */
720 #ifdef MAIN
721 #define INIT(decl, init)	decl = init
722 #else
723 #define INIT(decl, init)	extern decl
724 #endif
725 
726 #ifdef MINIMENU
727 INIT (int minimenusize, 2);		/* MUST be either 2 or zero */
728 #else /* MINIMENU */
729 INIT (int minimenusize, 0);		/* MUST be either 2 or zero */
730 #endif /* MINIMENU */
731 
732 INIT (int eflag, 0);			/* NZ for expand mode */
733 INIT (int dumpflag, 0);			/* NZ to do dump mode */
734 INIT (int fflag, 0);			/* NZ if -f specified */
735 #ifndef USG
736 INIT (int sflag, 0);			/* NZ to stop self after EOF */
737 #endif
738 INIT (int vflag, 0);			/* NZ to display characters as M-xxx */
739 INIT (int xflag, DEFNOBACKUPFLAG);	/* NZ to suppress backups */
740 INIT (int deftflag, -1);		/* NZ for TeX mode by default */
741 INIT (int tflag, DEFTEXFLAG);		/* NZ for TeX mode in current file */
742 INIT (int prefstringchar, -1);		/* Preferred string character type */
743 
744 INIT (int terse, 0);			/* NZ for "terse" mode */
745 
746 INIT (char tempfile[MAXPATHLEN], "");	/* Name of file we're spelling into */
747 
748 INIT (int minword, MINWORD);		/* Longest always-legal word */
749 INIT (int sortit, 1);			/* Sort suggestions alphabetically */
750 INIT (int compoundflag, -1);		/* How to treat compounds: see above */
751 INIT (int tryhardflag, -1);		/* Always call tryveryhard */
752 
753 INIT (char * currentfile, NULL);	/* Name of current input file */
754 
755 /* Odd numbers for math mode in LaTeX; even for LR or paragraph mode */
756 INIT (int math_mode, 0);
757 /* P -- paragraph or LR mode
758  * b -- parsing a \begin statement
759  * e -- parsing an \end statement
760  * r -- parsing a \ref type of argument.
761  * m -- looking for a \begin{minipage} argument.
762  */
763 INIT (char LaTeX_Mode, 'P');
764 
765 #ifdef __cplusplus
766 }
767 #endif /* c++ */
768 
769 #endif /* ISPELL_H */
770