1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ 2 #ifndef ISPELL_H 3 #define ISPELL_H 4 5 #include <sys/types.h> 6 7 /* 8 * $Id: ispell.h 28601 2010-01-11 12:40:00Z dom $ 9 */ 10 11 /* 12 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA 13 * All rights reserved. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. All modifications to the source code must be clearly marked as 25 * such. Binary redistributions based on modified source code 26 * must be clearly marked as modified versions in the documentation 27 * and/or other materials provided with the distribution. 28 * 4. All advertising materials mentioning features or use of this software 29 * must display the following acknowledgment: 30 * This product includes software developed by Geoff Kuenning and 31 * other unpaid contributors. 32 * 5. The name of Geoff Kuenning may not be used to endorse or promote 33 * products derived from this software without specific prior 34 * written permission. 35 * 36 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND 37 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 38 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 39 * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE 40 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 41 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 42 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 44 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 45 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 */ 48 49 /* 50 * $Log$ 51 * Revision 1.4 2003/08/14 17:51:27 dom 52 * update license - exception clause should be Lesser GPL 53 * 54 * Revision 1.3 2003/07/28 20:40:26 dom 55 * fix up the license clause, further win32-registry proof some directory getting functions 56 * 57 * Revision 1.2 2003/07/16 22:52:40 dom 58 * LGPL + exception license 59 * 60 * Revision 1.1 2003/07/15 01:15:06 dom 61 * ispell enchant backend 62 * 63 * Revision 1.10 2003/01/24 05:52:33 hippietrail 64 * 65 * Refactored ispell code. Old ispell global variables had been put into 66 * an allocated structure, a pointer to which was passed to many functions. 67 * I have now made all such functions and variables private members of the 68 * ISpellChecker class. It was C OO, now it's C++ OO. 69 * 70 * I've fixed the makefiles and tested compilation but am unable to test 71 * operation. Please back out my changes if they cause problems which 72 * are not obvious or easy to fix. 73 * 74 * Revision 1.9 2002/09/19 05:31:15 hippietrail 75 * 76 * More Ispell cleanup. Conditional globals and DEREF macros are removed. 77 * K&R function declarations removed, converted to Doxygen style comments 78 * where possible. No code has been changed (I hope). Compiles for me but 79 * unable to test. 80 * 81 * Revision 1.8 2002/09/17 03:03:29 hippietrail 82 * 83 * After seeking permission on the developer list I've reformatted all the 84 * spelling source which seemed to have parts which used 2, 3, 4, and 8 85 * spaces for tabs. It should all look good with our standard 4-space 86 * tabs now. 87 * I've concentrated just on indentation in the actual code. More prettying 88 * could be done. 89 * * NO code changes were made * 90 * 91 * Revision 1.7 2002/03/22 14:31:57 dom 92 * fix mg's compile problem 93 * 94 * Revision 1.6 2002/03/05 16:55:52 dom 95 * compound word support, tested against swedish 96 * 97 * Revision 1.5 2001/08/10 18:32:40 dom 98 * Spelling and iconv updates. god, i hate iconv 99 * 100 * Revision 1.4 2001/06/26 16:33:27 dom 101 * 128 StringChars and some other stuff 102 * 103 * Revision 1.3 2001/05/12 16:05:42 thomasf 104 * Big pseudo changes to ispell to make it pass around a structure rather 105 * than rely on all sorts of gloabals willy nilly here and there. Also 106 * fixed our spelling class to work with accepting suggestions once more. 107 * This code is dirty, gross and ugly (not to mention still not supporting 108 * multiple hash sized just yet) but it works on my machine and will no 109 * doubt break other machines. 110 * 111 * Revision 1.2 2001/04/18 00:59:36 thomasf 112 * Removed the duplicate declarations of variables that was causing build 113 * to bail. This new ispell stuff is a total mess. 114 * 115 * Revision 1.1 2001/04/15 16:01:24 tomas_f 116 * moving to spell/xp 117 * 118 * Revision 1.13 2001/04/13 12:33:12 tamlin 119 * ispell can now be used from C++ 120 * 121 * Revision 1.12 2001/03/25 01:30:02 tomb 122 * 1. Fixed ispell #define problems on Win32 123 * 2. Changed the way that togglable toolbars are tracked so that Full 124 * Screen mode works right on Windows 125 * 3. Fixed SET_GATHER macro in ap_Win32Dialog_Options.h 126 * 4. Fixed Toggle Case dialog to default to Sentence Case when loaded 127 * 5. Added #define for Auto Save checkbox (though I haven't updated the 128 * Prefs dialog yet) 129 * 130 * Revision 1.11 2001/03/24 23:28:41 dom 131 * Make C++ aware and watch out for VOID on Win32 132 * 133 * Revision 1.10 1999/12/21 18:46:29 sterwill 134 * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se> 135 * 136 * Revision 1.9 1999/10/20 03:19:35 paul 137 * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more. 138 * 139 * Revision 1.8 1999/09/29 23:33:32 justin 140 * Updates to the underlying ispell-based code to support suggested corrections. 141 * 142 * Revision 1.7 1999/04/13 17:12:51 jeff 143 * Applied "Darren O. Benham" <gecko@benham.net> spell check changes. 144 * Fixed crash on Win32 with the new code. 145 * 146 * Revision 1.6 1999/01/07 05:14:22 sterwill 147 * So it builds on Unix... it might break win32 in ispell, since ut_types 148 * is no longer included. This is a temporary solution to a larger problem 149 * of including C++ headers in C source files. 150 * 151 * Revision 1.6 1999/01/07 05:14:22 sterwill 152 * So it builds on Unix... it might break win32 in ispell, since ut_types 153 * is no longer included. This is a temporary solution to a larger problem 154 * of including C++ headers in C source files. 155 * 156 * Revision 1.5 1999/01/07 05:02:25 sterwill 157 * Checking in half-broken to avoid tree lossage 158 * 159 * Revision 1.4 1999/01/07 01:07:48 paul 160 * Fixed spell leaks. 161 * 162 * Revision 1.3 1998/12/29 15:03:54 eric 163 * 164 * minor fix to ispell.h to get things to compile on Linux again. 165 * 166 * Revision 1.2 1998/12/29 14:55:33 eric 167 * 168 * I've doctored the ispell code pretty extensively here. It is now 169 * warning-free on Win32. It also *works* on Win32 now, since I 170 * replaced all the I/O calls with ANSI standard ones. 171 * 172 * Revision 1.1 1998/12/28 18:04:43 davet 173 * Spell checker code stripped from ispell. At this point, there are 174 * two external routines... the Init routine, and a check-a-word routine 175 * which returns a boolean value, and takes a 16 bit char string. 176 * The code resembles the ispell code as much as possible still. 177 * 178 * Revision 1.68 1995/03/06 02:42:41 geoff 179 * Be vastly more paranoid about parenthesizing macro arguments. This 180 * fixes a bug in defmt.c where a complex argument was passed to 181 * isstringch. 182 * 183 * Revision 1.67 1995/01/03 19:24:12 geoff 184 * Get rid of a non-global declaration. 185 * 186 * Revision 1.66 1994/12/27 23:08:49 geoff 187 * Fix a lot of subtly bad assumptions about the widths of ints and longs 188 * which only show up on 64-bit machines like the Cray and the DEC Alpha. 189 * 190 * Revision 1.65 1994/11/02 06:56:10 geoff 191 * Remove the anyword feature, which I've decided is a bad idea. 192 * 193 * Revision 1.64 1994/10/25 05:46:18 geoff 194 * Add the FF_ANYWORD flag for defining an affix that will apply to any 195 * word, even if not explicitly specified. (Good for French.) 196 * 197 * Revision 1.63 1994/09/16 04:48:28 geoff 198 * Make stringdups and laststringch unsigned ints, and dupnos a plain 199 * int, so that we can handle more than 128 stringchars and stringchar 200 * types. 201 * 202 * Revision 1.62 1994/09/01 06:06:39 geoff 203 * Change erasechar/killchar to uerasechar/ukillchar to avoid 204 * shared-library problems on HP systems. 205 * 206 * Revision 1.61 1994/08/31 05:58:35 geoff 207 * Add contextoffset, used in -a mode to handle extremely long lines. 208 * 209 * Revision 1.60 1994/05/17 06:44:15 geoff 210 * Add support for controlled compound formation and the COMPOUNDONLY 211 * option to affix flags. 212 * 213 * Revision 1.59 1994/03/15 06:25:16 geoff 214 * Change deftflag's initialization so we can tell if -t/-n appeared. 215 * 216 * Revision 1.58 1994/02/07 05:53:28 geoff 217 * Add typecasts to the the 7-bit versions of ichar* routines 218 * 219 * Revision 1.57 1994/01/25 07:11:48 geoff 220 * Get rid of all old RCS log lines in preparation for the 3.1 release. 221 * 222 */ 223 224 #include <stdio.h> 225 /* #include "ut_types.h" */ 226 227 #include "ispell_def.h" 228 229 #ifdef __cplusplus 230 extern "C" { 231 #endif /* c++ */ 232 233 /* largest amount that a word might be extended by adding affixes */ 234 #ifndef MAXAFFIXLEN 235 #define MAXAFFIXLEN 20 236 #endif 237 238 /* 239 ** Number of mask bits (affix flags) supported. Must be 32, 64, 128, or 240 ** 256. If MASKBITS is 32 or 64, there are really only 26 or 58 flags 241 ** available, respectively. If it is 32, the flags are named with the 242 ** 26 English uppercase letters; lowercase will be converted to uppercase. 243 ** If MASKBITS is 64, the 58 flags are named 'A' through 'z' in ASCII 244 ** order, including the 6 special characters from 'Z' to 'a': "[\]^_`". 245 ** If MASKBITS is 128 or 256, all the 7-bit or 8-bit characters, 246 ** respectively, are theoretically available, though a few (newline, slash, 247 ** null byte) are pretty hard to actually use successfully. 248 ** 249 ** Note that a number of non-English affix files depend on having a 250 ** larger value for MASKBITS. See the affix files for more 251 ** information. 252 */ 253 254 #ifndef MASKBITS 255 #define MASKBITS 64 256 #endif 257 258 extern int gnMaskBits; 259 260 /* 261 ** C type to use for masks. This should be a type that the processor 262 ** accesses efficiently. 263 ** 264 ** MASKTYPE_WIDTH must correctly reflect the number of bits in a 265 ** MASKTYPE. Unfortunately, it is also required to be a constant at 266 ** preprocessor time, which means you can't use the sizeof operator to 267 ** define it. 268 ** 269 ** Note that MASKTYPE *must* match MASKTYPE_WIDTH or you may get 270 ** division-by-zero errors! 271 */ 272 #ifndef MASKTYPE 273 #define MASKTYPE long 274 #endif 275 #ifndef MASKTYPE_WIDTH 276 #define MASKTYPE_WIDTH 32 277 #endif 278 279 /* program: this should be coded now in init */ 280 281 #if MASKBITS < MASKTYPE_WIDTH 282 #undef MASKBITS 283 #define MASKBITS MASKTYPE_WIDTH 284 #endif /* MASKBITS < MASKTYPE_WIDTH */ 285 286 /* 287 ** Maximum hash table fullness percentage. Larger numbers trade space 288 ** for time. 289 **/ 290 #ifndef MAXPCT 291 #define MAXPCT 70 /* Expand table when 70% full */ 292 #endif 293 294 /* 295 ** Maximum number of "string" characters that can be defined in a 296 ** language (affix) file. Don't forget that an upper/lower string 297 ** character counts as two! 298 */ 299 #ifndef MAXSTRINGCHARS 300 #define MAXSTRINGCHARS 128 301 #endif /* MAXSTRINGCHARS */ 302 303 /* 304 ** Maximum length of a "string" character. The default is appropriate for 305 ** nroff-style characters starting with a backslash. 306 */ 307 #ifndef MAXSTRINGCHARLEN 308 #define MAXSTRINGCHARLEN 10 309 #endif /* MAXSTRINGCHARLEN */ 310 311 /* 312 ** Maximum number of "hits" expected on a word. This is basically the 313 ** number of different ways different affixes can produce the same word. 314 ** For example, with "english.aff", "brothers" can be produced 3 ways: 315 ** "brothers," "brother+s", or "broth+ers". If this is too low, no major 316 ** harm will be done, but ispell may occasionally forget a capitalization. 317 */ 318 #ifndef MAX_HITS 319 #define MAX_HITS 10 320 #endif 321 322 /* 323 ** Maximum number of capitalization variations expected in any word. 324 ** Besides the obvious all-lower, all-upper, and capitalized versions, 325 ** this includes followcase variants. If this is too low, no real 326 ** harm will be done, but ispell may occasionally fail to suggest a 327 ** correct capitalization. 328 */ 329 #ifndef MAX_CAPS 330 #define MAX_CAPS 10 331 #endif /* MAX_CAPS */ 332 333 /* buffer size to use for file names if not in sys/param.h */ 334 #ifndef MAXPATHLEN 335 #define MAXPATHLEN 512 336 #endif 337 338 /* 339 ** Maximum language-table search size. Smaller numbers make ispell 340 ** run faster, at the expense of more memory (the lowest reasonable value 341 ** is 2). If a given character appears in a significant position in 342 ** more than MAXSEARCH suffixes, it will be given its own index table. 343 ** If you change this, define INDEXDUMP in lookup.c to be sure your 344 ** index table looks reasonable. 345 */ 346 #ifndef MAXSEARCH 347 #define MAXSEARCH 4 348 #endif 349 350 #if defined(__STDC__) || defined(__cplusplus) 351 #define P(x) x 352 #ifndef VOID 353 #define VOID void 354 #endif 355 #else /* __STDC__ */ 356 #define P(x) () 357 #ifndef VOID 358 #define VOID char 359 #endif 360 #define const 361 #endif /* __STDC__ */ 362 363 #ifdef NO8BIT 364 #define SET_SIZE 128 365 #else 366 #define SET_SIZE 256 367 #endif 368 369 #define MASKSIZE (gnMaskBits / MASKTYPE_WIDTH) 370 371 #ifdef lint 372 extern int TSTMASKBIT P ((MASKTYPE * mask, int bit)); 373 #else /* lint */ 374 /* The following is really testing for MASKSIZE <= 1, but cpp can't do that */ 375 #define TSTMASKBIT(mask, bit) \ 376 ((mask)[(bit) / MASKTYPE_WIDTH] & \ 377 ((MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1)))) 378 #endif /* lint */ 379 380 #if MASKBITS > 64 381 #define FULLMASKSET 382 #endif 383 384 #if MASKBITS <= 32 385 #define FLAGBASE ((MASKTYPE_WIDTH) - 6) 386 #else 387 # if MASKBITS <= 64 388 #define FLAGBASE ((MASKTYPE_WIDTH) - 6) 389 # else 390 #define FLAGBASE 0 391 # endif 392 #endif 393 394 /* 395 ** Data type for internal word storage. If necessary, we use shorts rather 396 ** than chars so that string characters can be encoded as a single unit. 397 */ 398 #if (SET_SIZE + MAXSTRINGCHARS) <= 256 399 #ifndef lint 400 #define ICHAR_IS_CHAR 401 #endif /* lint */ 402 #endif 403 404 #ifdef ICHAR_IS_CHAR 405 typedef unsigned char ichar_t; /* Internal character */ 406 #define icharlen(s) strlen ((char *) (s)) 407 #define icharcpy(a, b) strcpy ((char *) (a), (char *) (b)) 408 #define icharcmp(a, b) strcmp ((char *) (a), (char *) (b)) 409 #define icharncmp(a, b, n) strncmp ((char *) (a), (char *) (b), (n)) 410 #define chartoichar(x) ((ichar_t) (x)) 411 #else 412 typedef unsigned short ichar_t; /* Internal character */ 413 #define chartoichar(x) ((ichar_t) (unsigned char) (x)) 414 415 /* 416 * Structure used to record data about successful lookups; these values 417 * are used in the ins_root_cap routine to produce correct capitalizations. 418 */ 419 struct success 420 { 421 struct dent * dictent; /* Header of dict entry chain for wd */ 422 struct flagent * prefix; /* Prefix flag used, or NULL */ 423 struct flagent * suffix; /* Suffix flag used, or NULL */ 424 }; 425 426 ichar_t* icharcpy (ichar_t* out, ichar_t* in); 427 int icharlen (ichar_t* in); 428 int icharcmp (ichar_t* s1, ichar_t* s2); 429 int icharncmp (ichar_t* s1, ichar_t* s2, int n); 430 431 #endif 432 433 struct dent 434 { 435 struct dent * next; 436 char * word; 437 MASKTYPE mask[2]; 438 #ifdef FULLMASKSET 439 char flags; 440 #endif 441 }; 442 443 /* 444 ** Flags in the directory entry. If FULLMASKSET is undefined, these are 445 ** stored in the highest bits of the last longword of the mask field. If 446 ** FULLMASKSET is defined, they are stored in the extra "flags" field. 447 #ifndef NO_CAPITALIZATION_SUPPORT 448 ** 449 ** If a word has only one capitalization form, and that form is not 450 ** FOLLOWCASE, it will have exactly one entry in the dictionary. The 451 ** legal capitalizations will be indicated by the 2-bit capitalization 452 ** field, as follows: 453 ** 454 ** ALLCAPS The word must appear in all capitals. 455 ** CAPITALIZED The word must be capitalized (e.g., London). 456 ** It will also be accepted in all capitals. 457 ** ANYCASE The word may appear in lowercase, capitalized, 458 ** or all-capitals. 459 ** 460 ** Regardless of the capitalization flags, the "word" field of the entry 461 ** will point to an all-uppercase copy of the word. This is to simplify 462 ** the large portion of the code that doesn't care about capitalization. 463 ** Ispell will generate the correct version when needed. 464 ** 465 ** If a word has more than one capitalization, there will be multiple 466 ** entries for it, linked together by the "next" field. The initial 467 ** entry for such words will be a dummy entry, primarily for use by code 468 ** that ignores capitalization. The "word" field of this entry will 469 ** again point to an all-uppercase copy of the word. The "mask" field 470 ** will contain the logical OR of the mask fields of all variants. 471 ** A header entry is indicated by a capitalization type of ALLCAPS, 472 ** with the MOREVARIANTS bit set. 473 ** 474 ** The following entries will define the individual variants. Each 475 ** entry except the last has the MOREVARIANTS flag set, and each 476 ** contains one of the following capitalization options: 477 ** 478 ** ALLCAPS The word must appear in all capitals. 479 ** CAPITALIZED The word must be capitalized (e.g., London). 480 ** It will also be accepted in all capitals. 481 ** FOLLOWCASE The word must be capitalized exactly like the 482 ** sample in the entry. Prefix (suffix) characters 483 ** must be rendered in the case of the first (last) 484 ** "alphabetic" character. It will also be accepted 485 ** in all capitals. ("Alphabetic" means "mentioned 486 ** in a 'casechars' statement".) 487 ** ANYCASE The word may appear in lowercase, capitalized, 488 ** or all-capitals. 489 ** 490 ** The "mask" field for the entry contains only the affix flag bits that 491 ** are legal for that capitalization. The "word" field will be null 492 ** except for FOLLOWCASE entries, where it will point to the 493 ** correctly-capitalized spelling of the root word. 494 ** 495 ** It is worth discussing why the ALLCAPS option is used in 496 ** the header entry. The header entry accepts an all-capitals 497 ** version of the root plus every affix (this is always legal, since 498 ** words get capitalized in headers and so forth). Further, all of 499 ** the following variant entries will reject any all-capitals form 500 ** that is illegal due to an affix. 501 ** 502 ** Finally, note that variations in the KEEP flag can cause a multiple-variant 503 ** entry as well. For example, if the personal dictionary contains "ALPHA", 504 ** (KEEP flag set) and the user adds "alpha" with the KEEP flag clear, a 505 ** multiple-variant entry will be created so that "alpha" will be accepted 506 ** but only "ALPHA" will actually be kept. 507 #endif 508 */ 509 #ifdef FULLMASKSET 510 #define flagfield flags 511 #else 512 #define flagfield mask[1] 513 #endif 514 #define USED ((MASKTYPE) 1 << (FLAGBASE + 0)) 515 #define KEEP ((MASKTYPE) 1 << (FLAGBASE + 1)) 516 #ifdef NO_CAPITALIZATION_SUPPORT 517 #define ALLFLAGS (USED | KEEP) 518 #else /* NO_CAPITALIZATION_SUPPORT */ 519 #define ANYCASE ((MASKTYPE) 0 << (FLAGBASE + 2)) 520 #define ALLCAPS ((MASKTYPE) 1 << (FLAGBASE + 2)) 521 #define CAPITALIZED ((MASKTYPE) 2 << (FLAGBASE + 2)) 522 #define FOLLOWCASE ((MASKTYPE) 3 << (FLAGBASE + 2)) 523 #define CAPTYPEMASK ((MASKTYPE) 3 << (FLAGBASE + 2)) 524 #define MOREVARIANTS ((MASKTYPE) 1 << (FLAGBASE + 4)) 525 #define ALLFLAGS (USED | KEEP | CAPTYPEMASK | MOREVARIANTS) 526 #define captype(x) ((x) & CAPTYPEMASK) 527 #endif /* NO_CAPITALIZATION_SUPPORT */ 528 529 /* 530 * Language tables used to encode prefix and suffix information. 531 */ 532 struct flagent 533 { 534 ichar_t * strip; /* String to strip off */ 535 ichar_t * affix; /* Affix to append */ 536 short flagbit; /* Flag bit this ent matches */ 537 short stripl; /* Length of strip */ 538 short affl; /* Length of affix */ 539 short numconds; /* Number of char conditions */ 540 short flagflags; /* Modifiers on this flag */ 541 char conds[SET_SIZE + MAXSTRINGCHARS]; /* Adj. char conds */ 542 }; 543 544 /* 545 * Bits in flagflags 546 */ 547 #define FF_CROSSPRODUCT (1 << 0) /* Affix does cross-products */ 548 #define FF_COMPOUNDONLY (1 << 1) /* Afx works in compounds */ 549 550 union ptr_union /* Aid for building flg ptrs */ 551 { 552 struct flagptr * fp; /* Pointer to more indexing */ 553 struct flagent * ent; /* First of a list of ents */ 554 }; 555 556 struct flagptr 557 { 558 union ptr_union pu; /* Ent list or more indexes */ 559 int numents; /* If zero, pu.fp is valid */ 560 }; 561 562 /* 563 * Description of a single string character type. 564 */ 565 struct strchartype 566 { 567 char * name; /* Name of the type */ 568 char * deformatter; /* Deformatter to use */ 569 char * suffixes; /* File suffixes, null seps */ 570 }; 571 572 /* 573 * Header placed at the beginning of the hash file. 574 */ 575 struct hashheader 576 { 577 unsigned short magic; /* Magic number for ID */ 578 unsigned short compileoptions; /* How we were compiled */ 579 short maxstringchars; /* Max # strchrs we support */ 580 short maxstringcharlen; /* Max strchr len supported */ 581 short compoundmin; /* Min lth of compound parts */ 582 short compoundbit; /* Flag 4 compounding roots */ 583 int stringsize; /* Size of string table */ 584 int lstringsize; /* Size of lang. str tbl */ 585 int tblsize; /* No. entries in hash tbl */ 586 int stblsize; /* No. entries in sfx tbl */ 587 int ptblsize; /* No. entries in pfx tbl */ 588 int sortval; /* Largest sort ID assigned */ 589 int nstrchars; /* No. strchars defined */ 590 int nstrchartype; /* No. strchar types */ 591 int strtypestart; /* Start of strtype table */ 592 char nrchars[5]; /* Nroff special characters */ 593 char texchars[13]; /* TeX special characters */ 594 char compoundflag; /* Compund-word handling */ 595 char defhardflag; /* Default tryveryhard flag */ 596 char flagmarker; /* "Start-of-flags" char */ 597 unsigned short sortorder[SET_SIZE + MAXSTRINGCHARS]; /* Sort ordering */ 598 ichar_t lowerconv[SET_SIZE + MAXSTRINGCHARS]; /* Lower-conversion table */ 599 ichar_t upperconv[SET_SIZE + MAXSTRINGCHARS]; /* Upper-conversion table */ 600 char wordchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for chars found in wrds */ 601 char upperchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for uppercase chars */ 602 char lowerchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for lowercase chars */ 603 char boundarychars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for boundary chars */ 604 char stringstarts[SET_SIZE]; /* NZ if char can start str */ 605 char stringchars[MAXSTRINGCHARS][MAXSTRINGCHARLEN + 1]; /* String chars */ 606 unsigned int stringdups[MAXSTRINGCHARS]; /* No. of "base" char */ 607 int dupnos[MAXSTRINGCHARS]; /* Dup char ID # */ 608 unsigned short magic2; /* Second magic for dbl chk */ 609 }; 610 611 /* hash table magic number */ 612 #define MAGIC 0x9602 613 614 /* compile options, put in the hash header for consistency checking */ 615 #ifdef NO8BIT 616 # define MAGIC8BIT 0x01 617 #else 618 # define MAGIC8BIT 0x00 619 #endif 620 #ifdef NO_CAPITALIZATION_SUPPORT 621 # define MAGICCAPITALIZATION 0x00 622 #else 623 # define MAGICCAPITALIZATION 0x02 624 #endif 625 # define MAGICMASKSET 0x04 626 627 #if MASKBITS <= 32 628 # define MAGICMASKSET 0x00 629 #else 630 # if MASKBITS <= 64 631 # else 632 # if MASKBITS <= 128 633 # define MAGICMASKSET 0x08 634 # else 635 # define MAGICMASKSET 0x0C 636 # endif 637 # endif 638 #endif 639 640 #define COMPILEOPTIONS (MAGIC8BIT | MAGICCAPITALIZATION | MAGICMASKSET) 641 642 /* 643 ** Offsets into the nroff special-character array 644 */ 645 #define NRLEFTPAREN hashheader.nrchars[0] 646 #define NRRIGHTPAREN hashheader.nrchars[1] 647 #define NRDOT hashheader.nrchars[2] 648 #define NRBACKSLASH hashheader.nrchars[3] 649 #define NRSTAR hashheader.nrchars[4] 650 651 /* 652 ** Offsets into the TeX special-character array 653 */ 654 #define TEXLEFTPAREN hashheader.texchars[0] 655 #define TEXRIGHTPAREN hashheader.texchars[1] 656 #define TEXLEFTSQUARE hashheader.texchars[2] 657 #define TEXRIGHTSQUARE hashheader.texchars[3] 658 #define TEXLEFTCURLY hashheader.texchars[4] 659 #define TEXRIGHTCURLY hashheader.texchars[5] 660 #define TEXLEFTANGLE hashheader.texchars[6] 661 #define TEXRIGHTANGLE hashheader.texchars[7] 662 #define TEXBACKSLASH hashheader.texchars[8] 663 #define TEXDOLLAR hashheader.texchars[9] 664 #define TEXSTAR hashheader.texchars[10] 665 #define TEXDOT hashheader.texchars[11] 666 #define TEXPERCENT hashheader.texchars[12] 667 668 /* 669 ** Values for compoundflag 670 */ 671 #define COMPOUND_NEVER 0 /* Compound words are never good */ 672 #define COMPOUND_ANYTIME 1 /* Accept run-together words */ 673 #define COMPOUND_CONTROLLED 2 /* Compounds controlled by afx flags */ 674 /* 675 ** These macros are similar to the ones above, but they take into account 676 ** the possibility of string characters. Note well that they take a POINTER, 677 ** not a character. 678 ** 679 ** The "l_" versions set "len" to the length of the string character as a 680 ** handy side effect. (Note that the global "laststringch" is also set, 681 ** and sometimes used, by these macros.) 682 ** 683 ** The "l1_" versions go one step further and guarantee that the "len" 684 ** field is valid for *all* characters, being set to 1 even if the macro 685 ** returns false. This macro is a great example of how NOT to write 686 ** readable C. 687 */ 688 /*TF NOTE: This is actually defined in code (makedent) now */ 689 #if 0 690 #define isstringch(ptr, canon) (isstringstart (*(ptr)) \ 691 && stringcharlen ((ptr), (canon)) > 0) 692 #define l_isstringch(ptr, len, canon) \ 693 (isstringstart (*(ptr)) \ 694 && (len = stringcharlen ((ptr), (canon))) \ 695 > 0) 696 #define l1_isstringch(ptr, len, canon) \ 697 (len = 1, \ 698 isstringstart ((unsigned char)(*(ptr))) \ 699 && ((len = \ 700 stringcharlen ((ptr), (canon))) \ 701 > 0 \ 702 ? 1 : (len = 1, 0))) 703 #endif 704 705 /* 706 * Sizes of buffers returned by ichartosstr/strtosichar. 707 */ 708 #define ICHARTOSSTR_SIZE (INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) 709 #define STRTOSICHAR_SIZE ((INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) \ 710 * sizeof (ichar_t)) 711 /* TF CHANGE: We should fill this as a structure 712 and then use it throughout. 713 */ 714 715 /* 716 * Initialized variables. These are generated using macros so that they 717 * may be consistently declared in all programs. Numerous examples of 718 * usage are given below. 719 */ 720 #ifdef MAIN 721 #define INIT(decl, init) decl = init 722 #else 723 #define INIT(decl, init) extern decl 724 #endif 725 726 #ifdef MINIMENU 727 INIT (int minimenusize, 2); /* MUST be either 2 or zero */ 728 #else /* MINIMENU */ 729 INIT (int minimenusize, 0); /* MUST be either 2 or zero */ 730 #endif /* MINIMENU */ 731 732 INIT (int eflag, 0); /* NZ for expand mode */ 733 INIT (int dumpflag, 0); /* NZ to do dump mode */ 734 INIT (int fflag, 0); /* NZ if -f specified */ 735 #ifndef USG 736 INIT (int sflag, 0); /* NZ to stop self after EOF */ 737 #endif 738 INIT (int vflag, 0); /* NZ to display characters as M-xxx */ 739 INIT (int xflag, DEFNOBACKUPFLAG); /* NZ to suppress backups */ 740 INIT (int deftflag, -1); /* NZ for TeX mode by default */ 741 INIT (int tflag, DEFTEXFLAG); /* NZ for TeX mode in current file */ 742 INIT (int prefstringchar, -1); /* Preferred string character type */ 743 744 INIT (int terse, 0); /* NZ for "terse" mode */ 745 746 INIT (char tempfile[MAXPATHLEN], ""); /* Name of file we're spelling into */ 747 748 INIT (int minword, MINWORD); /* Longest always-legal word */ 749 INIT (int sortit, 1); /* Sort suggestions alphabetically */ 750 INIT (int compoundflag, -1); /* How to treat compounds: see above */ 751 INIT (int tryhardflag, -1); /* Always call tryveryhard */ 752 753 INIT (char * currentfile, NULL); /* Name of current input file */ 754 755 /* Odd numbers for math mode in LaTeX; even for LR or paragraph mode */ 756 INIT (int math_mode, 0); 757 /* P -- paragraph or LR mode 758 * b -- parsing a \begin statement 759 * e -- parsing an \end statement 760 * r -- parsing a \ref type of argument. 761 * m -- looking for a \begin{minipage} argument. 762 */ 763 INIT (char LaTeX_Mode, 'P'); 764 765 #ifdef __cplusplus 766 } 767 #endif /* c++ */ 768 769 #endif /* ISPELL_H */ 770