1 ///////////////////////////////////////////////////////////////////////////
2 /*
3 Copyright 2001-2002 Ronald S. Burkey
4
5 This file is part of GutenMark.
6
7 GutenMark is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 GutenMark is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GutenMark; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21 Filename: MatchWordlists.c
22 Purpose: Sets various fields in an existing in-memory wordlist
23 against wordlists/namelists read from the filesystem.
24 Mods: 11/21/01 RSB Began.
25 11/29/01 RSB Appending to globs, which had been used,
26 worked in Linux but not FreeBSD or
27 MacOS-X. Therefore, appending to the
28 globs has been eliminated.
29 12/01/01 RSB Globbing doesn't appear at all in
30 mingw32. Rats! Fortunately,
31 the DOS-like functions findfirst()
32 and findnext() appear, and they can
33 be used to produce a similar effect.
34 I provide a globlike equivalent in
35 a separate file (winglob.c), but must
36 include winglob.h rather than glob.h.
37 12/05/01 RSB Now check for programname+".cfg" if
38 GutenMark.cfg isn't found. This allows
39 a uniform installation procedure to
40 place the configuration file and the
41 wordlists in the same directory as the
42 executable, but for a user's configuration
43 file to override the global file.
44 12/23/01 RSB Began adding the ability to determine the
45 specific set of languages for each word,
46 rather than just relying on the
47 native vs. foreign dichotomy.
48 06/30/02 RSB Added much more explicit error messages.
49 Found (and hopefully fixed) several cases
50 in which the program might abort or not
51 process subsequent wordlists when some
52 intermediate wordlist pattern wasn't
53 matched.
54 07/13/02 RSB Added some stuff to account for the fact
55 that Win32 adds ".exe" to argv[0].
56 Added ability to specify an alternate
57 configuration file on the command line.
58 07/14/02 RSB Added a bunch of new log messages reporting
59 results of wordlist globbing. Added
60 *.places.gz to the list of default
61 wordlists in the absence of a config file.
62 In the newest version of BSD I find,
63 sadly, that the glob function no longer
64 uses the GLOB_NOMATCH and GLOB_ABORTED
65 constants as in earlier versions or
66 as in Linux. *Sigh!*
67 07/21/02 RSB When wordlist names are pure -- i.e.,
68 without a leading path -- now searches
69 for the wordlists in *both* the current
70 directory and the directory containing
71 the executable. (Previously, was just
72 the current directory.)
73 12/16/02 RSB Changed the GLOB_ABEND case in GlobErrorMessage,
74 because Thomas Klausner has reported that
75 GLOB_ABEND and GLOB_ABORTED are defined
76 identically in NetBSD.
77
78 Note that although the documentation (as of 11/21/01) only refers
79 to gzipped wordlists, the wordlists can also be unzipped ASCII
80 as well. This takes a LOT more disk space, but might be faster
81 under some circumstances. (But maybe not!)
82 */
83
84 ///////////////////////////////////////////////////////////////////////////
85 #include <stdlib.h>
86 #include <stdio.h>
87 #include <string.h>
88 #include <ctype.h>
89 #ifdef WIN32
90 #include "winglob.h"
91 #else /* */
92 #include <glob.h>
93 #endif /* */
94 #include <zlib.h>
95 #include "libGutenSpell.h"
96
97 // Take care of some discrepancies in the constants used by the glob
98 // function on different platforms.
99 #ifndef GLOB_NOMATCH
100 #define GLOB_NOMATCH 0x666
101 #endif
102 #ifndef GLOB_ABORTED
103 #define GLOB_ABORTED 0x667
104 #endif
105 #ifndef GLOB_ABEND
106 #define GLOB_ABEND 0x668
107 #endif
108
109 #define FIRSTGLOB (GLOB_NOSORT | GLOB_TILDE)
110
111 //#define NEXTGLOB (GLOB_APPEND | FIRSTGLOB)
112 #define MAX_WORDLISTS 256
113
114 // In this list, we keep a marker for each globbed filename,
115 // indicating if it is supposed to be SPELL_NATIVE or SPELL_FOREIGN
116 // language. If there are more globbed files than entries in
117 // this table, they are assumed to be foreign.
118 typedef struct
119 {
120 int Count;
121 unsigned char List[MAX_WORDLISTS];
122 char *Names[MAX_WORDLISTS];
123 char *Languages[MAX_WORDLISTS];
124 unsigned long LanguageMasks[MAX_WORDLISTS];
125 }
126 NativeList;
127
128 //------------------------------------------------------------------------
129 // Reads and processes an on-disk wordlist. Returns 0 on success or
130 // on file not found. Returns non-zero only on actual error.
131 // Note that "gz" file operations (like gzopen rather than fopen)
132 // are used, because the wordlists are usually gzipped.
133 // The Type parameter is either SPELL_NATIVE or SPELL_FOREIGN.
134 //
135 // The strings read from the wordlist must contain ONLY the characters
136 // identified by IsWordChar as candidates for being in words, and must
137 // not begin with punctuation.
138 static int
ReadWordlist(Wordlist * Words,const char * Filename,unsigned char LanguageType,unsigned long LanguageMask,FILE * LogFile)139 ReadWordlist (Wordlist * Words, const char *Filename,
140 unsigned char LanguageType, unsigned long LanguageMask,
141 FILE * LogFile)
142 {
143 char Full[MAXWORDLENGTH], Normalized[MAXWORDLENGTH],
144 TestFull[MAXWORDLENGTH], TestNormalized[MAXWORDLENGTH], *ss;
145 int i, NumLower, NumUpper, FirstUpper, AnyDiacritical, ReturnValue =
146 1, Matched;
147 unsigned char Capitalization;
148 gzFile fp;
149 fp = gzopen (Filename, "rb");
150 if (fp == NULL)
151 {
152 fprintf (stderr, "Note: Wordlist \"%s\" not found (or corrupt).\n",
153 Filename);
154 if (LogFile != NULL)
155 fprintf (LogFile,
156 "ReadWordlist: Wordlist \"%s\" not found (or corrupt).\n",
157 Filename);
158 goto Okay;
159 }
160
161 // The file is open! Let's read and process it.
162 while (Z_NULL != gzgets (fp, Full, sizeof (Full)))
163 {
164
165 // Make sure the string is an actual word (not a comment),
166 // and trim off garbage like end of line.
167 for (ss = Full; *ss; ss++)
168 if (*ss == '\n')
169 *ss = 0;
170 if (!Full[0] || Full[0] == '#' || isspace (Full[0]))
171 continue;
172
173 // Analyze the capitalization of the word.
174 for (NumLower = NumUpper = FirstUpper = AnyDiacritical = 0, ss = Full;
175 *ss; ss++)
176 {
177 i = IsWordChar (*ss);
178 if (i & WORD_LOWER)
179 NumLower++;
180 else if (i & WORD_UPPER)
181 {
182 NumUpper++;
183 if (!NumLower)
184 FirstUpper = 1;
185 }
186 if (i & WORD_DIACRITICAL)
187 AnyDiacritical = 1;
188 }
189 if (!NumUpper)
190 Capitalization = SPELL_LOWERCASE;
191 else if (!NumLower)
192 Capitalization = SPELL_UPPERCASE;
193 else if (NumUpper == 1 && FirstUpper == 1)
194 Capitalization = SPELL_CAPITALIZED;
195 else
196 Capitalization = SPELL_CUSTOMCAP;
197
198 // Compute the normalized form of the word.
199 if (0 == DiacriticalNormalize (Full, Normalized, sizeof (Normalized)))
200 Normalized[0] = 0;
201
202 // First, see if this word has been used, as-is.
203 i = SearchWordlist (Words, Normalized, Full, &Matched);
204 if (Matched)
205 {
206
207 Words->Words[i].Languages |= LanguageMask;
208
209 // If the word is already marked, we don't have to do
210 // anything more with it. But if it hasn't been ...
211 if (!Words->Words[i].WordlistStatus)
212 Words->Words[i].WordlistStatus = LanguageType | Capitalization;
213
214 // If foreign, though, we set a flag regardless:
215 if (LanguageType == SPELL_FOREIGN)
216 Words->Words[i].WordlistStatus |= SPELL_NONNATIVE;
217 }
218
219 // If all lower-case, check if the capitalized version was used. This is
220 // necessary to prevent short foreign words from matching it.
221 if (Capitalization == SPELL_LOWERCASE)
222 {
223 strcpy (TestFull, Full);
224 strcpy (TestNormalized, Normalized);
225 TestFull[0] = DiacriticalToupper (TestFull[0]);
226 TestNormalized[0] = DiacriticalToupper (TestNormalized[0]);
227 i = SearchWordlist (Words, TestNormalized, TestFull, &Matched);
228 if (Matched)
229 {
230
231 Words->Words[i].Languages |= LanguageMask;
232
233 // If the word is already marked, we don't have to do
234 // anything more with it. But if it hasn't been ...
235 if (!Words->Words[i].WordlistStatus)
236 Words->Words[i].WordlistStatus =
237 LanguageType | Capitalization;
238
239 // If foreign, though, we set a flag regardless:
240 if (LanguageType == SPELL_FOREIGN)
241 Words->Words[i].WordlistStatus |= SPELL_NONNATIVE;
242 }
243 }
244
245 // Next, test the same thing, but for the case of the word having
246 // been used as all-caps.
247 if (Capitalization != SPELL_UPPERCASE)
248 {
249 strcpy (TestFull, Full);
250 strcpy (TestNormalized, Normalized);
251 DiacriticalStrupr (TestFull);
252 DiacriticalStrupr (TestNormalized);
253 i = SearchWordlist (Words, TestNormalized, TestFull, &Matched);
254 if (Matched)
255 {
256
257 Words->Words[i].Languages |= LanguageMask;
258
259 // If the word is already marked, we don't have to do
260 // anything more with it. But if it hasn't been ...
261 if (!Words->Words[i].WordlistStatus)
262 Words->Words[i].WordlistStatus =
263 LanguageType | Capitalization;
264
265 // If foreign, though, we set a flag regardless:
266 if (LanguageType == SPELL_FOREIGN)
267 Words->Words[i].WordlistStatus |= SPELL_NONNATIVE;
268 }
269 }
270
271 // The tests above assume a 7-bit to 7-bit or 8-bit to 8-bit
272 // match of the word in the etext with the word in the wordlist.
273 // Another possibility, though, is that a 7-bit word in the etext
274 // corresponds to an 8-bit word in the wordlist. These tests
275 // are more complex, because we're interested not merely in
276 // using them for handling ALL-CAPS italicizing, but also for
277 // restoration of diacritical marks. So we also need to check
278 // for the merely-capitalized case.
279 if (!AnyDiacritical) // wordlist word not 8-bit anyhow.
280 continue;
281
282 // Check 7-bit to 8-bit with unchanged capitalization.
283 i = SearchWordlist (Words, Normalized, Normalized, &Matched);
284 if (Matched)
285 {
286
287 Words->Words[i].Languages |= LanguageMask;
288
289 // If the word is already marked, we don't have to do
290 // anything more with it. But if it hasn't been ...
291 if (!Words->Words[i].WordlistStatus)
292 {
293 if (NULL !=
294 (Words->Words[i].Match = AllocSpellString (Words, Full)))
295 Words->Words[i].WordlistStatus =
296 SPELL_NORMALIZED | LanguageType | Capitalization;
297 }
298
299 // If foreign, though, we set a flag regardless:
300 if (LanguageType == SPELL_FOREIGN)
301 Words->Words[i].WordlistStatus |= SPELL_NONNATIVE;
302 }
303
304 // Check 7-bit to 8-bit with initial capitalization.
305 Normalized[0] = DiacriticalToupper (Normalized[0]);
306 i = SearchWordlist (Words, Normalized, Normalized, &Matched);
307 if (Matched)
308 {
309
310 Words->Words[i].Languages |= LanguageMask;
311
312 // If the word is already marked, we don't have to do
313 // anything more with it. But if it hasn't been ...
314 if (!Words->Words[i].WordlistStatus)
315 {
316 if (NULL !=
317 (Words->Words[i].Match = AllocSpellString (Words, Full)))
318 Words->Words[i].WordlistStatus =
319 SPELL_NORMALIZED | LanguageType | Capitalization;
320 }
321
322 // If foreign, though, we set a flag regardless:
323 if (LanguageType == SPELL_FOREIGN)
324 Words->Words[i].WordlistStatus |= SPELL_NONNATIVE;
325 }
326
327 // Check 7-bit to 8-bit with all caps.
328 i = SearchWordlist (Words, TestNormalized, TestNormalized, &Matched);
329 if (Matched)
330 {
331
332 Words->Words[i].Languages |= LanguageMask;
333
334 // If the word is already marked, we don't have to do
335 // anything more with it. But if it hasn't been ...
336 if (!Words->Words[i].WordlistStatus)
337 {
338 if (NULL !=
339 (Words->Words[i].Match = AllocSpellString (Words, Full)))
340 Words->Words[i].WordlistStatus =
341 SPELL_NORMALIZED | LanguageType | Capitalization;
342 }
343
344 // If foreign, though, we set a flag regardless:
345 if (LanguageType == SPELL_FOREIGN)
346 Words->Words[i].WordlistStatus |= SPELL_NONNATIVE;
347 }
348 }
349 Okay:ReturnValue = 0;
350
351 //Done:
352 if (fp != NULL)
353 gzclose (fp);
354 return (ReturnValue);
355 }
356
357 //------------------------------------------------------------------------
358 // Update the NativeList array after globbing. The glob_t contains
359 // the globbed filenames derived from an entry in the configuration
360 // file.
361 void
Mark(NativeList * List,glob_t * Glob,char Type,char * Language)362 Mark (NativeList * List, glob_t * Glob, char Type, char *Language)
363 {
364 int i, j, k;
365 unsigned long MaxMask;
366 if (*Language == 0)
367 {
368 if (List->Count == 0 || Type == SPELL_NATIVE)
369 Language = "native";
370 else
371 Language = "foreign";
372 }
373 for (i = 0, j = List->Count; i < Glob->gl_pathc && j < MAX_WORDLISTS;
374 i++, j++)
375 {
376 List->List[j] = Type;
377 List->Names[j] = (char *) calloc (1, strlen (Glob->gl_pathv[i]) + 1);
378 if (List->Names[j] == NULL)
379 break;
380 strcpy (List->Names[j], Glob->gl_pathv[i]);
381 List->Languages[j] = (char *) calloc (1, strlen (Language) + 1);
382 if (List->Languages[j] == NULL)
383 break;
384 strcpy (List->Languages[j], Language);
385 // Figure out the associated language mask. This is a value
386 // 1, 2, 4, 8, ... uniquely associated with the language.
387 MaxMask = 0;
388 for (k = 0; k < List->Count; k++)
389 if (!strcmp (List->Languages[k], Language))
390 {
391 List->LanguageMasks[j] = List->LanguageMasks[k];
392 break;
393 }
394 else if (List->LanguageMasks[k] > MaxMask)
395 MaxMask = List->LanguageMasks[k];
396 if (k == List->Count)
397 {
398 if (k == 0)
399 List->LanguageMasks[j] = 1;
400 else
401 {
402 if (0 != (MaxMask << 1))
403 MaxMask = MaxMask << 1;
404 List->LanguageMasks[j] = MaxMask;
405 }
406 }
407 }
408 List->Count = j;
409 }
410
411 //------------------------------------------------------------------------
412 // Convert a string to lower case.
413
414 static void
StrLwr(char * s)415 StrLwr (char *s)
416 {
417 for (; *s != '\0'; s++)
418 *s = tolower (*s);
419 }
420
421 //------------------------------------------------------------------------
422 // Displays an error message related to globbing.
423
424 static void
GlobErrorMessage(int ReturnValue,FILE * LogFile,char * ss)425 GlobErrorMessage (int ReturnValue, FILE * LogFile, char *ss)
426 {
427 if (ReturnValue)
428 {
429 switch (ReturnValue)
430 {
431 case GLOB_NOSPACE:
432 fprintf (stderr, "Out of memory.\n");
433 if (LogFile != NULL)
434 fprintf (LogFile, "MatchWordlists: GLOB_NOSPACE for \"%s\"\n",
435 ss);
436 break;
437 #if GLOB_ABORTED != GLOB_ABEND
438 case GLOB_ABORTED:
439 fprintf (stderr, "Directory-read error.\n");
440 if (LogFile != NULL)
441 fprintf (LogFile, "MatchWordlists: GLOB_ABORTED for \"%s\"\n",
442 ss);
443 break;
444 case GLOB_ABEND:
445 fprintf (stderr, "Possible disk-read error.\n");
446 if (LogFile != NULL)
447 fprintf (LogFile, "MatchWordlists: GLOB_ABEND for \"%s\"\n", ss);
448 break;
449 #else
450 case GLOB_ABORTED:
451 fprintf (stderr, "Possible disk-read error.\n");
452 if (LogFile != NULL)
453 fprintf (LogFile, "MatchWordlists: GLOB_ABORTED/GLOB_ABEND for \"%s\"\n", ss);
454 break;
455 #endif
456 case GLOB_NOMATCH:
457 fprintf (stderr, "Note: No wordlists matched \"%s\".\n", ss);
458 if (LogFile != NULL)
459 fprintf (LogFile, "MatchWordlists: GLOB_NOMATCH for \"%s\"\n",
460 ss);
461 break;
462 default:
463 fprintf (stderr, "Unknown globbing error.\n");
464 if (LogFile != NULL)
465 fprintf (LogFile,
466 "MatchWordlists: glob for \"%s\" returned %d\n", ss,
467 ReturnValue);
468 break;
469 }
470 }
471 }
472
473 //-----------------------------------------------------------------------
474 // Just a little thing that collects some operations that MatchWordlists
475 // does over and over again. Takes a pattern for wordlist filenames,
476 // and processes all of the wordlists following that pattern.
477 // returns 0 on success.
478
479 static int
ProcessWordlist(char * Pattern,glob_t * Glob,int * ReturnValue,FILE * LogFile,NativeList * Natives,char NativeFlag,char * NativeName)480 ProcessWordlist (char *Pattern, glob_t * Glob, int *ReturnValue,
481 FILE * LogFile, NativeList * Natives, char NativeFlag,
482 char *NativeName)
483 {
484 *ReturnValue = glob (Pattern, FIRSTGLOB, NULL, Glob);
485 if (*ReturnValue == GLOB_NOMATCH)
486 GlobErrorMessage (*ReturnValue, LogFile, Pattern);
487 else
488 {
489 if (*ReturnValue != 0)
490 return (1);
491 Mark (Natives, Glob, NativeFlag, NativeName);
492 }
493 return (0);
494 }
495
496 //------------------------------------------------------------------------
497 // libGutenSpell operates in reverse from the way a normal spell-checker
498 // works. A normal spell-checker takes a selected word and tries to
499 // find it in on-disk wordlists. This forces the on-disk wordlists to
500 // be highly organized for fast searching. With libGutenSpell, however,
501 // it is the in-memory words that are highly organized, and the on-disk
502 // wordlists need no particular organization (i.e., they don't have to
503 // be sorted), because EVERY word in the on-disk wordlists is going to
504 // be read and checked against the in-memory list. This has the advantage
505 // of allowing the on-disk wordlists to be highly compressed. In fact,
506 // they are read with zlib rather than stdio.
507 //
508 // The result of running this function is merely to set certain fields
509 // (mostly flags) in the in-memory wordlist, representing properties of
510 // the various words: i.e., whether they were found or not, whether
511 // native language or foreign, whether 7-bit ASCII or 8-bit ASCII, etc.
512 //
513 // The wordlists to be used are taken from the GutenMark.cfg file.
514 // They may contain wildcards or other regular-expression-type stuff,
515 // and so the full list of wordlists needs to be constructed by globbing,
516 // and this is done before any of the wordlists are actually read.
517 //
518 // Returns zero on success.
519 int
MatchWordlists(FILE * LogFile,Wordlist * Words,const char * Language,const char * ProgName,const char * AltCfg)520 MatchWordlists (FILE * LogFile, Wordlist * Words, const char *Language,
521 const char *ProgName, const char *AltCfg)
522 {
523 NativeList Natives = {
524 0
525 };
526 glob_t Glob;
527 char s[256], *ss, Filename[256], ListLanguage[256];
528 char SpellType;
529 FILE *cfg;
530 int i, j, ReturnValue, Found = 0, ProgNamePathLength;
531 for (ss = (char *) ProgName, ProgNamePathLength = 0; *ss; ss++)
532 if (*ss == ':' || *ss == '/' || *ss == '\\')
533 ProgNamePathLength = ss + 1 - ProgName;
534 if (!strncmp (ProgName, "./", ProgNamePathLength))
535 ProgNamePathLength = 0;
536 Glob.gl_offs = Glob.gl_pathc = 0;
537 Glob.gl_pathv = NULL;
538 cfg = NULL;
539 if (AltCfg != NULL)
540 {
541 cfg = fopen (ss = (char *) AltCfg, "r");
542 if (cfg == NULL)
543 {
544 //fprintf (stderr, "Note: Configuration file \"%s\" not found.\n", ss);
545 if (LogFile != NULL)
546 fprintf (LogFile, "Configuration file \"%s\" not found.\n", ss);
547 }
548 else
549 {
550 fprintf (stderr, "Using configuration file \"%s\".\n", ss);
551 if (LogFile != NULL)
552 fprintf (LogFile, "Using configuration file \"%s\".\n", ss);
553 }
554 }
555 if (cfg == NULL)
556 {
557 cfg = fopen (ss = "./GutenMark.cfg", "r");
558 if (cfg == NULL)
559 {
560 //fprintf (stderr, "Note: Configuration file \"%s\" not found.\n", ss);
561 if (LogFile != NULL)
562 fprintf (LogFile, "Configuration file \"%s\" not found.\n", ss);
563 }
564 else
565 {
566 fprintf (stderr, "Using configuration file \"%s\".\n", ss);
567 if (LogFile != NULL)
568 fprintf (LogFile, "Using configuration file \"%s\".\n", ss);
569 }
570 }
571 if (cfg == NULL)
572 {
573 strcpy (s, ProgName);
574 #ifdef WIN32
575 // Remove ".exe" from the end of the executable's name.
576 i = strlen (s);
577 if (i > 4)
578 {
579 ss = &s[i - 4];
580 if (!strcasecmp (ss, ".exe"))
581 *ss = '\0';
582 }
583 #endif // WIN32
584 strcat (s, ".cfg");
585 cfg = fopen (ss = s, "r");
586 if (cfg == NULL)
587 {
588 fprintf (stderr, "Note: Configuration file \"%s\" not found.\n",
589 ss);
590 if (LogFile != NULL)
591 fprintf (LogFile, "Configuration file \"%s\" not found.\n", ss);
592 }
593 else
594 {
595 fprintf (stderr, "Using configuration file \"%s\".\n", ss);
596 if (LogFile != NULL)
597 fprintf (LogFile, "Using configuration file \"%s\".\n", ss);
598 }
599 }
600
601 // Loop on the lines in the configuration file. (If the configuration
602 // file wasn't found, we simply do all of the namelists, followed
603 // by all of the wordlists, in the current directory.)
604 if (cfg == NULL)
605 {
606 fprintf (stderr, "Note: Working without a configuration file.\n");
607 if (LogFile != NULL)
608 fprintf (LogFile, "Note: Working without a configuration file.\n");
609 Default:
610 // Default-language, names, current directory.
611 sprintf (s, "%s.names.gz", Language);
612 if (ProcessWordlist
613 (ss =
614 s, &Glob, &ReturnValue, LogFile, &Natives, SPELL_NATIVE, "native"))
615 goto GlobErrorTrap;
616 // Default-language, names, executable directory.
617 if (ProgNamePathLength)
618 {
619 strncpy (s, ProgName, ProgNamePathLength);
620 sprintf (&s[ProgNamePathLength], "%s.names.gz", Language);
621 if (ProcessWordlist
622 (ss =
623 s, &Glob, &ReturnValue, LogFile, &Natives, SPELL_NATIVE,
624 "native"))
625 goto GlobErrorTrap;
626 }
627 // All languages, names, current directory.
628 if (ProcessWordlist
629 (ss =
630 "*.names.gz", &Glob, &ReturnValue, LogFile, &Natives, SPELL_NATIVE,
631 "native"))
632 goto GlobErrorTrap;
633 // All languages, names, executable directory.
634 if (ProgNamePathLength)
635 {
636 strncpy (s, ProgName, ProgNamePathLength);
637 sprintf (&s[ProgNamePathLength], "*.names.gz");
638 if (ProcessWordlist
639 (ss =
640 s, &Glob, &ReturnValue, LogFile, &Natives, SPELL_NATIVE,
641 "native"))
642 goto GlobErrorTrap;
643 }
644
645 // Default-language, places, current directory.
646 sprintf (s, "%s.places.gz", Language);
647 if (ProcessWordlist
648 (ss =
649 s, &Glob, &ReturnValue, LogFile, &Natives, SPELL_NATIVE, "native"))
650 goto GlobErrorTrap;
651 // Default-language, places, executable directory.
652 if (ProgNamePathLength)
653 {
654 strncpy (s, ProgName, ProgNamePathLength);
655 sprintf (&s[ProgNamePathLength], "%s.places.gz", Language);
656 if (ProcessWordlist
657 (ss =
658 s, &Glob, &ReturnValue, LogFile, &Natives, SPELL_NATIVE,
659 "native"))
660 goto GlobErrorTrap;
661 }
662 // All languages, places, current directory.
663 if (ProcessWordlist
664 (ss =
665 "*.places.gz", &Glob, &ReturnValue, LogFile, &Natives,
666 SPELL_NATIVE, "native"))
667 goto GlobErrorTrap;
668 // All languages, places, executable directory.
669 if (ProgNamePathLength)
670 {
671 strncpy (s, ProgName, ProgNamePathLength);
672 sprintf (&s[ProgNamePathLength], "*.places.gz");
673 if (ProcessWordlist
674 (ss =
675 s, &Glob, &ReturnValue, LogFile, &Natives, SPELL_NATIVE,
676 "native"))
677 goto GlobErrorTrap;
678 }
679
680 // Default-language, words, current directory.
681 sprintf (s, "%s.words.gz", Language);
682 if (ProcessWordlist
683 (ss =
684 s, &Glob, &ReturnValue, LogFile, &Natives, SPELL_NATIVE, "native"))
685 goto GlobErrorTrap;
686 // Default-language, words, executable directory.
687 if (ProgNamePathLength)
688 {
689 strncpy (s, ProgName, ProgNamePathLength);
690 sprintf (&s[ProgNamePathLength], "%s.words.gz", Language);
691 if (ProcessWordlist
692 (ss =
693 s, &Glob, &ReturnValue, LogFile, &Natives, SPELL_NATIVE,
694 "native"))
695 goto GlobErrorTrap;
696 }
697 // All languages, words, current directory.
698 if (ProcessWordlist
699 (ss =
700 "*.words.gz", &Glob, &ReturnValue, LogFile, &Natives,
701 SPELL_FOREIGN, "foreign"))
702 goto GlobErrorTrap;
703 // All languages, words, executable directory.
704 if (ProgNamePathLength)
705 {
706 strncpy (s, ProgName, ProgNamePathLength);
707 sprintf (&s[ProgNamePathLength], "*.words.gz");
708 if (ProcessWordlist
709 (ss =
710 s, &Glob, &ReturnValue, LogFile, &Natives, SPELL_FOREIGN,
711 "foreign"))
712 goto GlobErrorTrap;
713 }
714
715
716 GlobErrorTrap:
717 GlobErrorMessage (ReturnValue, LogFile, ss);
718 if (ReturnValue)
719 goto Done;
720 }
721 else
722 {
723
724 // Search the cfg file for the appropriate language profile.
725 // These are headed by lines like "[language]".
726 while (NULL != fgets (s, sizeof (s) - 1, cfg))
727 if (s[0] == '[')
728 {
729 for (ss = &s[1]; *ss; ss++)
730 if (*ss == ']')
731 break;
732 if (*ss == ']')
733 {
734 *ss = 0;
735
736 // Language profile found?
737 if (!strcasecmp (&s[1], Language))
738 {
739 Found = 1;
740
741 // We now use all lines between now and the
742 // next profile (or the end of file) beginning
743 // with "native=" or "foreign=".
744 while (NULL != fgets (s, sizeof (s) - 1, cfg))
745 {
746 if (s[0] == '[')
747 break;
748 ss = strstr (s, "=");
749 if (ss == NULL)
750 continue;
751 *ss = 0;
752 StrLwr (s);
753 *ss = '=';
754 //for (ss = s; *ss; ss++)
755 // if (*ss == '\n')
756 // *ss = 0;
757 ListLanguage[0] = 0;
758 if (sscanf (s, "native=%s%s", Filename, ListLanguage)
759 > 0)
760 SpellType = SPELL_NATIVE;
761 else
762 if (sscanf
763 (s, "foreign=%s%s", Filename, ListLanguage) > 0)
764 SpellType = SPELL_FOREIGN;
765 else
766 continue;
767 if (LogFile != NULL)
768 fprintf (LogFile, "Globbing \"%s\"\n", Filename);
769 ReturnValue = glob (Filename, FIRSTGLOB, NULL, &Glob);
770 if (ReturnValue == GLOB_NOMATCH)
771 {
772 if (LogFile != NULL)
773 fprintf (LogFile, "Note: No matches were found "
774 "for wordlist \"%s\".\n", Filename);
775 }
776 else
777 {
778 GlobErrorMessage (ReturnValue, LogFile, Filename);
779 if (ReturnValue)
780 {
781 if (LogFile != NULL)
782 fprintf (LogFile,
783 "Note: An error code of %d "
784 "was returned for globbing "
785 "\"%s\".\n", ReturnValue,
786 Filename);
787 break;
788 }
789 if (LogFile != NULL)
790 {
791 fprintf (LogFile, "Note: Globbing \"%s\" "
792 "matched %d files.\n",
793 Filename, (int) Glob.gl_pathc);
794 for (i = 0; i < Glob.gl_pathc; i++)
795 fprintf (LogFile, "\t\"%s\"\n",
796 Glob.gl_pathv[i]);
797 }
798 StrLwr (ListLanguage);
799 Mark (&Natives, &Glob, SpellType, ListLanguage);
800 }
801 // Now, if the filename of the wordlist contained no
802 // pathname separators -- in other words, if they were
803 // pure filenames -- it makes sense to look also in
804 // the executable's directory, since that's where the
805 // wordlist was stored.
806 for (ss = Filename; *ss; ss++)
807 if (*ss == ':' || *ss == '/' || *ss == '\\')
808 break;
809 if (LogFile != NULL)
810 fprintf (LogFile, "Filename=\"%s\" ss=\"%s\" ProgNamePathLength=%d\n",
811 Filename, ss, ProgNamePathLength);
812 if (!*ss && ProgNamePathLength)
813 {
814 // Add the executable's directory to the filename.
815 strcpy (s, Filename);
816 strncpy (Filename, ProgName, ProgNamePathLength);
817 strcpy (&Filename[ProgNamePathLength], s);
818 // Now process it just as above.
819 if (LogFile != NULL)
820 fprintf (LogFile, "Globbing \"%s\"\n",
821 Filename);
822 ReturnValue =
823 glob (Filename, FIRSTGLOB, NULL, &Glob);
824 if (ReturnValue == GLOB_NOMATCH)
825 {
826 if (LogFile != NULL)
827 fprintf (LogFile,
828 "Note: No matches were found "
829 "for wordlist \"%s\".\n",
830 Filename);
831 }
832 else
833 {
834 GlobErrorMessage (ReturnValue, LogFile, Filename);
835 if (ReturnValue)
836 {
837 fprintf (LogFile,
838 "Note: An error code of %d "
839 "was returned for globbing "
840 "\"%s\".\n", ReturnValue,
841 Filename);
842 break;
843 }
844 if (LogFile != NULL)
845 {
846 fprintf (LogFile, "Note: Globbing \"%s\" "
847 "matched %d files.\n",
848 Filename, (int) Glob.gl_pathc);
849 for (i = 0; i < Glob.gl_pathc; i++)
850 fprintf (LogFile, "\t\"%s\"\n",
851 Glob.gl_pathv[i]);
852 }
853 StrLwr (ListLanguage);
854 Mark (&Natives, &Glob, SpellType,
855 ListLanguage);
856 }
857 }
858 }
859 break;
860 }
861 }
862 }
863 fclose (cfg);
864 if (!Found)
865 {
866 fprintf (stderr,
867 "Note: Language section found in configuration.\n");
868 if (LogFile != NULL)
869 fprintf (LogFile,
870 "Language section not found in configuration.\n");
871 goto Default;
872 }
873 }
874
875 // Now the list of all wordslists and namelists has been totally
876 // expanded, though possibly with duplicates. We therefore
877 // process them one-by-one.
878 for (i = 0; i < Natives.Count; i++)
879 {
880
881 // Make sure one isn't a duplicate.
882 for (j = 0; j < i; j++)
883 if (!strcmp (Natives.Names[i], Natives.Names[j]))
884 break;
885 if (j == i)
886 {
887 if (LogFile != NULL)
888 fprintf (LogFile, "Wordlist = %s (%s %ld)\n", Natives.Names[i],
889 Natives.Languages[i], Natives.LanguageMasks[i]);
890 fprintf (stderr, "Checking wordlist/namelist %s (%s)\n",
891 Natives.Names[i], Natives.Languages[i]);
892
893 // Not a duplicate. Process it!
894 ReturnValue =
895 ReadWordlist (Words, Natives.Names[i], Natives.List[i],
896 Natives.LanguageMasks[i], LogFile);
897 if (ReturnValue)
898 {
899 fprintf (stderr, "Error processing wordlist.\n");
900 if (LogFile != NULL)
901 fprintf (LogFile, "Error %d processing wordlist.\n",
902 ReturnValue);
903 // Removed 06/30/02. goto Done;
904 }
905 }
906 else
907 {
908 if (LogFile != NULL)
909 fprintf (LogFile, "Duplicate wordlist = %s\n", Natives.Names[i]);
910 }
911 }
912
913 // All done!
914 ReturnValue = 0;
915 Done:
916
917 // For some reason, the following can cause a segmentation fault.
918 //globfree (&Glob);
919 return (ReturnValue);
920 }
921