1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 *
5 * Copyright (C) 2002-2017 Németh László
6 *
7 * The contents of this file are subject to the Mozilla Public License Version
8 * 1.1 (the "License"); you may not use this file except in compliance with
9 * the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
11 *
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 * for the specific language governing rights and limitations under the
15 * License.
16 *
17 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
18 *
19 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
20 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
21 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
22 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
23 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
24 *
25 * Alternatively, the contents of this file may be used under the terms of
26 * either the GNU General Public License Version 2 or later (the "GPL"), or
27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
36 *
37 * ***** END LICENSE BLOCK ***** */
38
39 // glibc < 3.0 (for mkstemp)
40 #ifndef __USE_MISC
41 #define __USE_MISC
42 #endif
43
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <sstream>
47 #include <string>
48 #include <string.h>
49 #include <config.h>
50 #include "../hunspell/atypes.hxx"
51 #include "../hunspell/hunspell.hxx"
52 #include "../hunspell/csutil.hxx"
53 #include "../hunspell/hunzip.hxx"
54
55 #define HUNSPELL_VERSION VERSION
56 #define INPUTLEN 50
57
58 #define HUNSPELL_PIPE_HEADING \
59 "@(#) International Ispell Version 3.2.06 (but really Hunspell " VERSION ")" \
60 "\n"
61 #define HUNSPELL_HEADING "Hunspell "
62 #define ODF_EXT "odt|ott|odp|otp|odg|otg|ods|ots"
63 #define ENTITY_APOS "'"
64 #define UTF8_APOS "\xe2\x80\x99"
65
66 // for debugging only
67 //#define LOG
68
69 #define DEFAULTDICNAME "default"
70
71 #ifdef WIN32
72
73 #define LIBDIR "C:\\Hunspell\\"
74 #define USEROOODIR { "Application Data\\OpenOffice.org 2\\user\\wordbook" }
75 #define OOODIR \
76 "C:\\Program files\\OpenOffice.org 2.4\\share\\dict\\ooo\\;" \
77 "C:\\Program files\\OpenOffice.org 2.3\\share\\dict\\ooo\\;" \
78 "C:\\Program files\\OpenOffice.org 2.2\\share\\dict\\ooo\\;" \
79 "C:\\Program files\\OpenOffice.org 2.1\\share\\dict\\ooo\\;" \
80 "C:\\Program files\\OpenOffice.org 2.0\\share\\dict\\ooo\\"
81 #define HOME "%USERPROFILE%\\"
82 #define DICBASENAME "hunspell_"
83 #define LOGFILE "C:\\Hunspell\\log"
84 #define DIRSEPCH '\\'
85 #define DIRSEP "\\"
86 #define PATHSEP ";"
87
88 #ifdef __MINGW32__
89 #include <sys/types.h>
90 #include <sys/stat.h>
91 #include <dirent.h>
92 #include <unistd.h>
93 #endif
94
95 #include "../parsers/textparser.hxx"
96 #include "../parsers/htmlparser.hxx"
97 #include "../parsers/latexparser.hxx"
98 #include "../parsers/manparser.hxx"
99 #include "../parsers/firstparser.hxx"
100 #include "../parsers/xmlparser.hxx"
101 #include "../parsers/odfparser.hxx"
102
103 #else
104
105 // Not Windows
106 #include <sys/types.h>
107 #include <sys/stat.h>
108 #include <dirent.h>
109 #include <unistd.h>
110 #include "../parsers/textparser.hxx"
111 #include "../parsers/htmlparser.hxx"
112 #include "../parsers/latexparser.hxx"
113 #include "../parsers/manparser.hxx"
114 #include "../parsers/firstparser.hxx"
115 #include "../parsers/xmlparser.hxx"
116 #include "../parsers/odfparser.hxx"
117
118 #define LIBDIR \
119 "/usr/share/hunspell:" \
120 "/usr/share/myspell:" \
121 "/usr/share/myspell/dicts:" \
122 "/Library/Spelling"
123 #define USEROOODIR { \
124 ".openoffice.org/3/user/wordbook", \
125 ".openoffice.org2/user/wordbook", \
126 ".openoffice.org2.0/user/wordbook",\
127 "Library/Spelling" }
128 #define OOODIR \
129 "/opt/openoffice.org/basis3.0/share/dict/ooo:" \
130 "/usr/lib/openoffice.org/basis3.0/share/dict/ooo:" \
131 "/opt/openoffice.org2.4/share/dict/ooo:" \
132 "/usr/lib/openoffice.org2.4/share/dict/ooo:" \
133 "/opt/openoffice.org2.3/share/dict/ooo:" \
134 "/usr/lib/openoffice.org2.3/share/dict/ooo:" \
135 "/opt/openoffice.org2.2/share/dict/ooo:" \
136 "/usr/lib/openoffice.org2.2/share/dict/ooo:" \
137 "/opt/openoffice.org2.1/share/dict/ooo:" \
138 "/usr/lib/openoffice.org2.1/share/dict/ooo:" \
139 "/opt/openoffice.org2.0/share/dict/ooo:" \
140 "/usr/lib/openoffice.org2.0/share/dict/ooo"
141 #define HOME getenv("HOME")
142 #define DICBASENAME ".hunspell_"
143 #define LOGFILE "/tmp/hunspell.log"
144 #define DIRSEPCH '/'
145 #define DIRSEP "/"
146 #define PATHSEP ":"
147 #endif
148
149 #ifdef HAVE_ICONV
150 #include <iconv.h>
151 #include <errno.h>
152 char text_conv[MAXLNLEN];
153 #endif
154
155 #ifdef HAVE_LOCALE_H
156 # include <locale.h>
157 #endif
158 #ifdef HAVE_LANGINFO_H
159 # include <langinfo.h>
160 #endif
161 #ifdef ENABLE_NLS
162 # include <libintl.h>
163 #else
164 # undef gettext
165 # define gettext(Msgid) ((const char *) (Msgid))
166 # undef textdomain
167 # define textdomain(Domainname) ((const char *) (Domainname))
168 #endif
169
170 #ifdef HAVE_CURSES_H
171 #ifdef HAVE_NCURSESW_CURSES_H
172 #include <ncursesw/curses.h>
173 #else
174 #include <curses.h>
175 #endif
176 #endif
177
178 #ifdef HAVE_READLINE
179 #include <readline/readline.h>
180 #else
181 #define readline scanline
182 #endif
183
184 // file formats:
185
186 enum { FMT_TEXT, FMT_LATEX, FMT_HTML, FMT_MAN, FMT_FIRST, FMT_XML, FMT_ODF };
187
188 // global variables
189
190 std::string wordchars;
191 char* dicpath = NULL;
192 const w_char* wordchars_utf16 = NULL;
193 std::vector<w_char> new_wordchars_utf16;
194 int wordchars_utf16_len;
195 char* dicname = NULL;
196 char* privdicname = NULL;
197 const char* currentfilename = NULL;
198
199 int modified; // modified file sign
200 bool multiple_files; // for listing file names in pipe interface
201
202 enum {
203 NORMAL,
204 BADWORD, // print only bad words
205 WORDFILTER, // print only bad words from 1 word/line input
206 BADLINE, // print only lines with bad words
207 STEM, // stem input words
208 ANALYZE, // analyze input words
209 PIPE, // print only stars for LyX compatibility
210 AUTO0, // search typical error (based on SuggestMgr::suggest())
211 AUTO, // automatic spelling to standard output
212 AUTO2, // automatic spelling to standard output with sed log
213 AUTO3,
214 SUFFIX // print suffixes that can be attached to a given word
215 }; // automatic spelling to standard output with gcc error format
216 int filter_mode = NORMAL;
217 int printgood = 0; // print only good words and lines
218 int showpath = 0; // show detected path of the dictionary
219 int checkurl = 0; // check URLs and mail addresses
220 int checkapos = 0; // force typographic apostrophe
221 int warn = 0; // warn potential mistakes (dictionary words with WARN flags)
222 const char* ui_enc = NULL; // locale character encoding (default for I/O)
223 const char* io_enc = NULL; // I/O character encoding
224
225 #define DMAX 10 // maximal count of loaded dictionaries
226
227 const char* dic_enc[DMAX]; // dictionary encoding
228 char* path = NULL;
229 int dmax = 0; // dictionary count
230
231 // functions
232
233 #ifdef HAVE_ICONV
fix_encoding_name(const char * enc)234 static const char* fix_encoding_name(const char* enc) {
235 if (strcmp(enc, "TIS620-2533") == 0)
236 enc = "TIS620";
237 return enc;
238 }
239 #endif
240
241 /* change character encoding */
chenc(const std::string & st,const char * enc1,const char * enc2)242 std::string chenc(const std::string& st, const char* enc1, const char* enc2) {
243 #ifndef HAVE_ICONV
244 (void)enc1;
245 (void)enc2;
246 return st;
247 #else
248 if (st.empty())
249 return st;
250
251 if (!enc1 || !enc2 || strcmp(enc1, enc2) == 0)
252 return st;
253
254 std::string out(st.size() < 15 ? 15 : st.size(), '\0');
255 size_t c1(st.size());
256 size_t c2(out.size());
257 ICONV_CONST char* source = (ICONV_CONST char*) &st[0];
258 char* dest = &out[0];
259 iconv_t conv = iconv_open(fix_encoding_name(enc2), fix_encoding_name(enc1));
260 if (conv == (iconv_t)-1) {
261 fprintf(stderr, gettext("error - iconv_open: %s -> %s\n"), enc2, enc1);
262 } else {
263 size_t res;
264 while ((res = iconv(conv, &source, &c1, &dest, &c2)) == size_t(-1)) {
265 if (errno == E2BIG) {
266 //c2 is zero or close to zero
267 size_t next_start = out.size() - c2;
268 c2 += c1*2;
269 out.resize(out.size() + c1*2);
270 dest = &out[next_start];
271 } else
272 break;
273 }
274 if (res == (size_t)-1) {
275 fprintf(stderr, gettext("error - iconv: %s -> %s\n"), enc2, enc1);
276 }
277 iconv_close(conv);
278 out.resize(dest - &out[0]);
279 return out;
280 }
281
282 return st;
283 #endif
284 }
285
get_parser(int format,const char * extension,Hunspell * pMS)286 TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
287 TextParser* p = NULL;
288 int io_utf8 = 0;
289 const char* denc = pMS->get_dict_encoding().c_str();
290 #ifdef HAVE_ICONV
291 initialize_utf_tbl(); // also need for 8-bit tokenization
292 if (io_enc) {
293 if ((strcmp(io_enc, "UTF-8") == 0) || (strcmp(io_enc, "utf-8") == 0) ||
294 (strcmp(io_enc, "UTF8") == 0) || (strcmp(io_enc, "utf8") == 0)) {
295 io_utf8 = 1;
296 io_enc = "UTF-8";
297 }
298 } else if (ui_enc) {
299 io_enc = ui_enc;
300 if (strcmp(ui_enc, "UTF-8") == 0)
301 io_utf8 = 1;
302 } else {
303 io_enc = denc;
304 if (strcmp(denc, "UTF-8") == 0)
305 io_utf8 = 1;
306 }
307
308 if (io_utf8) {
309 const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
310 const std::string& vec_wordchars = pMS->get_wordchars_cpp();
311 wordchars_utf16_len = vec_wordchars_utf16.size();
312 wordchars_utf16 = wordchars_utf16_len ? &vec_wordchars_utf16[0] : NULL;
313 if ((strcmp(denc, "UTF-8") != 0) && !vec_wordchars.empty()) {
314 const char* wchars = vec_wordchars.c_str();
315 size_t c1 = vec_wordchars.size();
316 size_t c2 = MAXLNLEN;
317 char* dest = text_conv;
318 iconv_t conv = iconv_open("UTF-8", fix_encoding_name(denc));
319 if (conv == (iconv_t)-1) {
320 fprintf(stderr, gettext("error - iconv_open: UTF-8 -> %s\n"), denc);
321 wordchars_utf16 = NULL;
322 wordchars_utf16_len = 0;
323 } else {
324 iconv(conv, (ICONV_CONST char**)&wchars, &c1, &dest, &c2);
325 iconv_close(conv);
326 u8_u16(new_wordchars_utf16, text_conv);
327 std::sort(new_wordchars_utf16.begin(), new_wordchars_utf16.end());
328 wordchars_utf16 = &new_wordchars_utf16[0];
329 wordchars_utf16_len = new_wordchars_utf16.size();
330 }
331 }
332 } else {
333 // 8-bit input encoding
334 // detect letters by unicodeisalpha() for tokenization
335 char letters[MAXLNLEN];
336 char* pletters = letters;
337 char ch[2];
338 char u8[10];
339 *pletters = '\0';
340 iconv_t conv = iconv_open("UTF-8", fix_encoding_name(io_enc));
341 if (conv == (iconv_t)-1) {
342 fprintf(stderr, gettext("error - iconv_open: UTF-8 -> %s\n"), io_enc);
343 } else {
344 for (int i = 32; i < 256; i++) {
345 size_t c1 = 1;
346 size_t c2 = 10;
347 char* dest = u8;
348 u8[0] = '\0';
349 char* ch8bit = ch;
350 ch[0] = (char)i;
351 ch[1] = '\0';
352 size_t res = iconv(conv, (ICONV_CONST char**)&ch8bit, &c1, &dest, &c2);
353 if (res != (size_t)-1) {
354 std::vector<w_char> w;
355 u8_u16(w, std::string(u8, dest));
356 unsigned short idx = w.empty() ? 0 : (w[0].h << 8) + w[0].l;
357 if (unicodeisalpha(idx)) {
358 *pletters = (char)i;
359 pletters++;
360 }
361 }
362 }
363 iconv_close(conv);
364 }
365 *pletters = '\0';
366
367 // UTF-8 wordchars -> 8 bit wordchars
368 const std::string& vec_wordchars = pMS->get_wordchars_cpp();
369 size_t len = vec_wordchars.size();
370 if (len) {
371 if ((strcmp(denc, "UTF-8") == 0)) {
372 len = pMS->get_wordchars_utf16().size();
373 }
374 char* dest = letters + strlen(letters); // append wordchars
375 size_t c1 = len + 1;
376 size_t c2 = len + 1;
377 conv = iconv_open(fix_encoding_name(io_enc), fix_encoding_name(denc));
378 if (conv == (iconv_t)-1) {
379 fprintf(stderr, gettext("error - iconv_open: %s -> %s\n"), io_enc,
380 denc);
381 } else {
382 const char* wchars = vec_wordchars.c_str();
383 iconv(conv, (ICONV_CONST char**)&wchars, &c1, &dest, &c2);
384 iconv_close(conv);
385 *dest = '\0';
386 }
387 }
388 if (*letters)
389 wordchars.assign(letters);
390 }
391 #else
392 if (strcmp(denc, "UTF-8") == 0) {
393 const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
394 wordchars_utf16 = (vec_wordchars_utf16.size() == 0) ? NULL : &vec_wordchars_utf16[0];
395 wordchars_utf16_len = vec_wordchars_utf16.size();
396 io_utf8 = 1;
397 } else {
398 std::string casechars = get_casechars(denc);
399 std::string wchars = pMS->get_wordchars_cpp();
400 wordchars = casechars + wchars;
401 }
402 io_enc = denc;
403 #endif
404
405 if (io_utf8) {
406 switch (format) {
407 case FMT_LATEX:
408 p = new LaTeXParser(wordchars_utf16, wordchars_utf16_len);
409 break;
410 case FMT_HTML:
411 p = new HTMLParser(wordchars_utf16, wordchars_utf16_len);
412 break;
413 case FMT_MAN:
414 p = new ManParser(wordchars_utf16, wordchars_utf16_len);
415 break;
416 case FMT_XML:
417 p = new XMLParser(wordchars_utf16, wordchars_utf16_len);
418 break;
419 case FMT_ODF:
420 p = new ODFParser(wordchars_utf16, wordchars_utf16_len);
421 break;
422 case FMT_FIRST:
423 p = new FirstParser(wordchars.c_str());
424 }
425 } else {
426 switch (format) {
427 case FMT_LATEX:
428 p = new LaTeXParser(wordchars.c_str());
429 break;
430 case FMT_HTML:
431 p = new HTMLParser(wordchars.c_str());
432 break;
433 case FMT_MAN:
434 p = new ManParser(wordchars.c_str());
435 break;
436 case FMT_XML:
437 p = new XMLParser(wordchars.c_str());
438 break;
439 case FMT_ODF:
440 p = new ODFParser(wordchars.c_str());
441 break;
442 case FMT_FIRST:
443 p = new FirstParser(wordchars.c_str());
444 }
445 }
446
447 if ((!p) && (extension)) {
448 if ((strcmp(extension, "html") == 0) || (strcmp(extension, "htm") == 0) ||
449 (strcmp(extension, "xhtml") == 0)) {
450 if (io_utf8) {
451 p = new HTMLParser(wordchars_utf16, wordchars_utf16_len);
452 } else {
453 p = new HTMLParser(wordchars.c_str());
454 }
455 } else if ((strcmp(extension, "xml") == 0)) {
456 if (io_utf8) {
457 p = new XMLParser(wordchars_utf16, wordchars_utf16_len);
458 } else {
459 p = new XMLParser(wordchars.c_str());
460 }
461 } else if (((strlen(extension) == 3) &&
462 (strstr(ODF_EXT, extension) != NULL)) ||
463 ((strlen(extension) == 4) && (extension[0] == 'f') &&
464 (strstr(ODF_EXT, extension + 1) != NULL))) {
465 if (io_utf8) {
466 p = new ODFParser(wordchars_utf16, wordchars_utf16_len);
467 } else {
468 p = new ODFParser(wordchars.c_str());
469 }
470 } else if (((extension[0] > '0') && (extension[0] <= '9'))) {
471 if (io_utf8) {
472 p = new ManParser(wordchars_utf16, wordchars_utf16_len);
473 } else {
474 p = new ManParser(wordchars.c_str());
475 }
476 } else if ((strcmp(extension, "tex") == 0)) {
477 if (io_utf8) {
478 p = new LaTeXParser(wordchars_utf16, wordchars_utf16_len);
479 } else {
480 p = new LaTeXParser(wordchars.c_str());
481 }
482 }
483 }
484 if (!p) {
485 if (io_utf8) {
486 p = new TextParser(wordchars_utf16, wordchars_utf16_len);
487 } else {
488 p = new TextParser(wordchars.c_str());
489 }
490 }
491 p->set_url_checking(checkurl);
492 return p;
493 }
494
495 #ifdef LOG
log(char * message)496 void log(char* message) {
497 FILE* f = fopen(LOGFILE, "a");
498 if (f) {
499 fprintf(f, "%s\n", message);
500 fclose(f);
501 } else {
502 fprintf(stderr, "Logfile...");
503 }
504 }
505 #endif
506
putdic(const std::string & in_word,Hunspell * pMS)507 int putdic(const std::string& in_word, Hunspell* pMS) {
508 std::string word = chenc(in_word, ui_enc, dic_enc[0]);
509
510 std::string buf;
511 pMS->input_conv(word.c_str(), buf);
512 word = buf;
513
514 if (word.empty())
515 return 0;
516
517 int ret(0);
518 size_t w = word.find('/', 1);
519 if (w == std::string::npos) {
520 if (word[0] == '*')
521 ret = pMS->remove(word.substr(1));
522 else
523 ret = pMS->add(word);
524 } else {
525 std::string affix = word.substr(w + 1);
526 word.resize(w);
527 if (!affix.empty() && affix[0] == '/') // word//pattern (back comp.)
528 affix.erase(0, 1);
529 ret = pMS->add_with_affix(word, affix); // word/pattern
530 }
531 return ret;
532 }
533
load_privdic(const char * filename,Hunspell * pMS)534 void load_privdic(const char* filename, Hunspell* pMS) {
535 std::ifstream dic;
536 dic.open(filename, std::ios_base::in);
537 if (dic.is_open()) {
538 std::string buf;
539 while (std::getline(dic, buf)) {
540 putdic(buf, pMS);
541 }
542 }
543 }
544
exist(const char * filename)545 bool exist(const char* filename) {
546 std::ifstream f;
547 f.open(filename, std::ios_base::in);
548 if (f.is_open()) {
549 return true;
550 }
551 return false;
552 }
553
save_privdic(const std::string & filename,const std::string & filename2,std::vector<std::string> & w)554 int save_privdic(const std::string& filename, const std::string& filename2, std::vector<std::string>& w) {
555 FILE* dic = fopen(filename.c_str(), "r");
556 if (dic) {
557 fclose(dic);
558 dic = fopen(filename.c_str(), "a");
559 } else {
560 dic = fopen(filename2.c_str(), "a");
561 }
562 if (!dic)
563 return 0;
564 for (size_t i = 0; i < w.size(); ++i) {
565 w[i] = chenc(w[i], io_enc, ui_enc);
566 fprintf(dic, "%s\n", w[i].c_str());
567 }
568 fclose(dic);
569 return 1;
570 }
571
basename(const char * s,char c)572 const char* basename(const char* s, char c) {
573 const char* p = s + strlen(s);
574 while ((*p != c) && (p != s))
575 p--;
576 if (*p == c)
577 p++;
578 return p;
579 }
580
581 #ifdef HAVE_CURSES_H
scanline(char * message)582 char* scanline(char* message) {
583 char input[INPUTLEN];
584 printw(message);
585 echo();
586 getnstr(input, INPUTLEN);
587 noecho();
588 return mystrdup(input);
589 }
590 #endif
591
592 // check words in the dictionaries (and set first checked dictionary)
check(Hunspell ** pMS,int * d,const std::string & token,int * info,std::string * root)593 bool check(Hunspell** pMS, int* d, const std::string& token, int* info, std::string* root) {
594 for (int i = 0; i < dmax; ++i) {
595 std::string buf = chenc(token, io_enc, dic_enc[*d]);
596 mystrrep(buf, ENTITY_APOS, "'");
597 if (checkapos && buf.find('\'') != std::string::npos)
598 return false;
599 // 8-bit encoded dictionaries need ASCII apostrophes (eg. English
600 // dictionaries)
601 if (strcmp(dic_enc[*d], "UTF-8") != 0)
602 mystrrep(buf, UTF8_APOS, "'");
603 if ((pMS[*d]->spell(buf, info, root) &&
604 !(warn && (*info & SPELL_WARN))) ||
605 // UTF-8 encoded dictionaries with ASCII apostrophes, but without ICONV
606 // support,
607 // need also ASCII apostrophes (eg. French dictionaries)
608 ((strcmp(dic_enc[*d], "UTF-8") == 0) &&
609 buf.find(UTF8_APOS) != std::string::npos &&
610 pMS[*d]->spell(mystrrep(buf, UTF8_APOS, "'"), info, root) &&
611 !(warn && (*info & SPELL_WARN)))) {
612 return true;
613 }
614 if (++(*d) == dmax)
615 *d = 0;
616 }
617 return false;
618 }
619
is_zipped_odf(TextParser * parser,const char * extension)620 static bool is_zipped_odf(TextParser* parser, const char* extension) {
621 // ODFParser and not flat ODF
622 return dynamic_cast<ODFParser*>(parser) && (extension && extension[0] != 'f');
623 }
624
secure_filename(const char * filename)625 static bool secure_filename(const char* filename) {
626 const char* hasapostrophe = strchr(filename, '\'');
627 if (hasapostrophe)
628 return false;
629 return true;
630 }
631
mymkdtemp(char * templ)632 char* mymkdtemp(char *templ) {
633 #ifdef WIN32
634 (void)templ;
635 char *odftmpdir = tmpnam(NULL);
636 if (!odftmpdir) {
637 return NULL;
638 }
639 if (system((std::string("mkdir ") + odftmpdir).c_str()) != 0) {
640 return NULL;
641 }
642 return odftmpdir;
643 #else
644 return mkdtemp(templ);
645 #endif
646 }
647
pipe_interface(Hunspell ** pMS,int format,FILE * fileid,char * filename)648 void pipe_interface(Hunspell** pMS, int format, FILE* fileid, char* filename) {
649 char buf[MAXLNLEN];
650 std::vector<std::string> dicwords;
651 int pos;
652 int bad;
653 int lineno = 0;
654 int terse_mode = 0;
655 int verbose_mode = 0;
656 int d = 0;
657 char* odftmpdir = NULL;
658
659 std::string filename_prefix = (multiple_files) ? filename + std::string(": ") : "";
660
661 const char* extension = (filename) ? basename(filename, '.') : NULL;
662 TextParser* parser = get_parser(format, extension, pMS[0]);
663 char tmpdirtemplate[] = "/tmp/hunspellXXXXXX";
664
665 bool bZippedOdf = is_zipped_odf(parser, extension);
666 // access content.xml of ODF
667 if (bZippedOdf) {
668 odftmpdir = mymkdtemp(tmpdirtemplate);
669 if (!odftmpdir) {
670 perror(gettext("Can't create tmp dir"));
671 exit(1);
672 }
673 // break 1-line XML of zipped ODT documents at </style:style> and </text:p>
674 // to avoid tokenization problems (fgets could stop within an XML tag)
675 std::ostringstream sbuf;
676 sbuf << "unzip -p \"" << filename << "\" content.xml | sed "
677 "\"s/\\(<\\/text:p>\\|<\\/style:style>\\)\\(.\\)/\\1\\n\\2/g;s/<\\/\\?text:span[^>]*>//g\" "
678 ">" << odftmpdir << "/content.xml";
679 if (!secure_filename(filename) || system(sbuf.str().c_str()) != 0) {
680 if (secure_filename(filename))
681 perror(gettext("Can't open inputfile"));
682 else
683 fprintf(stderr, gettext("Can't open %s.\n"), filename);
684 if (system((std::string("rmdir ") + odftmpdir).c_str()) != 0) {
685 perror("temp dir delete failed");
686 }
687 exit(1);
688 }
689 std::string file(odftmpdir);
690 file.append("/content.xml");
691 fileid = fopen(file.c_str(), "r");
692 if (fileid == NULL) {
693 perror(gettext("Can't open inputfile"));
694 if (system((std::string("rmdir ") + odftmpdir).c_str()) != 0) {
695 perror("temp dir delete failed");
696 }
697 exit(1);
698 }
699 }
700
701 if (filter_mode == NORMAL) {
702 fprintf(stdout, "%s", gettext(HUNSPELL_HEADING));
703 fprintf(stdout, HUNSPELL_VERSION);
704 const std::string& version = pMS[0]->get_version_cpp();
705 if (!version.empty())
706 fprintf(stdout, " - %s", version.c_str());
707 fprintf(stdout, "\n");
708 fflush(stdout);
709 }
710
711 nextline:
712 while (fgets(buf, MAXLNLEN, fileid)) {
713 buf[strcspn(buf, "\n")] = 0;
714 lineno++;
715 #ifdef LOG
716 log(buf);
717 #endif
718 bad = 0;
719 pos = 0;
720
721 // execute commands
722 if (filter_mode == PIPE) {
723 pos = -1;
724 switch (buf[0]) {
725 case '%': {
726 verbose_mode = terse_mode = 0;
727 break;
728 }
729 case '!': {
730 terse_mode = 1;
731 break;
732 }
733 case '`': {
734 verbose_mode = 1;
735 break;
736 }
737 case '+': {
738 delete parser;
739 parser = get_parser(FMT_LATEX, NULL, pMS[0]);
740 parser->set_url_checking(checkurl);
741 break;
742 }
743 case '-': {
744 delete parser;
745 parser = get_parser(format, NULL, pMS[0]);
746 break;
747 }
748 case '@': {
749 putdic(buf + 1, pMS[d]);
750 break;
751 }
752 case '*': {
753 std::string word(buf + 1);
754 dicwords.push_back(word);
755 putdic(word, pMS[d]);
756 break;
757 }
758 case '#': {
759 std::string sbuf;
760 if (HOME) {
761 sbuf.append(HOME);
762 } else {
763 fprintf(stderr, "%s", gettext("error - missing HOME variable\n"));
764 continue;
765 }
766 #ifndef WIN32
767 sbuf.append("/");
768 #endif
769 size_t offset = sbuf.size();
770 if (!privdicname) {
771 sbuf.append(DICBASENAME);
772 sbuf.append(basename(dicname, DIRSEPCH));
773 } else {
774 sbuf.append(privdicname);
775 }
776 if (save_privdic(sbuf.substr(offset), sbuf, dicwords)) {
777 dicwords.clear();
778 }
779 break;
780 }
781 case '^': {
782 pos = 1;
783 break;
784 }
785
786 default: {
787 pos = 0;
788 break;
789 }
790
791 } // end switch
792 } // end filter_mode == PIPE
793
794 if (pos >= 0) {
795 parser->put_line(buf + pos);
796 std::string token;
797 while (parser->next_token(token)) {
798 token = parser->get_word(token);
799 mystrrep(token, ENTITY_APOS, "'");
800 switch (filter_mode) {
801 case BADWORD: {
802 if (!check(pMS, &d, token, NULL, NULL)) {
803 bad = 1;
804 if (!printgood)
805 fprintf(stdout, "%s%s\n", filename_prefix.c_str(), token.c_str());
806 } else {
807 if (printgood)
808 fprintf(stdout, "%s%s\n", filename_prefix.c_str(), token.c_str());
809 }
810 continue;
811 }
812
813 case WORDFILTER: {
814 if (!check(pMS, &d, parser->get_word(token), NULL, NULL)) {
815 if (!printgood)
816 fprintf(stdout, "%s\n", buf);
817 } else {
818 if (printgood)
819 fprintf(stdout, "%s\n", buf);
820 }
821 goto nextline;
822 }
823
824 case BADLINE: {
825 if (!check(pMS, &d, parser->get_word(token), NULL, NULL)) {
826 bad = 1;
827 }
828 continue;
829 }
830
831 case AUTO0:
832 case AUTO:
833 case AUTO2:
834 case AUTO3: {
835 FILE* f = (filter_mode == AUTO) ? stderr : stdout;
836 if (!check(pMS, &d, parser->get_word(token), NULL, NULL)) {
837 bad = 1;
838 std::vector<std::string> wlst =
839 pMS[d]->suggest(chenc(parser->get_word(token), io_enc, dic_enc[d]));
840 if (!wlst.empty()) {
841 parser->change_token(chenc(wlst[0], dic_enc[d], io_enc).c_str());
842 if (filter_mode == AUTO3) {
843 fprintf(f, "%s:%d: Locate: %s | Try: %s\n", currentfilename,
844 lineno, token.c_str(), chenc(wlst[0], dic_enc[d], io_enc).c_str());
845 } else if (filter_mode == AUTO2) {
846 fprintf(f, "%ds/%s/%s/g; # %s\n", lineno, token.c_str(),
847 chenc(wlst[0], dic_enc[d], io_enc).c_str(), buf);
848 } else {
849 fprintf(f, gettext("Line %d: %s -> "), lineno,
850 chenc(token, io_enc, ui_enc).c_str());
851 fprintf(f, "%s\n", chenc(wlst[0], dic_enc[d], ui_enc).c_str());
852 }
853 }
854 }
855 continue;
856 }
857
858 case STEM: {
859 std::vector<std::string> result =
860 pMS[d]->stem(chenc(token, io_enc, dic_enc[d]));
861 for (size_t i = 0; i < result.size(); ++i) {
862 fprintf(stdout, "%s %s\n", token.c_str(),
863 chenc(result[i], dic_enc[d], ui_enc).c_str());
864 }
865 if (result.empty() && !token.empty() && token[token.size() - 1] == '.') {
866 token.resize(token.size() - 1);
867 result = pMS[d]->stem(token);
868 for (size_t i = 0; i < result.size(); ++i) {
869 fprintf(stdout, "%s %s\n", token.c_str(),
870 chenc(result[i], dic_enc[d], ui_enc).c_str());
871 }
872 }
873 if (result.empty())
874 fprintf(stdout, "%s\n", chenc(token, dic_enc[d], ui_enc).c_str());
875 fprintf(stdout, "\n");
876 continue;
877 }
878
879 case SUFFIX: {
880 std::vector<std::string> wlst = pMS[d]->suffix_suggest(token);
881 for (size_t j = 0; j < wlst.size(); ++j) {
882 fprintf(stdout, "Suffix Suggestions are %s \n",
883 chenc(wlst[j], dic_enc[d], io_enc).c_str());
884 }
885 fflush(stdout);
886 continue;
887 }
888 case ANALYZE: {
889 std::vector<std::string> result =
890 pMS[d]->analyze(chenc(token, io_enc, dic_enc[d]));
891 for (size_t i = 0; i < result.size(); ++i) {
892 fprintf(stdout, "%s %s\n", token.c_str(),
893 chenc(result[i], dic_enc[d], ui_enc).c_str());
894 }
895 if (result.empty() && !token.empty() && token[token.size() - 1] == '.') {
896 token.resize(token.size() - 1);
897 result = pMS[d]->analyze(token);
898 for (size_t i = 0; i < result.size(); ++i) {
899 fprintf(stdout, "%s %s\n", token.c_str(),
900 chenc(result[i], dic_enc[d], ui_enc).c_str());
901 }
902 }
903 if (result.empty())
904 fprintf(stdout, "%s\n", chenc(token, dic_enc[d], ui_enc).c_str());
905 fprintf(stdout, "\n");
906 continue;
907 }
908
909 case PIPE: {
910 int info;
911 std::string root;
912 if (check(pMS, &d, parser->get_word(token), &info, &root)) {
913 if (!terse_mode) {
914 if (verbose_mode)
915 fprintf(stdout, "* %s\n", token.c_str());
916 else
917 fprintf(stdout, "*\n");
918 }
919 fflush(stdout);
920 } else {
921 int byte_offset = parser->get_tokenpos() + pos;
922 int char_offset = 0;
923 if (strcmp(io_enc, "UTF-8") == 0) {
924 for (int i = 0; i < byte_offset; i++) {
925 if ((buf[i] & 0xc0) != 0x80)
926 char_offset++;
927 }
928 } else {
929 char_offset = byte_offset;
930 }
931 std::vector<std::string> wlst =
932 pMS[d]->suggest(chenc(token, io_enc, dic_enc[d]));
933 if (wlst.empty()) {
934 fprintf(stdout, "# %s %d", token.c_str(), char_offset);
935 } else {
936 fprintf(stdout, "& %s %u %d: ", token.c_str(), static_cast<unsigned int>(wlst.size()), char_offset);
937 fprintf(stdout, "%s", chenc(wlst[0], dic_enc[d], io_enc).c_str());
938 }
939 for (size_t j = 1; j < wlst.size(); ++j) {
940 fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str());
941 }
942 fprintf(stdout, "\n");
943 fflush(stdout);
944 }
945 continue;
946 }
947 case NORMAL: {
948 int info;
949 std::string root;
950 if (check(pMS, &d, token, &info, &root)) {
951 if (info & SPELL_COMPOUND) {
952 fprintf(stdout, "-\n");
953 } else if (!root.empty()) {
954 fprintf(stdout, "+ %s\n", chenc(root, dic_enc[d], ui_enc).c_str());
955 } else {
956 fprintf(stdout, "*\n");
957 }
958 fflush(stdout);
959 } else {
960 int byte_offset = parser->get_tokenpos() + pos;
961 int char_offset = 0;
962 if (strcmp(io_enc, "UTF-8") == 0) {
963 for (int i = 0; i < byte_offset; i++) {
964 if ((buf[i] & 0xc0) != 0x80)
965 char_offset++;
966 }
967 } else {
968 char_offset = byte_offset;
969 }
970 std::vector<std::string> wlst =
971 pMS[d]->suggest(chenc(token, io_enc, dic_enc[d]));
972 if (wlst.empty()) {
973 fprintf(stdout, "# %s %d", chenc(token, io_enc, ui_enc).c_str(),
974 char_offset);
975 } else {
976 fprintf(stdout, "& %s %u %d: ", chenc(token, io_enc, ui_enc).c_str(),
977 static_cast<unsigned int>(wlst.size()), char_offset);
978 fprintf(stdout, "%s", chenc(wlst[0], dic_enc[d], ui_enc).c_str());
979 }
980 for (size_t j = 1; j < wlst.size(); ++j) {
981 fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], ui_enc).c_str());
982 }
983 fprintf(stdout, "\n");
984 fflush(stdout);
985 }
986 }
987 }
988 }
989
990 switch (filter_mode) {
991 case AUTO: {
992 std::string pLine = parser->get_line();
993 fprintf(stdout, "%s\n", pLine.c_str());
994 break;
995 }
996
997 case BADLINE: {
998 if (((printgood) && (!bad)) || (!printgood && (bad)))
999 fprintf(stdout, "%s\n", buf);
1000 break;
1001 }
1002
1003 case PIPE:
1004 case NORMAL: {
1005 fprintf(stdout, "\n");
1006 fflush(stdout);
1007 break;
1008 }
1009 }
1010 } // if
1011 } // while
1012
1013 if (bZippedOdf) {
1014 fclose(fileid);
1015 std::ostringstream sbuf;
1016 sbuf << odftmpdir << "/content.xml";
1017 if (remove(sbuf.str().c_str()) != 0) {
1018 perror("temp file delete failed");
1019 }
1020 sbuf.str("");
1021 sbuf << "rmdir " << odftmpdir;
1022 if (system(sbuf.str().c_str()) != 0) {
1023 perror("temp dir delete failed");
1024 }
1025 }
1026
1027 delete parser;
1028 } // pipe_interface
1029
1030 #ifdef HAVE_READLINE
1031
1032 #ifdef HAVE_CURSES_H
1033 static const char* rltext;
1034
1035 // set base text of input line
set_rltext()1036 static int set_rltext() {
1037 if (rltext) {
1038 rl_insert_text(rltext);
1039 rltext = NULL;
1040 rl_startup_hook = (rl_hook_func_t*)NULL;
1041 }
1042 return 0;
1043 }
1044
1045 #endif
1046
1047 // Readline escape
rl_escape(int count,int key)1048 static int rl_escape(int count, int key) {
1049 rl_delete_text(0, rl_end);
1050 rl_done = 1;
1051 return 0;
1052 }
1053 #endif
1054
1055 #ifdef HAVE_CURSES_H
expand_tab(std::string & dest,const std::string & in_src)1056 int expand_tab(std::string& dest, const std::string& in_src) {
1057 dest.clear();
1058 const char *src = in_src.c_str();
1059 int u8 = ((ui_enc != NULL) && (strcmp(ui_enc, "UTF-8") == 0)) ? 1 : 0;
1060 int chpos = 0;
1061 for (int j = 0; (src[j] != '\0') && (src[j] != '\r'); j++) {
1062 if (src[j] == '\t') {
1063 int end = 8 - (chpos % 8);
1064 for (int k = 0; k < end; k++) {
1065 dest.push_back(' ');
1066 chpos++;
1067 }
1068 } else {
1069 dest.push_back(src[j]);
1070 if (!u8 || (src[j] & 0xc0) != 0x80)
1071 chpos++;
1072 }
1073 }
1074 return chpos;
1075 }
1076
1077 // UTF-8-aware version of strncpy (but output is always null terminated)
1078 // What we should deal in is cursor position cells in a terminal emulator,
1079 // i.e. the number of visual columns occupied like wcwidth/wcswidth does
1080 // What we're really current doing is to deal in the number of characters,
1081 // like mbstowcs which isn't quite correct, but close enough for western
1082 // text in UTF-8
strncpyu8(std::string & dest,const std::string & in_src,int begin,int n)1083 void strncpyu8(std::string& dest, const std::string& in_src, int begin, int n) {
1084 dest.clear();
1085 const char *src = in_src.c_str();
1086 if (n) {
1087 int u8 = ((ui_enc != NULL) && (strcmp(ui_enc, "UTF-8") == 0)) ? 1 : 0;
1088 for (int i = 0; i < begin + n;) {
1089 if (!*src)
1090 break; // source is at it's end
1091 if (!u8 || (*src & 0xc0) != 0x80)
1092 i++; // new character
1093 if (i > begin) { // copy char (w/ utf-8 bytes)
1094 dest.push_back(*src++);
1095 while (u8 && (*src & 0xc0) == 0x80)
1096 dest.push_back(*src++);
1097 } else { // skip char (w/ utf-8 bytes)
1098 ++src;
1099 while (u8 && (*src & 0xc0) == 0x80)
1100 ++src;
1101 }
1102 }
1103 }
1104 }
1105
1106 // See strncpyu8 for gotchas
strlenu8(const std::string & in_src)1107 int strlenu8(const std::string& in_src) {
1108 const char *src = in_src.c_str();
1109 int u8 = ((ui_enc != NULL) && (strcmp(ui_enc, "UTF-8") == 0)) ? 1 : 0;
1110 int i = 0;
1111 while (*src) {
1112 if (!u8 || (*src & 0xc0) != 0x80)
1113 i++;
1114 ++src;
1115 }
1116 return i;
1117 }
1118
dialogscreen(TextParser * parser,std::string & token,char * filename,int forbidden,std::vector<std::string> & wlst)1119 void dialogscreen(TextParser* parser,
1120 std::string& token,
1121 char* filename,
1122 int forbidden,
1123 std::vector<std::string>& wlst) {
1124 int x, y;
1125 getmaxyx(stdscr, y, x);
1126 clear();
1127
1128 if (forbidden & SPELL_FORBIDDEN)
1129 printw(gettext("FORBIDDEN!"));
1130 else if (forbidden & SPELL_WARN)
1131 printw(gettext("Spelling mistake?"));
1132
1133 printw(gettext("\t%s\t\tFile: %s\n\n"), chenc(token, io_enc, ui_enc).c_str(),
1134 filename);
1135
1136 // handle long lines and tabulators
1137 std::string lines[MAXPREVLINE];
1138 std::string prevLine;
1139 for (int i = 0; i < MAXPREVLINE; i++) {
1140 prevLine = parser->get_prevline(i);
1141 expand_tab(lines[i], chenc(prevLine, io_enc, ui_enc));
1142 }
1143
1144 prevLine = parser->get_prevline(0);
1145 std::string line = prevLine.substr(0, parser->get_tokenpos());
1146 std::string line2;
1147 int tokenbeg = expand_tab(line2, chenc(line, io_enc, ui_enc));
1148
1149 prevLine = parser->get_prevline(0);
1150 line = prevLine.substr(0, parser->get_tokenpos() + token.size());
1151 int tokenend = expand_tab(line2, chenc(line, io_enc, ui_enc));
1152
1153 int rowindex = (tokenend - 1) / x;
1154 int beginrow = rowindex - tokenbeg / x;
1155 if (beginrow >= MAXPREVLINE)
1156 beginrow = MAXPREVLINE - 1;
1157
1158 int ri = rowindex;
1159 int prevline = 0;
1160
1161 for (int i = 0; i < MAXPREVLINE; i++) {
1162 strncpyu8(line, lines[prevline], x * rowindex, x);
1163 mvprintw(MAXPREVLINE + 1 - i, 0, "%s", line.c_str());
1164 const bool finished = i == MAXPREVLINE - 1;
1165 if (!finished) {
1166 rowindex--;
1167 if (rowindex == -1) {
1168 prevline++;
1169 rowindex = strlenu8(lines[prevline]) / x;
1170 }
1171 }
1172 }
1173
1174 strncpyu8(line, lines[0], x * (ri - beginrow), tokenbeg % x);
1175 mvprintw(MAXPREVLINE + 1 - beginrow, 0, "%s", line.c_str());
1176 attron(A_REVERSE);
1177 printw("%s", chenc(token, io_enc, ui_enc).c_str());
1178 attroff(A_REVERSE);
1179
1180 mvprintw(MAXPREVLINE + 2, 0, "\n");
1181 for (size_t i = 0; i < wlst.size(); ++i) {
1182 if ((wlst.size() > 10) && (i < 10)) {
1183 printw(" 0%zu: %s\n", i, chenc(wlst[i], io_enc, ui_enc).c_str());
1184 } else {
1185 printw(" %u: %s\n", i, chenc(wlst[i], io_enc, ui_enc).c_str());
1186 }
1187 }
1188
1189 /* TRANSLATORS: the capital letters are shortcuts, mark one letter similarly
1190 in your translation and translate the standalone letter accordingly later
1191 */
1192 mvprintw(y - 3, 0, "%s\n", gettext("\n[SPACE] R)epl A)ccept I)nsert U)ncap "
1193 "S)tem Q)uit e(X)it or ? for help\n"));
1194 }
1195
lower_first_char(const std::string & token,const char * ioenc,int langnum)1196 std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) {
1197 std::string utf8str = chenc(token, ioenc, "UTF-8");
1198 std::vector<w_char> u;
1199 u8_u16(u, utf8str);
1200 if (!u.empty()) {
1201 unsigned short idx = (u[0].h << 8) + u[0].l;
1202 idx = unicodetolower(idx, langnum);
1203 u[0].h = (unsigned char)(idx >> 8);
1204 u[0].l = (unsigned char)(idx & 0x00FF);
1205 }
1206 std::string scratch;
1207 u16_u8(scratch, u);
1208 return chenc(scratch, "UTF-8", ioenc);
1209 }
1210
1211 // for terminal interface
dialog(TextParser * parser,Hunspell * pMS,std::string & token,char * filename,std::vector<std::string> & wlst,int forbidden)1212 int dialog(TextParser* parser,
1213 Hunspell* pMS,
1214 std::string& token,
1215 char* filename,
1216 std::vector<std::string>& wlst,
1217 int forbidden) {
1218 std::vector<std::string> dicwords;
1219 int c;
1220
1221 dialogscreen(parser, token, filename, forbidden, wlst);
1222
1223 char firstletter = '\0';
1224
1225 while ((c = getch())) {
1226 switch (c) {
1227 case '0':
1228 case '1':
1229 if ((firstletter == '\0') && (wlst.size() > 10)) {
1230 firstletter = c;
1231 break;
1232 }
1233 case '2':
1234 case '3':
1235 case '4':
1236 case '5':
1237 case '6':
1238 case '7':
1239 case '8':
1240 case '9':
1241 modified = 1;
1242 if (firstletter == '1') {
1243 c += 10;
1244 }
1245 c -= '0';
1246 if (c >= static_cast<int>(wlst.size()))
1247 break;
1248 if (checkapos) {
1249 std::string sbuf(wlst[c]);
1250 mystrrep(sbuf, "'", UTF8_APOS);
1251 parser->change_token(sbuf.c_str());
1252 } else {
1253 parser->change_token(wlst[c].c_str());
1254 }
1255 return 0;
1256 case ' ':
1257 return 0;
1258 case '?':
1259 clear();
1260 printw(gettext(
1261 "Whenever a word is found that is not in the dictionary\n"
1262 "it is printed on the first line of the screen. If the "
1263 "dictionary\n"
1264 "contains any similar words, they are listed with a number\n"
1265 "next to each one. You have the option of replacing the word\n"
1266 "completely, or choosing one of the suggested words.\n"));
1267 printw(gettext("\nCommands are:\n\n"));
1268 printw(gettext("R Replace the misspelled word completely.\n"));
1269 printw(gettext("Space Accept the word this time only.\n"));
1270 printw(
1271 gettext("A Accept the word for the rest of this session.\n"));
1272 printw(gettext(
1273 "I Accept the word, and put it in your private dictionary.\n"));
1274 printw(gettext(
1275 "U Accept and add lowercase version to private dictionary.\n"));
1276 printw(
1277 gettext("S\tAsk a stem and a model word and store them in the "
1278 "private dictionary.\n"
1279 "\tThe stem will be accepted also with the affixes of the "
1280 "model word.\n"));
1281 printw(gettext("0-n Replace with one of the suggested words.\n"));
1282 printw(gettext(
1283 "X Write the rest of this file, ignoring misspellings, and start "
1284 "next file.\n"));
1285 printw(
1286 gettext("Q Quit immediately. Asks for confirmation. Leaves file "
1287 "unchanged.\n"));
1288 printw(gettext("^Z Suspend program. Restart with fg command.\n"));
1289 printw(gettext("? Show this help screen.\n"));
1290 printw(gettext("\n-- Type space to continue -- \n"));
1291 while (getch() != ' ')
1292 ;
1293 // fall-through
1294 case 12: {
1295 dialogscreen(parser, token, filename, forbidden, wlst);
1296 break;
1297 }
1298 default: {
1299 /* TRANSLATORS: translate this letter according to the shortcut letter
1300 used
1301 previously in the translation of "R)epl" before */
1302 if (c == (gettext("r"))[0]) {
1303 modified = 1;
1304
1305 #ifdef HAVE_READLINE
1306 endwin();
1307 rltext = "";
1308 if (rltext && *rltext)
1309 rl_startup_hook = set_rltext;
1310 #endif
1311 char* temp = readline(gettext("Replace with: "));
1312 #ifdef HAVE_READLINE
1313 initscr();
1314 cbreak();
1315 #endif
1316
1317 if ((!temp) || (temp[0] == '\0')) {
1318 free(temp);
1319 dialogscreen(parser, token, filename, forbidden, wlst);
1320 break;
1321 }
1322
1323 std::string i(temp);
1324 free(temp);
1325 if (checkapos) {
1326 mystrrep(i, "'", UTF8_APOS);
1327 }
1328 parser->change_token(i.c_str());
1329
1330 return 2; // replace
1331 }
1332 /* TRANSLATORS: translate these letters according to the shortcut letter
1333 used
1334 previously in the translation of "U)ncap" and I)nsert before */
1335 int u_key = gettext("u")[0];
1336 int i_key = gettext("i")[0];
1337
1338 if (c == u_key || c == i_key) {
1339 std::string word = (c == i_key)
1340 ? token
1341 : lower_first_char(token, io_enc, pMS->get_langnum());
1342 dicwords.push_back(word);
1343 std::string sbuf;
1344 // save
1345 if (HOME) {
1346 sbuf.append(HOME);
1347 } else {
1348 fprintf(stderr, gettext("error - missing HOME variable\n"));
1349 break;
1350 }
1351 #ifndef WIN32
1352 sbuf.append("/");
1353 #endif
1354 size_t offset = sbuf.size();
1355 if (!privdicname) {
1356 sbuf.append(DICBASENAME);
1357 sbuf.append(basename(dicname, DIRSEPCH));
1358 } else {
1359 sbuf.append(privdicname);
1360 }
1361 if (save_privdic(sbuf.substr(offset), sbuf, dicwords)) {
1362 dicwords.clear();
1363 } else {
1364 fprintf(stderr, gettext("Cannot update personal dictionary."));
1365 break;
1366 }
1367 } // no break
1368 /* TRANSLATORS: translate this letter according to the shortcut letter
1369 used
1370 previously in the translation of "U)ncap" and I)nsert before */
1371 if ((c == (gettext("u"))[0]) || (c == (gettext("i"))[0]) ||
1372 (c == (gettext("a"))[0])) {
1373 modified = 1;
1374 putdic(token, pMS);
1375 return 0;
1376 }
1377 /* TRANSLATORS: translate this letter according to the shortcut letter
1378 used
1379 previously in the translation of "S)tem" before */
1380 if (c == (gettext("s"))[0]) {
1381 modified = 1;
1382
1383 std::string w(token);
1384 size_t n_last_of = w.find_last_of('-');
1385 if (n_last_of != std::string::npos) {
1386 w.resize(n_last_of);
1387 }
1388
1389 #ifdef HAVE_READLINE
1390 endwin();
1391 rltext = w.c_str();
1392 if (rltext && *rltext)
1393 rl_startup_hook = set_rltext;
1394 #endif
1395 char* temp = readline(gettext("New word (stem): "));
1396
1397 if ((!temp) || (temp[0] == '\0')) {
1398 free(temp);
1399 #ifdef HAVE_READLINE
1400 initscr();
1401 cbreak();
1402 #endif
1403 dialogscreen(parser, token, filename, forbidden, wlst);
1404 break;
1405 }
1406
1407 w.assign(temp);
1408 free(temp);
1409
1410 #ifdef HAVE_READLINE
1411 initscr();
1412 cbreak();
1413 #endif
1414 dialogscreen(parser, token, filename, forbidden, wlst);
1415 refresh();
1416
1417 #ifdef HAVE_READLINE
1418 endwin();
1419 rltext = "";
1420 if (rltext && *rltext)
1421 rl_startup_hook = set_rltext;
1422 #endif
1423 temp = readline(gettext("Model word (a similar dictionary word): "));
1424
1425 #ifdef HAVE_READLINE
1426 initscr();
1427 cbreak();
1428 #endif
1429
1430 if ((!temp) || (temp[0] == '\0')) {
1431 free(temp);
1432 dialogscreen(parser, token, filename, forbidden, wlst);
1433 break;
1434 }
1435
1436 std::string w2(temp);
1437 free(temp);
1438
1439 std::string w3;
1440 w3.append(w);
1441 w3.append("/");
1442 w3.append(w2);
1443
1444 if (!putdic(w3, pMS)) {
1445 dicwords.push_back(w3);
1446
1447 w3.clear();
1448 w3.append(w);
1449 w3.append("-/");
1450 w3.append(w2);
1451 w3.append("-");
1452 if (putdic(w3, pMS)) {
1453 dicwords.push_back(w3);
1454 }
1455 // save
1456 std::string sbuf;
1457 if (HOME) {
1458 sbuf.append(HOME);
1459 } else {
1460 fprintf(stderr, gettext("error - missing HOME variable\n"));
1461 continue;
1462 }
1463 #ifndef WIN32
1464 sbuf.append("/");
1465 #endif
1466 size_t offset = sbuf.size();
1467 if (!privdicname) {
1468 sbuf.append(DICBASENAME);
1469 sbuf.append(basename(dicname, DIRSEPCH));
1470 } else {
1471 sbuf.append(privdicname);
1472 }
1473 if (save_privdic(sbuf.substr(offset), sbuf, dicwords)) {
1474 dicwords.clear();
1475 } else {
1476 fprintf(stderr, gettext("Cannot update personal dictionary."));
1477 break;
1478 }
1479
1480 } else {
1481 dialogscreen(parser, token, filename, forbidden, wlst);
1482 printw(gettext(
1483 "Model word must be in the dictionary. Press any key!"));
1484 getch();
1485 dialogscreen(parser, token, filename, forbidden, wlst);
1486 break;
1487 }
1488 return 0;
1489 }
1490 /* TRANSLATORS: translate this letter according to the shortcut letter
1491 used
1492 previously in the translation of "e(X)it" before */
1493 if (c == (gettext("x"))[0]) {
1494 return 1;
1495 }
1496 /* TRANSLATORS: translate this letter according to the shortcut letter
1497 used
1498 previously in the translation of "Q)uit" before */
1499 if (c == (gettext("q"))[0]) {
1500 if (modified) {
1501 printw(
1502 gettext("Are you sure you want to throw away your changes? "));
1503 /* TRANSLATORS: translate this letter according to the shortcut
1504 * letter y)es */
1505 if (getch() == (gettext("y"))[0]) {
1506 return -1;
1507 }
1508 dialogscreen(parser, token, filename, forbidden, wlst);
1509 break;
1510 } else {
1511 return -1;
1512 }
1513 }
1514 }
1515 }
1516 }
1517 return 0;
1518 }
1519
interactive_line(TextParser * parser,Hunspell ** pMS,char * filename,FILE * tempfile)1520 int interactive_line(TextParser* parser,
1521 Hunspell** pMS,
1522 char* filename,
1523 FILE* tempfile) {
1524 int dialogexit = 0;
1525 int info = 0;
1526 int d = 0;
1527 std::string token;
1528 while (parser->next_token(token)) {
1529 if (!check(pMS, &d, parser->get_word(token), &info, NULL)) {
1530 std::vector<std::string> wlst;
1531 dialogscreen(parser, token, filename, info, wlst); // preview
1532 refresh();
1533 std::string dicbuf = chenc(parser->get_word(token), io_enc, dic_enc[d]);
1534 wlst = pMS[d]->suggest(mystrrep(dicbuf, ENTITY_APOS, "'").c_str());
1535 if (wlst.empty()) {
1536 dialogexit = dialog(parser, pMS[d], token, filename, wlst, info);
1537 } else {
1538 for (size_t j = 0; j < wlst.size(); ++j) {
1539 wlst[j] = chenc(wlst[j], dic_enc[d], io_enc);
1540 }
1541 dialogexit = dialog(parser, pMS[d], token, filename, wlst, info);
1542 }
1543 }
1544 if ((dialogexit == -1) || (dialogexit == 1))
1545 goto ki2;
1546 }
1547
1548 ki2:
1549 fprintf(tempfile, "%s", parser->get_line().c_str());
1550 return dialogexit;
1551 }
1552
interactive_interface(Hunspell ** pMS,char * filename,int format)1553 void interactive_interface(Hunspell** pMS, char* filename, int format) {
1554 char buf[MAXLNLEN];
1555 char* odffilename = NULL;
1556 char* odftmpdir = NULL; // external zip works only with temporary directories
1557 // (option -j)
1558
1559 FILE* text = fopen(filename, "r");
1560 if (!text) {
1561 perror(gettext("Can't open inputfile"));
1562 endwin();
1563 exit(1);
1564 }
1565
1566 int dialogexit;
1567 int check = 1;
1568
1569 const char* extension = basename(filename, '.');
1570 TextParser* parser = get_parser(format, extension, pMS[0]);
1571 char tmpdirtemplate[] = "/tmp/hunspellXXXXXX";
1572
1573 bool bZippedOdf = is_zipped_odf(parser, extension);
1574 // access content.xml of ODF
1575 if (bZippedOdf) {
1576 odftmpdir = mymkdtemp(tmpdirtemplate);
1577 if (!odftmpdir) {
1578 perror(gettext("Can't create tmp dir"));
1579 endwin();
1580 exit(1);
1581 }
1582 fclose(text);
1583 // break 1-line XML of zipped ODT documents at </style:style> and </text:p>
1584 // to avoid tokenization problems (fgets could stop within an XML tag)
1585 std::ostringstream sbuf;
1586 sbuf << "unzip -p \"" << filename << "\" content.xml | sed "
1587 "\"s/\\(<\\/text:p>\\|<\\/style:style>\\)\\(.\\)/\\1\\n\\2/g\" "
1588 ">" << odftmpdir << "/content.xml";
1589 if (!secure_filename(filename) || system(sbuf.str().c_str()) != 0) {
1590 if (secure_filename(filename))
1591 perror(gettext("Can't open inputfile"));
1592 else
1593 fprintf(stderr, gettext("Can't open %s.\n"), filename);
1594 endwin();
1595 (void)system((std::string("rmdir ") + odftmpdir).c_str());
1596 exit(1);
1597 }
1598 odffilename = filename;
1599 std::string file(odftmpdir);
1600 file.append("/content.xml");
1601 filename = mystrdup(file.c_str());
1602 text = fopen(filename, "r");
1603 if (!text) {
1604 perror(gettext("Can't open inputfile"));
1605 endwin();
1606 (void)system((std::string("rmdir ") + odftmpdir).c_str());
1607 exit(1);
1608 }
1609 }
1610
1611 FILE* tempfile = tmpfile();
1612
1613 if (!tempfile) {
1614 perror(gettext("Can't create tempfile"));
1615 delete parser;
1616 fclose(text);
1617 endwin();
1618 exit(1);
1619 }
1620
1621 while (fgets(buf, MAXLNLEN, text)) {
1622 if (check) {
1623 parser->put_line(buf);
1624 dialogexit = interactive_line(
1625 parser, pMS, odffilename ? odffilename : filename, tempfile);
1626 switch (dialogexit) {
1627 case -1: {
1628 clear();
1629 refresh();
1630 fclose(tempfile); // automatically deleted when closed
1631 if (bZippedOdf) {
1632 if (remove(filename) != 0) {
1633 perror("temp file delete failed");
1634 }
1635 std::ostringstream sbuf;
1636 sbuf << "rmdir " << odftmpdir;
1637 if (system(sbuf.str().c_str()) != 0) {
1638 perror("temp dir delete failed");
1639 }
1640 free(filename);
1641 }
1642 endwin();
1643 exit(0);
1644 }
1645 case 1: {
1646 check = 0;
1647 }
1648 }
1649 } else {
1650 fprintf(tempfile, "%s", buf);
1651 }
1652 }
1653 fclose(text);
1654
1655 if (modified) {
1656 rewind(tempfile);
1657 text = fopen(filename, "wb");
1658 if (text == NULL)
1659 perror(gettext("Can't open outputfile"));
1660 else {
1661 size_t n;
1662 while ((n = fread(buf, 1, MAXLNLEN, tempfile)) > 0) {
1663 if (fwrite(buf, 1, n, text) != n)
1664 perror("write failed");
1665 }
1666 fclose(text);
1667 if (bZippedOdf && odffilename) {
1668 std::ostringstream sbuf;
1669 sbuf << "zip -j '" << odffilename << "' " << filename;
1670 if (system(sbuf.str().c_str()) != 0)
1671 perror("write failed");
1672 }
1673 }
1674 }
1675
1676 if (bZippedOdf) {
1677 if (remove(filename) != 0) {
1678 perror("temp file delete failed");
1679 }
1680 std::ostringstream sbuf;
1681 sbuf << "rmdir " << odftmpdir;
1682 if (system(sbuf.str().c_str()) != 0) {
1683 perror("temp dir delete failed");
1684 }
1685 free(filename);
1686 }
1687
1688 delete parser;
1689 fclose(tempfile); // automatically deleted when closed
1690 }
1691
1692 #endif
1693
exist2(char * dir,int len,const char * name,const char * ext)1694 char* exist2(char* dir, int len, const char* name, const char* ext) {
1695 std::string buf;
1696 const char* sep = (len == 0) ? "" : DIRSEP;
1697 buf.assign(dir, len);
1698 buf.append(sep);
1699 buf.append(name);
1700 buf.append(ext);
1701 if (exist(buf.c_str()))
1702 return mystrdup(buf.c_str());
1703 buf.append(HZIP_EXTENSION);
1704 if (exist(buf.c_str())) {
1705 buf.erase(buf.size() - strlen(HZIP_EXTENSION));
1706 return mystrdup(buf.c_str());
1707 }
1708 return NULL;
1709 }
1710
1711 #if !defined(WIN32) || defined(__MINGW32__)
listdicpath(char * dir,int len)1712 int listdicpath(char* dir, int len) {
1713 std::string buf;
1714 const char* sep = (len == 0) ? "" : DIRSEP;
1715 buf.assign(dir, len);
1716 buf.append(sep);
1717 DIR* d = opendir(buf.c_str());
1718 if (!d)
1719 return 0;
1720 struct dirent* de;
1721 while ((de = readdir(d))) {
1722 len = strlen(de->d_name);
1723 if ((len > 4 && strcmp(de->d_name + len - 4, ".dic") == 0) ||
1724 (len > 7 && strcmp(de->d_name + len - 7, ".dic.hz") == 0)) {
1725 char* s = mystrdup(de->d_name);
1726 s[len - ((s[len - 1] == 'z') ? 7 : 4)] = '\0';
1727 fprintf(stderr, "%s%s\n", buf.c_str(), s);
1728 free(s);
1729 }
1730 }
1731 closedir(d);
1732 return 1;
1733 }
1734 #endif
1735
1736 // search existing path for file "name + ext"
search(char * begin,char * name,const char * ext)1737 char* search(char* begin, char* name, const char* ext) {
1738 char* end = begin;
1739 while (1) {
1740 while (!((*end == *PATHSEP) || (*end == '\0')))
1741 end++;
1742 char* res = NULL;
1743 if (name) {
1744 res = exist2(begin, int(end - begin), name, ext);
1745 } else {
1746 #if !defined(WIN32) || defined(__MINGW32__)
1747 listdicpath(begin, end - begin);
1748 #endif
1749 }
1750 if ((*end == '\0') || res)
1751 return res;
1752 end++;
1753 begin = end;
1754 }
1755 }
1756
main(int argc,char ** argv)1757 int main(int argc, char** argv) {
1758 std::string buf;
1759 Hunspell* pMS[DMAX];
1760 char* key = NULL;
1761 int arg_files = -1; // first filename argumentum position in argv
1762 int format = FMT_TEXT;
1763 int argstate = 0;
1764
1765 #ifdef HAVE_LOCALE_H
1766 setlocale(LC_ALL, "");
1767 #endif
1768 #ifdef HAVE_LANGINFO_H
1769 ui_enc = nl_langinfo(CODESET);
1770 #endif
1771 textdomain("hunspell"); //for gettext
1772
1773 #ifdef HAVE_READLINE
1774 rl_set_key("\x1b\x1b", rl_escape, rl_get_keymap());
1775 rl_bind_key('\t', rl_insert);
1776 #endif
1777
1778 #ifdef LOG
1779 log("START");
1780 #endif
1781
1782 for (int i = 1; i < argc; i++) {
1783 #ifdef LOG
1784 log(argv[i]);
1785 #endif
1786
1787 if (argstate == 1) {
1788 if (dicname)
1789 free(dicname);
1790 dicname = mystrdup(argv[i]);
1791 argstate = 0;
1792 } else if (argstate == 2) {
1793 if (privdicname)
1794 free(privdicname);
1795 privdicname = mystrdup(argv[i]);
1796 argstate = 0;
1797 } else if (argstate == 3) {
1798 io_enc = argv[i];
1799 argstate = 0;
1800 } else if (argstate == 4) {
1801 key = argv[i];
1802 argstate = 0;
1803 } else if (strcmp(argv[i], "-d") == 0)
1804 argstate = 1;
1805 else if (strcmp(argv[i], "-p") == 0)
1806 argstate = 2;
1807 else if (strcmp(argv[i], "-i") == 0)
1808 argstate = 3;
1809 else if (strcmp(argv[i], "-P") == 0)
1810 argstate = 4;
1811 else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
1812 fprintf(stderr, "%s", gettext("Usage: hunspell [OPTION]... [FILE]...\n"));
1813 fprintf(stderr, "%s", gettext("Check spelling of each FILE. Without FILE, "
1814 "check standard input.\n\n"));
1815 fprintf(stderr, "%s", gettext(" -1\t\tcheck only first field in lines "
1816 "(delimiter = tabulator)\n"));
1817 fprintf(stderr, "%s", gettext(" -a\t\tIspell's pipe interface\n"));
1818 fprintf(stderr, "%s", gettext(" --check-url\tcheck URLs, e-mail addresses and "
1819 "directory paths\n"));
1820 fprintf(
1821 stderr, "%s",
1822 gettext(
1823 " --check-apostrophe\tcheck Unicode typographic apostrophe\n"));
1824 fprintf(stderr, "%s",
1825 gettext(" -d d[,d2,...]\tuse d (d2 etc.) dictionaries\n"));
1826 fprintf(stderr, "%s", gettext(" -D\t\tshow available dictionaries\n"));
1827 fprintf(stderr, "%s", gettext(" -G\t\tprint only correct words or lines\n"));
1828 fprintf(stderr, "%s", gettext(" -h, --help\tdisplay this help and exit\n"));
1829 fprintf(stderr, "%s", gettext(" -H\t\tHTML input file format\n"));
1830 fprintf(stderr, "%s", gettext(" -i enc\tinput encoding\n"));
1831 fprintf(stderr, "%s", gettext(" -l\t\tprint misspelled words\n"));
1832 fprintf(stderr, "%s", gettext(" -L\t\tprint lines with misspelled words\n"));
1833 fprintf(stderr, "%s",
1834 gettext(" -m \t\tanalyze the words of the input text\n"));
1835 fprintf(stderr, "%s", gettext(" -n\t\tnroff/troff input file format\n"));
1836 fprintf(
1837 stderr, "%s",
1838 gettext(
1839 " -O\t\tOpenDocument (ODF or Flat ODF) input file format\n"));
1840 fprintf(stderr, "%s", gettext(" -p dict\tset dict custom dictionary\n"));
1841 fprintf(stderr, "%s",
1842 gettext(" -r\t\twarn of the potential mistakes (rare words)\n"));
1843 fprintf(
1844 stderr, "%s",
1845 gettext(" -P password\tset password for encrypted dictionaries\n"));
1846 fprintf(stderr, "%s", gettext(" -s \t\tstem the words of the input text\n"));
1847 fprintf(stderr, "%s", gettext(" -S \t\tsuffix words of the input text\n"));
1848 fprintf(stderr, "%s", gettext(" -t\t\tTeX/LaTeX input file format\n"));
1849 fprintf(stderr, "%s", gettext(" -v, --version\tprint version number\n"));
1850 fprintf(stderr, "%s",
1851 gettext(" -vv\t\tprint Ispell compatible version number\n"));
1852 fprintf(stderr, "%s", gettext(" -w\t\tprint misspelled words (= lines) from "
1853 "one word/line input.\n"));
1854 fprintf(stderr, "%s", gettext(" -X\t\tXML input file format\n\n"));
1855 fprintf(
1856 stderr, "%s",
1857 gettext(
1858 "Example: hunspell -d en_US file.txt # interactive spelling\n"
1859 " hunspell -i utf-8 file.txt # check UTF-8 encoded "
1860 "file\n"
1861 " hunspell -l *.odt # print misspelled words "
1862 "of ODF files\n\n"
1863 " # Quick fix of ODF documents by personal dictionary "
1864 "creation\n\n"
1865 " # 1 Make a reduced list from misspelled and unknown "
1866 "words:\n\n"
1867 " hunspell -l *.odt | sort | uniq >words\n\n"
1868 " # 2 Delete misspelled words of the file by a text "
1869 "editor.\n"
1870 " # 3 Use this personal dictionary to fix the deleted "
1871 "words:\n\n"
1872 " hunspell -p words *.odt\n\n"));
1873 fprintf(stderr, "%s", gettext("Bug reports: http://hunspell.github.io/\n"));
1874 exit(0);
1875 } else if ((strcmp(argv[i], "-vv") == 0) || (strcmp(argv[i], "-v") == 0) ||
1876 (strcmp(argv[i], "--version") == 0)) {
1877 fprintf(stdout, "%s", gettext(HUNSPELL_PIPE_HEADING));
1878 fprintf(stdout, "\n");
1879 if (strcmp(argv[i], "-vv") != 0) {
1880 fprintf(stdout, "%s",
1881 gettext("\nCopyright (C) 2002-2014 L\303\241szl\303\263 "
1882 "N\303\251meth. License: MPL/GPL/LGPL.\n\n"
1883 "Based on OpenOffice.org's Myspell library.\n"
1884 "Myspell's copyright (C) Kevin Hendricks, 2001-2002, "
1885 "License: BSD.\n\n"));
1886 fprintf(stdout, "%s", gettext("This is free software; see the source for "
1887 "copying conditions. There is NO\n"
1888 "warranty; not even for MERCHANTABILITY or "
1889 "FITNESS FOR A PARTICULAR PURPOSE,\n"
1890 "to the extent permitted by law.\n"));
1891 }
1892 exit(0);
1893 } else if ((strcmp(argv[i], "-a") == 0)) {
1894 filter_mode = PIPE;
1895 } else if ((strcmp(argv[i], "-m") == 0)) {
1896 /*
1897 if -a was used, don't override, i.e. keep ispell compatability
1898 ispell: Make possible root/affix combinations that aren't in the
1899 dictionary.
1900 hunspell: Analyze the words of the input text
1901 */
1902 if (filter_mode != PIPE)
1903 filter_mode = ANALYZE;
1904 } else if ((strcmp(argv[i], "-s") == 0)) {
1905 /*
1906 if -a was used, don't override, i.e. keep ispell compatability
1907 ispell: Stop itself with a SIGTSTP signal after each line of input.
1908 hunspell: Stem the words of the input text
1909 */
1910 if (filter_mode != PIPE)
1911 filter_mode = STEM;
1912 } else if ((strcmp(argv[i], "-S") == 0)) {
1913 if (filter_mode != PIPE)
1914 filter_mode = SUFFIX;
1915 } else if ((strcmp(argv[i], "-t") == 0)) {
1916 format = FMT_LATEX;
1917 } else if ((strcmp(argv[i], "-n") == 0)) {
1918 format = FMT_MAN;
1919 } else if ((strcmp(argv[i], "-H") == 0)) {
1920 format = FMT_HTML;
1921 } else if ((strcmp(argv[i], "-X") == 0)) {
1922 format = FMT_XML;
1923 } else if ((strcmp(argv[i], "-O") == 0)) {
1924 format = FMT_ODF;
1925 } else if ((strcmp(argv[i], "-l") == 0)) {
1926 filter_mode = BADWORD;
1927 } else if ((strcmp(argv[i], "-w") == 0)) {
1928 /*
1929 if -a was used, don't override, i.e. keep ispell compatability
1930 ispell: Specify additional characters that can be part of a word.
1931 hunspell: Print misspelled words (= lines) from one word/line input
1932 */
1933 if (filter_mode != PIPE)
1934 filter_mode = WORDFILTER;
1935 } else if ((strcmp(argv[i], "-L") == 0)) {
1936 /*
1937 if -a was used, don't override, i.e. keep ispell compatability
1938 ispell: Number of lines of context to be shown at the bottom of the
1939 screen
1940 hunspell: Print lines with misspelled words
1941 */
1942 if (filter_mode != PIPE)
1943 filter_mode = BADLINE;
1944 } else if ((strcmp(argv[i], "-u") == 0)) {
1945 /*
1946 if -a was used, don't override, i.e. keep ispell compatability
1947 ispell: None
1948 hunspell: Show typical misspellings
1949 */
1950 if (filter_mode != PIPE)
1951 filter_mode = AUTO0;
1952 } else if ((strcmp(argv[i], "-U") == 0)) {
1953 /*
1954 if -a was used, don't override, i.e. keep ispell compatability
1955 ispell: None
1956 hunspell: Automatic correction of typical misspellings to stdout
1957 */
1958 if (filter_mode != PIPE)
1959 filter_mode = AUTO;
1960 } else if ((strcmp(argv[i], "-u2") == 0)) {
1961 /*
1962 if -a was used, don't override, i.e. keep ispell compatability
1963 ispell: None
1964 hunspell: Print typical misspellings in sed format
1965 */
1966 if (filter_mode != PIPE)
1967 filter_mode = AUTO2;
1968 } else if ((strcmp(argv[i], "-u3") == 0)) {
1969 /*
1970 if -a was used, don't override, i.e. keep ispell compatability
1971 ispell: None
1972 hunspell: Print typical misspellings in gcc error format
1973 */
1974 if (filter_mode != PIPE)
1975 filter_mode = AUTO3;
1976 } else if ((strcmp(argv[i], "-G") == 0)) {
1977 printgood = 1;
1978 } else if ((strcmp(argv[i], "-1") == 0)) {
1979 format = FMT_FIRST;
1980 } else if ((strcmp(argv[i], "-D") == 0)) {
1981 showpath = 1;
1982 } else if ((strcmp(argv[i], "-r") == 0)) {
1983 warn = 1;
1984 } else if ((strcmp(argv[i], "--check-url") == 0)) {
1985 checkurl = 1;
1986 } else if ((strcmp(argv[i], "--check-apostrophe") == 0)) {
1987 checkapos = 1;
1988 } else if ((arg_files == -1) &&
1989 ((argv[i][0] != '-') && (argv[i][0] != '\0'))) {
1990 arg_files = i;
1991 if (!exist(argv[i])) { // first check (before time-consuming dic. load)
1992 fprintf(stderr, gettext("Can't open %s.\n"), argv[i]);
1993 #ifdef HAVE_CURSES_H
1994 endwin();
1995 #endif
1996 exit(1);
1997 }
1998 }
1999 }
2000
2001 multiple_files = (arg_files > 0) && (argc - arg_files > 1);
2002
2003 if (printgood && (filter_mode == NORMAL))
2004 filter_mode = BADWORD;
2005
2006 if (!dicname) {
2007 if (!(dicname = getenv("DICTIONARY"))) {
2008 /*
2009 * Search in order of LC_ALL, LC_MESSAGES &
2010 * LANG
2011 */
2012 const char* tests[] = {"LC_ALL", "LC_MESSAGES", "LANG"};
2013 for (size_t i = 0; i < sizeof(tests) / sizeof(const char*); ++i) {
2014 if ((dicname = getenv(tests[i])) && strcmp(dicname, "") != 0) {
2015 dicname = mystrdup(dicname);
2016 char* dot = strchr(dicname, '.');
2017 if (dot)
2018 *dot = '\0';
2019 char* at = strchr(dicname, '@');
2020 if (at)
2021 *at = '\0';
2022 break;
2023 }
2024 }
2025
2026 if (dicname &&
2027 ((strcmp(dicname, "C") == 0) || (strcmp(dicname, "POSIX") == 0))) {
2028 free(dicname);
2029 dicname = mystrdup("en_US");
2030 }
2031
2032 if (!dicname) {
2033 dicname = mystrdup(DEFAULTDICNAME);
2034 }
2035 } else {
2036 dicname = mystrdup(dicname);
2037 }
2038 }
2039
2040 {
2041 std::string path_std_str = ".";
2042 path_std_str.append(PATHSEP); // <- check path in local directory
2043 path_std_str.append(PATHSEP); // <- check path in root directory
2044 if (getenv("DICPATH")) {
2045 path_std_str.append(getenv("DICPATH")).append(PATHSEP);
2046 }
2047 path_std_str.append(LIBDIR).append(PATHSEP);
2048 if (HOME) {
2049 const char * userooodir[] = USEROOODIR;
2050 for(size_t i = 0; i < sizeof(userooodir)/sizeof(userooodir[0]); ++i) {
2051 path_std_str += HOME;
2052 #ifndef _WIN32
2053 path_std_str += DIRSEP;
2054 #endif
2055 path_std_str.append(userooodir[i]).append(PATHSEP);
2056 }
2057 path_std_str.append(OOODIR);
2058 }
2059 path = mystrdup(path_std_str.c_str());
2060 }
2061
2062 if (showpath) {
2063 fprintf(stderr, gettext("SEARCH PATH:\n%s\n"), path);
2064 fprintf(
2065 stderr, "%s",
2066 gettext(
2067 "AVAILABLE DICTIONARIES (path is not mandatory for -d option):\n"));
2068 search(path, NULL, NULL);
2069 if (-1 == arg_files) {
2070 exit(0);
2071 }
2072 }
2073
2074 if (!privdicname)
2075 privdicname = mystrdup(getenv("WORDLIST"));
2076
2077 char* dicplus = strchr(dicname, ',');
2078 if (dicplus)
2079 *dicplus = '\0';
2080 char* aff = search(path, dicname, ".aff");
2081 char* dic = search(path, dicname, ".dic");
2082 if (aff && dic) {
2083 if (showpath) {
2084 fprintf(stderr, gettext("LOADED DICTIONARY:\n%s\n%s\n"), aff, dic);
2085 }
2086 pMS[0] = new Hunspell(aff, dic, key);
2087 dic_enc[0] = pMS[0]->get_dict_encoding().c_str();
2088 dmax = 1;
2089 while (dicplus) {
2090 char* dicname2 = dicplus + 1;
2091 dicplus = strchr(dicname2, ',');
2092 if (dicplus)
2093 *dicplus = '\0';
2094 free(aff);
2095 free(dic);
2096 aff = search(path, dicname2, ".aff");
2097 dic = search(path, dicname2, ".dic");
2098 if (aff && dic) {
2099 if (dmax < DMAX) {
2100 pMS[dmax] = new Hunspell(aff, dic, key);
2101 dic_enc[dmax] = pMS[dmax]->get_dict_encoding().c_str();
2102 dmax++;
2103 if (showpath) {
2104 fprintf(stderr, gettext("LOADED DICTIONARY:\n%s\n%s\n"), aff, dic);
2105 }
2106 } else
2107 fprintf(stderr, gettext("error - %s exceeds dictionary limit.\n"),
2108 dicname2);
2109 } else if (dic)
2110 pMS[dmax - 1]->add_dic(dic);
2111 }
2112 } else {
2113 fprintf(stderr, gettext("Can't open affix or dictionary files for "
2114 "dictionary named \"%s\".\n"),
2115 dicname);
2116 exit(1);
2117 }
2118
2119 /* open the private dictionaries */
2120 if (HOME) {
2121 buf.assign(HOME);
2122 #ifndef WIN32
2123 buf.append("/");
2124 #endif
2125 buf.append(DICBASENAME);
2126 buf.append(basename(dicname, DIRSEPCH));
2127 load_privdic(buf.c_str(), pMS[0]);
2128 buf.assign(HOME);
2129 #ifndef WIN32
2130 buf.append("/");
2131 #endif
2132 if (!privdicname) {
2133 buf.assign(DICBASENAME);
2134 buf.append(basename(dicname, DIRSEPCH));
2135 load_privdic(buf.c_str(), pMS[0]);
2136 } else {
2137 buf.append(privdicname);
2138 load_privdic(buf.c_str(), pMS[0]);
2139 buf.assign(privdicname);
2140 load_privdic(buf.c_str(), pMS[0]);
2141 }
2142 }
2143
2144 /*
2145 If in pipe mode, output pipe mode version string only when
2146 hunspell has properly been started.
2147 Emacs and may be others relies in the English version format.
2148 Do not gettextize.
2149 */
2150 if (filter_mode == PIPE) {
2151 fprintf(stdout, HUNSPELL_PIPE_HEADING);
2152 fflush(stdout);
2153 }
2154
2155 if (arg_files == -1) {
2156 pipe_interface(pMS, format, stdin, NULL);
2157 } else if (filter_mode != NORMAL) {
2158 for (int i = arg_files; i < argc; i++) {
2159 if (exist(argv[i])) {
2160 modified = 0;
2161 currentfilename = argv[i];
2162 FILE* f = fopen(argv[i], "r");
2163 pipe_interface(pMS, format, f, argv[i]);
2164 fclose(f);
2165 } else {
2166 fprintf(stderr, gettext("Can't open %s.\n"), argv[i]);
2167 exit(1);
2168 }
2169 }
2170 } else /*filter_mode == NORMAL*/ {
2171 #ifdef HAVE_CURSES_H
2172 initscr();
2173 cbreak();
2174 noecho();
2175 nonl();
2176 intrflush(stdscr, FALSE);
2177
2178 for (int i = arg_files; i < argc; i++) {
2179 if (exist(argv[i])) {
2180 modified = 0;
2181 interactive_interface(pMS, argv[i], format);
2182 } else {
2183 fprintf(stderr, gettext("Can't open %s.\n"), argv[i]);
2184 endwin();
2185 exit(1);
2186 }
2187 }
2188
2189 clear();
2190 refresh();
2191 endwin();
2192 #else
2193 fprintf(
2194 stderr, "%s",
2195 gettext(
2196 "Hunspell has been compiled without Ncurses user interface.\n"));
2197 #endif
2198 }
2199
2200 if (dicname)
2201 free(dicname);
2202 if (privdicname)
2203 free(privdicname);
2204 if (path)
2205 free(path);
2206 if (aff)
2207 free(aff);
2208 if (dic)
2209 free(dic);
2210 #ifdef HAVE_ICONV
2211 free_utf_tbl();
2212 #endif
2213 for (int i = 0; i < dmax; i++)
2214 delete pMS[i];
2215 return 0;
2216 }
2217
2218 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
2219