1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4  *
5  * Copyright (C) 2002-2017 Németh László
6  *
7  * The contents of this file are subject to the Mozilla Public License Version
8  * 1.1 (the "License"); you may not use this file except in compliance with
9  * the License. You may obtain a copy of the License at
10  * http://www.mozilla.org/MPL/
11  *
12  * Software distributed under the License is distributed on an "AS IS" basis,
13  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14  * for the specific language governing rights and limitations under the
15  * License.
16  *
17  * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
18  *
19  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
20  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
21  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
22  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
23  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
24  *
25  * Alternatively, the contents of this file may be used under the terms of
26  * either the GNU General Public License Version 2 or later (the "GPL"), or
27  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28  * in which case the provisions of the GPL or the LGPL are applicable instead
29  * of those above. If you wish to allow use of your version of this file only
30  * under the terms of either the GPL or the LGPL, and not to allow others to
31  * use your version of this file under the terms of the MPL, indicate your
32  * decision by deleting the provisions above and replace them with the notice
33  * and other provisions required by the GPL or the LGPL. If you do not delete
34  * the provisions above, a recipient may use your version of this file under
35  * the terms of any one of the MPL, the GPL or the LGPL.
36  *
37  * ***** END LICENSE BLOCK ***** */
38 
39 // glibc < 3.0 (for mkstemp)
40 #ifndef __USE_MISC
41 #define __USE_MISC
42 #endif
43 
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <sstream>
47 #include <string>
48 #include <string.h>
49 #include <config.h>
50 #include "../hunspell/atypes.hxx"
51 #include "../hunspell/hunspell.hxx"
52 #include "../hunspell/csutil.hxx"
53 #include "../hunspell/hunzip.hxx"
54 
55 #define HUNSPELL_VERSION VERSION
56 #define INPUTLEN 50
57 
58 #define HUNSPELL_PIPE_HEADING                                                  \
59   "@(#) International Ispell Version 3.2.06 (but really Hunspell " VERSION ")" \
60                                                                            "\n"
61 #define HUNSPELL_HEADING "Hunspell "
62 #define ODF_EXT "odt|ott|odp|otp|odg|otg|ods|ots"
63 #define ENTITY_APOS "&apos;"
64 #define UTF8_APOS "\xe2\x80\x99"
65 
66 // for debugging only
67 //#define LOG
68 
69 #define DEFAULTDICNAME "default"
70 
71 #ifdef WIN32
72 
73 #define LIBDIR "C:\\Hunspell\\"
74 #define USEROOODIR { "Application Data\\OpenOffice.org 2\\user\\wordbook" }
75 #define OOODIR                                                 \
76   "C:\\Program files\\OpenOffice.org 2.4\\share\\dict\\ooo\\;" \
77   "C:\\Program files\\OpenOffice.org 2.3\\share\\dict\\ooo\\;" \
78   "C:\\Program files\\OpenOffice.org 2.2\\share\\dict\\ooo\\;" \
79   "C:\\Program files\\OpenOffice.org 2.1\\share\\dict\\ooo\\;" \
80   "C:\\Program files\\OpenOffice.org 2.0\\share\\dict\\ooo\\"
81 #define HOME "%USERPROFILE%\\"
82 #define DICBASENAME "hunspell_"
83 #define LOGFILE "C:\\Hunspell\\log"
84 #define DIRSEPCH '\\'
85 #define DIRSEP "\\"
86 #define PATHSEP ";"
87 
88 #ifdef __MINGW32__
89 #include <sys/types.h>
90 #include <sys/stat.h>
91 #include <dirent.h>
92 #include <unistd.h>
93 #endif
94 
95 #include "../parsers/textparser.hxx"
96 #include "../parsers/htmlparser.hxx"
97 #include "../parsers/latexparser.hxx"
98 #include "../parsers/manparser.hxx"
99 #include "../parsers/firstparser.hxx"
100 #include "../parsers/xmlparser.hxx"
101 #include "../parsers/odfparser.hxx"
102 
103 #else
104 
105 // Not Windows
106 #include <sys/types.h>
107 #include <sys/stat.h>
108 #include <dirent.h>
109 #include <unistd.h>
110 #include "../parsers/textparser.hxx"
111 #include "../parsers/htmlparser.hxx"
112 #include "../parsers/latexparser.hxx"
113 #include "../parsers/manparser.hxx"
114 #include "../parsers/firstparser.hxx"
115 #include "../parsers/xmlparser.hxx"
116 #include "../parsers/odfparser.hxx"
117 
118 #define LIBDIR                \
119   "/usr/share/hunspell:"      \
120   "/usr/share/myspell:"       \
121   "/usr/share/myspell/dicts:" \
122   "/Library/Spelling"
123 #define USEROOODIR {                  \
124   ".openoffice.org/3/user/wordbook", \
125   ".openoffice.org2/user/wordbook",  \
126   ".openoffice.org2.0/user/wordbook",\
127   "Library/Spelling" }
128 #define OOODIR                                       \
129   "/opt/openoffice.org/basis3.0/share/dict/ooo:"     \
130   "/usr/lib/openoffice.org/basis3.0/share/dict/ooo:" \
131   "/opt/openoffice.org2.4/share/dict/ooo:"           \
132   "/usr/lib/openoffice.org2.4/share/dict/ooo:"       \
133   "/opt/openoffice.org2.3/share/dict/ooo:"           \
134   "/usr/lib/openoffice.org2.3/share/dict/ooo:"       \
135   "/opt/openoffice.org2.2/share/dict/ooo:"           \
136   "/usr/lib/openoffice.org2.2/share/dict/ooo:"       \
137   "/opt/openoffice.org2.1/share/dict/ooo:"           \
138   "/usr/lib/openoffice.org2.1/share/dict/ooo:"       \
139   "/opt/openoffice.org2.0/share/dict/ooo:"           \
140   "/usr/lib/openoffice.org2.0/share/dict/ooo"
141 #define HOME getenv("HOME")
142 #define DICBASENAME ".hunspell_"
143 #define LOGFILE "/tmp/hunspell.log"
144 #define DIRSEPCH '/'
145 #define DIRSEP "/"
146 #define PATHSEP ":"
147 #endif
148 
149 #ifdef HAVE_ICONV
150 #include <iconv.h>
151 #include <errno.h>
152 char text_conv[MAXLNLEN];
153 #endif
154 
155 #ifdef HAVE_LOCALE_H
156 # include <locale.h>
157 #endif
158 #ifdef HAVE_LANGINFO_H
159 # include <langinfo.h>
160 #endif
161 #ifdef ENABLE_NLS
162 # include <libintl.h>
163 #else
164 # undef gettext
165 # define gettext(Msgid) ((const char *) (Msgid))
166 # undef textdomain
167 # define textdomain(Domainname) ((const char *) (Domainname))
168 #endif
169 
170 #ifdef HAVE_CURSES_H
171 #ifdef HAVE_NCURSESW_CURSES_H
172 #include <ncursesw/curses.h>
173 #else
174 #include <curses.h>
175 #endif
176 #endif
177 
178 #ifdef HAVE_READLINE
179 #include <readline/readline.h>
180 #else
181 #define readline scanline
182 #endif
183 
184 // file formats:
185 
186 enum { FMT_TEXT, FMT_LATEX, FMT_HTML, FMT_MAN, FMT_FIRST, FMT_XML, FMT_ODF };
187 
188 // global variables
189 
190 std::string wordchars;
191 char* dicpath = NULL;
192 const w_char* wordchars_utf16 = NULL;
193 std::vector<w_char> new_wordchars_utf16;
194 int wordchars_utf16_len;
195 char* dicname = NULL;
196 char* privdicname = NULL;
197 const char* currentfilename = NULL;
198 
199 int modified;  // modified file sign
200 bool multiple_files; // for listing file names in pipe interface
201 
202 enum {
203   NORMAL,
204   BADWORD,     // print only bad words
205   WORDFILTER,  // print only bad words from 1 word/line input
206   BADLINE,     // print only lines with bad words
207   STEM,        // stem input words
208   ANALYZE,     // analyze input words
209   PIPE,        // print only stars for LyX compatibility
210   AUTO0,       // search typical error (based on SuggestMgr::suggest())
211   AUTO,        // automatic spelling to standard output
212   AUTO2,       // automatic spelling to standard output with sed log
213   AUTO3,
214   SUFFIX  // print suffixes that can be attached to a given word
215 };        // automatic spelling to standard output with gcc error format
216 int filter_mode = NORMAL;
217 int printgood = 0;  // print only good words and lines
218 int showpath = 0;   // show detected path of the dictionary
219 int checkurl = 0;   // check URLs and mail addresses
220 int checkapos = 0;  // force typographic apostrophe
221 int warn = 0;  // warn potential mistakes (dictionary words with WARN flags)
222 const char* ui_enc = NULL;  // locale character encoding (default for I/O)
223 const char* io_enc = NULL;  // I/O character encoding
224 
225 #define DMAX 10  // maximal count of loaded dictionaries
226 
227 const char* dic_enc[DMAX];  // dictionary encoding
228 char* path = NULL;
229 int dmax = 0;  // dictionary count
230 
231 // functions
232 
233 #ifdef HAVE_ICONV
fix_encoding_name(const char * enc)234 static const char* fix_encoding_name(const char* enc) {
235   if (strcmp(enc, "TIS620-2533") == 0)
236     enc = "TIS620";
237   return enc;
238 }
239 #endif
240 
241 /* change character encoding */
chenc(const std::string & st,const char * enc1,const char * enc2)242 std::string chenc(const std::string& st, const char* enc1, const char* enc2) {
243 #ifndef HAVE_ICONV
244   (void)enc1;
245   (void)enc2;
246   return st;
247 #else
248   if (st.empty())
249     return st;
250 
251   if (!enc1 || !enc2 || strcmp(enc1, enc2) == 0)
252     return st;
253 
254   std::string out(st.size() < 15 ? 15 : st.size(), '\0');
255   size_t c1(st.size());
256   size_t c2(out.size());
257   ICONV_CONST char* source = (ICONV_CONST char*) &st[0];
258   char* dest = &out[0];
259   iconv_t conv = iconv_open(fix_encoding_name(enc2), fix_encoding_name(enc1));
260   if (conv == (iconv_t)-1) {
261     fprintf(stderr, gettext("error - iconv_open: %s -> %s\n"), enc2, enc1);
262   } else {
263     size_t res;
264     while ((res = iconv(conv, &source, &c1, &dest, &c2)) == size_t(-1)) {
265       if (errno == E2BIG) {
266         //c2 is zero or close to zero
267         size_t next_start = out.size() - c2;
268         c2 += c1*2;
269         out.resize(out.size() + c1*2);
270         dest = &out[next_start];
271       } else
272         break;
273     }
274     if (res == (size_t)-1) {
275       fprintf(stderr, gettext("error - iconv: %s -> %s\n"), enc2, enc1);
276     }
277     iconv_close(conv);
278     out.resize(dest - &out[0]);
279     return out;
280   }
281 
282   return st;
283 #endif
284 }
285 
get_parser(int format,const char * extension,Hunspell * pMS)286 TextParser* get_parser(int format, const char* extension, Hunspell* pMS) {
287   TextParser* p = NULL;
288   int io_utf8 = 0;
289   const char* denc = pMS->get_dict_encoding().c_str();
290 #ifdef HAVE_ICONV
291   initialize_utf_tbl();  // also need for 8-bit tokenization
292   if (io_enc) {
293     if ((strcmp(io_enc, "UTF-8") == 0) || (strcmp(io_enc, "utf-8") == 0) ||
294         (strcmp(io_enc, "UTF8") == 0) || (strcmp(io_enc, "utf8") == 0)) {
295       io_utf8 = 1;
296       io_enc = "UTF-8";
297     }
298   } else if (ui_enc) {
299     io_enc = ui_enc;
300     if (strcmp(ui_enc, "UTF-8") == 0)
301       io_utf8 = 1;
302   } else {
303     io_enc = denc;
304     if (strcmp(denc, "UTF-8") == 0)
305       io_utf8 = 1;
306   }
307 
308   if (io_utf8) {
309     const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
310     const std::string& vec_wordchars = pMS->get_wordchars_cpp();
311     wordchars_utf16_len = vec_wordchars_utf16.size();
312     wordchars_utf16 = wordchars_utf16_len ? &vec_wordchars_utf16[0] : NULL;
313     if ((strcmp(denc, "UTF-8") != 0) && !vec_wordchars.empty()) {
314       const char* wchars = vec_wordchars.c_str();
315       size_t c1 = vec_wordchars.size();
316       size_t c2 = MAXLNLEN;
317       char* dest = text_conv;
318       iconv_t conv = iconv_open("UTF-8", fix_encoding_name(denc));
319       if (conv == (iconv_t)-1) {
320         fprintf(stderr, gettext("error - iconv_open: UTF-8 -> %s\n"), denc);
321         wordchars_utf16 = NULL;
322         wordchars_utf16_len = 0;
323       } else {
324         iconv(conv, (ICONV_CONST char**)&wchars, &c1, &dest, &c2);
325         iconv_close(conv);
326         u8_u16(new_wordchars_utf16, text_conv);
327         std::sort(new_wordchars_utf16.begin(), new_wordchars_utf16.end());
328         wordchars_utf16 = &new_wordchars_utf16[0];
329         wordchars_utf16_len = new_wordchars_utf16.size();
330       }
331     }
332   } else {
333     // 8-bit input encoding
334     // detect letters by unicodeisalpha() for tokenization
335     char letters[MAXLNLEN];
336     char* pletters = letters;
337     char ch[2];
338     char u8[10];
339     *pletters = '\0';
340     iconv_t conv = iconv_open("UTF-8", fix_encoding_name(io_enc));
341     if (conv == (iconv_t)-1) {
342       fprintf(stderr, gettext("error - iconv_open: UTF-8 -> %s\n"), io_enc);
343     } else {
344       for (int i = 32; i < 256; i++) {
345         size_t c1 = 1;
346         size_t c2 = 10;
347         char* dest = u8;
348         u8[0] = '\0';
349         char* ch8bit = ch;
350         ch[0] = (char)i;
351         ch[1] = '\0';
352         size_t res = iconv(conv, (ICONV_CONST char**)&ch8bit, &c1, &dest, &c2);
353         if (res != (size_t)-1) {
354           std::vector<w_char> w;
355           u8_u16(w, std::string(u8, dest));
356           unsigned short idx = w.empty() ? 0 : (w[0].h << 8) + w[0].l;
357           if (unicodeisalpha(idx)) {
358             *pletters = (char)i;
359             pletters++;
360           }
361         }
362       }
363       iconv_close(conv);
364     }
365     *pletters = '\0';
366 
367     // UTF-8 wordchars -> 8 bit wordchars
368     const std::string& vec_wordchars = pMS->get_wordchars_cpp();
369     size_t len = vec_wordchars.size();
370     if (len) {
371       if ((strcmp(denc, "UTF-8") == 0)) {
372         len = pMS->get_wordchars_utf16().size();
373       }
374       char* dest = letters + strlen(letters);  // append wordchars
375       size_t c1 = len + 1;
376       size_t c2 = len + 1;
377       conv = iconv_open(fix_encoding_name(io_enc), fix_encoding_name(denc));
378       if (conv == (iconv_t)-1) {
379         fprintf(stderr, gettext("error - iconv_open: %s -> %s\n"), io_enc,
380                 denc);
381       } else {
382         const char* wchars = vec_wordchars.c_str();
383         iconv(conv, (ICONV_CONST char**)&wchars, &c1, &dest, &c2);
384         iconv_close(conv);
385         *dest = '\0';
386       }
387     }
388     if (*letters)
389       wordchars.assign(letters);
390   }
391 #else
392   if (strcmp(denc, "UTF-8") == 0) {
393     const std::vector<w_char>& vec_wordchars_utf16 = pMS->get_wordchars_utf16();
394     wordchars_utf16 = (vec_wordchars_utf16.size() == 0) ? NULL : &vec_wordchars_utf16[0];
395     wordchars_utf16_len = vec_wordchars_utf16.size();
396     io_utf8 = 1;
397   } else {
398     std::string casechars = get_casechars(denc);
399     std::string wchars = pMS->get_wordchars_cpp();
400     wordchars = casechars + wchars;
401   }
402   io_enc = denc;
403 #endif
404 
405   if (io_utf8) {
406     switch (format) {
407       case FMT_LATEX:
408         p = new LaTeXParser(wordchars_utf16, wordchars_utf16_len);
409         break;
410       case FMT_HTML:
411         p = new HTMLParser(wordchars_utf16, wordchars_utf16_len);
412         break;
413       case FMT_MAN:
414         p = new ManParser(wordchars_utf16, wordchars_utf16_len);
415         break;
416       case FMT_XML:
417         p = new XMLParser(wordchars_utf16, wordchars_utf16_len);
418         break;
419       case FMT_ODF:
420         p = new ODFParser(wordchars_utf16, wordchars_utf16_len);
421         break;
422       case FMT_FIRST:
423         p = new FirstParser(wordchars.c_str());
424     }
425   } else {
426     switch (format) {
427       case FMT_LATEX:
428         p = new LaTeXParser(wordchars.c_str());
429         break;
430       case FMT_HTML:
431         p = new HTMLParser(wordchars.c_str());
432         break;
433       case FMT_MAN:
434         p = new ManParser(wordchars.c_str());
435         break;
436       case FMT_XML:
437         p = new XMLParser(wordchars.c_str());
438         break;
439       case FMT_ODF:
440         p = new ODFParser(wordchars.c_str());
441         break;
442       case FMT_FIRST:
443         p = new FirstParser(wordchars.c_str());
444     }
445   }
446 
447   if ((!p) && (extension)) {
448     if ((strcmp(extension, "html") == 0) || (strcmp(extension, "htm") == 0) ||
449         (strcmp(extension, "xhtml") == 0)) {
450       if (io_utf8) {
451         p = new HTMLParser(wordchars_utf16, wordchars_utf16_len);
452       } else {
453         p = new HTMLParser(wordchars.c_str());
454       }
455     } else if ((strcmp(extension, "xml") == 0)) {
456       if (io_utf8) {
457         p = new XMLParser(wordchars_utf16, wordchars_utf16_len);
458       } else {
459         p = new XMLParser(wordchars.c_str());
460       }
461     } else if (((strlen(extension) == 3) &&
462                 (strstr(ODF_EXT, extension) != NULL)) ||
463                ((strlen(extension) == 4) && (extension[0] == 'f') &&
464                 (strstr(ODF_EXT, extension + 1) != NULL))) {
465       if (io_utf8) {
466         p = new ODFParser(wordchars_utf16, wordchars_utf16_len);
467       } else {
468         p = new ODFParser(wordchars.c_str());
469       }
470     } else if (((extension[0] > '0') && (extension[0] <= '9'))) {
471       if (io_utf8) {
472         p = new ManParser(wordchars_utf16, wordchars_utf16_len);
473       } else {
474         p = new ManParser(wordchars.c_str());
475       }
476     } else if ((strcmp(extension, "tex") == 0)) {
477       if (io_utf8) {
478         p = new LaTeXParser(wordchars_utf16, wordchars_utf16_len);
479       } else {
480         p = new LaTeXParser(wordchars.c_str());
481       }
482     }
483   }
484   if (!p) {
485     if (io_utf8) {
486       p = new TextParser(wordchars_utf16, wordchars_utf16_len);
487     } else {
488       p = new TextParser(wordchars.c_str());
489     }
490   }
491   p->set_url_checking(checkurl);
492   return p;
493 }
494 
495 #ifdef LOG
log(char * message)496 void log(char* message) {
497   FILE* f = fopen(LOGFILE, "a");
498   if (f) {
499     fprintf(f, "%s\n", message);
500     fclose(f);
501   } else {
502     fprintf(stderr, "Logfile...");
503   }
504 }
505 #endif
506 
putdic(const std::string & in_word,Hunspell * pMS)507 int putdic(const std::string& in_word, Hunspell* pMS) {
508   std::string word = chenc(in_word, ui_enc, dic_enc[0]);
509 
510   std::string buf;
511   pMS->input_conv(word.c_str(), buf);
512   word = buf;
513 
514   if (word.empty())
515     return 0;
516 
517   int ret(0);
518   size_t w = word.find('/', 1);
519   if (w == std::string::npos) {
520     if (word[0] == '*')
521       ret = pMS->remove(word.substr(1));
522     else
523       ret = pMS->add(word);
524   } else {
525     std::string affix = word.substr(w + 1);
526     word.resize(w);
527     if (!affix.empty() && affix[0] == '/') // word//pattern (back comp.)
528         affix.erase(0, 1);
529     ret = pMS->add_with_affix(word, affix);  // word/pattern
530   }
531   return ret;
532 }
533 
load_privdic(const char * filename,Hunspell * pMS)534 void load_privdic(const char* filename, Hunspell* pMS) {
535   std::ifstream dic;
536   dic.open(filename, std::ios_base::in);
537   if (dic.is_open()) {
538     std::string buf;
539     while (std::getline(dic, buf)) {
540       putdic(buf, pMS);
541     }
542   }
543 }
544 
exist(const char * filename)545 bool exist(const char* filename) {
546   std::ifstream f;
547   f.open(filename, std::ios_base::in);
548   if (f.is_open()) {
549     return true;
550   }
551   return false;
552 }
553 
save_privdic(const std::string & filename,const std::string & filename2,std::vector<std::string> & w)554 int save_privdic(const std::string& filename, const std::string& filename2, std::vector<std::string>& w) {
555   FILE* dic = fopen(filename.c_str(), "r");
556   if (dic) {
557     fclose(dic);
558     dic = fopen(filename.c_str(), "a");
559   } else {
560     dic = fopen(filename2.c_str(), "a");
561   }
562   if (!dic)
563     return 0;
564   for (size_t i = 0; i < w.size(); ++i) {
565     w[i] = chenc(w[i], io_enc, ui_enc);
566     fprintf(dic, "%s\n", w[i].c_str());
567   }
568   fclose(dic);
569   return 1;
570 }
571 
basename(const char * s,char c)572 const char* basename(const char* s, char c) {
573   const char* p = s + strlen(s);
574   while ((*p != c) && (p != s))
575     p--;
576   if (*p == c)
577     p++;
578   return p;
579 }
580 
581 #ifdef HAVE_CURSES_H
scanline(char * message)582 char* scanline(char* message) {
583   char input[INPUTLEN];
584   printw(message);
585   echo();
586   getnstr(input, INPUTLEN);
587   noecho();
588   return mystrdup(input);
589 }
590 #endif
591 
592 // check words in the dictionaries (and set first checked dictionary)
check(Hunspell ** pMS,int * d,const std::string & token,int * info,std::string * root)593 bool check(Hunspell** pMS, int* d, const std::string& token, int* info, std::string* root) {
594   for (int i = 0; i < dmax; ++i) {
595     std::string buf = chenc(token, io_enc, dic_enc[*d]);
596     mystrrep(buf, ENTITY_APOS, "'");
597     if (checkapos && buf.find('\'') != std::string::npos)
598       return false;
599     // 8-bit encoded dictionaries need ASCII apostrophes (eg. English
600     // dictionaries)
601     if (strcmp(dic_enc[*d], "UTF-8") != 0)
602       mystrrep(buf, UTF8_APOS, "'");
603     if ((pMS[*d]->spell(buf, info, root) &&
604          !(warn && (*info & SPELL_WARN))) ||
605         // UTF-8 encoded dictionaries with ASCII apostrophes, but without ICONV
606         // support,
607         // need also ASCII apostrophes (eg. French dictionaries)
608         ((strcmp(dic_enc[*d], "UTF-8") == 0) &&
609          buf.find(UTF8_APOS) != std::string::npos &&
610          pMS[*d]->spell(mystrrep(buf, UTF8_APOS, "'"), info, root) &&
611          !(warn && (*info & SPELL_WARN)))) {
612       return true;
613     }
614     if (++(*d) == dmax)
615       *d = 0;
616   }
617   return false;
618 }
619 
is_zipped_odf(TextParser * parser,const char * extension)620 static bool is_zipped_odf(TextParser* parser, const char* extension) {
621   // ODFParser and not flat ODF
622   return dynamic_cast<ODFParser*>(parser) && (extension && extension[0] != 'f');
623 }
624 
secure_filename(const char * filename)625 static bool secure_filename(const char* filename) {
626   const char* hasapostrophe = strchr(filename, '\'');
627   if (hasapostrophe)
628     return false;
629   return true;
630 }
631 
mymkdtemp(char * templ)632 char* mymkdtemp(char *templ) {
633 #ifdef WIN32
634   (void)templ;
635   char *odftmpdir = tmpnam(NULL);
636   if (!odftmpdir) {
637     return NULL;
638   }
639   if (system((std::string("mkdir ") + odftmpdir).c_str()) != 0) {
640     return NULL;
641   }
642   return odftmpdir;
643 #else
644   return mkdtemp(templ);
645 #endif
646 }
647 
pipe_interface(Hunspell ** pMS,int format,FILE * fileid,char * filename)648 void pipe_interface(Hunspell** pMS, int format, FILE* fileid, char* filename) {
649   char buf[MAXLNLEN];
650   std::vector<std::string> dicwords;
651   int pos;
652   int bad;
653   int lineno = 0;
654   int terse_mode = 0;
655   int verbose_mode = 0;
656   int d = 0;
657   char* odftmpdir = NULL;
658 
659   std::string filename_prefix = (multiple_files) ? filename + std::string(": ") : "";
660 
661   const char* extension = (filename) ? basename(filename, '.') : NULL;
662   TextParser* parser = get_parser(format, extension, pMS[0]);
663   char tmpdirtemplate[] = "/tmp/hunspellXXXXXX";
664 
665   bool bZippedOdf = is_zipped_odf(parser, extension);
666   // access content.xml of ODF
667   if (bZippedOdf) {
668     odftmpdir = mymkdtemp(tmpdirtemplate);
669     if (!odftmpdir) {
670       perror(gettext("Can't create tmp dir"));
671       exit(1);
672     }
673     // break 1-line XML of zipped ODT documents at </style:style> and </text:p>
674     // to avoid tokenization problems (fgets could stop within an XML tag)
675     std::ostringstream sbuf;
676     sbuf << "unzip -p \"" << filename << "\" content.xml | sed "
677             "\"s/\\(<\\/text:p>\\|<\\/style:style>\\)\\(.\\)/\\1\\n\\2/g;s/<\\/\\?text:span[^>]*>//g\" "
678             ">" << odftmpdir << "/content.xml";
679     if (!secure_filename(filename) || system(sbuf.str().c_str()) != 0) {
680       if (secure_filename(filename))
681         perror(gettext("Can't open inputfile"));
682       else
683         fprintf(stderr, gettext("Can't open %s.\n"), filename);
684       if (system((std::string("rmdir ") + odftmpdir).c_str()) != 0) {
685         perror("temp dir delete failed");
686       }
687       exit(1);
688     }
689     std::string file(odftmpdir);
690     file.append("/content.xml");
691     fileid = fopen(file.c_str(), "r");
692     if (fileid == NULL) {
693       perror(gettext("Can't open inputfile"));
694       if (system((std::string("rmdir ") + odftmpdir).c_str()) != 0) {
695         perror("temp dir delete failed");
696       }
697       exit(1);
698     }
699   }
700 
701   if (filter_mode == NORMAL) {
702     fprintf(stdout, "%s", gettext(HUNSPELL_HEADING));
703     fprintf(stdout, HUNSPELL_VERSION);
704     const std::string& version = pMS[0]->get_version_cpp();
705     if (!version.empty())
706       fprintf(stdout, " - %s", version.c_str());
707     fprintf(stdout, "\n");
708     fflush(stdout);
709   }
710 
711 nextline:
712   while (fgets(buf, MAXLNLEN, fileid)) {
713     buf[strcspn(buf, "\n")] = 0;
714     lineno++;
715 #ifdef LOG
716     log(buf);
717 #endif
718     bad = 0;
719     pos = 0;
720 
721     // execute commands
722     if (filter_mode == PIPE) {
723       pos = -1;
724       switch (buf[0]) {
725         case '%': {
726           verbose_mode = terse_mode = 0;
727           break;
728         }
729         case '!': {
730           terse_mode = 1;
731           break;
732         }
733         case '`': {
734           verbose_mode = 1;
735           break;
736         }
737         case '+': {
738           delete parser;
739           parser = get_parser(FMT_LATEX, NULL, pMS[0]);
740           parser->set_url_checking(checkurl);
741           break;
742         }
743         case '-': {
744           delete parser;
745           parser = get_parser(format, NULL, pMS[0]);
746           break;
747         }
748         case '@': {
749           putdic(buf + 1, pMS[d]);
750           break;
751         }
752         case '*': {
753           std::string word(buf + 1);
754           dicwords.push_back(word);
755           putdic(word, pMS[d]);
756           break;
757         }
758         case '#': {
759           std::string sbuf;
760           if (HOME) {
761             sbuf.append(HOME);
762           } else {
763             fprintf(stderr, "%s", gettext("error - missing HOME variable\n"));
764             continue;
765           }
766 #ifndef WIN32
767           sbuf.append("/");
768 #endif
769           size_t offset = sbuf.size();
770           if (!privdicname) {
771             sbuf.append(DICBASENAME);
772             sbuf.append(basename(dicname, DIRSEPCH));
773           } else {
774             sbuf.append(privdicname);
775           }
776           if (save_privdic(sbuf.substr(offset), sbuf, dicwords)) {
777             dicwords.clear();
778           }
779           break;
780         }
781         case '^': {
782           pos = 1;
783           break;
784         }
785 
786         default: {
787           pos = 0;
788           break;
789         }
790 
791       }  // end switch
792     }    // end filter_mode == PIPE
793 
794     if (pos >= 0) {
795       parser->put_line(buf + pos);
796       std::string token;
797       while (parser->next_token(token)) {
798         token = parser->get_word(token);
799         mystrrep(token, ENTITY_APOS, "'");
800         switch (filter_mode) {
801           case BADWORD: {
802             if (!check(pMS, &d, token, NULL, NULL)) {
803               bad = 1;
804               if (!printgood)
805                 fprintf(stdout, "%s%s\n", filename_prefix.c_str(), token.c_str());
806             } else {
807               if (printgood)
808                 fprintf(stdout, "%s%s\n", filename_prefix.c_str(), token.c_str());
809             }
810             continue;
811           }
812 
813           case WORDFILTER: {
814             if (!check(pMS, &d, parser->get_word(token), NULL, NULL)) {
815               if (!printgood)
816                 fprintf(stdout, "%s\n", buf);
817             } else {
818               if (printgood)
819                 fprintf(stdout, "%s\n", buf);
820             }
821             goto nextline;
822           }
823 
824           case BADLINE: {
825             if (!check(pMS, &d, parser->get_word(token), NULL, NULL)) {
826               bad = 1;
827             }
828             continue;
829           }
830 
831           case AUTO0:
832           case AUTO:
833           case AUTO2:
834           case AUTO3: {
835             FILE* f = (filter_mode == AUTO) ? stderr : stdout;
836             if (!check(pMS, &d, parser->get_word(token), NULL, NULL)) {
837               bad = 1;
838               std::vector<std::string> wlst =
839                   pMS[d]->suggest(chenc(parser->get_word(token), io_enc, dic_enc[d]));
840               if (!wlst.empty()) {
841                 parser->change_token(chenc(wlst[0], dic_enc[d], io_enc).c_str());
842                 if (filter_mode == AUTO3) {
843                   fprintf(f, "%s:%d: Locate: %s | Try: %s\n", currentfilename,
844                           lineno, token.c_str(), chenc(wlst[0], dic_enc[d], io_enc).c_str());
845                 } else if (filter_mode == AUTO2) {
846                   fprintf(f, "%ds/%s/%s/g; # %s\n", lineno, token.c_str(),
847                           chenc(wlst[0], dic_enc[d], io_enc).c_str(), buf);
848                 } else {
849                   fprintf(f, gettext("Line %d: %s -> "), lineno,
850                           chenc(token, io_enc, ui_enc).c_str());
851                   fprintf(f, "%s\n", chenc(wlst[0], dic_enc[d], ui_enc).c_str());
852                 }
853               }
854             }
855             continue;
856           }
857 
858           case STEM: {
859             std::vector<std::string> result =
860               pMS[d]->stem(chenc(token, io_enc, dic_enc[d]));
861             for (size_t i = 0; i < result.size(); ++i) {
862               fprintf(stdout, "%s %s\n", token.c_str(),
863                       chenc(result[i], dic_enc[d], ui_enc).c_str());
864             }
865             if (result.empty() && !token.empty() && token[token.size() - 1] == '.') {
866               token.resize(token.size() - 1);
867               result = pMS[d]->stem(token);
868               for (size_t i = 0; i < result.size(); ++i) {
869                 fprintf(stdout, "%s %s\n", token.c_str(),
870                         chenc(result[i], dic_enc[d], ui_enc).c_str());
871               }
872             }
873             if (result.empty())
874               fprintf(stdout, "%s\n", chenc(token, dic_enc[d], ui_enc).c_str());
875             fprintf(stdout, "\n");
876             continue;
877           }
878 
879           case SUFFIX: {
880             std::vector<std::string> wlst = pMS[d]->suffix_suggest(token);
881             for (size_t j = 0; j < wlst.size(); ++j) {
882               fprintf(stdout, "Suffix Suggestions are %s \n",
883                       chenc(wlst[j], dic_enc[d], io_enc).c_str());
884             }
885             fflush(stdout);
886             continue;
887           }
888           case ANALYZE: {
889             std::vector<std::string> result =
890               pMS[d]->analyze(chenc(token, io_enc, dic_enc[d]));
891             for (size_t i = 0; i < result.size(); ++i) {
892               fprintf(stdout, "%s %s\n", token.c_str(),
893                       chenc(result[i], dic_enc[d], ui_enc).c_str());
894             }
895             if (result.empty() && !token.empty() && token[token.size() - 1] == '.') {
896               token.resize(token.size() - 1);
897               result = pMS[d]->analyze(token);
898               for (size_t i = 0; i < result.size(); ++i) {
899                 fprintf(stdout, "%s %s\n", token.c_str(),
900                         chenc(result[i], dic_enc[d], ui_enc).c_str());
901               }
902             }
903             if (result.empty())
904               fprintf(stdout, "%s\n", chenc(token, dic_enc[d], ui_enc).c_str());
905             fprintf(stdout, "\n");
906             continue;
907           }
908 
909           case PIPE: {
910             int info;
911             std::string root;
912             if (check(pMS, &d, parser->get_word(token), &info, &root)) {
913               if (!terse_mode) {
914                 if (verbose_mode)
915                   fprintf(stdout, "* %s\n", token.c_str());
916                 else
917                   fprintf(stdout, "*\n");
918               }
919               fflush(stdout);
920             } else {
921               int byte_offset = parser->get_tokenpos() + pos;
922               int char_offset = 0;
923               if (strcmp(io_enc, "UTF-8") == 0) {
924                 for (int i = 0; i < byte_offset; i++) {
925                   if ((buf[i] & 0xc0) != 0x80)
926                     char_offset++;
927                 }
928               } else {
929                 char_offset = byte_offset;
930               }
931               std::vector<std::string> wlst =
932                 pMS[d]->suggest(chenc(token, io_enc, dic_enc[d]));
933               if (wlst.empty()) {
934                 fprintf(stdout, "# %s %d", token.c_str(), char_offset);
935               } else {
936                 fprintf(stdout, "& %s %u %d: ", token.c_str(), static_cast<unsigned int>(wlst.size()), char_offset);
937                 fprintf(stdout, "%s", chenc(wlst[0], dic_enc[d], io_enc).c_str());
938               }
939               for (size_t j = 1; j < wlst.size(); ++j) {
940                   fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc).c_str());
941               }
942               fprintf(stdout, "\n");
943               fflush(stdout);
944             }
945             continue;
946           }
947           case NORMAL: {
948             int info;
949             std::string root;
950             if (check(pMS, &d, token, &info, &root)) {
951               if (info & SPELL_COMPOUND) {
952                 fprintf(stdout, "-\n");
953               } else if (!root.empty()) {
954                 fprintf(stdout, "+ %s\n", chenc(root, dic_enc[d], ui_enc).c_str());
955               } else {
956                 fprintf(stdout, "*\n");
957               }
958               fflush(stdout);
959             } else {
960               int byte_offset = parser->get_tokenpos() + pos;
961               int char_offset = 0;
962               if (strcmp(io_enc, "UTF-8") == 0) {
963                 for (int i = 0; i < byte_offset; i++) {
964                   if ((buf[i] & 0xc0) != 0x80)
965                     char_offset++;
966                 }
967               } else {
968                 char_offset = byte_offset;
969               }
970               std::vector<std::string> wlst =
971                 pMS[d]->suggest(chenc(token, io_enc, dic_enc[d]));
972               if (wlst.empty()) {
973                 fprintf(stdout, "# %s %d", chenc(token, io_enc, ui_enc).c_str(),
974                         char_offset);
975               } else {
976                 fprintf(stdout, "& %s %u %d: ", chenc(token, io_enc, ui_enc).c_str(),
977                         static_cast<unsigned int>(wlst.size()), char_offset);
978                 fprintf(stdout, "%s", chenc(wlst[0], dic_enc[d], ui_enc).c_str());
979               }
980               for (size_t j = 1; j < wlst.size(); ++j) {
981                 fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], ui_enc).c_str());
982               }
983               fprintf(stdout, "\n");
984               fflush(stdout);
985             }
986           }
987         }
988       }
989 
990       switch (filter_mode) {
991         case AUTO: {
992           std::string pLine = parser->get_line();
993           fprintf(stdout, "%s\n", pLine.c_str());
994           break;
995         }
996 
997         case BADLINE: {
998           if (((printgood) && (!bad)) || (!printgood && (bad)))
999             fprintf(stdout, "%s\n", buf);
1000           break;
1001         }
1002 
1003         case PIPE:
1004         case NORMAL: {
1005           fprintf(stdout, "\n");
1006           fflush(stdout);
1007           break;
1008         }
1009       }
1010     }  // if
1011   }    // while
1012 
1013   if (bZippedOdf) {
1014     fclose(fileid);
1015     std::ostringstream sbuf;
1016     sbuf << odftmpdir << "/content.xml";
1017     if (remove(sbuf.str().c_str()) != 0) {
1018       perror("temp file delete failed");
1019     }
1020     sbuf.str("");
1021     sbuf << "rmdir " << odftmpdir;
1022     if (system(sbuf.str().c_str()) != 0) {
1023       perror("temp dir delete failed");
1024     }
1025   }
1026 
1027   delete parser;
1028 }  // pipe_interface
1029 
1030 #ifdef HAVE_READLINE
1031 
1032 #ifdef HAVE_CURSES_H
1033 static const char* rltext;
1034 
1035 // set base text of input line
set_rltext()1036 static int set_rltext() {
1037   if (rltext) {
1038     rl_insert_text(rltext);
1039     rltext = NULL;
1040     rl_startup_hook = (rl_hook_func_t*)NULL;
1041   }
1042   return 0;
1043 }
1044 
1045 #endif
1046 
1047 // Readline escape
rl_escape(int count,int key)1048 static int rl_escape(int count, int key) {
1049   rl_delete_text(0, rl_end);
1050   rl_done = 1;
1051   return 0;
1052 }
1053 #endif
1054 
1055 #ifdef HAVE_CURSES_H
expand_tab(std::string & dest,const std::string & in_src)1056 int expand_tab(std::string& dest, const std::string& in_src) {
1057   dest.clear();
1058   const char *src = in_src.c_str();
1059   int u8 = ((ui_enc != NULL) && (strcmp(ui_enc, "UTF-8") == 0)) ? 1 : 0;
1060   int chpos = 0;
1061   for (int j = 0; (src[j] != '\0') && (src[j] != '\r'); j++) {
1062     if (src[j] == '\t') {
1063       int end = 8 - (chpos % 8);
1064       for (int k = 0; k < end; k++) {
1065         dest.push_back(' ');
1066         chpos++;
1067       }
1068     } else {
1069       dest.push_back(src[j]);
1070       if (!u8 || (src[j] & 0xc0) != 0x80)
1071         chpos++;
1072     }
1073   }
1074   return chpos;
1075 }
1076 
1077 // UTF-8-aware version of strncpy (but output is always null terminated)
1078 // What we should deal in is cursor position cells in a terminal emulator,
1079 // i.e. the number of visual columns occupied like wcwidth/wcswidth does
1080 // What we're really current doing is to deal in the number of characters,
1081 // like mbstowcs which isn't quite correct, but close enough for western
1082 // text in UTF-8
strncpyu8(std::string & dest,const std::string & in_src,int begin,int n)1083 void strncpyu8(std::string& dest, const std::string& in_src, int begin, int n) {
1084   dest.clear();
1085   const char *src = in_src.c_str();
1086   if (n) {
1087     int u8 = ((ui_enc != NULL) && (strcmp(ui_enc, "UTF-8") == 0)) ? 1 : 0;
1088     for (int i = 0; i < begin + n;) {
1089       if (!*src)
1090         break;  // source is at it's end
1091       if (!u8 || (*src & 0xc0) != 0x80)
1092         i++;            // new character
1093       if (i > begin) {  // copy char (w/ utf-8 bytes)
1094         dest.push_back(*src++);
1095         while (u8 && (*src & 0xc0) == 0x80)
1096           dest.push_back(*src++);
1097       } else {  // skip char (w/ utf-8 bytes)
1098         ++src;
1099         while (u8 && (*src & 0xc0) == 0x80)
1100           ++src;
1101       }
1102     }
1103   }
1104 }
1105 
1106 // See strncpyu8 for gotchas
strlenu8(const std::string & in_src)1107 int strlenu8(const std::string& in_src) {
1108   const char *src = in_src.c_str();
1109   int u8 = ((ui_enc != NULL) && (strcmp(ui_enc, "UTF-8") == 0)) ? 1 : 0;
1110   int i = 0;
1111   while (*src) {
1112     if (!u8 || (*src & 0xc0) != 0x80)
1113       i++;
1114     ++src;
1115   }
1116   return i;
1117 }
1118 
dialogscreen(TextParser * parser,std::string & token,char * filename,int forbidden,std::vector<std::string> & wlst)1119 void dialogscreen(TextParser* parser,
1120                   std::string& token,
1121                   char* filename,
1122                   int forbidden,
1123                   std::vector<std::string>& wlst) {
1124   int x, y;
1125   getmaxyx(stdscr, y, x);
1126   clear();
1127 
1128   if (forbidden & SPELL_FORBIDDEN)
1129     printw(gettext("FORBIDDEN!"));
1130   else if (forbidden & SPELL_WARN)
1131     printw(gettext("Spelling mistake?"));
1132 
1133   printw(gettext("\t%s\t\tFile: %s\n\n"), chenc(token, io_enc, ui_enc).c_str(),
1134          filename);
1135 
1136   // handle long lines and tabulators
1137   std::string lines[MAXPREVLINE];
1138   std::string prevLine;
1139   for (int i = 0; i < MAXPREVLINE; i++) {
1140     prevLine = parser->get_prevline(i);
1141     expand_tab(lines[i], chenc(prevLine, io_enc, ui_enc));
1142   }
1143 
1144   prevLine = parser->get_prevline(0);
1145   std::string line = prevLine.substr(0, parser->get_tokenpos());
1146   std::string line2;
1147   int tokenbeg = expand_tab(line2, chenc(line, io_enc, ui_enc));
1148 
1149   prevLine = parser->get_prevline(0);
1150   line = prevLine.substr(0, parser->get_tokenpos() + token.size());
1151   int tokenend = expand_tab(line2, chenc(line, io_enc, ui_enc));
1152 
1153   int rowindex = (tokenend - 1) / x;
1154   int beginrow = rowindex - tokenbeg / x;
1155   if (beginrow >= MAXPREVLINE)
1156     beginrow = MAXPREVLINE - 1;
1157 
1158   int ri = rowindex;
1159   int prevline = 0;
1160 
1161   for (int i = 0; i < MAXPREVLINE; i++) {
1162     strncpyu8(line, lines[prevline], x * rowindex, x);
1163     mvprintw(MAXPREVLINE + 1 - i, 0, "%s", line.c_str());
1164     const bool finished = i == MAXPREVLINE - 1;
1165     if (!finished) {
1166       rowindex--;
1167       if (rowindex == -1) {
1168         prevline++;
1169         rowindex = strlenu8(lines[prevline]) / x;
1170       }
1171     }
1172   }
1173 
1174   strncpyu8(line, lines[0], x * (ri - beginrow), tokenbeg % x);
1175   mvprintw(MAXPREVLINE + 1 - beginrow, 0, "%s", line.c_str());
1176   attron(A_REVERSE);
1177   printw("%s", chenc(token, io_enc, ui_enc).c_str());
1178   attroff(A_REVERSE);
1179 
1180   mvprintw(MAXPREVLINE + 2, 0, "\n");
1181   for (size_t i = 0; i < wlst.size(); ++i) {
1182     if ((wlst.size() > 10) && (i < 10)) {
1183       printw(" 0%zu: %s\n", i, chenc(wlst[i], io_enc, ui_enc).c_str());
1184     } else {
1185       printw(" %u: %s\n", i, chenc(wlst[i], io_enc, ui_enc).c_str());
1186     }
1187   }
1188 
1189   /* TRANSLATORS: the capital letters are shortcuts, mark one letter similarly
1190      in your translation and translate the standalone letter accordingly later
1191      */
1192   mvprintw(y - 3, 0, "%s\n", gettext("\n[SPACE] R)epl A)ccept I)nsert U)ncap "
1193                                      "S)tem Q)uit e(X)it or ? for help\n"));
1194 }
1195 
lower_first_char(const std::string & token,const char * ioenc,int langnum)1196 std::string lower_first_char(const std::string& token, const char* ioenc, int langnum) {
1197   std::string utf8str = chenc(token, ioenc, "UTF-8");
1198   std::vector<w_char> u;
1199   u8_u16(u, utf8str);
1200   if (!u.empty()) {
1201     unsigned short idx = (u[0].h << 8) + u[0].l;
1202     idx = unicodetolower(idx, langnum);
1203     u[0].h = (unsigned char)(idx >> 8);
1204     u[0].l = (unsigned char)(idx & 0x00FF);
1205   }
1206   std::string scratch;
1207   u16_u8(scratch, u);
1208   return chenc(scratch, "UTF-8", ioenc);
1209 }
1210 
1211 // for terminal interface
dialog(TextParser * parser,Hunspell * pMS,std::string & token,char * filename,std::vector<std::string> & wlst,int forbidden)1212 int dialog(TextParser* parser,
1213            Hunspell* pMS,
1214            std::string& token,
1215            char* filename,
1216            std::vector<std::string>& wlst,
1217            int forbidden) {
1218   std::vector<std::string> dicwords;
1219   int c;
1220 
1221   dialogscreen(parser, token, filename, forbidden, wlst);
1222 
1223   char firstletter = '\0';
1224 
1225   while ((c = getch())) {
1226     switch (c) {
1227       case '0':
1228       case '1':
1229         if ((firstletter == '\0') && (wlst.size() > 10)) {
1230           firstletter = c;
1231           break;
1232         }
1233       case '2':
1234       case '3':
1235       case '4':
1236       case '5':
1237       case '6':
1238       case '7':
1239       case '8':
1240       case '9':
1241         modified = 1;
1242         if (firstletter == '1') {
1243           c += 10;
1244         }
1245         c -= '0';
1246         if (c >= static_cast<int>(wlst.size()))
1247           break;
1248         if (checkapos) {
1249           std::string sbuf(wlst[c]);
1250           mystrrep(sbuf, "'", UTF8_APOS);
1251           parser->change_token(sbuf.c_str());
1252         } else {
1253           parser->change_token(wlst[c].c_str());
1254         }
1255         return 0;
1256       case ' ':
1257         return 0;
1258       case '?':
1259         clear();
1260         printw(gettext(
1261             "Whenever a word is found that is not in the dictionary\n"
1262             "it is printed on the first line of the screen.  If the "
1263             "dictionary\n"
1264             "contains any similar words, they are listed with a number\n"
1265             "next to each one.  You have the option of replacing the word\n"
1266             "completely, or choosing one of the suggested words.\n"));
1267         printw(gettext("\nCommands are:\n\n"));
1268         printw(gettext("R	Replace the misspelled word completely.\n"));
1269         printw(gettext("Space	Accept the word this time only.\n"));
1270         printw(
1271             gettext("A	Accept the word for the rest of this session.\n"));
1272         printw(gettext(
1273             "I	Accept the word, and put it in your private dictionary.\n"));
1274         printw(gettext(
1275             "U	Accept and add lowercase version to private dictionary.\n"));
1276         printw(
1277             gettext("S\tAsk a stem and a model word and store them in the "
1278                     "private dictionary.\n"
1279                     "\tThe stem will be accepted also with the affixes of the "
1280                     "model word.\n"));
1281         printw(gettext("0-n	Replace with one of the suggested words.\n"));
1282         printw(gettext(
1283             "X	Write the rest of this file, ignoring misspellings, and start "
1284             "next file.\n"));
1285         printw(
1286             gettext("Q	Quit immediately. Asks for confirmation. Leaves file "
1287                     "unchanged.\n"));
1288         printw(gettext("^Z	Suspend program. Restart with fg command.\n"));
1289         printw(gettext("?	Show this help screen.\n"));
1290         printw(gettext("\n-- Type space to continue -- \n"));
1291         while (getch() != ' ')
1292           ;
1293       // fall-through
1294       case 12: {
1295         dialogscreen(parser, token, filename, forbidden, wlst);
1296         break;
1297       }
1298       default: {
1299         /* TRANSLATORS: translate this letter according to the shortcut letter
1300            used
1301            previously in the  translation of "R)epl" before */
1302         if (c == (gettext("r"))[0]) {
1303           modified = 1;
1304 
1305 #ifdef HAVE_READLINE
1306           endwin();
1307           rltext = "";
1308           if (rltext && *rltext)
1309             rl_startup_hook = set_rltext;
1310 #endif
1311           char* temp = readline(gettext("Replace with: "));
1312 #ifdef HAVE_READLINE
1313           initscr();
1314           cbreak();
1315 #endif
1316 
1317           if ((!temp) || (temp[0] == '\0')) {
1318             free(temp);
1319             dialogscreen(parser, token, filename, forbidden, wlst);
1320             break;
1321           }
1322 
1323           std::string i(temp);
1324           free(temp);
1325           if (checkapos) {
1326             mystrrep(i, "'", UTF8_APOS);
1327           }
1328           parser->change_token(i.c_str());
1329 
1330           return 2;  // replace
1331         }
1332         /* TRANSLATORS: translate these letters according to the shortcut letter
1333            used
1334            previously in the  translation of "U)ncap" and I)nsert before */
1335         int u_key = gettext("u")[0];
1336         int i_key = gettext("i")[0];
1337 
1338         if (c == u_key || c == i_key) {
1339           std::string word = (c == i_key)
1340                       ? token
1341                       : lower_first_char(token, io_enc, pMS->get_langnum());
1342           dicwords.push_back(word);
1343           std::string sbuf;
1344           // save
1345           if (HOME) {
1346             sbuf.append(HOME);
1347           } else {
1348             fprintf(stderr, gettext("error - missing HOME variable\n"));
1349             break;
1350           }
1351 #ifndef WIN32
1352           sbuf.append("/");
1353 #endif
1354           size_t offset = sbuf.size();
1355           if (!privdicname) {
1356             sbuf.append(DICBASENAME);
1357             sbuf.append(basename(dicname, DIRSEPCH));
1358           } else {
1359             sbuf.append(privdicname);
1360           }
1361           if (save_privdic(sbuf.substr(offset), sbuf, dicwords)) {
1362             dicwords.clear();
1363           } else {
1364             fprintf(stderr, gettext("Cannot update personal dictionary."));
1365             break;
1366           }
1367         }  // no break
1368         /* TRANSLATORS: translate this letter according to the shortcut letter
1369            used
1370            previously in the  translation of "U)ncap" and I)nsert before */
1371         if ((c == (gettext("u"))[0]) || (c == (gettext("i"))[0]) ||
1372             (c == (gettext("a"))[0])) {
1373           modified = 1;
1374           putdic(token, pMS);
1375           return 0;
1376         }
1377         /* TRANSLATORS: translate this letter according to the shortcut letter
1378            used
1379            previously in the  translation of "S)tem" before */
1380         if (c == (gettext("s"))[0]) {
1381           modified = 1;
1382 
1383           std::string w(token);
1384           size_t n_last_of = w.find_last_of('-');
1385           if (n_last_of != std::string::npos) {
1386             w.resize(n_last_of);
1387           }
1388 
1389 #ifdef HAVE_READLINE
1390           endwin();
1391           rltext = w.c_str();
1392           if (rltext && *rltext)
1393             rl_startup_hook = set_rltext;
1394 #endif
1395           char* temp = readline(gettext("New word (stem): "));
1396 
1397           if ((!temp) || (temp[0] == '\0')) {
1398             free(temp);
1399 #ifdef HAVE_READLINE
1400             initscr();
1401             cbreak();
1402 #endif
1403             dialogscreen(parser, token, filename, forbidden, wlst);
1404             break;
1405           }
1406 
1407           w.assign(temp);
1408           free(temp);
1409 
1410 #ifdef HAVE_READLINE
1411           initscr();
1412           cbreak();
1413 #endif
1414           dialogscreen(parser, token, filename, forbidden, wlst);
1415           refresh();
1416 
1417 #ifdef HAVE_READLINE
1418           endwin();
1419           rltext = "";
1420           if (rltext && *rltext)
1421             rl_startup_hook = set_rltext;
1422 #endif
1423           temp = readline(gettext("Model word (a similar dictionary word): "));
1424 
1425 #ifdef HAVE_READLINE
1426           initscr();
1427           cbreak();
1428 #endif
1429 
1430           if ((!temp) || (temp[0] == '\0')) {
1431             free(temp);
1432             dialogscreen(parser, token, filename, forbidden, wlst);
1433             break;
1434           }
1435 
1436           std::string w2(temp);
1437           free(temp);
1438 
1439           std::string w3;
1440           w3.append(w);
1441           w3.append("/");
1442           w3.append(w2);
1443 
1444           if (!putdic(w3, pMS)) {
1445             dicwords.push_back(w3);
1446 
1447             w3.clear();
1448             w3.append(w);
1449             w3.append("-/");
1450             w3.append(w2);
1451             w3.append("-");
1452             if (putdic(w3, pMS)) {
1453               dicwords.push_back(w3);
1454             }
1455             // save
1456             std::string sbuf;
1457             if (HOME) {
1458               sbuf.append(HOME);
1459             } else {
1460               fprintf(stderr, gettext("error - missing HOME variable\n"));
1461               continue;
1462             }
1463 #ifndef WIN32
1464             sbuf.append("/");
1465 #endif
1466             size_t offset = sbuf.size();
1467             if (!privdicname) {
1468               sbuf.append(DICBASENAME);
1469               sbuf.append(basename(dicname, DIRSEPCH));
1470             } else {
1471               sbuf.append(privdicname);
1472             }
1473             if (save_privdic(sbuf.substr(offset), sbuf, dicwords)) {
1474               dicwords.clear();
1475             } else {
1476               fprintf(stderr, gettext("Cannot update personal dictionary."));
1477               break;
1478             }
1479 
1480           } else {
1481             dialogscreen(parser, token, filename, forbidden, wlst);
1482             printw(gettext(
1483                 "Model word must be in the dictionary. Press any key!"));
1484             getch();
1485             dialogscreen(parser, token, filename, forbidden, wlst);
1486             break;
1487           }
1488           return 0;
1489         }
1490         /* TRANSLATORS: translate this letter according to the shortcut letter
1491            used
1492            previously in the  translation of "e(X)it" before */
1493         if (c == (gettext("x"))[0]) {
1494           return 1;
1495         }
1496         /* TRANSLATORS: translate this letter according to the shortcut letter
1497            used
1498            previously in the  translation of "Q)uit" before */
1499         if (c == (gettext("q"))[0]) {
1500           if (modified) {
1501             printw(
1502                 gettext("Are you sure you want to throw away your changes? "));
1503             /* TRANSLATORS: translate this letter according to the shortcut
1504              * letter y)es */
1505             if (getch() == (gettext("y"))[0]) {
1506               return -1;
1507             }
1508             dialogscreen(parser, token, filename, forbidden, wlst);
1509             break;
1510           } else {
1511             return -1;
1512           }
1513         }
1514       }
1515     }
1516   }
1517   return 0;
1518 }
1519 
interactive_line(TextParser * parser,Hunspell ** pMS,char * filename,FILE * tempfile)1520 int interactive_line(TextParser* parser,
1521                      Hunspell** pMS,
1522                      char* filename,
1523                      FILE* tempfile) {
1524   int dialogexit = 0;
1525   int info = 0;
1526   int d = 0;
1527   std::string token;
1528   while (parser->next_token(token)) {
1529     if (!check(pMS, &d, parser->get_word(token), &info, NULL)) {
1530       std::vector<std::string> wlst;
1531       dialogscreen(parser, token, filename, info, wlst);  // preview
1532       refresh();
1533       std::string dicbuf = chenc(parser->get_word(token), io_enc, dic_enc[d]);
1534       wlst = pMS[d]->suggest(mystrrep(dicbuf, ENTITY_APOS, "'").c_str());
1535       if (wlst.empty()) {
1536         dialogexit = dialog(parser, pMS[d], token, filename, wlst, info);
1537       } else {
1538         for (size_t j = 0; j < wlst.size(); ++j) {
1539           wlst[j] = chenc(wlst[j], dic_enc[d], io_enc);
1540         }
1541         dialogexit = dialog(parser, pMS[d], token, filename, wlst, info);
1542       }
1543     }
1544     if ((dialogexit == -1) || (dialogexit == 1))
1545       goto ki2;
1546   }
1547 
1548 ki2:
1549   fprintf(tempfile, "%s", parser->get_line().c_str());
1550   return dialogexit;
1551 }
1552 
interactive_interface(Hunspell ** pMS,char * filename,int format)1553 void interactive_interface(Hunspell** pMS, char* filename, int format) {
1554   char buf[MAXLNLEN];
1555   char* odffilename = NULL;
1556   char* odftmpdir = NULL;  // external zip works only with temporary directories
1557                             // (option -j)
1558 
1559   FILE* text = fopen(filename, "r");
1560   if (!text) {
1561     perror(gettext("Can't open inputfile"));
1562     endwin();
1563     exit(1);
1564   }
1565 
1566   int dialogexit;
1567   int check = 1;
1568 
1569   const char* extension = basename(filename, '.');
1570   TextParser* parser = get_parser(format, extension, pMS[0]);
1571   char tmpdirtemplate[] = "/tmp/hunspellXXXXXX";
1572 
1573   bool bZippedOdf = is_zipped_odf(parser, extension);
1574   // access content.xml of ODF
1575   if (bZippedOdf) {
1576     odftmpdir = mymkdtemp(tmpdirtemplate);
1577     if (!odftmpdir) {
1578       perror(gettext("Can't create tmp dir"));
1579       endwin();
1580       exit(1);
1581     }
1582     fclose(text);
1583     // break 1-line XML of zipped ODT documents at </style:style> and </text:p>
1584     // to avoid tokenization problems (fgets could stop within an XML tag)
1585     std::ostringstream sbuf;
1586     sbuf << "unzip -p \"" << filename << "\" content.xml | sed "
1587             "\"s/\\(<\\/text:p>\\|<\\/style:style>\\)\\(.\\)/\\1\\n\\2/g\" "
1588             ">" << odftmpdir << "/content.xml";
1589     if (!secure_filename(filename) || system(sbuf.str().c_str()) != 0) {
1590       if (secure_filename(filename))
1591         perror(gettext("Can't open inputfile"));
1592       else
1593         fprintf(stderr, gettext("Can't open %s.\n"), filename);
1594       endwin();
1595       (void)system((std::string("rmdir ") + odftmpdir).c_str());
1596       exit(1);
1597     }
1598     odffilename = filename;
1599     std::string file(odftmpdir);
1600     file.append("/content.xml");
1601     filename = mystrdup(file.c_str());
1602     text = fopen(filename, "r");
1603     if (!text) {
1604       perror(gettext("Can't open inputfile"));
1605       endwin();
1606       (void)system((std::string("rmdir ") + odftmpdir).c_str());
1607       exit(1);
1608     }
1609   }
1610 
1611   FILE* tempfile = tmpfile();
1612 
1613   if (!tempfile) {
1614     perror(gettext("Can't create tempfile"));
1615     delete parser;
1616     fclose(text);
1617     endwin();
1618     exit(1);
1619   }
1620 
1621   while (fgets(buf, MAXLNLEN, text)) {
1622     if (check) {
1623       parser->put_line(buf);
1624       dialogexit = interactive_line(
1625           parser, pMS, odffilename ? odffilename : filename, tempfile);
1626       switch (dialogexit) {
1627         case -1: {
1628           clear();
1629           refresh();
1630           fclose(tempfile);  // automatically deleted when closed
1631           if (bZippedOdf) {
1632             if (remove(filename) != 0) {
1633               perror("temp file delete failed");
1634             }
1635             std::ostringstream sbuf;
1636             sbuf << "rmdir " << odftmpdir;
1637             if (system(sbuf.str().c_str()) != 0) {
1638               perror("temp dir delete failed");
1639             }
1640             free(filename);
1641           }
1642           endwin();
1643           exit(0);
1644         }
1645         case 1: {
1646           check = 0;
1647         }
1648       }
1649     } else {
1650       fprintf(tempfile, "%s", buf);
1651     }
1652   }
1653   fclose(text);
1654 
1655   if (modified) {
1656     rewind(tempfile);
1657     text = fopen(filename, "wb");
1658     if (text == NULL)
1659       perror(gettext("Can't open outputfile"));
1660     else {
1661       size_t n;
1662       while ((n = fread(buf, 1, MAXLNLEN, tempfile)) > 0) {
1663         if (fwrite(buf, 1, n, text) != n)
1664           perror("write failed");
1665       }
1666       fclose(text);
1667       if (bZippedOdf && odffilename) {
1668         std::ostringstream sbuf;
1669         sbuf << "zip -j '" << odffilename << "' " << filename;
1670         if (system(sbuf.str().c_str()) != 0)
1671           perror("write failed");
1672       }
1673     }
1674   }
1675 
1676   if (bZippedOdf) {
1677     if (remove(filename) != 0) {
1678       perror("temp file delete failed");
1679     }
1680     std::ostringstream sbuf;
1681     sbuf << "rmdir " << odftmpdir;
1682     if (system(sbuf.str().c_str()) != 0) {
1683       perror("temp dir delete failed");
1684     }
1685     free(filename);
1686   }
1687 
1688   delete parser;
1689   fclose(tempfile);  // automatically deleted when closed
1690 }
1691 
1692 #endif
1693 
exist2(char * dir,int len,const char * name,const char * ext)1694 char* exist2(char* dir, int len, const char* name, const char* ext) {
1695   std::string buf;
1696   const char* sep = (len == 0) ? "" : DIRSEP;
1697   buf.assign(dir, len);
1698   buf.append(sep);
1699   buf.append(name);
1700   buf.append(ext);
1701   if (exist(buf.c_str()))
1702     return mystrdup(buf.c_str());
1703   buf.append(HZIP_EXTENSION);
1704   if (exist(buf.c_str())) {
1705     buf.erase(buf.size() - strlen(HZIP_EXTENSION));
1706     return mystrdup(buf.c_str());
1707   }
1708   return NULL;
1709 }
1710 
1711 #if !defined(WIN32) || defined(__MINGW32__)
listdicpath(char * dir,int len)1712 int listdicpath(char* dir, int len) {
1713   std::string buf;
1714   const char* sep = (len == 0) ? "" : DIRSEP;
1715   buf.assign(dir, len);
1716   buf.append(sep);
1717   DIR* d = opendir(buf.c_str());
1718   if (!d)
1719     return 0;
1720   struct dirent* de;
1721   while ((de = readdir(d))) {
1722     len = strlen(de->d_name);
1723     if ((len > 4 && strcmp(de->d_name + len - 4, ".dic") == 0) ||
1724         (len > 7 && strcmp(de->d_name + len - 7, ".dic.hz") == 0)) {
1725       char* s = mystrdup(de->d_name);
1726       s[len - ((s[len - 1] == 'z') ? 7 : 4)] = '\0';
1727       fprintf(stderr, "%s%s\n", buf.c_str(), s);
1728       free(s);
1729     }
1730   }
1731   closedir(d);
1732   return 1;
1733 }
1734 #endif
1735 
1736 // search existing path for file "name + ext"
search(char * begin,char * name,const char * ext)1737 char* search(char* begin, char* name, const char* ext) {
1738   char* end = begin;
1739   while (1) {
1740     while (!((*end == *PATHSEP) || (*end == '\0')))
1741       end++;
1742     char* res = NULL;
1743     if (name) {
1744       res = exist2(begin, int(end - begin), name, ext);
1745     } else {
1746 #if !defined(WIN32) || defined(__MINGW32__)
1747       listdicpath(begin, end - begin);
1748 #endif
1749     }
1750     if ((*end == '\0') || res)
1751       return res;
1752     end++;
1753     begin = end;
1754   }
1755 }
1756 
main(int argc,char ** argv)1757 int main(int argc, char** argv) {
1758   std::string buf;
1759   Hunspell* pMS[DMAX];
1760   char* key = NULL;
1761   int arg_files = -1;  // first filename argumentum position in argv
1762   int format = FMT_TEXT;
1763   int argstate = 0;
1764 
1765 #ifdef HAVE_LOCALE_H
1766   setlocale(LC_ALL, "");
1767 #endif
1768 #ifdef HAVE_LANGINFO_H
1769   ui_enc = nl_langinfo(CODESET);
1770 #endif
1771   textdomain("hunspell"); //for gettext
1772 
1773 #ifdef HAVE_READLINE
1774   rl_set_key("\x1b\x1b", rl_escape, rl_get_keymap());
1775   rl_bind_key('\t', rl_insert);
1776 #endif
1777 
1778 #ifdef LOG
1779   log("START");
1780 #endif
1781 
1782   for (int i = 1; i < argc; i++) {
1783 #ifdef LOG
1784     log(argv[i]);
1785 #endif
1786 
1787     if (argstate == 1) {
1788       if (dicname)
1789         free(dicname);
1790       dicname = mystrdup(argv[i]);
1791       argstate = 0;
1792     } else if (argstate == 2) {
1793       if (privdicname)
1794         free(privdicname);
1795       privdicname = mystrdup(argv[i]);
1796       argstate = 0;
1797     } else if (argstate == 3) {
1798       io_enc = argv[i];
1799       argstate = 0;
1800     } else if (argstate == 4) {
1801       key = argv[i];
1802       argstate = 0;
1803     } else if (strcmp(argv[i], "-d") == 0)
1804       argstate = 1;
1805     else if (strcmp(argv[i], "-p") == 0)
1806       argstate = 2;
1807     else if (strcmp(argv[i], "-i") == 0)
1808       argstate = 3;
1809     else if (strcmp(argv[i], "-P") == 0)
1810       argstate = 4;
1811     else if ((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
1812       fprintf(stderr, "%s", gettext("Usage: hunspell [OPTION]... [FILE]...\n"));
1813       fprintf(stderr, "%s", gettext("Check spelling of each FILE. Without FILE, "
1814                               "check standard input.\n\n"));
1815       fprintf(stderr, "%s", gettext("  -1\t\tcheck only first field in lines "
1816                               "(delimiter = tabulator)\n"));
1817       fprintf(stderr, "%s", gettext("  -a\t\tIspell's pipe interface\n"));
1818       fprintf(stderr, "%s", gettext("  --check-url\tcheck URLs, e-mail addresses and "
1819                               "directory paths\n"));
1820       fprintf(
1821           stderr, "%s",
1822           gettext(
1823               "  --check-apostrophe\tcheck Unicode typographic apostrophe\n"));
1824       fprintf(stderr, "%s",
1825               gettext("  -d d[,d2,...]\tuse d (d2 etc.) dictionaries\n"));
1826       fprintf(stderr, "%s", gettext("  -D\t\tshow available dictionaries\n"));
1827       fprintf(stderr, "%s", gettext("  -G\t\tprint only correct words or lines\n"));
1828       fprintf(stderr, "%s", gettext("  -h, --help\tdisplay this help and exit\n"));
1829       fprintf(stderr, "%s", gettext("  -H\t\tHTML input file format\n"));
1830       fprintf(stderr, "%s", gettext("  -i enc\tinput encoding\n"));
1831       fprintf(stderr, "%s", gettext("  -l\t\tprint misspelled words\n"));
1832       fprintf(stderr, "%s", gettext("  -L\t\tprint lines with misspelled words\n"));
1833       fprintf(stderr, "%s",
1834               gettext("  -m \t\tanalyze the words of the input text\n"));
1835       fprintf(stderr, "%s", gettext("  -n\t\tnroff/troff input file format\n"));
1836       fprintf(
1837           stderr, "%s",
1838           gettext(
1839               "  -O\t\tOpenDocument (ODF or Flat ODF) input file format\n"));
1840       fprintf(stderr, "%s", gettext("  -p dict\tset dict custom dictionary\n"));
1841       fprintf(stderr, "%s",
1842               gettext("  -r\t\twarn of the potential mistakes (rare words)\n"));
1843       fprintf(
1844           stderr, "%s",
1845           gettext("  -P password\tset password for encrypted dictionaries\n"));
1846       fprintf(stderr, "%s", gettext("  -s \t\tstem the words of the input text\n"));
1847       fprintf(stderr, "%s", gettext("  -S \t\tsuffix words of the input text\n"));
1848       fprintf(stderr, "%s", gettext("  -t\t\tTeX/LaTeX input file format\n"));
1849       fprintf(stderr, "%s", gettext("  -v, --version\tprint version number\n"));
1850       fprintf(stderr, "%s",
1851               gettext("  -vv\t\tprint Ispell compatible version number\n"));
1852       fprintf(stderr, "%s", gettext("  -w\t\tprint misspelled words (= lines) from "
1853                               "one word/line input.\n"));
1854       fprintf(stderr, "%s", gettext("  -X\t\tXML input file format\n\n"));
1855       fprintf(
1856           stderr, "%s",
1857           gettext(
1858               "Example: hunspell -d en_US file.txt    # interactive spelling\n"
1859               "         hunspell -i utf-8 file.txt    # check UTF-8 encoded "
1860               "file\n"
1861               "         hunspell -l *.odt             # print misspelled words "
1862               "of ODF files\n\n"
1863               "         # Quick fix of ODF documents by personal dictionary "
1864               "creation\n\n"
1865               "         # 1 Make a reduced list from misspelled and unknown "
1866               "words:\n\n"
1867               "         hunspell -l *.odt | sort | uniq >words\n\n"
1868               "         # 2 Delete misspelled words of the file by a text "
1869               "editor.\n"
1870               "         # 3 Use this personal dictionary to fix the deleted "
1871               "words:\n\n"
1872               "         hunspell -p words *.odt\n\n"));
1873       fprintf(stderr, "%s", gettext("Bug reports: http://hunspell.github.io/\n"));
1874       exit(0);
1875     } else if ((strcmp(argv[i], "-vv") == 0) || (strcmp(argv[i], "-v") == 0) ||
1876                (strcmp(argv[i], "--version") == 0)) {
1877       fprintf(stdout, "%s", gettext(HUNSPELL_PIPE_HEADING));
1878       fprintf(stdout, "\n");
1879       if (strcmp(argv[i], "-vv") != 0) {
1880         fprintf(stdout, "%s",
1881                 gettext("\nCopyright (C) 2002-2014 L\303\241szl\303\263 "
1882                         "N\303\251meth. License: MPL/GPL/LGPL.\n\n"
1883                         "Based on OpenOffice.org's Myspell library.\n"
1884                         "Myspell's copyright (C) Kevin Hendricks, 2001-2002, "
1885                         "License: BSD.\n\n"));
1886         fprintf(stdout, "%s", gettext("This is free software; see the source for "
1887                                 "copying conditions.  There is NO\n"
1888                                 "warranty; not even for MERCHANTABILITY or "
1889                                 "FITNESS FOR A PARTICULAR PURPOSE,\n"
1890                                 "to the extent permitted by law.\n"));
1891       }
1892       exit(0);
1893     } else if ((strcmp(argv[i], "-a") == 0)) {
1894       filter_mode = PIPE;
1895     } else if ((strcmp(argv[i], "-m") == 0)) {
1896       /*
1897        if -a was used, don't override, i.e. keep ispell compatability
1898        ispell:   Make possible root/affix combinations that aren't in the
1899        dictionary.
1900        hunspell: Analyze the words of the input text
1901       */
1902       if (filter_mode != PIPE)
1903         filter_mode = ANALYZE;
1904     } else if ((strcmp(argv[i], "-s") == 0)) {
1905       /*
1906        if -a was used, don't override, i.e. keep ispell compatability
1907        ispell:   Stop itself with a SIGTSTP signal after each line of input.
1908        hunspell: Stem the words of the input text
1909       */
1910       if (filter_mode != PIPE)
1911         filter_mode = STEM;
1912     } else if ((strcmp(argv[i], "-S") == 0)) {
1913       if (filter_mode != PIPE)
1914         filter_mode = SUFFIX;
1915     } else if ((strcmp(argv[i], "-t") == 0)) {
1916       format = FMT_LATEX;
1917     } else if ((strcmp(argv[i], "-n") == 0)) {
1918       format = FMT_MAN;
1919     } else if ((strcmp(argv[i], "-H") == 0)) {
1920       format = FMT_HTML;
1921     } else if ((strcmp(argv[i], "-X") == 0)) {
1922       format = FMT_XML;
1923     } else if ((strcmp(argv[i], "-O") == 0)) {
1924       format = FMT_ODF;
1925     } else if ((strcmp(argv[i], "-l") == 0)) {
1926       filter_mode = BADWORD;
1927     } else if ((strcmp(argv[i], "-w") == 0)) {
1928       /*
1929        if -a was used, don't override, i.e. keep ispell compatability
1930        ispell:   Specify additional characters that can be part of a word.
1931        hunspell: Print misspelled words (= lines) from one word/line input
1932       */
1933       if (filter_mode != PIPE)
1934         filter_mode = WORDFILTER;
1935     } else if ((strcmp(argv[i], "-L") == 0)) {
1936       /*
1937        if -a was used, don't override, i.e. keep ispell compatability
1938        ispell:   Number of lines of context to be shown at the bottom of the
1939        screen
1940        hunspell: Print lines with misspelled words
1941       */
1942       if (filter_mode != PIPE)
1943         filter_mode = BADLINE;
1944     } else if ((strcmp(argv[i], "-u") == 0)) {
1945       /*
1946        if -a was used, don't override, i.e. keep ispell compatability
1947        ispell: None
1948        hunspell: Show typical misspellings
1949       */
1950       if (filter_mode != PIPE)
1951         filter_mode = AUTO0;
1952     } else if ((strcmp(argv[i], "-U") == 0)) {
1953       /*
1954        if -a was used, don't override, i.e. keep ispell compatability
1955        ispell: None
1956        hunspell: Automatic correction of typical misspellings to stdout
1957       */
1958       if (filter_mode != PIPE)
1959         filter_mode = AUTO;
1960     } else if ((strcmp(argv[i], "-u2") == 0)) {
1961       /*
1962        if -a was used, don't override, i.e. keep ispell compatability
1963        ispell: None
1964        hunspell: Print typical misspellings in sed format
1965       */
1966       if (filter_mode != PIPE)
1967         filter_mode = AUTO2;
1968     } else if ((strcmp(argv[i], "-u3") == 0)) {
1969       /*
1970        if -a was used, don't override, i.e. keep ispell compatability
1971        ispell: None
1972        hunspell: Print typical misspellings in gcc error format
1973       */
1974       if (filter_mode != PIPE)
1975         filter_mode = AUTO3;
1976     } else if ((strcmp(argv[i], "-G") == 0)) {
1977       printgood = 1;
1978     } else if ((strcmp(argv[i], "-1") == 0)) {
1979       format = FMT_FIRST;
1980     } else if ((strcmp(argv[i], "-D") == 0)) {
1981       showpath = 1;
1982     } else if ((strcmp(argv[i], "-r") == 0)) {
1983       warn = 1;
1984     } else if ((strcmp(argv[i], "--check-url") == 0)) {
1985       checkurl = 1;
1986     } else if ((strcmp(argv[i], "--check-apostrophe") == 0)) {
1987       checkapos = 1;
1988     } else if ((arg_files == -1) &&
1989                ((argv[i][0] != '-') && (argv[i][0] != '\0'))) {
1990       arg_files = i;
1991       if (!exist(argv[i])) {  // first check (before time-consuming dic. load)
1992         fprintf(stderr, gettext("Can't open %s.\n"), argv[i]);
1993 #ifdef HAVE_CURSES_H
1994         endwin();
1995 #endif
1996         exit(1);
1997       }
1998     }
1999   }
2000 
2001   multiple_files = (arg_files > 0) && (argc - arg_files > 1);
2002 
2003   if (printgood && (filter_mode == NORMAL))
2004     filter_mode = BADWORD;
2005 
2006   if (!dicname) {
2007     if (!(dicname = getenv("DICTIONARY"))) {
2008       /*
2009        * Search in order of LC_ALL, LC_MESSAGES &
2010        * LANG
2011       */
2012       const char* tests[] = {"LC_ALL", "LC_MESSAGES", "LANG"};
2013       for (size_t i = 0; i < sizeof(tests) / sizeof(const char*); ++i) {
2014         if ((dicname = getenv(tests[i])) && strcmp(dicname, "") != 0) {
2015           dicname = mystrdup(dicname);
2016           char* dot = strchr(dicname, '.');
2017           if (dot)
2018             *dot = '\0';
2019           char* at = strchr(dicname, '@');
2020           if (at)
2021             *at = '\0';
2022           break;
2023         }
2024       }
2025 
2026       if (dicname &&
2027           ((strcmp(dicname, "C") == 0) || (strcmp(dicname, "POSIX") == 0))) {
2028         free(dicname);
2029         dicname = mystrdup("en_US");
2030       }
2031 
2032       if (!dicname) {
2033         dicname = mystrdup(DEFAULTDICNAME);
2034       }
2035     } else {
2036       dicname = mystrdup(dicname);
2037     }
2038   }
2039 
2040   {
2041     std::string path_std_str = ".";
2042     path_std_str.append(PATHSEP); // <- check path in local directory
2043     path_std_str.append(PATHSEP); // <- check path in root directory
2044     if (getenv("DICPATH")) {
2045       path_std_str.append(getenv("DICPATH")).append(PATHSEP);
2046     }
2047     path_std_str.append(LIBDIR).append(PATHSEP);
2048     if (HOME) {
2049       const char * userooodir[] = USEROOODIR;
2050       for(size_t i = 0; i < sizeof(userooodir)/sizeof(userooodir[0]); ++i) {
2051         path_std_str += HOME;
2052 #ifndef _WIN32
2053         path_std_str += DIRSEP;
2054 #endif
2055         path_std_str.append(userooodir[i]).append(PATHSEP);
2056       }
2057       path_std_str.append(OOODIR);
2058     }
2059     path = mystrdup(path_std_str.c_str());
2060   }
2061 
2062   if (showpath) {
2063     fprintf(stderr, gettext("SEARCH PATH:\n%s\n"), path);
2064     fprintf(
2065         stderr, "%s",
2066         gettext(
2067             "AVAILABLE DICTIONARIES (path is not mandatory for -d option):\n"));
2068     search(path, NULL, NULL);
2069     if (-1 == arg_files) {
2070       exit(0);
2071     }
2072   }
2073 
2074   if (!privdicname)
2075     privdicname = mystrdup(getenv("WORDLIST"));
2076 
2077   char* dicplus = strchr(dicname, ',');
2078   if (dicplus)
2079     *dicplus = '\0';
2080   char* aff = search(path, dicname, ".aff");
2081   char* dic = search(path, dicname, ".dic");
2082   if (aff && dic) {
2083     if (showpath) {
2084       fprintf(stderr, gettext("LOADED DICTIONARY:\n%s\n%s\n"), aff, dic);
2085     }
2086     pMS[0] = new Hunspell(aff, dic, key);
2087     dic_enc[0] = pMS[0]->get_dict_encoding().c_str();
2088     dmax = 1;
2089     while (dicplus) {
2090       char* dicname2 = dicplus + 1;
2091       dicplus = strchr(dicname2, ',');
2092       if (dicplus)
2093         *dicplus = '\0';
2094       free(aff);
2095       free(dic);
2096       aff = search(path, dicname2, ".aff");
2097       dic = search(path, dicname2, ".dic");
2098       if (aff && dic) {
2099         if (dmax < DMAX) {
2100           pMS[dmax] = new Hunspell(aff, dic, key);
2101           dic_enc[dmax] = pMS[dmax]->get_dict_encoding().c_str();
2102           dmax++;
2103           if (showpath) {
2104             fprintf(stderr, gettext("LOADED DICTIONARY:\n%s\n%s\n"), aff, dic);
2105           }
2106         } else
2107           fprintf(stderr, gettext("error - %s exceeds dictionary limit.\n"),
2108                   dicname2);
2109       } else if (dic)
2110         pMS[dmax - 1]->add_dic(dic);
2111     }
2112   } else {
2113     fprintf(stderr, gettext("Can't open affix or dictionary files for "
2114                             "dictionary named \"%s\".\n"),
2115             dicname);
2116     exit(1);
2117   }
2118 
2119   /* open the private dictionaries */
2120   if (HOME) {
2121     buf.assign(HOME);
2122 #ifndef WIN32
2123     buf.append("/");
2124 #endif
2125     buf.append(DICBASENAME);
2126     buf.append(basename(dicname, DIRSEPCH));
2127     load_privdic(buf.c_str(), pMS[0]);
2128     buf.assign(HOME);
2129 #ifndef WIN32
2130     buf.append("/");
2131 #endif
2132     if (!privdicname) {
2133       buf.assign(DICBASENAME);
2134       buf.append(basename(dicname, DIRSEPCH));
2135       load_privdic(buf.c_str(), pMS[0]);
2136     } else {
2137       buf.append(privdicname);
2138       load_privdic(buf.c_str(), pMS[0]);
2139       buf.assign(privdicname);
2140       load_privdic(buf.c_str(), pMS[0]);
2141     }
2142   }
2143 
2144   /*
2145      If in pipe mode, output pipe mode version string only when
2146      hunspell has properly been started.
2147      Emacs and may be others relies in the English version format.
2148      Do not gettextize.
2149   */
2150   if (filter_mode == PIPE) {
2151     fprintf(stdout, HUNSPELL_PIPE_HEADING);
2152     fflush(stdout);
2153   }
2154 
2155   if (arg_files == -1) {
2156     pipe_interface(pMS, format, stdin, NULL);
2157   } else if (filter_mode != NORMAL) {
2158     for (int i = arg_files; i < argc; i++) {
2159       if (exist(argv[i])) {
2160         modified = 0;
2161         currentfilename = argv[i];
2162         FILE* f = fopen(argv[i], "r");
2163         pipe_interface(pMS, format, f, argv[i]);
2164         fclose(f);
2165       } else {
2166         fprintf(stderr, gettext("Can't open %s.\n"), argv[i]);
2167         exit(1);
2168       }
2169     }
2170   } else /*filter_mode == NORMAL*/ {
2171 #ifdef HAVE_CURSES_H
2172     initscr();
2173     cbreak();
2174     noecho();
2175     nonl();
2176     intrflush(stdscr, FALSE);
2177 
2178     for (int i = arg_files; i < argc; i++) {
2179       if (exist(argv[i])) {
2180         modified = 0;
2181         interactive_interface(pMS, argv[i], format);
2182       } else {
2183         fprintf(stderr, gettext("Can't open %s.\n"), argv[i]);
2184         endwin();
2185         exit(1);
2186       }
2187     }
2188 
2189     clear();
2190     refresh();
2191     endwin();
2192 #else
2193     fprintf(
2194         stderr, "%s",
2195         gettext(
2196             "Hunspell has been compiled without Ncurses user interface.\n"));
2197 #endif
2198   }
2199 
2200   if (dicname)
2201     free(dicname);
2202   if (privdicname)
2203     free(privdicname);
2204   if (path)
2205     free(path);
2206   if (aff)
2207     free(aff);
2208   if (dic)
2209     free(dic);
2210 #ifdef HAVE_ICONV
2211   free_utf_tbl();
2212 #endif
2213   for (int i = 0; i < dmax; i++)
2214     delete pMS[i];
2215   return 0;
2216 }
2217 
2218 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
2219