1 /* Copyright 2016-2021 Dimitrij Mijoski
2 *
3 * This file is part of Nuspell.
4 *
5 * Nuspell is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * Nuspell is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "finder.hxx"
20 #include "utils.hxx"
21
22 #include <algorithm>
23 #include <array>
24 #include <iostream>
25 #include <iterator>
26 #include <sstream>
27 #include <unordered_set>
28 #include <utility>
29
30 #if !defined(_WIN32) && \
31 (defined(__unix__) || defined(__unix) || \
32 (defined(__APPLE__) && defined(__MACH__)) || defined(__HAIKU__))
33 #include <unistd.h>
34 #ifdef _POSIX_VERSION
35 #include <dirent.h>
36 #include <glob.h>
37 #include <sys/stat.h>
38 #include <sys/types.h>
39 #endif
40
41 #elif defined(_WIN32)
42
43 #include <io.h>
44 #include <windows.h>
45
46 #ifdef __MINGW32__
47 #include <dirent.h>
48 //#include <glob.h> //not present in mingw-w64. present in vanilla mingw
49 #include <sys/stat.h>
50 #include <sys/types.h>
51 #endif //__MINGW32__
52
53 #endif
54
55 using namespace std;
56
57 namespace nuspell {
58 inline namespace v5 {
59 #ifdef _WIN32
60 const auto PATHSEP = ';';
61 const auto DIRSEP = '\\';
62 #else
63 const auto PATHSEP = ':';
64 const auto DIRSEP = '/';
65 #endif
66
67 /**
68 * @brief Append the paths of the default directories to be searched for
69 * dictionaries.
70 * @param paths vector of directory paths to append to
71 */
append_default_dir_paths(std::vector<string> & paths)72 auto append_default_dir_paths(std::vector<string>& paths) -> void
73 {
74 auto dicpath = getenv("DICPATH");
75 if (dicpath && *dicpath)
76 split(dicpath, PATHSEP, paths);
77
78 #ifdef _POSIX_VERSION
79 auto home = getenv("HOME");
80 auto xdg_data_home = getenv("XDG_DATA_HOME");
81 if (xdg_data_home && *xdg_data_home)
82 paths.push_back(xdg_data_home + string("/hunspell"));
83 else if (home)
84 paths.push_back(home + string("/.local/share/hunspell"));
85
86 auto xdg_data_dirs = getenv("XDG_DATA_DIRS");
87 if (xdg_data_dirs && *xdg_data_dirs) {
88 auto data_dirs = string_view(xdg_data_dirs);
89
90 auto i = paths.size();
91 split(data_dirs, PATHSEP, paths);
92 for (; i != paths.size(); ++i)
93 paths[i] += "/hunspell";
94
95 i = paths.size();
96 split(data_dirs, PATHSEP, paths);
97 for (; i != paths.size(); ++i)
98 paths[i] += "/myspell";
99 }
100 else {
101 paths.push_back("/usr/local/share/hunspell");
102 paths.push_back("/usr/share/hunspell");
103 paths.push_back("/usr/local/share/myspell");
104 paths.push_back("/usr/share/myspell");
105 }
106 #if defined(__APPLE__) && defined(__MACH__)
107 auto osx = string("/Library/Spelling");
108 if (home) {
109 paths.push_back(home + osx);
110 }
111 paths.push_back(osx);
112 #endif
113 #endif
114 #ifdef _WIN32
115 auto winpaths = {getenv("LOCALAPPDATA"), getenv("PROGRAMDATA")};
116 for (auto& p : winpaths) {
117 if (p) {
118 paths.push_back(string(p) + "\\hunspell");
119 }
120 }
121 #endif
122 }
123
124 #ifdef _WIN32
125 class FileListerWindows {
126 struct _finddata_t data = {};
127 intptr_t handle = -1;
128 bool goodbit = false;
129
130 public:
FileListerWindows()131 FileListerWindows() {}
FileListerWindows(const char * pattern)132 FileListerWindows(const char* pattern) { first(pattern); }
FileListerWindows(const string & pattern)133 FileListerWindows(const string& pattern) { first(pattern); }
134 FileListerWindows(const FileListerWindows& d) = delete;
135 void operator=(const FileListerWindows& d) = delete;
~FileListerWindows()136 ~FileListerWindows() { close(); }
137
first(const char * pattern)138 auto first(const char* pattern) -> bool
139 {
140 close();
141 handle = _findfirst(pattern, &data);
142 goodbit = handle != -1;
143 return goodbit;
144 }
first(const string & pattern)145 auto first(const string& pattern) -> bool
146 {
147 return first(pattern.c_str());
148 }
149
name() const150 auto name() const -> const char* { return data.name; }
good() const151 auto good() const -> bool { return goodbit; }
next()152 auto next() -> bool
153 {
154 goodbit = _findnext(handle, &data) == 0;
155 return goodbit;
156 }
close()157 auto close() -> void
158 {
159 if (handle == -1)
160 return;
161 _findclose(handle);
162 handle = -1;
163 goodbit = false;
164 }
list_all()165 auto list_all() -> vector<string>
166 {
167 vector<string> ret;
168 for (; good(); next()) {
169 ret.push_back(name());
170 }
171 return ret;
172 }
173 };
174 #endif
175
176 #ifdef _POSIX_VERSION
177 class Globber {
178 private:
179 glob_t g = {};
180 int ret = 1;
181
182 public:
Globber(const char * pattern)183 Globber(const char* pattern) { ret = ::glob(pattern, 0, nullptr, &g); }
Globber(const string & pattern)184 Globber(const string& pattern) : Globber(pattern.c_str()) {}
185 Globber(const Globber&) = delete;
186 auto operator=(const Globber&) = delete;
glob(const char * pattern)187 auto glob(const char* pattern) -> bool
188 {
189 globfree(&g);
190 ret = ::glob(pattern, 0, nullptr, &g);
191 return ret == 0;
192 }
glob(const string & pattern)193 auto glob(const string& pattern) -> bool
194 {
195 return glob(pattern.c_str());
196 }
begin()197 auto begin() -> const char* const* { return g.gl_pathv; }
end()198 auto end() -> const char* const* { return begin() + g.gl_pathc; }
append_glob_paths_to(vector<string> & out)199 auto append_glob_paths_to(vector<string>& out) -> void
200 {
201 if (ret == 0)
202 out.insert(out.end(), begin(), end());
203 }
~Globber()204 ~Globber() { globfree(&g); }
205 };
206 #elif defined(_WIN32)
207 class Globber {
208 vector<string> data;
209
210 public:
Globber(const char * pattern)211 Globber(const char* pattern) { glob(pattern); }
Globber(const string & pattern)212 Globber(const string& pattern) { glob(pattern); }
glob(const char * pattern)213 auto glob(const char* pattern) -> bool { return glob(string(pattern)); }
glob(const string & pattern)214 auto glob(const string& pattern) -> bool
215 {
216 data.clear();
217
218 if (pattern.empty())
219 return false;
220 auto first_two = pattern.substr(0, 2);
221 if (first_two == "\\\\" || first_two == "//" ||
222 first_two == "\\/" || first_two == "//")
223 return false;
224
225 auto q1 = vector<string>();
226 auto q2 = q1;
227 auto v = q1;
228
229 split_on_any_of(pattern, "\\/", v);
230 auto i = v.begin();
231 if (i == v.end())
232 return false;
233
234 FileListerWindows fl;
235
236 if (i->find(':') != i->npos) {
237 // absolute path
238 q1.push_back(*i++);
239 }
240 else if (pattern[0] == '\\' || pattern[0] == '/') {
241 // relative to drive
242 q1.push_back("");
243 }
244 else {
245 // relative
246 q1.push_back(".");
247 }
248 for (; i != v.end(); ++i) {
249 if (i->empty())
250 continue;
251 for (auto& q1e : q1) {
252 auto p = q1e + DIRSEP + *i;
253 // cout << "P " << p << endl;
254 fl.first(p.c_str());
255 for (; fl.good(); fl.next()) {
256
257 if (fl.name() == string(".") ||
258 fl.name() == string(".."))
259 continue;
260 auto n = q1e + DIRSEP + fl.name();
261 q2.push_back(n);
262 // cout << "Q2 " << n << endl;
263 }
264 }
265 q1.clear();
266 q1.swap(q2);
267 }
268 data.insert(data.end(), q1.begin(), q1.end());
269 return true;
270 }
begin()271 auto begin() -> vector<string>::iterator { return data.begin(); }
end()272 auto end() -> vector<string>::iterator { return data.end(); }
append_glob_paths_to(vector<string> & out)273 auto append_glob_paths_to(vector<string>& out) -> void
274 {
275 out.insert(out.end(), begin(), end());
276 }
277 };
278 #else
279 // unimplemented
280 struct Globber {
Globbernuspell::v5::Globber281 Globber(const char* pattern) {}
Globbernuspell::v5::Globber282 Globber(const string& pattern) {}
globnuspell::v5::Globber283 auto glob(const char* pattern) -> bool { return false; }
globnuspell::v5::Globber284 auto glob(const string& pattern) -> bool { return false; }
beginnuspell::v5::Globber285 auto begin() -> char** { return nullptr; }
endnuspell::v5::Globber286 auto end() -> char** { return nullptr; }
append_glob_paths_tonuspell::v5::Globber287 auto append_glob_paths_to(vector<string>& out) -> void {}
288 };
289 #endif
290
291 /**
292 * @brief Append the paths of the LibreOffice's directories to be searched for
293 * dictionaries.
294 *
295 * @warning This function shall not be called from LibreOffice or modules that
296 * may end up being used by LibreOffice. It is mainly intended to be used by
297 * the CLI tool.
298 *
299 * @param paths vector of directory paths to append to
300 */
append_libreoffice_dir_paths(std::vector<std::string> & paths)301 auto append_libreoffice_dir_paths(std::vector<std::string>& paths) -> void
302 {
303 auto lo_user_glob = string();
304 #ifdef _POSIX_VERSION
305 // add LibreOffice Linux global paths
306 auto prefixes = {"/usr/local/lib/libreoffice", "/usr/lib/libreoffice",
307 "/opt/libreoffice*"};
308 for (auto& prefix : prefixes) {
309 Globber g(string(prefix) + "/share/extensions/dict-*");
310 g.append_glob_paths_to(paths);
311 }
312
313 // add LibreOffice Linux local
314
315 auto home = getenv("HOME");
316 if (home == nullptr)
317 return;
318 lo_user_glob = home;
319 lo_user_glob += "/.config/libreoffice/?/user/uno_packages/cache"
320 "/uno_packages/*/*.oxt/";
321 #elif defined(_WIN32)
322 // add Libreoffice Windows global paths
323 auto prefixes = {getenv("PROGRAMFILES"), getenv("PROGRAMFILES(x86)")};
324 for (auto& prefix : prefixes) {
325 if (prefix == nullptr)
326 continue;
327 Globber g(string(prefix) +
328 "\\LibreOffice ?\\share\\extensions\\dict-*");
329 g.append_glob_paths_to(paths);
330 }
331
332 auto home = getenv("APPDATA");
333 if (home == nullptr)
334 return;
335 lo_user_glob = home;
336 lo_user_glob += "\\libreoffice\\?\\user\\uno_packages\\cache"
337 "\\uno_packages\\*\\*.oxt\\";
338 #else
339 return;
340 #endif
341 // finish adding LibreOffice user path dicts (Linux and Windows)
342 Globber g(lo_user_glob + "dict*");
343 g.append_glob_paths_to(paths);
344
345 g.glob(lo_user_glob + "*.aff");
346 auto path_str = string();
347 for (auto& path : g) {
348 path_str = path;
349 path_str.erase(path_str.rfind(DIRSEP));
350 paths.push_back(path_str);
351 }
352 }
353
354 #if defined(_POSIX_VERSION) || defined(__MINGW32__)
355 class Directory {
356 DIR* dp = nullptr;
357 struct dirent* ent_p = nullptr;
358
359 public:
360 Directory() = default;
361 Directory(const Directory& d) = delete;
362 void operator=(const Directory& d) = delete;
open(const string & dirname)363 auto open(const string& dirname) -> bool
364 {
365 close();
366 dp = opendir(dirname.c_str());
367 return dp;
368 }
next()369 auto next() -> bool { return (ent_p = readdir(dp)); }
entry_name() const370 auto entry_name() const -> const char* { return ent_p->d_name; }
close()371 auto close() -> void
372 {
373 if (dp) {
374 (void)closedir(dp);
375 dp = nullptr;
376 }
377 }
~Directory()378 ~Directory() { close(); }
379 };
380 #elif defined(_WIN32)
381 class Directory {
382 FileListerWindows fl;
383 bool first = true;
384
385 public:
Directory()386 Directory() {}
387 Directory(const Directory& d) = delete;
388 void operator=(const Directory& d) = delete;
open(const string & dirname)389 auto open(const string& dirname) -> bool
390 {
391 fl.first(dirname + "\\*");
392 first = true;
393 return fl.good();
394 }
next()395 auto next() -> bool
396 {
397 if (first)
398 first = false;
399 else
400 fl.next();
401 return fl.good();
402 }
entry_name() const403 auto entry_name() const -> const char* { return fl.name(); }
close()404 auto close() -> void { fl.close(); }
405 };
406 #else
407 struct Directory {
Directorynuspell::v5::Directory408 Directory() {}
409 Directory(const Directory& d) = delete;
410 void operator=(const Directory& d) = delete;
opennuspell::v5::Directory411 auto open(const string& dirname) -> bool { return false; }
nextnuspell::v5::Directory412 auto next() -> bool { return false; }
entry_namenuspell::v5::Directory413 auto entry_name() const -> const char* { return nullptr; }
closenuspell::v5::Directory414 auto close() -> void {}
415 };
416 #endif
417
418 /**
419 * @brief Search a directory for dictionaries.
420 *
421 * This function searches the directory for files that represent a dictionary
422 * and for each one found it appends the pair of dictionary name and filepath to
423 * dictionary, both without the filename extension (.aff or .dic).
424 *
425 * For example for the files /dict/dir/en_US.dic and /dict/dir/en_US.aff the
426 * following pair will be appended ("en_US", "/dict/dir/en_US").
427 *
428 * @todo At some point this API should be made to be more strongly typed.
429 * Instead of using that pair of strings to represent the dictionary files, a
430 * new class should be created with three public functions, getters, that would
431 * return the name, the path to the .aff file (with filename extension to avoid
432 * confusions) and the path to the .dic file. The C++ 17 std::filesystem::path
433 * should probably be used. It is unspecified to the public what this class
434 * holds privately, but it should probably hold only one path to the aff file.
435 * For the directory paths, it is simple, just use the type
436 * std::filesystem::path. When this API is created, the same function names
437 * should be used, added as overloads. The old API should be marked as
438 * deprecated. This should be done when we start requiring GCC 9 which supports
439 * C++ 17 filesystem out of the box. GCC 8 has this too, but it is somewhat
440 * experimental and requires manually linking to additional static library.
441 *
442 * @param dir_path path to directory
443 * @param dict_list vector to append the found dictionaries to
444 */
search_dir_for_dicts(const string & dir_path,vector<pair<string,string>> & dict_list)445 auto search_dir_for_dicts(const string& dir_path,
446 vector<pair<string, string>>& dict_list) -> void
447 {
448 Directory d;
449 if (d.open(dir_path) == false)
450 return;
451
452 unordered_set<string> dics;
453 string file_name;
454 while (d.next()) {
455 file_name = d.entry_name();
456 auto sz = file_name.size();
457 if (sz < 4)
458 continue;
459
460 if (file_name.compare(sz - 4, 4, ".dic") == 0) {
461 dics.insert(file_name);
462 file_name.replace(sz - 4, 4, ".aff");
463 }
464 else if (file_name.compare(sz - 4, 4, ".aff") == 0) {
465 dics.insert(file_name);
466 file_name.replace(sz - 4, 4, ".dic");
467 }
468 else {
469 continue;
470 }
471 if (dics.count(file_name)) {
472 file_name.erase(sz - 4);
473 auto full_path = dir_path + DIRSEP + file_name;
474 dict_list.emplace_back(move(file_name),
475 move(full_path));
476 }
477 }
478 }
479
480 /**
481 * @brief Search the directories for dictionaries.
482 *
483 * @see search_dir_for_dicts()
484 *
485 * @param dir_paths list of paths to directories
486 * @param dict_list vector to append the found dictionaries to
487 */
search_dirs_for_dicts(const std::vector<string> & dir_paths,std::vector<std::pair<string,string>> & dict_list)488 auto search_dirs_for_dicts(const std::vector<string>& dir_paths,
489 std::vector<std::pair<string, string>>& dict_list)
490 -> void
491 {
492 for (auto& p : dir_paths)
493 search_dir_for_dicts(p, dict_list);
494 }
495
496 /**
497 * @brief Search the default directories for dictionaries.
498 *
499 * @see append_default_dir_paths()
500 * @see search_dirs_for_dicts()
501 *
502 * @param dict_list vector to append the found dictionaries to
503 */
search_default_dirs_for_dicts(std::vector<std::pair<std::string,std::string>> & dict_list)504 auto search_default_dirs_for_dicts(
505 std::vector<std::pair<std::string, std::string>>& dict_list) -> void
506 {
507 auto dir_paths = vector<string>();
508 append_default_dir_paths(dir_paths);
509 search_dirs_for_dicts(dir_paths, dict_list);
510 }
511
512 /**
513 * @brief Find dictionary path given the name.
514 *
515 * Find the first dictionary whose name matches @p dict_name.
516 *
517 * @param dict_list vector of pairs with name and paths
518 * @param dict_name dictionary name
519 * @return iterator of @p dict_list that points to the found dictionary or end
520 * if not found.
521 */
find_dictionary(const std::vector<std::pair<std::string,std::string>> & dict_list,const std::string & dict_name)522 auto find_dictionary(
523 const std::vector<std::pair<std::string, std::string>>& dict_list,
524 const std::string& dict_name)
525 -> std::vector<std::pair<std::string, std::string>>::const_iterator
526 {
527 return find_if(begin(dict_list), end(dict_list),
528 [&](auto& e) { return e.first == dict_name; });
529 }
530
Dict_Finder_For_CLI_Tool()531 Dict_Finder_For_CLI_Tool::Dict_Finder_For_CLI_Tool()
532 {
533 append_default_dir_paths(dir_paths);
534 append_libreoffice_dir_paths(dir_paths);
535 dir_paths.push_back(".");
536 search_dirs_for_dicts(dir_paths, dict_multimap);
537 stable_sort(begin(dict_multimap), end(dict_multimap),
538 [](auto& a, auto& b) { return a.first < b.first; });
539 }
540
541 /**
542 * @internal
543 * @brief Gets the dictionary path.
544 *
545 * If path is given (contains slash) it returns the input argument,
546 * otherwise searches the found dictionaries by their name and returns their
547 * path.
548 *
549 * @param dict name or path of dictionary without the trailing .aff/.dic.
550 * @return the path to dictionary or empty if does not exists.
551 */
get_dictionary_path(const std::string & dict) const552 auto Dict_Finder_For_CLI_Tool::get_dictionary_path(
553 const std::string& dict) const -> std::string
554 {
555 #ifdef _WIN32
556 const auto SEPARATORS = "\\/";
557 #else
558 const auto SEPARATORS = '/';
559 #endif
560 // first check if it is a path
561 if (dict.find_first_of(SEPARATORS) != dict.npos) {
562 // a path
563 return dict;
564 }
565 else {
566 // search list
567 auto x = find_dictionary(dict_multimap, dict);
568 if (x != end(dict_multimap))
569 return x->second;
570 }
571 return {};
572 }
573 } // namespace v5
574 } // namespace nuspell
575