1 /* Copyright 2016-2021 Dimitrij Mijoski
2  *
3  * This file is part of Nuspell.
4  *
5  * Nuspell is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * Nuspell is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with Nuspell.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "finder.hxx"
20 #include "utils.hxx"
21 
22 #include <algorithm>
23 #include <array>
24 #include <iostream>
25 #include <iterator>
26 #include <sstream>
27 #include <unordered_set>
28 #include <utility>
29 
30 #if !defined(_WIN32) &&                                                        \
31     (defined(__unix__) || defined(__unix) ||                                   \
32      (defined(__APPLE__) && defined(__MACH__)) || defined(__HAIKU__))
33 #include <unistd.h>
34 #ifdef _POSIX_VERSION
35 #include <dirent.h>
36 #include <glob.h>
37 #include <sys/stat.h>
38 #include <sys/types.h>
39 #endif
40 
41 #elif defined(_WIN32)
42 
43 #include <io.h>
44 #include <windows.h>
45 
46 #ifdef __MINGW32__
47 #include <dirent.h>
48 //#include <glob.h> //not present in mingw-w64. present in vanilla mingw
49 #include <sys/stat.h>
50 #include <sys/types.h>
51 #endif //__MINGW32__
52 
53 #endif
54 
55 using namespace std;
56 
57 namespace nuspell {
58 inline namespace v5 {
59 #ifdef _WIN32
60 const auto PATHSEP = ';';
61 const auto DIRSEP = '\\';
62 #else
63 const auto PATHSEP = ':';
64 const auto DIRSEP = '/';
65 #endif
66 
67 /**
68  * @brief Append the paths of the default directories to be searched for
69  * dictionaries.
70  * @param paths vector of directory paths to append to
71  */
append_default_dir_paths(std::vector<string> & paths)72 auto append_default_dir_paths(std::vector<string>& paths) -> void
73 {
74 	auto dicpath = getenv("DICPATH");
75 	if (dicpath && *dicpath)
76 		split(dicpath, PATHSEP, paths);
77 
78 #ifdef _POSIX_VERSION
79 	auto home = getenv("HOME");
80 	auto xdg_data_home = getenv("XDG_DATA_HOME");
81 	if (xdg_data_home && *xdg_data_home)
82 		paths.push_back(xdg_data_home + string("/hunspell"));
83 	else if (home)
84 		paths.push_back(home + string("/.local/share/hunspell"));
85 
86 	auto xdg_data_dirs = getenv("XDG_DATA_DIRS");
87 	if (xdg_data_dirs && *xdg_data_dirs) {
88 		auto data_dirs = string_view(xdg_data_dirs);
89 
90 		auto i = paths.size();
91 		split(data_dirs, PATHSEP, paths);
92 		for (; i != paths.size(); ++i)
93 			paths[i] += "/hunspell";
94 
95 		i = paths.size();
96 		split(data_dirs, PATHSEP, paths);
97 		for (; i != paths.size(); ++i)
98 			paths[i] += "/myspell";
99 	}
100 	else {
101 		paths.push_back("/usr/local/share/hunspell");
102 		paths.push_back("/usr/share/hunspell");
103 		paths.push_back("/usr/local/share/myspell");
104 		paths.push_back("/usr/share/myspell");
105 	}
106 #if defined(__APPLE__) && defined(__MACH__)
107 	auto osx = string("/Library/Spelling");
108 	if (home) {
109 		paths.push_back(home + osx);
110 	}
111 	paths.push_back(osx);
112 #endif
113 #endif
114 #ifdef _WIN32
115 	auto winpaths = {getenv("LOCALAPPDATA"), getenv("PROGRAMDATA")};
116 	for (auto& p : winpaths) {
117 		if (p) {
118 			paths.push_back(string(p) + "\\hunspell");
119 		}
120 	}
121 #endif
122 }
123 
124 #ifdef _WIN32
125 class FileListerWindows {
126 	struct _finddata_t data = {};
127 	intptr_t handle = -1;
128 	bool goodbit = false;
129 
130       public:
FileListerWindows()131 	FileListerWindows() {}
FileListerWindows(const char * pattern)132 	FileListerWindows(const char* pattern) { first(pattern); }
FileListerWindows(const string & pattern)133 	FileListerWindows(const string& pattern) { first(pattern); }
134 	FileListerWindows(const FileListerWindows& d) = delete;
135 	void operator=(const FileListerWindows& d) = delete;
~FileListerWindows()136 	~FileListerWindows() { close(); }
137 
first(const char * pattern)138 	auto first(const char* pattern) -> bool
139 	{
140 		close();
141 		handle = _findfirst(pattern, &data);
142 		goodbit = handle != -1;
143 		return goodbit;
144 	}
first(const string & pattern)145 	auto first(const string& pattern) -> bool
146 	{
147 		return first(pattern.c_str());
148 	}
149 
name() const150 	auto name() const -> const char* { return data.name; }
good() const151 	auto good() const -> bool { return goodbit; }
next()152 	auto next() -> bool
153 	{
154 		goodbit = _findnext(handle, &data) == 0;
155 		return goodbit;
156 	}
close()157 	auto close() -> void
158 	{
159 		if (handle == -1)
160 			return;
161 		_findclose(handle);
162 		handle = -1;
163 		goodbit = false;
164 	}
list_all()165 	auto list_all() -> vector<string>
166 	{
167 		vector<string> ret;
168 		for (; good(); next()) {
169 			ret.push_back(name());
170 		}
171 		return ret;
172 	}
173 };
174 #endif
175 
176 #ifdef _POSIX_VERSION
177 class Globber {
178       private:
179 	glob_t g = {};
180 	int ret = 1;
181 
182       public:
Globber(const char * pattern)183 	Globber(const char* pattern) { ret = ::glob(pattern, 0, nullptr, &g); }
Globber(const string & pattern)184 	Globber(const string& pattern) : Globber(pattern.c_str()) {}
185 	Globber(const Globber&) = delete;
186 	auto operator=(const Globber&) = delete;
glob(const char * pattern)187 	auto glob(const char* pattern) -> bool
188 	{
189 		globfree(&g);
190 		ret = ::glob(pattern, 0, nullptr, &g);
191 		return ret == 0;
192 	}
glob(const string & pattern)193 	auto glob(const string& pattern) -> bool
194 	{
195 		return glob(pattern.c_str());
196 	}
begin()197 	auto begin() -> const char* const* { return g.gl_pathv; }
end()198 	auto end() -> const char* const* { return begin() + g.gl_pathc; }
append_glob_paths_to(vector<string> & out)199 	auto append_glob_paths_to(vector<string>& out) -> void
200 	{
201 		if (ret == 0)
202 			out.insert(out.end(), begin(), end());
203 	}
~Globber()204 	~Globber() { globfree(&g); }
205 };
206 #elif defined(_WIN32)
207 class Globber {
208 	vector<string> data;
209 
210       public:
Globber(const char * pattern)211 	Globber(const char* pattern) { glob(pattern); }
Globber(const string & pattern)212 	Globber(const string& pattern) { glob(pattern); }
glob(const char * pattern)213 	auto glob(const char* pattern) -> bool { return glob(string(pattern)); }
glob(const string & pattern)214 	auto glob(const string& pattern) -> bool
215 	{
216 		data.clear();
217 
218 		if (pattern.empty())
219 			return false;
220 		auto first_two = pattern.substr(0, 2);
221 		if (first_two == "\\\\" || first_two == "//" ||
222 		    first_two == "\\/" || first_two == "//")
223 			return false;
224 
225 		auto q1 = vector<string>();
226 		auto q2 = q1;
227 		auto v = q1;
228 
229 		split_on_any_of(pattern, "\\/", v);
230 		auto i = v.begin();
231 		if (i == v.end())
232 			return false;
233 
234 		FileListerWindows fl;
235 
236 		if (i->find(':') != i->npos) {
237 			// absolute path
238 			q1.push_back(*i++);
239 		}
240 		else if (pattern[0] == '\\' || pattern[0] == '/') {
241 			// relative to drive
242 			q1.push_back("");
243 		}
244 		else {
245 			// relative
246 			q1.push_back(".");
247 		}
248 		for (; i != v.end(); ++i) {
249 			if (i->empty())
250 				continue;
251 			for (auto& q1e : q1) {
252 				auto p = q1e + DIRSEP + *i;
253 				// cout << "P " << p << endl;
254 				fl.first(p.c_str());
255 				for (; fl.good(); fl.next()) {
256 
257 					if (fl.name() == string(".") ||
258 					    fl.name() == string(".."))
259 						continue;
260 					auto n = q1e + DIRSEP + fl.name();
261 					q2.push_back(n);
262 					// cout << "Q2 " << n << endl;
263 				}
264 			}
265 			q1.clear();
266 			q1.swap(q2);
267 		}
268 		data.insert(data.end(), q1.begin(), q1.end());
269 		return true;
270 	}
begin()271 	auto begin() -> vector<string>::iterator { return data.begin(); }
end()272 	auto end() -> vector<string>::iterator { return data.end(); }
append_glob_paths_to(vector<string> & out)273 	auto append_glob_paths_to(vector<string>& out) -> void
274 	{
275 		out.insert(out.end(), begin(), end());
276 	}
277 };
278 #else
279 // unimplemented
280 struct Globber {
Globbernuspell::v5::Globber281 	Globber(const char* pattern) {}
Globbernuspell::v5::Globber282 	Globber(const string& pattern) {}
globnuspell::v5::Globber283 	auto glob(const char* pattern) -> bool { return false; }
globnuspell::v5::Globber284 	auto glob(const string& pattern) -> bool { return false; }
beginnuspell::v5::Globber285 	auto begin() -> char** { return nullptr; }
endnuspell::v5::Globber286 	auto end() -> char** { return nullptr; }
append_glob_paths_tonuspell::v5::Globber287 	auto append_glob_paths_to(vector<string>& out) -> void {}
288 };
289 #endif
290 
291 /**
292  * @brief Append the paths of the LibreOffice's directories to be searched for
293  * dictionaries.
294  *
295  * @warning This function shall not be called from LibreOffice or modules that
296  * may end up being used by LibreOffice. It is mainly intended to be used by
297  * the CLI tool.
298  *
299  * @param paths vector of directory paths to append to
300  */
append_libreoffice_dir_paths(std::vector<std::string> & paths)301 auto append_libreoffice_dir_paths(std::vector<std::string>& paths) -> void
302 {
303 	auto lo_user_glob = string();
304 #ifdef _POSIX_VERSION
305 	// add LibreOffice Linux global paths
306 	auto prefixes = {"/usr/local/lib/libreoffice", "/usr/lib/libreoffice",
307 	                 "/opt/libreoffice*"};
308 	for (auto& prefix : prefixes) {
309 		Globber g(string(prefix) + "/share/extensions/dict-*");
310 		g.append_glob_paths_to(paths);
311 	}
312 
313 	// add LibreOffice Linux local
314 
315 	auto home = getenv("HOME");
316 	if (home == nullptr)
317 		return;
318 	lo_user_glob = home;
319 	lo_user_glob += "/.config/libreoffice/?/user/uno_packages/cache"
320 	                "/uno_packages/*/*.oxt/";
321 #elif defined(_WIN32)
322 	// add Libreoffice Windows global paths
323 	auto prefixes = {getenv("PROGRAMFILES"), getenv("PROGRAMFILES(x86)")};
324 	for (auto& prefix : prefixes) {
325 		if (prefix == nullptr)
326 			continue;
327 		Globber g(string(prefix) +
328 		          "\\LibreOffice ?\\share\\extensions\\dict-*");
329 		g.append_glob_paths_to(paths);
330 	}
331 
332 	auto home = getenv("APPDATA");
333 	if (home == nullptr)
334 		return;
335 	lo_user_glob = home;
336 	lo_user_glob += "\\libreoffice\\?\\user\\uno_packages\\cache"
337 	                "\\uno_packages\\*\\*.oxt\\";
338 #else
339 	return;
340 #endif
341 	// finish adding LibreOffice user path dicts (Linux and Windows)
342 	Globber g(lo_user_glob + "dict*");
343 	g.append_glob_paths_to(paths);
344 
345 	g.glob(lo_user_glob + "*.aff");
346 	auto path_str = string();
347 	for (auto& path : g) {
348 		path_str = path;
349 		path_str.erase(path_str.rfind(DIRSEP));
350 		paths.push_back(path_str);
351 	}
352 }
353 
354 #if defined(_POSIX_VERSION) || defined(__MINGW32__)
355 class Directory {
356 	DIR* dp = nullptr;
357 	struct dirent* ent_p = nullptr;
358 
359       public:
360 	Directory() = default;
361 	Directory(const Directory& d) = delete;
362 	void operator=(const Directory& d) = delete;
open(const string & dirname)363 	auto open(const string& dirname) -> bool
364 	{
365 		close();
366 		dp = opendir(dirname.c_str());
367 		return dp;
368 	}
next()369 	auto next() -> bool { return (ent_p = readdir(dp)); }
entry_name() const370 	auto entry_name() const -> const char* { return ent_p->d_name; }
close()371 	auto close() -> void
372 	{
373 		if (dp) {
374 			(void)closedir(dp);
375 			dp = nullptr;
376 		}
377 	}
~Directory()378 	~Directory() { close(); }
379 };
380 #elif defined(_WIN32)
381 class Directory {
382 	FileListerWindows fl;
383 	bool first = true;
384 
385       public:
Directory()386 	Directory() {}
387 	Directory(const Directory& d) = delete;
388 	void operator=(const Directory& d) = delete;
open(const string & dirname)389 	auto open(const string& dirname) -> bool
390 	{
391 		fl.first(dirname + "\\*");
392 		first = true;
393 		return fl.good();
394 	}
next()395 	auto next() -> bool
396 	{
397 		if (first)
398 			first = false;
399 		else
400 			fl.next();
401 		return fl.good();
402 	}
entry_name() const403 	auto entry_name() const -> const char* { return fl.name(); }
close()404 	auto close() -> void { fl.close(); }
405 };
406 #else
407 struct Directory {
Directorynuspell::v5::Directory408 	Directory() {}
409 	Directory(const Directory& d) = delete;
410 	void operator=(const Directory& d) = delete;
opennuspell::v5::Directory411 	auto open(const string& dirname) -> bool { return false; }
nextnuspell::v5::Directory412 	auto next() -> bool { return false; }
entry_namenuspell::v5::Directory413 	auto entry_name() const -> const char* { return nullptr; }
closenuspell::v5::Directory414 	auto close() -> void {}
415 };
416 #endif
417 
418 /**
419  * @brief Search a directory for dictionaries.
420  *
421  * This function searches the directory for files that represent a dictionary
422  * and for each one found it appends the pair of dictionary name and filepath to
423  * dictionary, both without the filename extension (.aff or .dic).
424  *
425  * For example for the files /dict/dir/en_US.dic and /dict/dir/en_US.aff the
426  * following pair will be appended ("en_US", "/dict/dir/en_US").
427  *
428  * @todo At some point this API should be made to be more strongly typed.
429  * Instead of using that pair of strings to represent the dictionary files, a
430  * new class should be created with three public functions, getters, that would
431  * return the name, the path to the .aff file (with filename extension to avoid
432  * confusions) and the path to the .dic file. The C++ 17 std::filesystem::path
433  * should probably be used. It is unspecified to the public what this class
434  * holds privately, but it should probably hold only one path to the aff file.
435  * For the directory paths, it is simple, just use the type
436  * std::filesystem::path. When this API is created, the same function names
437  * should be used, added as overloads. The old API should be marked as
438  * deprecated. This should be done when we start requiring GCC 9 which supports
439  * C++ 17 filesystem out of the box. GCC 8 has this too, but it is somewhat
440  * experimental and requires manually linking to additional static library.
441  *
442  * @param dir_path path to directory
443  * @param dict_list vector to append the found dictionaries to
444  */
search_dir_for_dicts(const string & dir_path,vector<pair<string,string>> & dict_list)445 auto search_dir_for_dicts(const string& dir_path,
446                           vector<pair<string, string>>& dict_list) -> void
447 {
448 	Directory d;
449 	if (d.open(dir_path) == false)
450 		return;
451 
452 	unordered_set<string> dics;
453 	string file_name;
454 	while (d.next()) {
455 		file_name = d.entry_name();
456 		auto sz = file_name.size();
457 		if (sz < 4)
458 			continue;
459 
460 		if (file_name.compare(sz - 4, 4, ".dic") == 0) {
461 			dics.insert(file_name);
462 			file_name.replace(sz - 4, 4, ".aff");
463 		}
464 		else if (file_name.compare(sz - 4, 4, ".aff") == 0) {
465 			dics.insert(file_name);
466 			file_name.replace(sz - 4, 4, ".dic");
467 		}
468 		else {
469 			continue;
470 		}
471 		if (dics.count(file_name)) {
472 			file_name.erase(sz - 4);
473 			auto full_path = dir_path + DIRSEP + file_name;
474 			dict_list.emplace_back(move(file_name),
475 			                       move(full_path));
476 		}
477 	}
478 }
479 
480 /**
481  * @brief Search the directories for dictionaries.
482  *
483  * @see search_dir_for_dicts()
484  *
485  * @param dir_paths list of paths to directories
486  * @param dict_list vector to append the found dictionaries to
487  */
search_dirs_for_dicts(const std::vector<string> & dir_paths,std::vector<std::pair<string,string>> & dict_list)488 auto search_dirs_for_dicts(const std::vector<string>& dir_paths,
489                            std::vector<std::pair<string, string>>& dict_list)
490     -> void
491 {
492 	for (auto& p : dir_paths)
493 		search_dir_for_dicts(p, dict_list);
494 }
495 
496 /**
497  * @brief Search the default directories for dictionaries.
498  *
499  * @see append_default_dir_paths()
500  * @see search_dirs_for_dicts()
501  *
502  * @param dict_list vector to append the found dictionaries to
503  */
search_default_dirs_for_dicts(std::vector<std::pair<std::string,std::string>> & dict_list)504 auto search_default_dirs_for_dicts(
505     std::vector<std::pair<std::string, std::string>>& dict_list) -> void
506 {
507 	auto dir_paths = vector<string>();
508 	append_default_dir_paths(dir_paths);
509 	search_dirs_for_dicts(dir_paths, dict_list);
510 }
511 
512 /**
513  * @brief Find dictionary path given the name.
514  *
515  * Find the first dictionary whose name matches @p dict_name.
516  *
517  * @param dict_list vector of pairs with name and paths
518  * @param dict_name dictionary name
519  * @return iterator of @p dict_list that points to the found dictionary or end
520  * if not found.
521  */
find_dictionary(const std::vector<std::pair<std::string,std::string>> & dict_list,const std::string & dict_name)522 auto find_dictionary(
523     const std::vector<std::pair<std::string, std::string>>& dict_list,
524     const std::string& dict_name)
525     -> std::vector<std::pair<std::string, std::string>>::const_iterator
526 {
527 	return find_if(begin(dict_list), end(dict_list),
528 	               [&](auto& e) { return e.first == dict_name; });
529 }
530 
Dict_Finder_For_CLI_Tool()531 Dict_Finder_For_CLI_Tool::Dict_Finder_For_CLI_Tool()
532 {
533 	append_default_dir_paths(dir_paths);
534 	append_libreoffice_dir_paths(dir_paths);
535 	dir_paths.push_back(".");
536 	search_dirs_for_dicts(dir_paths, dict_multimap);
537 	stable_sort(begin(dict_multimap), end(dict_multimap),
538 	            [](auto& a, auto& b) { return a.first < b.first; });
539 }
540 
541 /**
542  * @internal
543  * @brief Gets the dictionary path.
544  *
545  * If path is given (contains slash) it returns the input argument,
546  * otherwise searches the found dictionaries by their name and returns their
547  * path.
548  *
549  * @param dict name or path of dictionary without the trailing .aff/.dic.
550  * @return the path to dictionary or empty if does not exists.
551  */
get_dictionary_path(const std::string & dict) const552 auto Dict_Finder_For_CLI_Tool::get_dictionary_path(
553     const std::string& dict) const -> std::string
554 {
555 #ifdef _WIN32
556 	const auto SEPARATORS = "\\/";
557 #else
558 	const auto SEPARATORS = '/';
559 #endif
560 	// first check if it is a path
561 	if (dict.find_first_of(SEPARATORS) != dict.npos) {
562 		// a path
563 		return dict;
564 	}
565 	else {
566 		// search list
567 		auto x = find_dictionary(dict_multimap, dict);
568 		if (x != end(dict_multimap))
569 			return x->second;
570 	}
571 	return {};
572 }
573 } // namespace v5
574 } // namespace nuspell
575