1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * Copyright (C) 2002-2017 Németh László
5  *
6  * The contents of this file are subject to the Mozilla Public License Version
7  * 1.1 (the "License"); you may not use this file except in compliance with
8  * the License. You may obtain a copy of the License at
9  * http://www.mozilla.org/MPL/
10  *
11  * Software distributed under the License is distributed on an "AS IS" basis,
12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13  * for the specific language governing rights and limitations under the
14  * License.
15  *
16  * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
17  *
18  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
19  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
20  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
21  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
22  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
23  *
24  * Alternatively, the contents of this file may be used under the terms of
25  * either the GNU General Public License Version 2 or later (the "GPL"), or
26  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27  * in which case the provisions of the GPL or the LGPL are applicable instead
28  * of those above. If you wish to allow use of your version of this file only
29  * under the terms of either the GPL or the LGPL, and not to allow others to
30  * use your version of this file under the terms of the MPL, indicate your
31  * decision by deleting the provisions above and replace them with the notice
32  * and other provisions required by the GPL or the LGPL. If you do not delete
33  * the provisions above, a recipient may use your version of this file under
34  * the terms of any one of the MPL, the GPL or the LGPL.
35  *
36  * ***** END LICENSE BLOCK ***** */
37 /*
38  * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
39  * And Contributors.  All rights reserved.
40  *
41  * Redistribution and use in source and binary forms, with or without
42  * modification, are permitted provided that the following conditions
43  * are met:
44  *
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  *
48  * 2. Redistributions in binary form must reproduce the above copyright
49  *    notice, this list of conditions and the following disclaimer in the
50  *    documentation and/or other materials provided with the distribution.
51  *
52  * 3. All modifications to the source code must be clearly marked as
53  *    such.  Binary redistributions based on modified source code
54  *    must be clearly marked as modified versions in the documentation
55  *    and/or other materials provided with the distribution.
56  *
57  * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
58  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
59  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
60  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
61  * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
62  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
63  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
64  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  */
70 
71 #ifndef SUGGESTMGR_HXX_
72 #define SUGGESTMGR_HXX_
73 
74 #define MAX_ROOTS 100
75 #define MAX_WORDS 100
76 #define MAX_GUESS 200
77 #define MAXNGRAMSUGS 4
78 #define MAXPHONSUGS 2
79 #define MAXCOMPOUNDSUGS 3
80 
81 #define NGRAM_LONGER_WORSE (1 << 0)
82 #define NGRAM_ANY_MISMATCH (1 << 1)
83 #define NGRAM_LOWERING (1 << 2)
84 #define NGRAM_WEIGHTED (1 << 3)
85 
86 #include "atypes.hxx"
87 #include "affixmgr.hxx"
88 #include "hashmgr.hxx"
89 #include "langnum.hxx"
90 
91 enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
92 
93 class SuggestMgr {
94  private:
95   SuggestMgr(const SuggestMgr&);
96   SuggestMgr& operator=(const SuggestMgr&);
97 
98  private:
99   char* ckey;
100   size_t ckeyl;
101   std::vector<w_char> ckey_utf;
102 
103   char* ctry;
104   size_t ctryl;
105   std::vector<w_char> ctry_utf;
106   bool lang_with_dash_usage;
107 
108   AffixMgr* pAMgr;
109   unsigned int maxSug;
110   struct cs_info* csconv;
111   int utf8;
112   int langnum;
113   int nosplitsugs;
114   int maxngramsugs;
115   int maxcpdsugs;
116   int complexprefixes;
117 
118  public:
119   SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr);
120   ~SuggestMgr();
121 
122   bool suggest(std::vector<std::string>& slst, const char* word, int* onlycmpdsug);
123   void ngsuggest(std::vector<std::string>& slst, const char* word, const std::vector<HashMgr*>& rHMgr, int captype);
124 
125   std::string suggest_morph(const std::string& word);
126   std::string suggest_gen(const std::vector<std::string>& pl, const std::string& pattern);
127 
128  private:
129   void testsug(std::vector<std::string>& wlst,
130                const std::string& candidate,
131                int cpdsuggest,
132                int* timer,
133                clock_t* timelimit);
134   int checkword(const std::string& word, int, int*, clock_t*);
135   int check_forbidden(const char*, int);
136 
137   void capchars(std::vector<std::string>&, const char*, int);
138   int replchars(std::vector<std::string>&, const char*, int);
139   int doubletwochars(std::vector<std::string>&, const char*, int);
140   int forgotchar(std::vector<std::string>&, const char*, int);
141   int swapchar(std::vector<std::string>&, const char*, int);
142   int longswapchar(std::vector<std::string>&, const char*, int);
143   int movechar(std::vector<std::string>&, const char*, int);
144   int extrachar(std::vector<std::string>&, const char*, int);
145   int badcharkey(std::vector<std::string>&, const char*, int);
146   int badchar(std::vector<std::string>&, const char*, int);
147   bool twowords(std::vector<std::string>&, const char*, int, bool);
148 
149   void capchars_utf(std::vector<std::string>&, const w_char*, int wl, int);
150   int doubletwochars_utf(std::vector<std::string>&, const w_char*, int wl, int);
151   int forgotchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
152   int extrachar_utf(std::vector<std::string>&, const w_char*, int wl, int);
153   int badcharkey_utf(std::vector<std::string>&, const w_char*, int wl, int);
154   int badchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
155   int swapchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
156   int longswapchar_utf(std::vector<std::string>&, const w_char*, int, int);
157   int movechar_utf(std::vector<std::string>&, const w_char*, int, int);
158 
159   int mapchars(std::vector<std::string>&, const char*, int);
160   int map_related(const char*,
161                   std::string&,
162                   int,
163                   std::vector<std::string>& wlst,
164                   int,
165                   const std::vector<mapentry>&,
166                   int*,
167                   clock_t*);
168   int ngram(int n, const std::vector<w_char>& su1,
169             const std::vector<w_char>& su2, int opt);
170   int ngram(int n, const std::string& s1, const std::string& s2, int opt);
171   int mystrlen(const char* word);
172   int leftcommonsubstring(const std::vector<w_char>& su1,
173                           const std::vector<w_char>& su2);
174   int leftcommonsubstring(const char* s1, const char* s2);
175   int commoncharacterpositions(const char* s1, const char* s2, int* is_swap);
176   void bubblesort(char** rwd, char** rwd2, int* rsc, int n);
177   void lcs(const char* s, const char* s2, int* l1, int* l2, char** result);
178   int lcslen(const char* s, const char* s2);
179   int lcslen(const std::string& s, const std::string& s2);
180   std::string suggest_hentry_gen(hentry* rv, const char* pattern);
181 };
182 
183 #endif
184