1 /* ***** BEGIN LICENSE BLOCK ***** 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 * 4 * The contents of this file are subject to the Mozilla Public License Version 5 * 1.1 (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * http://www.mozilla.org/MPL/ 8 * 9 * Software distributed under the License is distributed on an "AS IS" basis, 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 11 * for the specific language governing rights and limitations under the 12 * License. 13 * 14 * The Original Code is Hunspell, based on MySpell. 15 * 16 * The Initial Developers of the Original Code are 17 * Kevin Hendricks (MySpell) and Németh László (Hunspell). 18 * Portions created by the Initial Developers are Copyright (C) 2002-2005 19 * the Initial Developers. All Rights Reserved. 20 * 21 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, 22 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, 23 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, 24 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, 25 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen 26 * 27 * Alternatively, the contents of this file may be used under the terms of 28 * either the GNU General Public License Version 2 or later (the "GPL"), or 29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 30 * in which case the provisions of the GPL or the LGPL are applicable instead 31 * of those above. If you wish to allow use of your version of this file only 32 * under the terms of either the GPL or the LGPL, and not to allow others to 33 * use your version of this file under the terms of the MPL, indicate your 34 * decision by deleting the provisions above and replace them with the notice 35 * and other provisions required by the GPL or the LGPL. If you do not delete 36 * the provisions above, a recipient may use your version of this file under 37 * the terms of any one of the MPL, the GPL or the LGPL. 38 * 39 * ***** END LICENSE BLOCK ***** */ 40 /* 41 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada 42 * And Contributors. All rights reserved. 43 * 44 * Redistribution and use in source and binary forms, with or without 45 * modification, are permitted provided that the following conditions 46 * are met: 47 * 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 51 * 2. Redistributions in binary form must reproduce the above copyright 52 * notice, this list of conditions and the following disclaimer in the 53 * documentation and/or other materials provided with the distribution. 54 * 55 * 3. All modifications to the source code must be clearly marked as 56 * such. Binary redistributions based on modified source code 57 * must be clearly marked as modified versions in the documentation 58 * and/or other materials provided with the distribution. 59 * 60 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS 61 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 62 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 63 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 64 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 65 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 66 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 67 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 71 * SUCH DAMAGE. 72 */ 73 74 #include "hunvisapi.h" 75 76 #include "hashmgr.hxx" 77 #include "affixmgr.hxx" 78 #include "suggestmgr.hxx" 79 #include "langnum.hxx" 80 #include <vector> 81 82 #define SPELL_XML "<?xml?>" 83 84 #define MAXDIC 20 85 #define MAXSUGGESTION 15 86 #define MAXSHARPS 5 87 88 #define HUNSPELL_OK (1 << 0) 89 #define HUNSPELL_OK_WARN (1 << 1) 90 91 #ifndef _MYSPELLMGR_HXX_ 92 #define _MYSPELLMGR_HXX_ 93 94 class LIBHUNSPELL_DLL_EXPORTED Hunspell { 95 private: 96 Hunspell(const Hunspell&); 97 Hunspell& operator=(const Hunspell&); 98 99 private: 100 AffixMgr* pAMgr; 101 HashMgr* pHMgr[MAXDIC]; 102 int maxdic; 103 SuggestMgr* pSMgr; 104 char* affixpath; 105 char* encoding; 106 struct cs_info* csconv; 107 int langnum; 108 int utf8; 109 int complexprefixes; 110 char** wordbreak; 111 112 public: 113 /* Hunspell(aff, dic) - constructor of Hunspell class 114 * input: path of affix file and dictionary file 115 * 116 * In WIN32 environment, use UTF-8 encoded paths started with the long path 117 * prefix \\\\?\\ to handle system-independent character encoding and very 118 * long path names (without the long path prefix Hunspell will use fopen() 119 * with system-dependent character encoding instead of _wfopen()). 120 */ 121 122 Hunspell(const char* affpath, const char* dpath, const char* key = NULL); 123 ~Hunspell(); 124 125 /* load extra dictionaries (only dic files) */ 126 int add_dic(const char* dpath, const char* key = NULL); 127 128 /* spell(word) - spellcheck word 129 * output: 0 = bad word, not 0 = good word 130 * 131 * plus output: 132 * info: information bit array, fields: 133 * SPELL_COMPOUND = a compound word 134 * SPELL_FORBIDDEN = an explicit forbidden word 135 * root: root (stem), when input is a word with affix(es) 136 */ 137 138 int spell(const char* word, int* info = NULL, char** root = NULL); 139 140 /* suggest(suggestions, word) - search suggestions 141 * input: pointer to an array of strings pointer and the (bad) word 142 * array of strings pointer (here *slst) may not be initialized 143 * output: number of suggestions in string array, and suggestions in 144 * a newly allocated array of strings (*slts will be NULL when number 145 * of suggestion equals 0.) 146 */ 147 148 int suggest(char*** slst, const char* word); 149 150 /* Suggest words from suffix rules 151 * suffix_suggest(suggestions, root_word) 152 * input: pointer to an array of strings pointer and the word 153 * array of strings pointer (here *slst) may not be initialized 154 * output: number of suggestions in string array, and suggestions in 155 * a newly allocated array of strings (*slts will be NULL when number 156 * of suggestion equals 0.) 157 */ 158 int suffix_suggest(char*** slst, const char* root_word); 159 160 /* deallocate suggestion lists */ 161 162 void free_list(char*** slst, int n); 163 164 char* get_dic_encoding(); 165 166 /* morphological functions */ 167 168 /* analyze(result, word) - morphological analysis of the word */ 169 170 int analyze(char*** slst, const char* word); 171 172 /* stem(result, word) - stemmer function */ 173 174 int stem(char*** slst, const char* word); 175 176 /* stem(result, analysis, n) - get stems from a morph. analysis 177 * example: 178 * char ** result, result2; 179 * int n1 = analyze(&result, "words"); 180 * int n2 = stem(&result2, result, n1); 181 */ 182 183 int stem(char*** slst, char** morph, int n); 184 185 /* generate(result, word, word2) - morphological generation by example(s) */ 186 187 int generate(char*** slst, const char* word, const char* word2); 188 189 /* generate(result, word, desc, n) - generation by morph. description(s) 190 * example: 191 * char ** result; 192 * char * affix = "is:plural"; // description depends from dictionaries, too 193 * int n = generate(&result, "word", &affix, 1); 194 * for (int i = 0; i < n; i++) printf("%s\n", result[i]); 195 */ 196 197 int generate(char*** slst, const char* word, char** desc, int n); 198 199 /* functions for run-time modification of the dictionary */ 200 201 /* add word to the run-time dictionary */ 202 203 int add(const char* word); 204 205 /* add word to the run-time dictionary with affix flags of 206 * the example (a dictionary word): Hunspell will recognize 207 * affixed forms of the new word, too. 208 */ 209 210 int add_with_affix(const char* word, const char* example); 211 212 /* remove word from the run-time dictionary */ 213 214 int remove(const char* word); 215 216 /* other */ 217 218 /* get extra word characters definied in affix file for tokenization */ 219 const char* get_wordchars(); 220 const std::vector<w_char>& get_wordchars_utf16(); 221 222 struct cs_info* get_csconv(); 223 const char* get_version(); 224 225 int get_langnum() const; 226 227 /* need for putdic */ 228 int input_conv(const char* word, char* dest, size_t destsize); 229 230 private: 231 void cleanword(std::string& dest, const char*, int* pcaptype, int* pabbrev); 232 size_t cleanword2(std::string& dest, 233 std::vector<w_char>& dest_u, 234 const char*, 235 int* w_len, 236 int* pcaptype, 237 size_t* pabbrev); 238 void mkinitcap(std::string& u8); 239 int mkinitcap2(std::string& u8, std::vector<w_char>& u16); 240 int mkinitsmall2(std::string& u8, std::vector<w_char>& u16); 241 void mkallcap(std::string& u8); 242 int mkallsmall2(std::string& u8, std::vector<w_char>& u16); 243 struct hentry* checkword(const char*, int* info, char** root); 244 std::string sharps_u8_l1(const std::string& source); 245 hentry* 246 spellsharps(std::string& base, size_t start_pos, int, int, int* info, char** root); 247 int is_keepcase(const hentry* rv); 248 int insert_sug(char*** slst, const char* word, int ns); 249 void cat_result(std::string& result, char* st); 250 char* stem_description(const char* desc); 251 int spellml(char*** slst, const char* word); 252 std::string get_xml_par(const char* par); 253 const char* get_xml_pos(const char* s, const char* attr); 254 int get_xml_list(char*** slst, const char* list, const char* tag); 255 int check_xml_par(const char* q, const char* attr, const char* value); 256 }; 257 258 #endif 259