1 // Copyright 2000 by Kevin Atkinson under the terms of the LGPL 2 3 #include "language.hpp" 4 #include "phonetic.hpp" 5 #include "phonet.hpp" 6 7 #include "file_util.hpp" 8 #include "file_data_util.hpp" 9 #include "clone_ptr-t.hpp" 10 11 namespace aspeller { 12 13 class SimpileSoundslike : public Soundslike { 14 private: 15 const Language * lang; 16 char first[256]; 17 char rest[256]; 18 public: SimpileSoundslike(const Language * l)19 SimpileSoundslike(const Language * l) : lang(l) {} 20 setup(Conv &)21 PosibErr<void> setup(Conv &) { 22 memcpy(first, lang->sl_first_, 256); 23 memcpy(rest, lang->sl_rest_, 256); 24 return no_err; 25 } 26 soundslike_chars() const27 String soundslike_chars() const { 28 bool chars_set[256] = {0}; 29 for (int i = 0; i != 256; ++i) 30 { 31 char c = first[i]; 32 if (c) chars_set[static_cast<unsigned char>(c)] = true; 33 c = rest[i]; 34 if (c) chars_set[static_cast<unsigned char>(c)] = true; 35 } 36 String chars_list; 37 for (int i = 0; i != 256; ++i) 38 { 39 if (chars_set[i]) 40 chars_list += static_cast<char>(i); 41 } 42 return chars_list; 43 } 44 to_soundslike(char * res,const char * str,int size) const45 char * to_soundslike(char * res, const char * str, int size) const 46 { 47 char prev, cur = '\0'; 48 49 const char * i = str; 50 while (*i) { 51 cur = first[static_cast<unsigned char>(*i++)]; 52 if (cur) {*res++ = cur; break;} 53 } 54 prev = cur; 55 56 while (*i) { 57 cur = rest[static_cast<unsigned char>(*i++)]; 58 if (cur && cur != prev) *res++ = cur; 59 prev = cur; 60 } 61 *res = '\0'; 62 return res; 63 } 64 name() const65 const char * name () const { 66 return "simple"; 67 } version() const68 const char * version() const { 69 return "2.0"; 70 } 71 }; 72 73 class NoSoundslike : public Soundslike { 74 private: 75 const Language * lang; 76 public: NoSoundslike(const Language * l)77 NoSoundslike(const Language * l) : lang(l) {} 78 setup(Conv &)79 PosibErr<void> setup(Conv &) {return no_err;} 80 soundslike_chars() const81 String soundslike_chars() const { 82 return get_clean_chars(*lang); 83 } 84 to_soundslike(char * res,const char * str,int size) const85 char * to_soundslike(char * res, const char * str, int size) const 86 { 87 return lang->LangImpl::to_clean(res, str); 88 } 89 name() const90 const char * name() const { 91 return "none"; 92 } version() const93 const char * version() const { 94 return "1.0"; 95 } 96 }; 97 98 class StrippedSoundslike : public Soundslike { 99 private: 100 const Language * lang; 101 public: StrippedSoundslike(const Language * l)102 StrippedSoundslike(const Language * l) : lang(l) {} 103 setup(Conv &)104 PosibErr<void> setup(Conv &) {return no_err;} 105 soundslike_chars() const106 String soundslike_chars() const { 107 return get_stripped_chars(*lang); 108 } 109 to_soundslike(char * res,const char * str,int size) const110 char * to_soundslike(char * res, const char * str, int size) const 111 { 112 return lang->LangImpl::to_stripped(res, str); 113 } 114 name() const115 const char * name() const { 116 return "stripped"; 117 } version() const118 const char * version() const { 119 return "1.0"; 120 } 121 }; 122 123 class PhonetSoundslike : public Soundslike { 124 125 const Language * lang; 126 StackPtr<PhonetParms> phonet_parms; 127 128 public: 129 PhonetSoundslike(const Language * l)130 PhonetSoundslike(const Language * l) : lang(l) {} 131 setup(Conv & iconv)132 PosibErr<void> setup(Conv & iconv) { 133 String file; 134 file += lang->data_dir(); 135 file += '/'; 136 file += lang->name(); 137 file += "_phonet.dat"; 138 PosibErr<PhonetParms *> pe = new_phonet(file, iconv, lang); 139 if (pe.has_err()) return pe; 140 phonet_parms.reset(pe); 141 return no_err; 142 } 143 144 soundslike_chars() const145 String soundslike_chars() const 146 { 147 bool chars_set[256] = {0}; 148 String chars_list; 149 for (const char * * i = phonet_parms->rules + 1; 150 *(i-1) != PhonetParms::rules_end; 151 i += 2) 152 { 153 for (const char * j = *i; *j; ++j) 154 { 155 chars_set[static_cast<unsigned char>(*j)] = true; 156 } 157 } 158 for (int i = 0; i != 256; ++i) 159 { 160 if (chars_set[i]) 161 chars_list += static_cast<char>(i); 162 } 163 return chars_list; 164 } 165 to_soundslike(char * res,const char * str,int size) const166 char * to_soundslike(char * res, const char * str, int size) const 167 { 168 int new_size = phonet(str, res, size, *phonet_parms); 169 return res + new_size; 170 } 171 name() const172 const char * name() const 173 { 174 return "phonet"; 175 } version() const176 const char * version() const 177 { 178 return phonet_parms->version.c_str(); 179 } 180 }; 181 182 new_soundslike(ParmString name,Conv & iconv,const Language * lang)183 PosibErr<Soundslike *> new_soundslike(ParmString name, 184 Conv & iconv, 185 const Language * lang) 186 { 187 Soundslike * sl; 188 if (name == "simple" || name == "generic") { 189 sl = new SimpileSoundslike(lang); 190 } else if (name == "stripped") { 191 sl = new StrippedSoundslike(lang); 192 } else if (name == "none") { 193 sl = new NoSoundslike(lang); 194 } else if (name == lang->name()) { 195 sl = new PhonetSoundslike(lang); 196 } else { 197 abort(); // FIXME 198 } 199 PosibErrBase pe = sl->setup(iconv); 200 if (pe.has_err()) { 201 delete sl; 202 return pe; 203 } else { 204 return sl; 205 } 206 } 207 } 208 209