1 // Copyright 2000 by Kevin Atkinson under the terms of the LGPL
2 
3 #include "language.hpp"
4 #include "phonetic.hpp"
5 #include "phonet.hpp"
6 
7 #include "file_util.hpp"
8 #include "file_data_util.hpp"
9 #include "clone_ptr-t.hpp"
10 
11 namespace aspeller {
12 
13   class SimpileSoundslike : public Soundslike {
14   private:
15     const Language * lang;
16     char first[256];
17     char rest[256];
18   public:
SimpileSoundslike(const Language * l)19     SimpileSoundslike(const Language * l) : lang(l) {}
20 
setup(Conv &)21     PosibErr<void> setup(Conv &) {
22       memcpy(first, lang->sl_first_, 256);
23       memcpy(rest,  lang->sl_rest_, 256);
24       return no_err;
25     }
26 
soundslike_chars() const27     String soundslike_chars() const {
28       bool chars_set[256] = {0};
29       for (int i = 0; i != 256; ++i)
30       {
31         char c = first[i];
32         if (c) chars_set[static_cast<unsigned char>(c)] = true;
33         c = rest[i];
34         if (c) chars_set[static_cast<unsigned char>(c)] = true;
35       }
36       String     chars_list;
37       for (int i = 0; i != 256; ++i)
38       {
39         if (chars_set[i])
40           chars_list += static_cast<char>(i);
41       }
42       return chars_list;
43     }
44 
to_soundslike(char * res,const char * str,int size) const45     char * to_soundslike(char * res, const char * str, int size) const
46     {
47       char prev, cur = '\0';
48 
49       const char * i = str;
50       while (*i) {
51         cur = first[static_cast<unsigned char>(*i++)];
52         if (cur) {*res++ = cur; break;}
53       }
54       prev = cur;
55 
56       while (*i) {
57 	cur = rest[static_cast<unsigned char>(*i++)];
58 	if (cur && cur != prev) *res++ = cur;
59 	prev = cur;
60       }
61       *res = '\0';
62       return res;
63     }
64 
name() const65     const char * name () const {
66       return "simple";
67     }
version() const68     const char * version() const {
69       return "2.0";
70     }
71   };
72 
73   class NoSoundslike : public Soundslike {
74   private:
75     const Language * lang;
76   public:
NoSoundslike(const Language * l)77     NoSoundslike(const Language * l) : lang(l) {}
78 
setup(Conv &)79     PosibErr<void> setup(Conv &) {return no_err;}
80 
soundslike_chars() const81     String soundslike_chars() const {
82       return get_clean_chars(*lang);
83     }
84 
to_soundslike(char * res,const char * str,int size) const85     char * to_soundslike(char * res, const char * str, int size) const
86     {
87       return lang->LangImpl::to_clean(res, str);
88     }
89 
name() const90     const char * name() const {
91       return "none";
92     }
version() const93     const char * version() const {
94       return "1.0";
95     }
96   };
97 
98   class StrippedSoundslike : public Soundslike {
99   private:
100     const Language * lang;
101   public:
StrippedSoundslike(const Language * l)102     StrippedSoundslike(const Language * l) : lang(l) {}
103 
setup(Conv &)104     PosibErr<void> setup(Conv &) {return no_err;}
105 
soundslike_chars() const106     String soundslike_chars() const {
107       return get_stripped_chars(*lang);
108     }
109 
to_soundslike(char * res,const char * str,int size) const110     char * to_soundslike(char * res, const char * str, int size) const
111     {
112       return lang->LangImpl::to_stripped(res, str);
113     }
114 
name() const115     const char * name() const {
116       return "stripped";
117     }
version() const118     const char * version() const {
119       return "1.0";
120     }
121   };
122 
123   class PhonetSoundslike : public Soundslike {
124 
125     const Language * lang;
126     StackPtr<PhonetParms> phonet_parms;
127 
128   public:
129 
PhonetSoundslike(const Language * l)130     PhonetSoundslike(const Language * l) : lang(l) {}
131 
setup(Conv & iconv)132     PosibErr<void> setup(Conv & iconv) {
133       String file;
134       file += lang->data_dir();
135       file += '/';
136       file += lang->name();
137       file += "_phonet.dat";
138       PosibErr<PhonetParms *> pe = new_phonet(file, iconv, lang);
139       if (pe.has_err()) return pe;
140       phonet_parms.reset(pe);
141       return no_err;
142     }
143 
144 
soundslike_chars() const145     String soundslike_chars() const
146     {
147       bool chars_set[256] = {0};
148       String     chars_list;
149       for (const char * * i = phonet_parms->rules + 1;
150 	   *(i-1) != PhonetParms::rules_end;
151 	   i += 2)
152       {
153         for (const char * j = *i; *j; ++j)
154         {
155           chars_set[static_cast<unsigned char>(*j)] = true;
156         }
157       }
158       for (int i = 0; i != 256; ++i)
159       {
160         if (chars_set[i])
161           chars_list += static_cast<char>(i);
162       }
163       return chars_list;
164     }
165 
to_soundslike(char * res,const char * str,int size) const166     char * to_soundslike(char * res, const char * str, int size) const
167     {
168       int new_size = phonet(str, res, size, *phonet_parms);
169       return res + new_size;
170     }
171 
name() const172     const char * name() const
173     {
174       return "phonet";
175     }
version() const176     const char * version() const
177     {
178       return phonet_parms->version.c_str();
179     }
180   };
181 
182 
new_soundslike(ParmString name,Conv & iconv,const Language * lang)183   PosibErr<Soundslike *> new_soundslike(ParmString name,
184                                         Conv & iconv,
185                                         const Language * lang)
186   {
187     Soundslike * sl;
188     if (name == "simple" || name == "generic") {
189       sl = new SimpileSoundslike(lang);
190     } else if (name == "stripped") {
191       sl = new StrippedSoundslike(lang);
192     } else if (name == "none") {
193       sl = new NoSoundslike(lang);
194     } else if (name == lang->name()) {
195       sl = new PhonetSoundslike(lang);
196     } else {
197       abort(); // FIXME
198     }
199     PosibErrBase pe = sl->setup(iconv);
200     if (pe.has_err()) {
201       delete sl;
202       return pe;
203     } else {
204       return sl;
205     }
206   }
207 }
208 
209