1 /***************************************************************************
2 * Copyright 1991, 1992, 1993, 1994, 1995, 1996, 2001, 2002 *
3 * David R. Hill, Leonard Manzara, Craig Schock *
4 * *
5 * This program is free software: you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation, either version 3 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program. If not, see <http://www.gnu.org/licenses/>. *
17 ***************************************************************************/
18 // 2014-09
19 // This file was copied from Gnuspeech and modified by Marcelo Y. Matuda.
20
21 #include "en/letter_to_sound/syllabify.h"
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <vector>
27
28 #include "en/letter_to_sound/clusters.h"
29
30
31
32 /* LOCAL DEFINES ***********************************************************/
33 #define MAX_LEN 1024
34 #define isvowel(c) ((c)=='a' || (c)=='e' || (c)=='i' || (c)=='o' || (c)=='u' )
35 #define LEFT begin_syllable
36 #define RIGHT end_syllable
37
38
39
40 namespace {
41
42 /* DATA TYPES **************************************************************/
43 typedef char phone_type;
44
45 int syllable_break(const char* cluster);
46 void create_cv_signature(char *ptr, phone_type *arr);
47 char *add_1_phone(char *t);
48 void extract_consonant_cluster(char* ptr, phone_type* type, std::vector<char>& cluster);
49 int next_consonant_cluster(phone_type *pt);
50 int check_cluster(const char* p, const char** match_array);
51
52
53
54 /******************************************************************************
55 *
56 * function: syllable_break
57 *
58 * purpose: Returns -2 if could not break the cluster.
59 *
60 *
61 * arguments: cluster
62 *
63 * internal
64 * functions: check_cluster
65 *
66 * library
67 * functions: strlen, strcpy
68 *
69 ******************************************************************************/
70 int
syllable_break(const char * cluster)71 syllable_break(const char* cluster)
72 {
73 const char* left_cluster;
74 const char* right_cluster;
75 char temp[MAX_LEN];
76 int offset, length;
77
78 /* GET LENGTH OF CLUSTER */
79 length = strlen(cluster);
80
81 /* INITIALLY WE SHALL RETURN THE FIRST 'POSSIBLE' MATCH */
82 for (offset = -1; (offset <= length); offset++) {
83 if (offset == -1 || offset == length || cluster[offset] == '_' || cluster[offset] == '.') {
84 strcpy(temp, cluster);
85 if (offset >= 0) {
86 temp[offset] = 0;
87 }
88 left_cluster = (offset < 0 ? temp : offset == length ? temp + length : temp + (offset + 1));
89 /* POINTS TO BEGINNING OR NULL */
90 right_cluster = (offset >= 0 ? temp : temp + length);
91 /* NOW THEY POINT TO EITHER A LEFT/RIGHT HANDED CLUSTER OR A NULL STRING */
92 if (check_cluster(left_cluster, LEFT) && check_cluster(right_cluster, RIGHT)) {
93 /* IF THIS IS A POSSIBLE BREAK */
94 /* TEMPORARY: WILL STORE LIST OF POSSIBLES AND PICK A 'BEST' ONE */
95 return offset;
96 }
97 }
98 }
99
100 /* IF HERE, RETURN ERROR */
101 return -2;
102 }
103
104 /******************************************************************************
105 *
106 * function: create_cv_signature
107 *
108 * purpose:
109 *
110 *
111 * arguments: ptr, arr
112 *
113 * internal
114 * functions: (isvowel), add_1_phone
115 *
116 * library
117 * functions: none
118 *
119 ******************************************************************************/
120 void
create_cv_signature(char * ptr,phone_type * arr)121 create_cv_signature(char *ptr, phone_type *arr)
122 {
123 phone_type *arr_next;
124
125 arr_next = arr;
126 while (*ptr) {
127 *arr_next++ = isvowel(*ptr) ? 'v' : 'c';
128 ptr = add_1_phone(ptr);
129 }
130 *arr_next = 0;
131 }
132
133 /******************************************************************************
134 *
135 * function: add_1_phone
136 *
137 * purpose:
138 *
139 *
140 * arguments: t
141 *
142 * internal
143 * functions: none
144 *
145 * library
146 * functions: none
147 *
148 ******************************************************************************/
149 char*
add_1_phone(char * t)150 add_1_phone(char *t)
151 {
152 while (*t && *t != '_' && *t != '.')
153 t++;
154
155 while (*t == '_' || *t == '.')
156 t++;
157
158 return(t);
159 }
160
161 /******************************************************************************
162 *
163 * function: extract_consonant_cluster
164 *
165 ******************************************************************************/
166 void
extract_consonant_cluster(char * ptr,phone_type * type,std::vector<char> & cluster)167 extract_consonant_cluster(char* ptr, phone_type* type, std::vector<char>& cluster)
168 {
169 char* newptr = ptr;
170
171 while (*type == 'c') {
172 type++;
173 newptr = add_1_phone(newptr);
174 }
175
176 cluster.assign(strlen(ptr) + 1, '\0');
177 strcpy(&cluster[0], ptr);
178 int offset = newptr - ptr - 1;
179
180 if (offset >= 0) {
181 cluster[offset] = '\0';
182 } else {
183 fprintf(stderr, "offset error\n"); // what's this??
184 }
185 }
186
187 /******************************************************************************
188 *
189 * function: next_consonant_cluster
190 *
191 * purpose: Takes a pointer to phone_type and returns an integer
192 * offset from that point to the start of the next
193 * consonant cluster (or 0 if there are no vowels between
194 * the pointer and the end of the word, or if this is the
195 * second-last cluster and the word doesn't end with a
196 * vowel. Basically, 0 means to stop.)
197 *
198 * arguments: pt
199 *
200 * internal
201 * functions: none
202 *
203 * library
204 * functions: none
205 *
206 ******************************************************************************/
207 int
next_consonant_cluster(phone_type * pt)208 next_consonant_cluster(phone_type *pt)
209 {
210 phone_type *pt_var, *pt_temp;
211
212 pt_var = pt;
213 while (*pt_var == 'c')
214 pt_var++;
215
216 while (*pt_var == 'v')
217 pt_var++;
218
219 /* CHECK TO SEE IF WE ARE NOW ON THE FINAL CLUSTER OF THE WORD WHICH IS AT
220 THE END OF THE WORD */
221 pt_temp = pt_var;
222
223 while (*pt_temp == 'c')
224 pt_temp++;
225
226 return (*pt_var && *pt_temp ? pt_var - pt : 0);
227 }
228
229 /******************************************************************************
230 *
231 * function: check_cluster
232 *
233 * purpose: Returns 1 if it is a possible match, 0 otherwise.
234 *
235 *
236 * arguments: p, match_array
237 *
238 * internal
239 * functions: none
240 *
241 * library
242 * functions: strcmp
243 *
244 ******************************************************************************/
245 int
check_cluster(const char * p,const char ** match_array)246 check_cluster(const char *p, const char** match_array)
247 {
248 const char** i;
249
250 /* EMPTY COUNTS AS A MATCH */
251 if (!*p)
252 return 1;
253
254 i = match_array;
255 while (*i) {
256 if (!strcmp(*i, p))
257 return 1;
258 i++;
259 }
260 return 0;
261 }
262
263 } /* namespace */
264
265 //==============================================================================
266
267 namespace GS {
268 namespace En {
269
270 /******************************************************************************
271 *
272 * function: syllabify
273 *
274 * purpose: Steps along until probable syllable beginning is found,
275 * taking the longest possible first; then continues
276 * skipping vowels until a possible syllable end is found
277 * (again taking the longest possible.) Changes '_' to
278 * '.' where it occurs between syllable end and start.
279 *
280 * arguments: word
281 *
282 * internal
283 * functions: create_cv_signature, next_consonant_cluster,
284 * add_1_phone, extract_consonant_cluster, syllable_break
285 *
286 * library
287 * functions: none
288 *
289 ******************************************************************************/
290 int
syllabify(char * word)291 syllabify(char* word)
292 {
293 int i, n, temp, number_of_syllables = 0;
294 phone_type cv_signature[MAX_LEN], *current_type;
295 char *ptr;
296 std::vector<char> cluster;
297
298 /* INITIALIZE THIS ARRAY TO 'c' (CONSONANT), 'v' (VOWEL), 0 (END) */
299 ptr = word;
300 create_cv_signature(ptr, cv_signature);
301 current_type = cv_signature;
302
303 /* WHILE THERE IS ANOTHER CONSONANT CLUSTER (NOT THE LAST) */
304 while ( (temp = next_consonant_cluster(current_type)) ) {
305 number_of_syllables++;
306
307 /* UPDATE CURRENT TYPE POINTER */
308 current_type += temp;
309
310 /* MOVE PTR TO POINT TO THAT CLUSTER */
311 for (i = 0; i < temp; i++) {
312 ptr = add_1_phone(ptr);
313 }
314
315 /* EXTRACT THE CLUSTER INTO A SEPARATE STRING */
316 extract_consonant_cluster(ptr, current_type, cluster);
317
318 /* DETERMINE WHERE THE PERIOD GOES (OFFSET FROM PTR, WHICH COULD BE -1) */
319 n = syllable_break(&cluster[0]);
320
321 /* MARK THE SYLLABLE IF POSSIBLE */
322 if (n != -2) {
323 *(ptr + n) = '.';
324 }
325 }
326
327 /* RETURN NUMBER OF SYLLABLES */
328 return number_of_syllables ? number_of_syllables : 1;
329 }
330
331 } /* namespace En */
332 } /* namespace GS */
333