1 /***************************************************************************
2  *  Copyright 1991, 1992, 1993, 1994, 1995, 1996, 2001, 2002               *
3  *    David R. Hill, Leonard Manzara, Craig Schock                         *
4  *                                                                         *
5  *  This program is free software: you can redistribute it and/or modify   *
6  *  it under the terms of the GNU General Public License as published by   *
7  *  the Free Software Foundation, either version 3 of the License, or      *
8  *  (at your option) any later version.                                    *
9  *                                                                         *
10  *  This program is distributed in the hope that it will be useful,        *
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of         *
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
13  *  GNU General Public License for more details.                           *
14  *                                                                         *
15  *  You should have received a copy of the GNU General Public License      *
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.  *
17  ***************************************************************************/
18 // 2014-09
19 // This file was copied from Gnuspeech and modified by Marcelo Y. Matuda.
20 
21 #include "en/letter_to_sound/syllabify.h"
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <vector>
27 
28 #include "en/letter_to_sound/clusters.h"
29 
30 
31 
32 /*  LOCAL DEFINES  ***********************************************************/
33 #define MAX_LEN    1024
34 #define isvowel(c) ((c)=='a' || (c)=='e' || (c)=='i' || (c)=='o' || (c)=='u' )
35 #define LEFT       begin_syllable
36 #define RIGHT      end_syllable
37 
38 
39 
40 namespace {
41 
42 /*  DATA TYPES  **************************************************************/
43 typedef char phone_type;
44 
45 int syllable_break(const char* cluster);
46 void create_cv_signature(char *ptr, phone_type *arr);
47 char *add_1_phone(char *t);
48 void extract_consonant_cluster(char* ptr, phone_type* type, std::vector<char>& cluster);
49 int next_consonant_cluster(phone_type *pt);
50 int check_cluster(const char* p, const char** match_array);
51 
52 
53 
54 /******************************************************************************
55 *
56 *	function:	syllable_break
57 *
58 *	purpose:	Returns -2 if could not break the cluster.
59 *
60 *
61 *       arguments:      cluster
62 *
63 *	internal
64 *	functions:	check_cluster
65 *
66 *	library
67 *	functions:	strlen, strcpy
68 *
69 ******************************************************************************/
70 int
syllable_break(const char * cluster)71 syllable_break(const char* cluster)
72 {
73 	const char* left_cluster;
74 	const char* right_cluster;
75 	char temp[MAX_LEN];
76 	int offset, length;
77 
78 	/*  GET LENGTH OF CLUSTER  */
79 	length = strlen(cluster);
80 
81 	/*  INITIALLY WE SHALL RETURN THE FIRST 'POSSIBLE' MATCH  */
82 	for (offset = -1; (offset <= length); offset++) {
83 		if (offset == -1 || offset == length || cluster[offset] == '_' || cluster[offset] == '.') {
84 			strcpy(temp, cluster);
85 			if (offset >= 0) {
86 				temp[offset] = 0;
87 			}
88 			left_cluster = (offset < 0 ? temp : offset == length ? temp + length : temp + (offset + 1));
89 			/*  POINTS TO BEGINNING OR NULL  */
90 			right_cluster = (offset >= 0 ? temp : temp + length);
91 			/*  NOW THEY POINT TO EITHER A LEFT/RIGHT HANDED CLUSTER OR A NULL STRING  */
92 			if (check_cluster(left_cluster, LEFT) && check_cluster(right_cluster, RIGHT)) {
93 				/*  IF THIS IS A POSSIBLE BREAK */
94 				/*  TEMPORARY:  WILL STORE LIST OF POSSIBLES AND PICK A 'BEST' ONE  */
95 				return offset;
96 			}
97 		}
98 	}
99 
100 	/*  IF HERE, RETURN ERROR  */
101 	return -2;
102 }
103 
104 /******************************************************************************
105 *
106 *	function:	create_cv_signature
107 *
108 *	purpose:
109 *
110 *
111 *       arguments:      ptr, arr
112 *
113 *	internal
114 *	functions:	(isvowel), add_1_phone
115 *
116 *	library
117 *	functions:	none
118 *
119 ******************************************************************************/
120 void
create_cv_signature(char * ptr,phone_type * arr)121 create_cv_signature(char *ptr, phone_type *arr)
122 {
123     phone_type         *arr_next;
124 
125     arr_next = arr;
126     while (*ptr) {
127 	*arr_next++ = isvowel(*ptr) ? 'v' : 'c';
128 	ptr = add_1_phone(ptr);
129     }
130     *arr_next = 0;
131 }
132 
133 /******************************************************************************
134 *
135 *	function:	add_1_phone
136 *
137 *	purpose:
138 *
139 *
140 *       arguments:      t
141 *
142 *	internal
143 *	functions:	none
144 *
145 *	library
146 *	functions:	none
147 *
148 ******************************************************************************/
149 char*
add_1_phone(char * t)150 add_1_phone(char *t)
151 {
152     while (*t && *t != '_' && *t != '.')
153 	t++;
154 
155     while (*t == '_' || *t == '.')
156 	t++;
157 
158     return(t);
159 }
160 
161 /******************************************************************************
162 *
163 *	function:	extract_consonant_cluster
164 *
165 ******************************************************************************/
166 void
extract_consonant_cluster(char * ptr,phone_type * type,std::vector<char> & cluster)167 extract_consonant_cluster(char* ptr, phone_type* type, std::vector<char>& cluster)
168 {
169 	char* newptr = ptr;
170 
171 	while (*type == 'c') {
172 		type++;
173 		newptr = add_1_phone(newptr);
174 	}
175 
176 	cluster.assign(strlen(ptr) + 1, '\0');
177 	strcpy(&cluster[0], ptr);
178 	int offset = newptr - ptr - 1;
179 
180 	if (offset >= 0) {
181 		cluster[offset] = '\0';
182 	} else {
183 		fprintf(stderr, "offset error\n");  // what's this??
184 	}
185 }
186 
187 /******************************************************************************
188 *
189 *	function:	next_consonant_cluster
190 *
191 *	purpose:	Takes a pointer to phone_type and returns an integer
192 *                       offset from that point to the start of the next
193 *                       consonant cluster (or 0 if there are no vowels between
194 *                       the pointer and the end of the word, or if this is the
195 *                       second-last cluster and the word doesn't end with a
196 *                       vowel. Basically, 0 means to stop.)
197 *
198 *       arguments:      pt
199 *
200 *	internal
201 *	functions:	none
202 *
203 *	library
204 *	functions:	none
205 *
206 ******************************************************************************/
207 int
next_consonant_cluster(phone_type * pt)208 next_consonant_cluster(phone_type *pt)
209 {
210     phone_type         *pt_var, *pt_temp;
211 
212     pt_var = pt;
213     while (*pt_var == 'c')
214 	pt_var++;
215 
216     while (*pt_var == 'v')
217 	pt_var++;
218 
219    /*  CHECK TO SEE IF WE ARE NOW ON THE FINAL CLUSTER OF THE WORD WHICH IS AT
220        THE END OF THE WORD  */
221     pt_temp = pt_var;
222 
223     while (*pt_temp == 'c')
224 	pt_temp++;
225 
226     return (*pt_var && *pt_temp ? pt_var - pt : 0);
227 }
228 
229 /******************************************************************************
230 *
231 *	function:	check_cluster
232 *
233 *	purpose:	Returns 1 if it is a possible match, 0 otherwise.
234 *
235 *
236 *       arguments:      p, match_array
237 *
238 *	internal
239 *	functions:	none
240 *
241 *	library
242 *	functions:	strcmp
243 *
244 ******************************************************************************/
245 int
check_cluster(const char * p,const char ** match_array)246 check_cluster(const char *p, const char** match_array)
247 {
248 	const char** i;
249 
250 	/*  EMPTY COUNTS AS A MATCH  */
251 	if (!*p)
252 		return 1;
253 
254 	i = match_array;
255 	while (*i) {
256 		if (!strcmp(*i, p))
257 			return 1;
258 		i++;
259 	}
260 	return 0;
261 }
262 
263 } /* namespace */
264 
265 //==============================================================================
266 
267 namespace GS {
268 namespace En {
269 
270 /******************************************************************************
271 *
272 *	function:	syllabify
273 *
274 *	purpose:	Steps along until probable syllable beginning is found,
275 *                       taking the longest possible first; then continues
276 *			skipping vowels until a possible syllable end is found
277 *                       (again taking the longest possible.)  Changes '_' to
278 *                       '.' where it occurs between syllable end and start.
279 *
280 *       arguments:      word
281 *
282 *	internal
283 *	functions:	create_cv_signature, next_consonant_cluster,
284 *                       add_1_phone, extract_consonant_cluster, syllable_break
285 *
286 *	library
287 *	functions:	none
288 *
289 ******************************************************************************/
290 int
syllabify(char * word)291 syllabify(char* word)
292 {
293 	int        i, n, temp, number_of_syllables = 0;
294 	phone_type cv_signature[MAX_LEN], *current_type;
295 	char *ptr;
296 	std::vector<char> cluster;
297 
298 	/*  INITIALIZE THIS ARRAY TO 'c' (CONSONANT), 'v' (VOWEL), 0 (END)  */
299 	ptr = word;
300 	create_cv_signature(ptr, cv_signature);
301 	current_type = cv_signature;
302 
303 	/*  WHILE THERE IS ANOTHER CONSONANT CLUSTER (NOT THE LAST)  */
304 	while ( (temp = next_consonant_cluster(current_type)) ) {
305 		number_of_syllables++;
306 
307 		/*  UPDATE CURRENT TYPE POINTER  */
308 		current_type += temp;
309 
310 		/*  MOVE PTR TO POINT TO THAT CLUSTER  */
311 		for (i = 0; i < temp; i++) {
312 			ptr = add_1_phone(ptr);
313 		}
314 
315 		/*  EXTRACT THE CLUSTER INTO A SEPARATE STRING  */
316 		extract_consonant_cluster(ptr, current_type, cluster);
317 
318 		/*  DETERMINE WHERE THE PERIOD GOES (OFFSET FROM PTR, WHICH COULD BE -1)  */
319 		n = syllable_break(&cluster[0]);
320 
321 		/*  MARK THE SYLLABLE IF POSSIBLE  */
322 		if (n != -2) {
323 			*(ptr + n) = '.';
324 		}
325 	}
326 
327 	/*  RETURN NUMBER OF SYLLABLES  */
328 	return number_of_syllables ? number_of_syllables : 1;
329 }
330 
331 } /* namespace En */
332 } /* namespace GS */
333