1 /***************************************************************************
2  *  Copyright 1991, 1992, 1993, 1994, 1995, 1996, 2001, 2002               *
3  *    David R. Hill, Leonard Manzara, Craig Schock                         *
4  *                                                                         *
5  *  This program is free software: you can redistribute it and/or modify   *
6  *  it under the terms of the GNU General Public License as published by   *
7  *  the Free Software Foundation, either version 3 of the License, or      *
8  *  (at your option) any later version.                                    *
9  *                                                                         *
10  *  This program is distributed in the hope that it will be useful,        *
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of         *
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
13  *  GNU General Public License for more details.                           *
14  *                                                                         *
15  *  You should have received a copy of the GNU General Public License      *
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.  *
17  ***************************************************************************/
18 // 2014-09
19 // This file was copied from Gnuspeech and modified by Marcelo Y. Matuda.
20 
21 /******************************************************************************
22 *
23 *     Routines to return pronunciation of word based on letter-to-sound
24 *     rules.
25 *
26 ******************************************************************************/
27 
28 #include "en/letter_to_sound/letter_to_sound.h"
29 
30 #include <string.h>
31 #include <stdio.h>
32 
33 #include "en/letter_to_sound/word_to_patphone.h"
34 #include "en/letter_to_sound/isp_trans.h"
35 #include "en/letter_to_sound/syllabify.h"
36 #include "en/letter_to_sound/apply_stress.h"
37 #include "en/letter_to_sound/tail.h"
38 
39 
40 
41 /*  LOCAL DEFINES  ***********************************************************/
42 #define WORD_TYPE_UNKNOWN          "j"
43 #define WORD_TYPE_DELIMITER        '%'
44 #define MAX_WORD_LENGTH            1024
45 #define MAX_PRONUNCIATION_LENGTH   8192
46 #define MAX(a,b)                   (a > b ? a : b)
47 #define WORDEND(word,string)       (!strcmp(MAX(word+strlen(word)-strlen(string),word),string))
48 
49 
50 
51 namespace {
52 
53 const char* word_type(const char* word);
54 
55 
56 
57 /******************************************************************************
58 *
59 *	function:	word_type
60 *
61 *	purpose:	Returns the word type based on the word spelling.
62 *
63 *       arguments:      word
64 *
65 *	internal
66 *	functions:	WORDEND
67 *
68 *	library
69 *	functions:	(strlen, strcmp)
70 *
71 ******************************************************************************/
72 const char*
word_type(const char * word)73 word_type(const char* word)
74 {
75 	const tail_entry* list_ptr;
76 
77 	/*  IF WORD END MATCHES LIST, RETURN CORRESPONDING TYPE  */
78 	for (list_ptr = tail_list; list_ptr->tail; list_ptr++) {
79 		if (WORDEND(word, list_ptr->tail)) {
80 			return list_ptr->type;
81 		}
82 	}
83 
84 	/*  ELSE RETURN UNKNOWN WORD TYPE  */
85 	return WORD_TYPE_UNKNOWN;
86 }
87 
88 } /* namespace */
89 
90 //==============================================================================
91 
92 namespace GS {
93 namespace En {
94 
95 /******************************************************************************
96 *
97 *	function:	letter_to_sound
98 *
99 *	purpose:	Returns pronunciation of word based on letter-to-sound
100 *                       rules.  Returns NULL if any error (rare).
101 *
102 ******************************************************************************/
103 void
letter_to_sound(const char * word,std::vector<char> & pronunciation)104 letter_to_sound(const char* word, std::vector<char>& pronunciation)
105 {
106 	char buffer[MAX_WORD_LENGTH + 3];
107 	int number_of_syllables = 0;
108 
109 	pronunciation.assign(MAX_PRONUNCIATION_LENGTH + 1, '\0');
110 
111 	/*  FORMAT WORD  */
112 	sprintf(buffer, "#%s#", word);
113 
114 	/*  CONVERT WORD TO PRONUNCIATION  */
115 	if (!word_to_patphone(buffer)) {
116 		isp_trans(buffer, &pronunciation[0]);
117 		/*  ATTEMPT TO MARK SYLL/STRESS  */
118 		number_of_syllables = syllabify(&pronunciation[0]);
119 		if (apply_stress(&pronunciation[0], word)) { // error
120 			pronunciation.clear();
121 			return;
122 		}
123 	} else {
124 		strcpy(&pronunciation[0], buffer);
125 	}
126 
127 	/*  APPEND WORD_TYPE_DELIMITER  */
128 	pronunciation[strlen(&pronunciation[0]) - 1] = WORD_TYPE_DELIMITER;
129 
130 	/*  GUESS TYPE OF WORD  */
131 	if (number_of_syllables != 1) {
132 		strcat(&pronunciation[0], word_type(word));
133 	} else {
134 		strcat(&pronunciation[0], WORD_TYPE_UNKNOWN);
135 	}
136 
137 	/*  RETURN RESULTING PRONUNCIATION  */
138 	return;
139 }
140 
141 } /* namespace En */
142 } /* namespace GS */
143