1 /* espeakdata_FileInMemory.cpp
2  *
3  * Copyright (C) David Weenink 2012-2020
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or (at
8  * your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18  */
19 
20 // The glue between Praat and espeak
21 
22 
23 #include "NUM2.h"
24 #include "espeak_ng.h"
25 #include "FileInMemoryManager.h"
26 #include "speech.h"
27 #include "voice.h"
28 #include <wctype.h>
29 #include "Strings_extensions.h"
30 #include "Table_and_Strings.h"
31 
32 #include "espeakdata_FileInMemory.h"
33 
34 #if 0
35 static integer Table_getRownumberOfStringInColumn (Table me, conststring32 string, integer icol) {
36 	integer row = 0;
37 	if (icol > 0 && icol <= my numberOfColumns) {
38 		for (integer irow = 1; irow <= my rows.size; irow ++) {
39 			const TableRow myRow = my rows.at [irow];
40 			if (Melder_equ (myRow -> cells [icol]. string.get(), string)) {
41 				return irow;
42 			}
43 		}
44 	}
45 	return row;
46 }
47 #endif
espeakdata_praat_init()48 void espeakdata_praat_init () {
49 	try {
50 		espeak_ng_FileInMemoryManager = create_espeak_ng_FileInMemoryManager ();
51 		espeakdata_languages_propertiesTable = Table_createAsEspeakLanguagesProperties ();
52 		espeakdata_voices_propertiesTable = Table_createAsEspeakVoicesProperties ();
53 		espeakdata_languages_names = Table_column_to_Strings (espeakdata_languages_propertiesTable.get(), 2);
54 		espeakdata_voices_names = Table_column_to_Strings (espeakdata_voices_propertiesTable.get(), 2);
55 		const int test = 1;
56 		if (* ((char *) & test) != 1) { // (too?) simple endian test
57 			espeak_ng_data_to_bigendian ();
58 		}
59 	} catch (MelderError) {
60 		Melder_throw (U"Espeakdata initialization not performed.");
61 	}
62 }
63 
64 #define ESPEAK_ISSPACE(c) (c == ' ' || c == '\t' || c == '\r' || c == '\n')
65 
66 // imitates fgets_strip for file in memory
espeakdata_get_voicedata(const char * data,integer ndata,char * buf,integer nbuf,integer * index)67 const char * espeakdata_get_voicedata (const char *data, integer ndata, char *buf, integer nbuf, integer *index) {
68 	if (ndata <= 0 || nbuf <= 0 || *index >= ndata)
69 		return 0;
70 
71 	integer i = 0;
72 	while (i < nbuf && *index < ndata && ((buf [i] = data [i]) != '\n')) {
73 		i ++;
74 		(*index) ++;
75 	}
76 	(*index) ++;   // ppgb 20151020 fix
77 	const integer idata = i + 1;
78 	buf [i] = '\0';
79 	while (-- i >= 0 && ESPEAK_ISSPACE (buf [i]))
80 		buf [i] = 0;
81 
82 	char *p = strstr (buf, "//");
83 	if (p)
84 		*p = '\0';
85 	return & data [idata];
86 }
87 
88 
get_wordAfterPrecursor_u8(constvector<unsigned char> const & text8,conststring32 precursor)89 static conststring32 get_wordAfterPrecursor_u8 (constvector<unsigned char> const& text8, conststring32 precursor) {
90 	static char32 word [100];
91 	/*
92 		1. Find (first occurrence of) 'precursor' at the start of a line (with optional leading whitespace).
93 		2. Get the words after 'precursor' (skip leading and trailing whitespace).
94 	*/
95 	autoMelderString regex;
96 	const conststring32 text = Melder_peek8to32 (reinterpret_cast<const char *> (text8.asArgumentToFunctionThatExpectsZeroBasedArray()));
97 	MelderString_append (& regex, U"^\\s*", precursor, U"\\s+");
98 	char32 *p = nullptr;
99 	const char32 *pmatch = strstr_regexp (text, regex.string);
100 	if (pmatch) {
101 		pmatch += str32len (precursor); // skip 'precursor'
102 		while (*pmatch == U' ' || *pmatch == U'\t')
103 			pmatch ++; // skip whitespace after 'precursor'
104 		p = word;
105 		char32 *p_end = p + 99;
106 		while ((*p = *pmatch ++) && *p != U' ' && *p != U'\t' && *p != U'\n' && *p != U'\r' && p < p_end)
107 			p ++;
108 		*p = U'\0';
109 		p = word;
110 	}
111 	return p;
112 }
113 
get_stringAfterPrecursor_u8(constvector<unsigned char> const & text8,conststring32 precursor)114 static conststring32 get_stringAfterPrecursor_u8 (constvector<unsigned char> const& text8, conststring32 precursor) {
115 	static char32 word [100];
116 	/*
117 		1. Find (first occurrence of) 'precursor' at the start of a line (with optional leading whitespace).
118 		2. Get the words after 'precursor' (skip leading and trailing whitespace).
119 	*/
120 	autoMelderString regex;
121 	const conststring32 text = Melder_peek8to32 (reinterpret_cast<const char *> (text8.asArgumentToFunctionThatExpectsZeroBasedArray()));
122 	MelderString_append (& regex, U"^\\s*", precursor, U"\\s+");
123 	char32 *p = nullptr;
124 	const char32 *pmatch = strstr_regexp (text, regex.string);
125 	if (pmatch) {
126 		pmatch += str32len (precursor); // skip 'precursor'
127 		while (*pmatch == U' ' || *pmatch == U'\t')
128 			pmatch ++; // skip whitespace after 'precursor'
129 		//pmatch --;
130 		p = word;
131 		char32 *p_end = p + 99;
132 		// also discard text after comment '//'
133 		while ((*p = *pmatch ++) && *p != U'\n' && *p != U'\r' && *p != U'/' && *(p+1) != U'/' && p < p_end)
134 			p ++; // copy to end of line
135 		while (*p == U' ' || *p == U'\t' || *p == U'\n' || *p == U'\r')
136 			p --; // remove trailing white space
137 		*(++ p) = U'\0';
138 		p = word;
139 	}
140 	return p;
141 }
142 
Table_createAsEspeakVoicesProperties()143 autoTable Table_createAsEspeakVoicesProperties () {
144 	try {
145 		constexpr conststring32 criterion = U"/voices/!v/";
146 		FileInMemorySet me = espeak_ng_FileInMemoryManager -> files.get();
147 		const integer numberOfMatches = FileInMemorySet_findNumberOfMatches_path (me, kMelder_string :: CONTAINS, criterion);
148 		const conststring32 columnNames [] = { U"id", U"name", U"index", U"gender", U"age", U"variant" };
149 		autoTable thee = Table_createWithColumnNames (numberOfMatches, ARRAY_TO_STRVEC (columnNames));
150 		integer irow = 0;
151 		for (integer ifile = 1; ifile <= my size; ifile ++) {
152 			const FileInMemory fim = (FileInMemory) my at [ifile];
153 			if (Melder_stringMatchesCriterion (fim -> d_path.get(), kMelder_string :: CONTAINS, criterion, true)) {
154 				irow ++;
155 				Table_setStringValue (thee.get(), irow, 1, fim -> d_id.get());
156 				const char32 *name = get_stringAfterPrecursor_u8 (fim -> d_data.get(), U"name");
157 				// The first character of name must be upper case
158 				if (name) {
159 					autoMelderString capitalFirst;
160 					MelderString_copy (& capitalFirst, name); // we cannot modify original
161 					const char32 capital = Melder_toUpperCase (*name);
162 					*(capitalFirst. string) = capital;
163 					Table_setStringValue (thee.get(), irow, 2, capitalFirst. string);
164 				} else {
165 					Table_setStringValue (thee.get(), irow, 2, fim -> d_id.get());
166 				}
167 				Table_setNumericValue (thee.get(), irow, 3, ifile);
168 				conststring32 word = get_wordAfterPrecursor_u8 (fim -> d_data.get(), U"gender");
169 				Table_setStringValue (thee.get(), irow, 4, (word ? word : U"0"));
170 				word = get_wordAfterPrecursor_u8 (fim -> d_data.get(), U"age");
171 				Table_setStringValue (thee.get(), irow, 5, (word ? word : U"0"));
172 				word = get_stringAfterPrecursor_u8 (fim -> d_data.get(), U"variant");
173 				Table_setStringValue (thee.get(), irow, 6, (word ? word : U"0"));
174 			}
175 		}
176 		Melder_assert (irow == numberOfMatches);
177 		Table_sortRows (thee.get(),
178 				autoSTRVEC ({ U"name" }).get());
179 		return thee;
180 	} catch (MelderError) {
181 		Melder_throw (U"Table with espeak-ng voice properties not created.");
182 	}
183 }
184 
Table_createAsEspeakLanguagesProperties()185 autoTable Table_createAsEspeakLanguagesProperties () {
186 	try {
187 		constexpr conststring32 criterion = U"/lang/";
188 		FileInMemorySet me = espeak_ng_FileInMemoryManager -> files.get();
189 		const integer numberOfMatches = FileInMemorySet_findNumberOfMatches_path (me, kMelder_string :: CONTAINS, criterion);
190 		const conststring32 columnNames [] = { U"id", U"name", U"index" };
191 		autoTable thee = Table_createWithColumnNames (numberOfMatches, ARRAY_TO_STRVEC (columnNames)); // old: Default English
192 		integer irow = 0;
193 		for (integer ifile = 1; ifile <= my size; ifile ++) {
194 			const FileInMemory fim = (FileInMemory) my at [ifile];
195 			if (Melder_stringMatchesCriterion (fim -> d_path.get(), kMelder_string :: CONTAINS, criterion, true)) {
196 				irow ++;
197 				Table_setStringValue (thee.get(), irow, 1, fim -> d_id.get());
198 				const char32 *word = get_stringAfterPrecursor_u8 (fim -> d_data.get(), U"name");
199 				Table_setStringValue (thee.get(), irow, 2, ( word ? word : fim -> d_id.get() ));
200 				Table_setNumericValue (thee.get(), irow, 3, ifile);
201 			}
202 		}
203 		Melder_assert (irow == numberOfMatches);
204 		Table_sortRows (thee.get(),
205 				autoSTRVEC ({ U"name" }).get());
206 		return thee;
207 	} catch (MelderError) {
208 		Melder_throw (U"Table with espeak-ng languages not created.");
209 	}
210 }
211 
espeakdata_getIndices(conststring32 language_string,conststring32 voice_string,int * p_languageIndex,int * p_voiceIndex)212 void espeakdata_getIndices (conststring32 language_string, conststring32 voice_string, int *p_languageIndex, int *p_voiceIndex) {
213 	if (p_languageIndex) {
214 		integer languageIndex = Strings_findString (espeakdata_languages_names.get(), language_string);
215 		if (languageIndex == 0) {
216 			if (Melder_equ (language_string, U"Default") || Melder_equ (language_string, U"English")) {
217 				languageIndex = Strings_findString (espeakdata_languages_names.get(), U"English (Great Britain)");
218 				Melder_casual (U"Language \"", language_string, U"\" is deprecated. Please use \"",
219 					espeakdata_languages_names -> strings [languageIndex].get(), U"\".");
220 			} else {
221 				languageIndex = Table_searchColumn (espeakdata_languages_propertiesTable.get(), 1, language_string);
222 				if (languageIndex == 0) {
223 					Melder_throw (U"Language \"", language_string, U" is not a valid option.");
224 				}
225 			}
226 		}
227 		*p_languageIndex = languageIndex;
228 	}
229 	if (p_voiceIndex) {
230 		integer voiceIndex = Strings_findString (espeakdata_voices_names.get(), voice_string);
231 		*p_voiceIndex = voiceIndex;
232 		if (voiceIndex == 0) {
233 			if (Melder_equ (voice_string, U"default")) {
234 				voiceIndex = Strings_findString (espeakdata_voices_names.get(), U"Male1");
235 			} else if (Melder_equ (voice_string, U"f1")) {
236 				voiceIndex = Strings_findString (espeakdata_voices_names.get(), U"Female1");
237 			} else {
238 				// Try the bare file names
239 				voiceIndex = Table_searchColumn (espeakdata_voices_propertiesTable.get(), 1, voice_string);
240 				if (voiceIndex == 0) {
241 					Melder_throw (U"Voice variant ", voice_string, U" is not a valid option.");
242 				}
243 			}
244 		}
245 		if (voiceIndex != *p_voiceIndex) {
246 			*p_voiceIndex = voiceIndex;
247 			Melder_casual (U"Voice \"", voice_string, U"\" is deprecated. Please use \"",
248 				espeakdata_voices_names -> strings [*p_voiceIndex].get(), U"\".");
249 		} else {
250 			// unknown voice, handled by interface
251 		}
252 	}
253 }
254 
255 /* End of file espeakdata_FileInMemory.cpp */
256