1 /* espeakdata_FileInMemory.cpp
2 *
3 * Copyright (C) David Weenink 2012-2020
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or (at
8 * your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19
20 // The glue between Praat and espeak
21
22
23 #include "NUM2.h"
24 #include "espeak_ng.h"
25 #include "FileInMemoryManager.h"
26 #include "speech.h"
27 #include "voice.h"
28 #include <wctype.h>
29 #include "Strings_extensions.h"
30 #include "Table_and_Strings.h"
31
32 #include "espeakdata_FileInMemory.h"
33
34 #if 0
35 static integer Table_getRownumberOfStringInColumn (Table me, conststring32 string, integer icol) {
36 integer row = 0;
37 if (icol > 0 && icol <= my numberOfColumns) {
38 for (integer irow = 1; irow <= my rows.size; irow ++) {
39 const TableRow myRow = my rows.at [irow];
40 if (Melder_equ (myRow -> cells [icol]. string.get(), string)) {
41 return irow;
42 }
43 }
44 }
45 return row;
46 }
47 #endif
espeakdata_praat_init()48 void espeakdata_praat_init () {
49 try {
50 espeak_ng_FileInMemoryManager = create_espeak_ng_FileInMemoryManager ();
51 espeakdata_languages_propertiesTable = Table_createAsEspeakLanguagesProperties ();
52 espeakdata_voices_propertiesTable = Table_createAsEspeakVoicesProperties ();
53 espeakdata_languages_names = Table_column_to_Strings (espeakdata_languages_propertiesTable.get(), 2);
54 espeakdata_voices_names = Table_column_to_Strings (espeakdata_voices_propertiesTable.get(), 2);
55 const int test = 1;
56 if (* ((char *) & test) != 1) { // (too?) simple endian test
57 espeak_ng_data_to_bigendian ();
58 }
59 } catch (MelderError) {
60 Melder_throw (U"Espeakdata initialization not performed.");
61 }
62 }
63
64 #define ESPEAK_ISSPACE(c) (c == ' ' || c == '\t' || c == '\r' || c == '\n')
65
66 // imitates fgets_strip for file in memory
espeakdata_get_voicedata(const char * data,integer ndata,char * buf,integer nbuf,integer * index)67 const char * espeakdata_get_voicedata (const char *data, integer ndata, char *buf, integer nbuf, integer *index) {
68 if (ndata <= 0 || nbuf <= 0 || *index >= ndata)
69 return 0;
70
71 integer i = 0;
72 while (i < nbuf && *index < ndata && ((buf [i] = data [i]) != '\n')) {
73 i ++;
74 (*index) ++;
75 }
76 (*index) ++; // ppgb 20151020 fix
77 const integer idata = i + 1;
78 buf [i] = '\0';
79 while (-- i >= 0 && ESPEAK_ISSPACE (buf [i]))
80 buf [i] = 0;
81
82 char *p = strstr (buf, "//");
83 if (p)
84 *p = '\0';
85 return & data [idata];
86 }
87
88
get_wordAfterPrecursor_u8(constvector<unsigned char> const & text8,conststring32 precursor)89 static conststring32 get_wordAfterPrecursor_u8 (constvector<unsigned char> const& text8, conststring32 precursor) {
90 static char32 word [100];
91 /*
92 1. Find (first occurrence of) 'precursor' at the start of a line (with optional leading whitespace).
93 2. Get the words after 'precursor' (skip leading and trailing whitespace).
94 */
95 autoMelderString regex;
96 const conststring32 text = Melder_peek8to32 (reinterpret_cast<const char *> (text8.asArgumentToFunctionThatExpectsZeroBasedArray()));
97 MelderString_append (& regex, U"^\\s*", precursor, U"\\s+");
98 char32 *p = nullptr;
99 const char32 *pmatch = strstr_regexp (text, regex.string);
100 if (pmatch) {
101 pmatch += str32len (precursor); // skip 'precursor'
102 while (*pmatch == U' ' || *pmatch == U'\t')
103 pmatch ++; // skip whitespace after 'precursor'
104 p = word;
105 char32 *p_end = p + 99;
106 while ((*p = *pmatch ++) && *p != U' ' && *p != U'\t' && *p != U'\n' && *p != U'\r' && p < p_end)
107 p ++;
108 *p = U'\0';
109 p = word;
110 }
111 return p;
112 }
113
get_stringAfterPrecursor_u8(constvector<unsigned char> const & text8,conststring32 precursor)114 static conststring32 get_stringAfterPrecursor_u8 (constvector<unsigned char> const& text8, conststring32 precursor) {
115 static char32 word [100];
116 /*
117 1. Find (first occurrence of) 'precursor' at the start of a line (with optional leading whitespace).
118 2. Get the words after 'precursor' (skip leading and trailing whitespace).
119 */
120 autoMelderString regex;
121 const conststring32 text = Melder_peek8to32 (reinterpret_cast<const char *> (text8.asArgumentToFunctionThatExpectsZeroBasedArray()));
122 MelderString_append (& regex, U"^\\s*", precursor, U"\\s+");
123 char32 *p = nullptr;
124 const char32 *pmatch = strstr_regexp (text, regex.string);
125 if (pmatch) {
126 pmatch += str32len (precursor); // skip 'precursor'
127 while (*pmatch == U' ' || *pmatch == U'\t')
128 pmatch ++; // skip whitespace after 'precursor'
129 //pmatch --;
130 p = word;
131 char32 *p_end = p + 99;
132 // also discard text after comment '//'
133 while ((*p = *pmatch ++) && *p != U'\n' && *p != U'\r' && *p != U'/' && *(p+1) != U'/' && p < p_end)
134 p ++; // copy to end of line
135 while (*p == U' ' || *p == U'\t' || *p == U'\n' || *p == U'\r')
136 p --; // remove trailing white space
137 *(++ p) = U'\0';
138 p = word;
139 }
140 return p;
141 }
142
Table_createAsEspeakVoicesProperties()143 autoTable Table_createAsEspeakVoicesProperties () {
144 try {
145 constexpr conststring32 criterion = U"/voices/!v/";
146 FileInMemorySet me = espeak_ng_FileInMemoryManager -> files.get();
147 const integer numberOfMatches = FileInMemorySet_findNumberOfMatches_path (me, kMelder_string :: CONTAINS, criterion);
148 const conststring32 columnNames [] = { U"id", U"name", U"index", U"gender", U"age", U"variant" };
149 autoTable thee = Table_createWithColumnNames (numberOfMatches, ARRAY_TO_STRVEC (columnNames));
150 integer irow = 0;
151 for (integer ifile = 1; ifile <= my size; ifile ++) {
152 const FileInMemory fim = (FileInMemory) my at [ifile];
153 if (Melder_stringMatchesCriterion (fim -> d_path.get(), kMelder_string :: CONTAINS, criterion, true)) {
154 irow ++;
155 Table_setStringValue (thee.get(), irow, 1, fim -> d_id.get());
156 const char32 *name = get_stringAfterPrecursor_u8 (fim -> d_data.get(), U"name");
157 // The first character of name must be upper case
158 if (name) {
159 autoMelderString capitalFirst;
160 MelderString_copy (& capitalFirst, name); // we cannot modify original
161 const char32 capital = Melder_toUpperCase (*name);
162 *(capitalFirst. string) = capital;
163 Table_setStringValue (thee.get(), irow, 2, capitalFirst. string);
164 } else {
165 Table_setStringValue (thee.get(), irow, 2, fim -> d_id.get());
166 }
167 Table_setNumericValue (thee.get(), irow, 3, ifile);
168 conststring32 word = get_wordAfterPrecursor_u8 (fim -> d_data.get(), U"gender");
169 Table_setStringValue (thee.get(), irow, 4, (word ? word : U"0"));
170 word = get_wordAfterPrecursor_u8 (fim -> d_data.get(), U"age");
171 Table_setStringValue (thee.get(), irow, 5, (word ? word : U"0"));
172 word = get_stringAfterPrecursor_u8 (fim -> d_data.get(), U"variant");
173 Table_setStringValue (thee.get(), irow, 6, (word ? word : U"0"));
174 }
175 }
176 Melder_assert (irow == numberOfMatches);
177 Table_sortRows (thee.get(),
178 autoSTRVEC ({ U"name" }).get());
179 return thee;
180 } catch (MelderError) {
181 Melder_throw (U"Table with espeak-ng voice properties not created.");
182 }
183 }
184
Table_createAsEspeakLanguagesProperties()185 autoTable Table_createAsEspeakLanguagesProperties () {
186 try {
187 constexpr conststring32 criterion = U"/lang/";
188 FileInMemorySet me = espeak_ng_FileInMemoryManager -> files.get();
189 const integer numberOfMatches = FileInMemorySet_findNumberOfMatches_path (me, kMelder_string :: CONTAINS, criterion);
190 const conststring32 columnNames [] = { U"id", U"name", U"index" };
191 autoTable thee = Table_createWithColumnNames (numberOfMatches, ARRAY_TO_STRVEC (columnNames)); // old: Default English
192 integer irow = 0;
193 for (integer ifile = 1; ifile <= my size; ifile ++) {
194 const FileInMemory fim = (FileInMemory) my at [ifile];
195 if (Melder_stringMatchesCriterion (fim -> d_path.get(), kMelder_string :: CONTAINS, criterion, true)) {
196 irow ++;
197 Table_setStringValue (thee.get(), irow, 1, fim -> d_id.get());
198 const char32 *word = get_stringAfterPrecursor_u8 (fim -> d_data.get(), U"name");
199 Table_setStringValue (thee.get(), irow, 2, ( word ? word : fim -> d_id.get() ));
200 Table_setNumericValue (thee.get(), irow, 3, ifile);
201 }
202 }
203 Melder_assert (irow == numberOfMatches);
204 Table_sortRows (thee.get(),
205 autoSTRVEC ({ U"name" }).get());
206 return thee;
207 } catch (MelderError) {
208 Melder_throw (U"Table with espeak-ng languages not created.");
209 }
210 }
211
espeakdata_getIndices(conststring32 language_string,conststring32 voice_string,int * p_languageIndex,int * p_voiceIndex)212 void espeakdata_getIndices (conststring32 language_string, conststring32 voice_string, int *p_languageIndex, int *p_voiceIndex) {
213 if (p_languageIndex) {
214 integer languageIndex = Strings_findString (espeakdata_languages_names.get(), language_string);
215 if (languageIndex == 0) {
216 if (Melder_equ (language_string, U"Default") || Melder_equ (language_string, U"English")) {
217 languageIndex = Strings_findString (espeakdata_languages_names.get(), U"English (Great Britain)");
218 Melder_casual (U"Language \"", language_string, U"\" is deprecated. Please use \"",
219 espeakdata_languages_names -> strings [languageIndex].get(), U"\".");
220 } else {
221 languageIndex = Table_searchColumn (espeakdata_languages_propertiesTable.get(), 1, language_string);
222 if (languageIndex == 0) {
223 Melder_throw (U"Language \"", language_string, U" is not a valid option.");
224 }
225 }
226 }
227 *p_languageIndex = languageIndex;
228 }
229 if (p_voiceIndex) {
230 integer voiceIndex = Strings_findString (espeakdata_voices_names.get(), voice_string);
231 *p_voiceIndex = voiceIndex;
232 if (voiceIndex == 0) {
233 if (Melder_equ (voice_string, U"default")) {
234 voiceIndex = Strings_findString (espeakdata_voices_names.get(), U"Male1");
235 } else if (Melder_equ (voice_string, U"f1")) {
236 voiceIndex = Strings_findString (espeakdata_voices_names.get(), U"Female1");
237 } else {
238 // Try the bare file names
239 voiceIndex = Table_searchColumn (espeakdata_voices_propertiesTable.get(), 1, voice_string);
240 if (voiceIndex == 0) {
241 Melder_throw (U"Voice variant ", voice_string, U" is not a valid option.");
242 }
243 }
244 }
245 if (voiceIndex != *p_voiceIndex) {
246 *p_voiceIndex = voiceIndex;
247 Melder_casual (U"Voice \"", voice_string, U"\" is deprecated. Please use \"",
248 espeakdata_voices_names -> strings [*p_voiceIndex].get(), U"\".");
249 } else {
250 // unknown voice, handled by interface
251 }
252 }
253 }
254
255 /* End of file espeakdata_FileInMemory.cpp */
256