1 /* espeak_io.cpp
2  *
3 //  * Copyright (C) 2017 David Weenink
4  *
5  * This code is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or (at
8  * your option) any later version.
9  *
10  * This code is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this work. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /*
20 	djmw 20171024
21 */
22 
23 #include "espeakdata_FileInMemory.h"
24 #include "espeak_ng.h"
25 #include "speech.h"
26 #include "synthesize.h"
27 #include <errno.h>
28 
29 extern autoFileInMemoryManager espeak_ng_FileInMemoryManager;
30 #define ESPEAK_FILEINMEMORYMANAGER espeak_ng_FileInMemoryManager.get()
31 
espeak_io_fopen(const char * filename,const char * mode)32 FILE *espeak_io_fopen (const char * filename, const char * mode) {
33 	return FileInMemoryManager_fopen (ESPEAK_FILEINMEMORYMANAGER, filename, mode);
34 }
35 
espeak_io_rewind(FILE * stream)36 void espeak_io_rewind (FILE *stream) {
37 	FileInMemoryManager_rewind (ESPEAK_FILEINMEMORYMANAGER, stream);
38 }
39 
espeak_io_fclose(FILE * stream)40 int espeak_io_fclose (FILE *stream) {
41 	return FileInMemoryManager_fclose (ESPEAK_FILEINMEMORYMANAGER, stream);
42 }
43 
espeak_io_feof(FILE * stream)44 int espeak_io_feof (FILE *stream) {
45 	return FileInMemoryManager_feof (ESPEAK_FILEINMEMORYMANAGER, stream);
46 }
47 
espeak_io_ftell(FILE * stream)48 long espeak_io_ftell (FILE *stream) {
49 	return FileInMemoryManager_ftell (ESPEAK_FILEINMEMORYMANAGER, stream);
50 }
51 
espeak_io_fseek(FILE * stream,long offset,int origin)52 int espeak_io_fseek (FILE *stream, long offset, int origin) {
53 	return FileInMemoryManager_fseek (ESPEAK_FILEINMEMORYMANAGER, stream, offset, origin);
54 }
55 
espeak_io_fgets(char * str,int num,FILE * stream)56 char *espeak_io_fgets (char *str, int num, FILE *stream) {
57 	return FileInMemoryManager_fgets (ESPEAK_FILEINMEMORYMANAGER, str, num, stream);
58 }
59 
espeak_io_fread(void * ptr,size_t size,size_t count,FILE * stream)60 size_t espeak_io_fread (void *ptr, size_t size, size_t count, FILE *stream) {
61 	return FileInMemoryManager_fread (ESPEAK_FILEINMEMORYMANAGER, ptr, size, count, stream);
62 }
63 
espeak_io_fgetc(FILE * stream)64 int espeak_io_fgetc (FILE *stream) {
65 	return FileInMemoryManager_fgetc (ESPEAK_FILEINMEMORYMANAGER, stream);
66 }
67 
espeak_io_fprintf(FILE * stream,...)68 int espeak_io_fprintf (FILE * stream, ... ) {
69 	va_list arg;
70 	va_start (arg, stream);
71 	char *format = static_cast<char *> (va_arg (arg, void*));
72 	int result = FileInMemoryManager_fprintf (ESPEAK_FILEINMEMORYMANAGER, stream, format, arg);
73 	va_end (arg);
74 	return result;
75 }
76 
espeak_io_ungetc(int character,FILE * stream)77 int espeak_io_ungetc (int character, FILE * stream) {
78 	return FileInMemoryManager_ungetc (ESPEAK_FILEINMEMORYMANAGER, character,stream);
79 }
80 /* This mimics GetFileLength of espeak-ng */
FileInMemoryManager_GetFileLength(FileInMemoryManager me,const char * filename)81 int FileInMemoryManager_GetFileLength (FileInMemoryManager me, const char *filename) {
82 		integer index = FileInMemorySet_lookUp (my files.get(), Melder_peek8to32(filename));
83 		if (index > 0) {
84 			FileInMemory fim = static_cast<FileInMemory> (my files -> at [index]);
85 			return fim -> d_numberOfBytes;
86 		}
87 		// Directory ??
88 		if (FileInMemorySet_hasDirectory (my files.get(), Melder_peek8to32(filename))) {
89 			return -EISDIR;
90 		}
91 		return -1;
92 }
93 
94 /*
95 	espeak_io_GetFileLength: mimics GetFileLength of espeak-ng
96 	Returns the number of bytes in the file.
97 	If the filename is a directory it return -EISDIR
98 */
espeak_io_GetFileLength(const char * filename)99 int espeak_io_GetFileLength (const char *filename) {
100 	FileInMemorySet me = ESPEAK_FILEINMEMORYMANAGER -> files.get();
101 	integer index = FileInMemorySet_lookUp (me, Melder_peek8to32(filename));
102 	if (index > 0) {
103 		FileInMemory fim = static_cast<FileInMemory> (my at [index]);
104 		return fim -> d_numberOfBytes;
105 	}
106 	// Directory ??
107 	if (FileInMemorySet_hasDirectory (me, Melder_peek8to32(filename))) {
108 		return -EISDIR;
109 	}
110 	return -1;
111 }
112 
113 /*
114 	espeak_io_GetVoices: mimics GetVoices of espeak-ng
115 	If is_languange_file == 0 then /voices/ else /lang/
116 	We know our voices are in /voices/ and our languages in /lang/
117 */
espeak_io_GetVoices(const char * path,int len_path_voices,int is_language_file)118 void espeak_io_GetVoices (const char *path, int len_path_voices, int is_language_file) {
119 	(void) path;
120 	/*
121 		if is_languange_file == 0 then /voices/ else /lang/
122 		We know our voices are in /voices/!v/ and our languages in /lang/
123 	*/
124 	FileInMemoryManager me = ESPEAK_FILEINMEMORYMANAGER;
125 	conststring32 criterion = is_language_file ? U"/lang/" : U"/voices/";
126 	autoFileInMemorySet fileList = FileInMemorySet_listFiles (my files.get(), kMelder_string :: CONTAINS, criterion);
127 	for (long ifile = 1; ifile <= fileList -> size; ifile ++) {
128 		FileInMemory fim = static_cast<FileInMemory> (fileList -> at [ifile]);
129 		FILE *f_voice = FileInMemoryManager_fopen (me, Melder_peek32to8 (fim -> d_path.get()), "r");
130 		conststring8 fname = Melder_peek32to8 (fim -> d_path.get());
131 		espeak_VOICE *voice_data = ReadVoiceFile (f_voice, fname + len_path_voices, is_language_file);
132 		FileInMemoryManager_fclose (me, f_voice);
133 		if (voice_data) {
134 			voices_list [n_voices_list ++] = voice_data;
135 		} /*else {
136 			Melder_warning (U"Voice data for ", fname, U" could not be gathered.");
137 		}*/
138 	}
139 }
140 
get_int32_le(char * ch)141 int get_int32_le (char *ch) {
142 	return (((uint8)ch[0]<<0) | ((uint8)ch[1]<<8) | ((uint8)ch[2]<<16) | ((uint8)ch[3]<<24));
143 }
144 
get_int16_le(char * ch)145 short get_int16_le (char *ch) {
146        return (((uint8)ch[0]<<0) | ((uint8)ch[1]<<8));
147 }
148 
get_set_int32_le(char * ch)149 int get_set_int32_le (char *ch) {
150        int i32 = (((uint8)ch[0]<<0) | ((uint8)ch[1]<<8) | ((uint8)ch[2]<<16) | ((uint8)ch[3]<<24));
151        int *p32 = (int *) ch;
152        *p32 = i32;
153        return i32;
154 }
155 
156 /*
157 	The espeak-ng data files have been written with little endian byte order. To be able to use these files on big endian hardware we have to change these files as if they were written on a big endian machine.
158 	The following routines were modeled after espeak-phonemedata.c by Jonathan Duddington.
159 	A serious bug in his code for the phontab_to_bigendian procedure has been corrected.
160 	A better solution would be:
161 		espeak-ng should read a little endian int32 as 4 unsigned bytes:
162 			int32 i = (ch[0]<<0) | (ch[1]<<8) | (ch[2]<<16) | (ch[3]<<24);
163 		a int16 (short) as 2 unsigned bytes:
164 			int16 i = (ch[0]<<0) | (ch[1]<<8);
165 		Then no conversion of data files would be necessary.
166 
167 */
168 
169 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
170 	#define SWAP_2(i1) { integer i2 = i1 + 1; \
171 		thy d_data [i1] = my d_data [i2]; \
172 		thy d_data [i2] = my d_data [i1]; }
173 
174 	#define SWAP_4(i1) { integer i2 = i1 + 1, i3 = i1 + 2, i4 = i1 + 3; \
175 		thy d_data [i1] = my d_data [i4]; \
176 		thy d_data [i2] = my d_data [i3]; \
177 		thy d_data [i3] = my d_data [i2]; \
178 		thy d_data [i4] = my d_data [i1]; }
179 #else
180 	#define SWAP_2(i1)
181 	#define SWAP_4(i1)
182 #endif
183 
phondata_to_bigendian(FileInMemory me,FileInMemory manifest)184 static autoFileInMemory phondata_to_bigendian (FileInMemory me, FileInMemory manifest) {
185 	try {
186 		autoFileInMemory thee = Data_copy (me);
187 		FILE *phondataf = fopen (Melder_peek32to8 (my d_path.get()), "r");
188 		FILE *manifestf = fopen (Melder_peek32to8 (manifest -> d_path.get()), "r");
189 		char line [1024];
190 		// copy 4 bytes: version number
191 		// copy 4 bytes: sample rate
192 		while (fgets (line, sizeof (line), manifestf)) {
193 			if (! isupper (line [0])) continue;
194 			unsigned int index;
195 			sscanf(& line [2], "%x", & index);
196 			fseek (phondataf, index, SEEK_SET);
197 			integer i1 = index;
198 			if (line [0] == 'S') { //
199 				/*
200 					typedef struct {
201 						short length;
202 						unsigned char n_frames;
203 						unsigned char sqflags;
204 						frame_t frame[N_SEQ_FRAMES];
205 					} SPECT_SEQ;
206 				*/
207 
208 				SWAP_2 (i1)
209 				index += 2; // skip the short length
210 				integer numberOfFrames = my d_data [index]; // unsigned char n_frames
211 				index += 2; // skip the 2 unsigned char's n_frames & sqflags
212 
213 				for (integer n = 1; n <= numberOfFrames; n ++) {
214 					/*
215 						typedef struct { //64 bytes
216 							short frflags;
217 							short ffreq[7];
218 							unsigned char length;
219 							unsigned char rms;
220 							unsigned char fheight[8];
221 							unsigned char fwidth[6];          // width/4  f0-5
222 							unsigned char fright[3];          // width/4  f0-2
223 							unsigned char bw[4];        // Klatt bandwidth BNZ /2, f1,f2,f3
224 							unsigned char klattp[5];    // AV, FNZ, Tilt, Aspr, Skew
225 							unsigned char klattp2[5];   // continuation of klattp[],  Avp, Fric, FricBP, Turb
226 							unsigned char klatt_ap[7];  // Klatt parallel amplitude
227 							unsigned char klatt_bp[7];  // Klatt parallel bandwidth  /2
228 							unsigned char spare;        // pad to multiple of 4 bytes
229 						} frame_t;   //  with extra Klatt parameters for parallel resonators
230 
231 						typedef struct {  // 44 bytes
232 							short frflags;
233 							short ffreq[7];
234 							unsigned char length;
235 							unsigned char rms;
236 							unsigned char fheight[8];
237 							unsigned char fwidth[6];          // width/4  f0-5
238 							unsigned char fright[3];          // width/4  f0-2
239 							unsigned char bw[4];        // Klatt bandwidth BNZ /2, f1,f2,f3
240 							unsigned char klattp[5];    // AV, FNZ, Tilt, Aspr, Skew
241 						} frame_t2;
242 						Both frame_t and frame_t2 start with 8 short's.
243 					*/
244 					i1 = index;
245 					for (integer i = 1; i <= 8; i ++) {
246 						SWAP_2 (i1)
247 						i1 += 2;
248 					}
249 					//
250 					#define FRFLAG_KLATT 0x01
251 					index += (thy d_data [i1] & FRFLAG_KLATT) ? sizeof (frame_t) : sizeof (frame_t2); // thy is essential!
252 				}
253 			} else if (line [0] == 'W') { // Wave data
254 				int length = my d_data [i1 + 1] * 256 + my d_data [i1];
255 				index += 4;
256 
257 				index += length; // char wavedata[length]
258 
259 				index += index % 3;
260 
261 			} else if (line [0] == 'E') {
262 
263 				index += 128; // Envelope: skip 128 bytes
264 
265 
266 			} else if (line [0] == 'Q') {
267 				unsigned int length = my d_data [index + 2] << 8 + my d_data [index + 3];
268 				length *= 4;
269 
270 				index += length;
271 			}
272 			Melder_require (index <= my d_numberOfBytes, U"Position ", index, U"is larger than file length (", my d_numberOfBytes, U").");
273 		}
274 		return thee;
275 	} catch (MelderError) {
276 		Melder_throw (U"phondata not converted to bigendian.");
277 	}
278 }
279 
phontab_to_bigendian(FileInMemory me)280 static autoFileInMemory phontab_to_bigendian (FileInMemory me) {
281 	try {
282 		autoFileInMemory thee = Data_copy (me);
283 		integer numberOfPhonemeTables = my d_data [0];
284 		integer index = 4; // skip first 4 bytes
285 		for (integer itab = 1; itab <= numberOfPhonemeTables; itab ++) {
286 			integer numberOfPhonemes = thy d_data [index];
287 
288 			index += 4; // This is 8 (incorrect) in the original code of espeak.
289 
290 			index += N_PHONEME_TAB_NAME; // skip the name
291 			integer phonemeTableSizes = numberOfPhonemes * sizeof (PHONEME_TAB);
292 			Melder_require (index + phonemeTableSizes <= my d_numberOfBytes, U"Too many tables to process. (table ", itab, U" from ", numberOfPhonemeTables, U").");
293 			for (integer j = 1; j <= numberOfPhonemes; j ++) {
294 				/*
295 					typedef struct { // 16 bytes
296 						unsigned int  mnemonic;      // 1st char is in the l.s.byte
297 						unsigned int  phflags;       // bits 16-19 place of articulation
298 						unsigned short program;
299 						unsigned char  code;         // the phoneme number
300 						unsigned char  type;         // phVOWEL, phPAUSE, phSTOP etc
301 						unsigned char  start_type;
302 						unsigned char  end_type;
303 						unsigned char  std_length;   // for vowels, in mS/2;  for phSTRESS, the stress/tone type
304 						unsigned char  length_mod;   // a length_mod group number, used to access length_mod_tab
305 					} PHONEME_TAB;
306 				*/
307 				integer i1 = index;
308 				SWAP_4 (i1)
309 				i1 += 4;
310 				SWAP_4 (i1);
311 				i1 += 4;
312 				SWAP_2 (i1)
313 				index += sizeof (PHONEME_TAB);
314 			}
315 			Melder_require (index <= my d_numberOfBytes, U"Position ", index, U" is larger than file length (", my d_numberOfBytes, U").");
316 		}
317 		return thee;
318 	} catch (MelderError) {
319 		Melder_throw (U"phontab not converted to bigendian.");
320 	}
321 }
322 
phonindex_to_bigendian(FileInMemory me)323 static autoFileInMemory phonindex_to_bigendian (FileInMemory me) {
324 	try {
325 		autoFileInMemory thee = Data_copy (me);
326 		integer numberOfShorts = (my d_numberOfBytes - 4 - 1) / 2;
327 		integer index = 4; // skip first 4 bytes
328 		for (integer i = 0; i < numberOfShorts; i ++) {
329 			SWAP_2 (index)
330 			index += 2;
331 			Melder_require (index <= my d_numberOfBytes, U"Position ", index, U" is larger than file length (", my d_numberOfBytes, U").");
332 		}
333 		return thee;
334 	} catch (MelderError) {
335 		Melder_throw (U"phonindex not converted to bigendian.");
336 	}
337 }
338 
espeak_ng_data_to_bigendian()339 void espeak_ng_data_to_bigendian () {
340 	FileInMemoryManager me = ESPEAK_FILEINMEMORYMANAGER;
341 	autoMelderString file;
342 
343 	MelderString_append (& file, Melder_peek8to32 (PATH_ESPEAK_DATA), U"/phondata-manifest");
344 	integer index = FileInMemorySet_lookUp (my files.get(), file.string);
345 	Melder_require (index > 0, U"phondata-manifest not present.");
346 	FileInMemory manifest = (FileInMemory) my files -> at [index];
347 
348 	MelderString_empty (& file);
349 	MelderString_append (& file, Melder_peek8to32 (PATH_ESPEAK_DATA), U"/phondata");
350 	index = FileInMemorySet_lookUp (my files.get(), file.string);
351 	Melder_require (index > 0, U"phondata not present.");
352 	FileInMemory phondata = (FileInMemory) my files -> at [index];
353 
354 	autoFileInMemory phondata_new = phondata_to_bigendian (phondata, manifest);
355 	my files -> replaceItem_move (phondata_new.move(), index);
356 
357 	MelderString_empty (& file);
358 	MelderString_append (& file, Melder_peek8to32 (PATH_ESPEAK_DATA), U"/phontab");
359 	index = FileInMemorySet_lookUp (my files.get(), file.string);
360 	Melder_require (index > 0, U"phonindex not present.");
361 	FileInMemory phontab = (FileInMemory) my files -> at [index];
362 
363 	autoFileInMemory phontab_new = phontab_to_bigendian (phontab);
364 	my files -> replaceItem_move (phontab_new.move(), index);
365 
366 	MelderString_empty (& file);
367 	MelderString_append (& file, Melder_peek8to32 (PATH_ESPEAK_DATA), U"/phonindex");
368 	index = FileInMemorySet_lookUp (my files.get(), file.string);
369 	Melder_require (index > 0, U"phonindex not present.");
370 	FileInMemory phonindex = (FileInMemory) my files -> at [index];
371 
372 	autoFileInMemory phonindex_new = phonindex_to_bigendian (phonindex);
373 	my files -> replaceItem_move (phonindex_new.move(), index);
374 }
375 
376 /* End of file espeak_io.cpp */
377