1 /* Data.cpp
2  *
3  * Copyright (C) 1992-2018,2021 Paul Boersma
4  *
5  * This code is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or (at
8  * your option) any later version.
9  *
10  * This code is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13  * See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this work. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "Collection.h"
20 
21 Thing_implement (Daata, Thing, 0);
22 
v_copy(Daata)23 void structDaata :: v_copy (Daata /* thee */) {
24 }
25 
v_equal(Daata)26 bool structDaata :: v_equal (Daata /* thee */) {
27 	return true;
28 }   // names of "identical" objects are allowed to be different
29 
v_canWriteAsEncoding(int)30 bool structDaata :: v_canWriteAsEncoding (int /* encoding */) {
31 	return true;
32 }
33 
v_writeText(MelderFile)34 void structDaata :: v_writeText (MelderFile /* openFile */) {
35 }
36 
v_readText(MelderReadText,int)37 void structDaata :: v_readText (MelderReadText, int /* formatVersion */) {
38 }
39 
v_writeBinary(FILE *)40 void structDaata :: v_writeBinary (FILE *) {
41 }
42 
v_readBinary(FILE *,int)43 void structDaata :: v_readBinary (FILE *, int /*formatVersion*/) {
44 }
45 
_Data_copy(Daata me)46 autoDaata _Data_copy (Daata me) {
47 	try {
48 		if (! me) return autoDaata();
49 		autoDaata thee = Thing_newFromClass (my classInfo).static_cast_move <structDaata> ();
50 		my v_copy (thee.get());
51 		Thing_setName (thee.get(), my name.get());
52 		return thee;
53 	} catch (MelderError) {
54 		Melder_throw (me, U": not copied.");
55 	}
56 }
57 
Data_equal(Daata me,Daata thee)58 bool Data_equal (Daata me, Daata thee) {
59 	if (my classInfo != thy classInfo) return false;   // different class: not equal
60 	int offset = sizeof (struct structDaata);   // we already compared the methods, and are going to skip the names
61 	if (! memcmp ((char *) me + offset, (char *) thee + offset, my classInfo -> size - offset))   // BUG: not necessarily portable
62 		return true;   // no shallow differences
63 	return my v_equal (thee);
64 }
65 
Data_canWriteAsEncoding(Daata me,int encoding)66 bool Data_canWriteAsEncoding (Daata me, int encoding) {
67 	return my v_canWriteAsEncoding (encoding);
68 }
69 
Data_canWriteText(Daata me)70 bool Data_canWriteText (Daata me) {
71 	return my v_writable ();
72 }
73 
Data_writeText(Daata me,MelderFile openFile)74 void Data_writeText (Daata me, MelderFile openFile) {
75 	my v_writeText (openFile);
76 	if (ferror (openFile -> filePointer))
77 		Melder_throw (U"I/O error.");
78 }
79 
Data_createTextFile(Daata me,MelderFile file,bool verbose)80 MelderFile Data_createTextFile (Daata me, MelderFile file, bool verbose) {
81 	autoMelderFile mfile = MelderFile_create (file);
82 	#if defined (_WIN32)
83 		file -> requiresCRLF = true;
84 	#endif
85 	file -> verbose = verbose;
86 	file -> outputEncoding = (int) Melder_getOutputEncoding ();
87 	if (file -> outputEncoding == (int) kMelder_textOutputEncoding::ASCII_THEN_UTF16)
88 		file -> outputEncoding = Data_canWriteAsEncoding (me, kMelder_textOutputEncoding_ASCII) ?
89 			kMelder_textOutputEncoding_ASCII : (int) kMelder_textOutputEncoding::UTF16;
90 	else if (file -> outputEncoding == (int) kMelder_textOutputEncoding::ISO_LATIN1_THEN_UTF16)
91 		file -> outputEncoding = Data_canWriteAsEncoding (me, kMelder_textOutputEncoding_ISO_LATIN1) ?
92 			kMelder_textOutputEncoding_ISO_LATIN1 : (int) kMelder_textOutputEncoding::UTF16;
93 	if (file -> outputEncoding == (int) kMelder_textOutputEncoding::UTF16) {
94 		binputu16 (0xfeff, file -> filePointer);
95 	}
96 	return mfile.transfer();
97 }
98 
_Data_writeToTextFile(Daata me,MelderFile file,bool verbose)99 static void _Data_writeToTextFile (Daata me, MelderFile file, bool verbose) {
100 	try {
101 		if (! Data_canWriteText (me))
102 			Melder_throw (U"Objects of class ", my classInfo -> className, U" cannot be written to a text file.");
103 		autoMelderFile mfile = Data_createTextFile (me, file, verbose);
104 		#ifndef _WIN32
105 			flockfile (file -> filePointer);   // BUG
106 		#endif
107 		MelderFile_write (file, U"File type = \"ooTextFile\"\nObject class = \"", my classInfo -> className);
108 		if (my classInfo -> version > 0)
109 			MelderFile_write (file, U" ", my classInfo -> version);
110 		MelderFile_write (file, U"\"\n");
111 		Data_writeText (me, file);
112 		MelderFile_writeCharacter (file, U'\n');
113 		#ifndef _WIN32
114 			if (file -> filePointer) funlockfile (file -> filePointer);
115 		#endif
116 		mfile.close ();
117 	} catch (MelderError) {
118 		#ifndef _WIN32
119 			if (file -> filePointer) funlockfile (file -> filePointer);   // the file pointer is null before Data_createTextFile() and after mfile.close()
120 		#endif
121 		throw;
122 	}
123 }
124 
Data_writeToTextFile(Daata me,MelderFile file)125 void Data_writeToTextFile (Daata me, MelderFile file) {
126 	try {
127 		_Data_writeToTextFile (me, file, true);
128 	} catch (MelderError) {
129 		Melder_throw (me, U": not written to text file ", file, U".");
130 	}
131 }
132 
Data_writeToShortTextFile(Daata me,MelderFile file)133 void Data_writeToShortTextFile (Daata me, MelderFile file) {
134 	try {
135 		_Data_writeToTextFile (me, file, false);
136 	} catch (MelderError) {
137 		Melder_throw (me, U": not written to short text file ", file, U".");
138 	}
139 }
140 
Data_canWriteBinary(Daata me)141 bool Data_canWriteBinary (Daata me) {
142 	return my v_writable ();
143 }
144 
Data_writeBinary(Daata me,FILE * f)145 void Data_writeBinary (Daata me, FILE *f) {
146 	my v_writeBinary (f);
147 	if (ferror (f))
148 		Melder_throw (U"I/O error.");
149 }
150 
Data_writeToBinaryFile(Daata me,MelderFile file)151 void Data_writeToBinaryFile (Daata me, MelderFile file) {
152 	try {
153 		if (! Data_canWriteBinary (me))
154 			Melder_throw (U"Objects of class ", my classInfo -> className, U" cannot be written to a generic binary file.");
155 		autoMelderFile mfile = MelderFile_create (file);
156 		if (fprintf (file -> filePointer, "ooBinaryFile") < 0)
157 			Melder_throw (U"Cannot write first bytes of file.");
158 		binputw8 (
159 			my classInfo -> version > 0 ?
160 				Melder_cat (my classInfo -> className, U" ", my classInfo -> version) :
161 				my classInfo -> className,
162 			file -> filePointer);
163 		Data_writeBinary (me, file -> filePointer);
164 		mfile.close ();
165 	} catch (MelderError) {
166 		Melder_throw (me, U": not written to binary file ", file, U".");
167 	}
168 }
169 
Data_canReadText(Daata me)170 bool Data_canReadText (Daata me) {
171 	return my v_writable ();
172 }
173 
Data_readText(Daata me,MelderReadText text,int formatVersion)174 void Data_readText (Daata me, MelderReadText text, int formatVersion) {
175 	try {
176 		my v_readText (text, formatVersion);
177 		my v_repair ();
178 	} catch (MelderError) {
179 		Melder_throw (Thing_className (me), U" not read.");
180 	}
181 }
182 
Data_readFromTextFile(MelderFile file)183 autoDaata Data_readFromTextFile (MelderFile file) {
184 	try {
185 		autoMelderReadText text = MelderReadText_createFromFile (file);
186 		const mutablestring32 line = MelderReadText_readLine (text.get());
187 		if (! line)
188 			Melder_throw (U"No lines.");
189 		/*
190 			Allow for a future version of text files (we have no plans).
191 			This check was written on 2017-09-10.
192 			See below at `Data_readFromBinaryFile` for a more serious proposal.
193 		*/
194 		if (str32str (line, U"ooText2File"))
195 			Melder_throw (U"This Praat version cannot read this Praat file. Please download a newer version of Praat.");
196 		char32 *end = str32str (line, U"ooTextFile");   // oo format?
197 		autoDaata me;
198 		int formatVersion;
199 		if (end) {
200 			autostring32 klas = texgetw16 (text.get());
201 			me = Thing_newFromClassName (klas.get(), & formatVersion).static_cast_move <structDaata> ();
202 		} else {
203 			end = str32str (line, U"TextFile");
204 			if (! end)
205 				Melder_throw (U"Not an old-type text file; should not occur.");
206 			*end = U'\0';
207 			me = Thing_newFromClassName (line, nullptr).static_cast_move <structDaata> ();
208 			formatVersion = -1;   // old version
209 		}
210 		MelderFile_getParentDir (file, & Data_directoryBeingRead);
211 		Data_readText (me.get(), text.get(), formatVersion);
212 		file -> format = structMelderFile :: Format :: text;
213 		return me;
214 	} catch (MelderError) {
215 		Melder_throw (U"Data not read from text file ", file, U".");
216 	}
217 }
218 
Data_canReadBinary(Daata me)219 bool Data_canReadBinary (Daata me) {
220 	return my v_writable ();
221 }
222 
Data_readBinary(Daata me,FILE * f,int formatVersion)223 void Data_readBinary (Daata me, FILE *f, int formatVersion) {
224 	try {
225 		my v_readBinary (f, formatVersion);
226 		if (feof (f))
227 			Melder_throw (U"Early end of file.");
228 		if (ferror (f))
229 			Melder_throw (U"I/O error.");
230 		my v_repair ();
231 	} catch (MelderError) {
232 		Melder_throw (Thing_className (me), U" not read.");
233 	}
234 }
235 
Data_readFromBinaryFile(MelderFile file)236 autoDaata Data_readFromBinaryFile (MelderFile file) {
237 	try {
238 		autofile f = Melder_fopen (file, "rb");
239 		char line [200];
240 		size_t n = fread (line, 1, 199, f); line [n] = '\0';
241 		/*
242 			Allow for a future version of binary files, which can handle 64-bit integers
243 			and are perhaps written in little-endian format.
244 			This check was written on 2017-09-10, and should stay for at least a year;
245 			ooBinary2 files can therefore be implemented from some moment after 2018-09-10.
246 			Please compare with `Data_readFromTextFile` above.
247 		*/
248 		if (strstr (line, "ooBinary2File"))
249 			Melder_throw (U"This Praat version cannot read this Praat file. Please download a newer version of Praat.");
250 		char *end = strstr (line, "ooBinaryFile");
251 		autoDaata me;
252 		int formatVersion;
253 		if (end) {
254 			fseek (f, strlen ("ooBinaryFile"), 0);
255 			autostring8 klas = bingets8 (f);
256 			me = Thing_newFromClassName (Melder_peek8to32 (klas.get()), & formatVersion).static_cast_move <structDaata> ();
257 		} else {
258 			end = strstr (line, "BinaryFile");
259 			if (! end) {
260 				Melder_throw (U"File ", file, U" is not a Data binary file.");
261 			}
262 			*end = '\0';
263 			me = Thing_newFromClassName (Melder_peek8to32 (line), nullptr).static_cast_move <structDaata> ();
264 			formatVersion = -1;   // old version: override version number, which was set to 0 by newFromClassName
265 			rewind (f);
266 			fread (line, 1, (size_t) (end - line) + strlen ("BinaryFile"), f);
267 		}
268 		MelderFile_getParentDir (file, & Data_directoryBeingRead);
269 		Data_readBinary (me.get(), f, formatVersion);
270 		file -> format = structMelderFile :: Format :: binary;
271 		f.close (file);
272 		return me;
273 	} catch (MelderError) {
274 		Melder_throw (U"Data not read from binary file ", file, U".");
275 	}
276 }
277 
defaultPublish(autoDaata)278 static int defaultPublish (autoDaata /* me */) {
279 	return 0;   // nothing published
280 }
281 
282 static int (*thePublish) (autoDaata) = defaultPublish;
283 
Data_publish(autoDaata me)284 int Data_publish (autoDaata me) {
285 	return thePublish (me.move());
286 }
287 
Data_setPublishProc(int (* publish)(autoDaata))288 void Data_setPublishProc (int (*publish) (autoDaata)) {
289 	thePublish = publish ? publish : defaultPublish;
290 }
291 
292 /* Generic reading. */
293 
294 static int numFileTypeRecognizers = 0;
295 static Data_FileTypeRecognizer fileTypeRecognizers [100];
Data_recognizeFileType(Data_FileTypeRecognizer recognizer)296 void Data_recognizeFileType (Data_FileTypeRecognizer recognizer) {
297 	Melder_assert (numFileTypeRecognizers < 100);
298 	fileTypeRecognizers [++ numFileTypeRecognizers] = recognizer;
299 }
300 
Data_readFromFile(MelderFile file)301 autoDaata Data_readFromFile (MelderFile file) {
302 	char header [513];
303 	autofile f = Melder_fopen (file, "rb");
304 	size_t nread_u = fread (& header [0], 1, 512, f);
305 	integer nread = (integer) nread_u;   // we know it cannot be more than 512
306 	f.close (file);
307 	header [nread] = 0;
308 
309 	/*
310 		Possibility 1: is this file a text file as defined in Data.cpp?
311 	*/
312 
313 	if (nread > 11) {
314 		int numberOfBytesInFileType = 0;
315 		char *p = strstr (header, "TextFile");
316 		if (p) {
317 			numberOfBytesInFileType = 8;
318 		} else {
319 			p = strstr (header, "Text2File");   // future version?
320 			numberOfBytesInFileType = 9;
321 		}
322 		if (p && p - header < nread - numberOfBytesInFileType && p - header < 40)
323 			return Data_readFromTextFile (file);
324 	}
325 	if (nread > 22) {
326 		char headerCopy [101];
327 		memcpy (headerCopy, header, 100);
328 		headerCopy [100] = '\0';
329 		for (int i = 0; i < 100; i ++)
330 			if (headerCopy [i] == '\0')
331 				headerCopy [i] = '\001';
332 		char *p = strstr (headerCopy, "T\001e\001x\001t\001F\001i\001l\001e");
333 		if (p && p - headerCopy < nread - 15 && p - headerCopy < 80)
334 			return Data_readFromTextFile (file);
335 	}
336 
337 	/*
338 		Possibility 2: is this file a binary file as defined in Data.cpp?
339 	*/
340 
341 	if (nread > 13) {
342 		int numberOfBytesInFileType = 0;
343 		char *p = strstr (header, "BinaryFile");
344 		if (p) {
345 			numberOfBytesInFileType = 10;
346 		} else {
347 			p = strstr (header, "Binary2File");   // future version
348 			numberOfBytesInFileType = 11;
349 		}
350 		if (p && p - header < nread - numberOfBytesInFileType && p - header < 40)
351 			return Data_readFromBinaryFile (file);
352 	}
353 
354 	/*
355 		Possibility 3: is this file of a type for which a recognizer has been installed?
356 	*/
357 
358 	MelderFile_getParentDir (file, & Data_directoryBeingRead);
359 	for (int i = 1; i <= numFileTypeRecognizers; i ++) {
360 		autoDaata object = fileTypeRecognizers [i] (nread, header, file);
361 		if (object) {
362 			if (object -> classInfo == classDaata)   // dummy object? the recognizer could have had a side effect, such as drawing a picture
363 				return autoDaata ();   // a null return on recognized non-data!
364 			return object;
365 		}
366 	}
367 
368 	/*
369 		Possibility 4: is this a common text file?
370 	*/
371 
372 	int i = 0;
373 	for (; i < nread; i ++)
374 		if (header [i] < 32 || header [i] > 126)   // not ASCII? (note: this expression happens to work correctly for both signed and unsigned char)
375 			break;
376 	if (i >= nread)
377 		return Data_readFromTextFile (file);
378 
379 	Melder_throw (U"File ", file, U" not recognized.");
380 }
381 
382 /* Recursive routines for working with struct members. */
383 
Data_Description_countMembers(Data_Description structDescription)384 int Data_Description_countMembers (Data_Description structDescription) {
385 	int count = 0;
386 	for (Data_Description desc = structDescription; desc -> name; desc ++)
387 		count ++;
388 	if (structDescription [0]. type == inheritwa) {
389 		Data_Description parentDescription = ((Daata) _Thing_dummyObject ((ClassInfo) structDescription [0]. tagType)) -> v_description ();
390 		if (parentDescription)
391 			return count + Data_Description_countMembers (parentDescription);
392 	}
393 	return count;
394 }
395 
Data_Description_findMatch(Data_Description structDescription,conststring32 name)396 Data_Description Data_Description_findMatch (Data_Description structDescription, conststring32 name) {
397 	for (Data_Description desc = structDescription; desc -> name; desc ++)
398 		if (str32equ (name, desc -> name)) return desc;
399 	if (structDescription [0]. type == inheritwa) {
400 		Data_Description parentDescription = ((Daata) _Thing_dummyObject ((ClassInfo) structDescription [0]. tagType)) -> v_description ();
401 		if (parentDescription)
402 			return Data_Description_findMatch (parentDescription, name);
403 	}
404 	return nullptr;   // not found
405 }
406 
Data_Description_findNumberUse(Data_Description structDescription,conststring32 string)407 Data_Description Data_Description_findNumberUse (Data_Description structDescription, conststring32 string) {
408 	for (Data_Description desc = structDescription; desc -> name; desc ++) {
409 		if (desc -> max1 && str32equ (desc -> max1, string)) return desc;
410 		if (desc -> max2 && str32equ (desc -> max2, string)) return desc;
411 	}
412 	if (structDescription [0]. type == inheritwa) {
413 		Data_Description parentDescription = ((Daata) _Thing_dummyObject ((ClassInfo) structDescription [0]. tagType)) -> v_description ();
414 		if (parentDescription)
415 			return Data_Description_findNumberUse (parentDescription, string);
416 	}
417 	return nullptr;
418 }
419 
420 /* Retrieving data from object + description. */
421 
Data_Description_integer(void * address,Data_Description description)422 int64 Data_Description_integer (void *address, Data_Description description) {
423 	switch (description -> type) {
424 		case bytewa:           return * (signed char *)      ((char *) address + description -> offset);
425 		case int16wa:          return * (int16 *)            ((char *) address + description -> offset);
426 		case intwa:            return * (int *)              ((char *) address + description -> offset);
427 		case integerwa:        return * (integer *)          ((char *) address + description -> offset);
428 		case ubytewa:          return * (unsigned char *)    ((char *) address + description -> offset);
429 		case uintwa:           return * (unsigned int *)     ((char *) address + description -> offset);
430 		case uintegerwa:       return (int64) * (uinteger *) ((char *) address + description -> offset);   // ignore numbers above 2^63 - 1
431 		case questionwa:       return * (bool *)             ((char *) address + description -> offset);
432 		case objectwa:         return (* (Collection *)      ((char *) address + description -> offset))->size;   // FIXME: alignment not guaranteed
433 		case collectionofwa:   return (  (Collection)        ((char *) address + description -> offset))->size;   // FIXME: alignment not guaranteed
434 		case collectionwa:     return (* (Collection *)      ((char *) address + description -> offset))->size;   // FIXME: alignment not guaranteed
435 		default: return 0;
436 	}
437 }
438 
Data_Description_evaluateInteger(void * structAddress,Data_Description structDescription,conststring32 formula,integer * result)439 int Data_Description_evaluateInteger (void *structAddress, Data_Description structDescription,
440 	conststring32 formula, integer *result)
441 {
442 	if (! formula) {   // this was a VECTOR_FROM array
443 		*result = 1;
444 		return 1;
445 	}
446 	if (formula [0] >= U'a' && formula [0] <= U'z') {
447 		char32 buffer [100], *minus1, *plus1, *psize;
448 		Data_Description sizeDescription;
449 		str32cpy (buffer, formula);
450 		if ((minus1 = str32str (buffer, U" - 1")) != nullptr)
451 			*minus1 = U'\0';   // strip trailing " - 1", but remember
452 		if ((plus1 = str32str (buffer, U" + 1")) != nullptr)
453 			*plus1 = U'\0';   // strip trailing " + 1", but remember
454 		if ((psize = str32str (buffer, U" -> size")) != nullptr)
455 			*psize = U'\0';   // strip trailing " -> size"
456 		if (! (sizeDescription = Data_Description_findMatch (structDescription, buffer))) {
457 			*result = 0;
458 			return 0 /*Melder_error ("Cannot find member \"%ls\".", buffer)*/;
459 		}
460 		*result = Data_Description_integer (structAddress, sizeDescription);
461 		if (minus1) *result -= 1;
462 		if (plus1) *result += 1;
463 	} else {
464 		*result = Melder_atoi (formula);
465 	}
466 	return 1;
467 }
468 
469 /* End of file Data.cpp */
470