1 /* melder_files.cpp
2  *
3  * Copyright (C) 1992-2008,2010-2020 Paul Boersma, 2013 Tom Naughton
4  *
5  * This code is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or (at
8  * your option) any later version.
9  *
10  * This code is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13  * See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this work. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 /*
20  * pb 2002/03/07 GPL
21  * rvs&pb 2002/03/07 url support
22  * pb 2002/03/10 Mach compatibility
23  * pb 2003/09/12 MelderFile_getMacType
24  * pb 2003/09/14 MelderDir_relativePathToFile
25  * pb 2004/09/25 use /tmp as temporary directory
26  * pb 2004/10/16 C++ compatible structs
27  * pb 2005/11/07 Windows: use %USERPROFILE% rather than %HOMESHARE%%HOMEPATH%
28  * rvs&pb 2005/11/18 url support
29  * pb 2006/01/21 MelderFile_writeText does not create temporary file
30  * pb 2006/08/03 openForWriting
31  * rvs 2006/08/12 curl: do not fail on error
32  * pb 2006/08/12 check whether unicodeName exists
33  * pb 2006/10/28 erased MacOS 9 stuff
34  * Erez Volk 2007/05/14 FLAC support
35  * pb 2007/05/28 wchar
36  * pb 2007/06/09 more wchar
37  * pb 2007/08/12 more wchar
38  * pb 2007/10/05 FSFindFolder
39  * pb 2008/11/01 warn after finding final tabs (not just spaces) in file names
40  * pb 2010/12/14 more high Unicode compatibility
41  * pb 2011/04/05 C++
42  */
43 
44 #if defined (UNIX)
45 	#include <unistd.h>
46 	#include <sys/stat.h>
47 #endif
48 #if defined (CURLPRESENT)
49 	#include <curl/curl.h>
50 #endif
51 #ifdef _WIN32
52 	#include <windows.h>
53 #endif
54 #if defined (macintosh)
55 	#include "macport_on.h"
56 	#include <CoreFoundation/CoreFoundation.h>
57 	#include "macport_off.h"
58 #endif
59 #include <errno.h>
60 #include "melder.h"
61 #include "../kar/UnicodeData.h"
62 
63 //#include "flac_FLAC_stream_encoder.h"
64 extern "C" int  FLAC__stream_encoder_finish (FLAC__StreamEncoder *);
65 extern "C" void FLAC__stream_encoder_delete (FLAC__StreamEncoder *);
66 
67 #if defined (macintosh)
68 	#include <sys/stat.h>
69 	#define UNIX
70 	#include <unistd.h>
71 #endif
72 
73 static char32 theShellDirectory [kMelder_MAXPATH+1];
Melder_rememberShellDirectory()74 void Melder_rememberShellDirectory () {
75 	structMelderDir shellDir { };
76 	Melder_getDefaultDir (& shellDir);
77 	str32cpy (theShellDirectory, Melder_dirToPath (& shellDir));
78 }
Melder_getShellDirectory()79 conststring32 Melder_getShellDirectory () {
80 	return & theShellDirectory [0];
81 }
82 
83 #if defined (UNIX)
Melder_8bitFileRepresentationToStr32_inplace(const char * path8,char32 * path32)84 void Melder_8bitFileRepresentationToStr32_inplace (const char *path8, char32 *path32) {
85 	#if defined (macintosh)
86 		CFStringRef cfpath = CFStringCreateWithCString (nullptr, path8, kCFStringEncodingUTF8);
87 		if (! cfpath) {
88 			/*
89 				Probably something wrong, like a disk was disconnected in the meantime.
90 			*/
91 			try {
92 				Melder_8to32_inplace (path8, path32, kMelder_textInputEncoding::UTF8);
93 			} catch (MelderError) {
94 				Melder_8to32_inplace (path8, path32, kMelder_textInputEncoding::MACROMAN);   // cannot fail
95 				Melder_throw (U"Unusual error finding or creating file <<", path32, U">> (MacRoman).");
96 			}
97 			Melder_throw (U"Unusual error finding or creating file ", path32, U".");
98 		}
99 		CFMutableStringRef cfpath2 = CFStringCreateMutableCopy (nullptr, 0, cfpath);
100 		CFRelease (cfpath);
101 		CFStringNormalize (cfpath2, kCFStringNormalizationFormC);   // Praat requires composed characters
102 		integer n_utf16 = CFStringGetLength (cfpath2);
103 		integer n_utf32 = 0;
104 		for (integer i = 0; i < n_utf16; i ++) {
105 			char32 kar1 = CFStringGetCharacterAtIndex (cfpath2, i);
106 			if (kar1 >= 0x00'D800 && kar1 <= 0x00'DBFF) {
107 				char32 kar2 = (char32) CFStringGetCharacterAtIndex (cfpath2, ++ i);   // convert up
108 				if (kar2 >= 0x00'DC00 && kar2 <= 0x00'DFFF) {
109 					kar1 = (((kar1 & 0x3FF) << 10) | (kar2 & 0x3FF)) + 0x01'0000;
110 				} else {
111 					kar1 = UNICODE_REPLACEMENT_CHARACTER;
112 				}
113 			}
114 			path32 [n_utf32 ++] = kar1;
115 		}
116 		path32 [n_utf32] = U'\0';
117 		CFRelease (cfpath2);
118 	#else
119 		Melder_8to32_inplace (path8, path32, kMelder_textInputEncoding::UTF8);
120 	#endif
121 }
122 #endif
123 
MelderFile_name(MelderFile file)124 conststring32 MelderFile_name (MelderFile file) {
125 	#if defined (UNIX)
126 		char32 *slash = str32rchr (file -> path, U'/');
127 		return slash ? slash + 1 : file -> path;
128 	#elif defined (_WIN32)
129 		char32 *backslash = str32rchr (file -> path, U'\\');
130 		return backslash ? backslash + 1 : file -> path;
131 	#else
132 		return nullptr;
133 	#endif
134 }
135 
MelderDir_name(MelderDir dir)136 conststring32 MelderDir_name (MelderDir dir) {
137 	#if defined (UNIX)
138 		char32 *slash = str32rchr (dir -> path, U'/');
139 		return slash ? slash + 1 : dir -> path;
140 	#elif defined (_WIN32)
141 		char32 *backslash = str32rchr (dir -> path, U'\\');
142 		return backslash ? backslash + 1 : dir -> path;
143 	#else
144 		return nullptr;
145 	#endif
146 }
147 
Melder_pathToDir(conststring32 path,MelderDir dir)148 void Melder_pathToDir (conststring32 path, MelderDir dir) {
149 	Melder_sprint (dir -> path,kMelder_MAXPATH+1, path);
150 }
151 
Melder_pathToFile(conststring32 path,MelderFile file)152 void Melder_pathToFile (conststring32 path, MelderFile file) {
153 	/*
154 	 * This handles complete path names only.
155 	 *
156 	 * Used if we know for sure that we have a complete path name,
157 	 * i.e. if the program determined the name (fileselector, printing, prefs).
158 	 */
159 	Melder_sprint (file -> path,kMelder_MAXPATH+1, path);
160 }
161 
Melder_relativePathToFile(conststring32 path,MelderFile file)162 void Melder_relativePathToFile (conststring32 path, MelderFile file) {
163 	/*
164 	 * This handles complete and partial path names,
165 	 * and translates slashes to native directory separators.
166 	 *
167 	 * Used if we do not know for sure that we have a complete path name,
168 	 * i.e. if the user determined the name (scripting).
169 	 */
170 	#if defined (UNIX)
171 		/*
172 		 * We assume that Unix complete path names start with a slash.
173 		 */
174 		if (path [0] == U'~' && path [1] == U'/') {
175 			Melder_sprint (file -> path,kMelder_MAXPATH+1, Melder_peek8to32 (getenv ("HOME")), & path [1]);
176 		} else if (path [0] == U'/' || str32equ (path, U"<stdout>") || str32str (path, U"://")) {
177 			Melder_sprint (file -> path,kMelder_MAXPATH+1, path);
178 		} else {
179 			structMelderDir dir { };
180 			Melder_getDefaultDir (& dir);   // BUG
181 			if (dir. path [0] == U'/' && dir. path [1] == U'\0') {
182 				Melder_sprint (file -> path,kMelder_MAXPATH+1, U"/", path);
183 			} else {
184 				Melder_sprint (file -> path,kMelder_MAXPATH+1, dir. path, U"/", path);
185 			}
186 		}
187 	#elif defined (_WIN32)
188 		/*
189 		 * We assume that Win32 complete path names look like:
190 		 *    C:\WINDOWS\CTRL32.DLL
191 		 *    LPT1:
192 		 *    \\host\path
193 		 */
194 		structMelderDir dir { };
195 		if (path [0] == U'~' && path [1] == U'/') {
196 			Melder_getHomeDir (& dir);
197 			Melder_sprint (file -> path,kMelder_MAXPATH+1, dir. path, & path [1]);
198 			for (;;) {
199 				char32 *slash = str32chr (file -> path, U'/');
200 				if (! slash)
201 					break;
202 				*slash = U'\\';
203 			}
204 			return;
205 		}
206 		if (str32chr (path, U'/') && ! str32str (path, U"://")) {
207 			char32 winPath [kMelder_MAXPATH+1];
208 			Melder_sprint (winPath,kMelder_MAXPATH+1, path);
209 			for (;;) {
210 				char32 *slash = str32chr (winPath, U'/');
211 				if (! slash) break;
212 				*slash = U'\\';
213 			}
214 			Melder_relativePathToFile (winPath, file);
215 			return;
216 		}
217 		if (str32chr (path, U':') || path [0] == U'\\' && path [1] == U'\\' || str32equ (path, U"<stdout>")) {
218 			Melder_sprint (file -> path,kMelder_MAXPATH+1, path);
219 		} else {
220 			Melder_getDefaultDir (& dir);   // BUG
221 			Melder_sprint (file -> path,kMelder_MAXPATH+1,
222 				dir. path,
223 				dir. path [0] != U'\0' && dir. path [str32len (dir. path) - 1] == U'\\' ? U"" : U"\\",
224 				path);
225 		}
226 	#endif
227 }
228 
Melder_dirToPath(MelderDir dir)229 conststring32 Melder_dirToPath (MelderDir dir) {
230 	return & dir -> path [0];
231 }
232 
Melder_fileToPath(MelderFile file)233 conststring32 Melder_fileToPath (MelderFile file) {
234 	return & file -> path [0];
235 }
236 
MelderFile_copy(MelderFile file,MelderFile copy)237 void MelderFile_copy (MelderFile file, MelderFile copy) {
238 	str32cpy (copy -> path, file -> path);
239 }
240 
MelderDir_copy(MelderDir dir,MelderDir copy)241 void MelderDir_copy (MelderDir dir, MelderDir copy) {
242 	str32cpy (copy -> path, dir -> path);
243 }
244 
MelderFile_equal(MelderFile file1,MelderFile file2)245 bool MelderFile_equal (MelderFile file1, MelderFile file2) {
246 	return str32equ (file1 -> path, file2 -> path);
247 }
248 
MelderDir_equal(MelderDir dir1,MelderDir dir2)249 bool MelderDir_equal (MelderDir dir1, MelderDir dir2) {
250 	return str32equ (dir1 -> path, dir2 -> path);
251 }
252 
MelderFile_setToNull(MelderFile file)253 void MelderFile_setToNull (MelderFile file) {
254 	file -> path [0] = U'\0';
255 }
256 
MelderFile_isNull(MelderFile file)257 bool MelderFile_isNull (MelderFile file) {
258 	return ! file || file -> path [0] == U'\0';
259 }
260 
MelderDir_setToNull(MelderDir dir)261 void MelderDir_setToNull (MelderDir dir) {
262 	dir -> path [0] = U'\0';
263 }
264 
MelderDir_isNull(MelderDir dir)265 bool MelderDir_isNull (MelderDir dir) {
266 	return dir -> path [0] == U'\0';
267 }
268 
MelderDir_getFile(MelderDir parent,conststring32 fileName,MelderFile file)269 void MelderDir_getFile (MelderDir parent, conststring32 fileName, MelderFile file) {
270 	#if defined (UNIX)
271 		if (parent -> path [0] == U'/' && parent -> path [1] == U'\0') {
272 			Melder_sprint (file -> path,kMelder_MAXPATH+1, U"/", fileName);
273 		} else {
274 			Melder_sprint (file -> path,kMelder_MAXPATH+1, parent -> path, U"/", fileName);
275 		}
276 	#elif defined (_WIN32)
277 		if (str32rchr (file -> path, U'\\') - file -> path == str32len (file -> path) - 1) {
278 			Melder_sprint (file -> path,kMelder_MAXPATH+1, parent -> path, fileName);
279 		} else {
280 			Melder_sprint (file -> path,kMelder_MAXPATH+1, parent -> path, U"\\", fileName);
281 		}
282 	#endif
283 }
284 
MelderDir_relativePathToFile(MelderDir dir,conststring32 path,MelderFile file)285 void MelderDir_relativePathToFile (MelderDir dir, conststring32 path, MelderFile file) {
286 	autoMelderSetDefaultDir saveDir (dir);
287 	Melder_relativePathToFile (path, file);
288 }
289 
290 #ifndef UNIX
Melder_getDesktop(MelderDir dir)291 static void Melder_getDesktop (MelderDir dir) {
292 	dir -> path [0] = U'\0';
293 }
294 #endif
295 
MelderFile_getParentDir(MelderFile file,MelderDir parent)296 void MelderFile_getParentDir (MelderFile file, MelderDir parent) {
297 	#if defined (UNIX)
298 		/*
299 			The parent of /usr/hello.txt is /usr.
300 			The parent of /hello.txt is /.
301 		*/
302 		str32cpy (parent -> path, file -> path);
303 		char32 *slash = str32rchr (parent -> path, U'/');
304 		if (slash)
305 			*slash = U'\0';
306 		if (parent -> path [0] == U'\0')
307 			str32cpy (parent -> path, U"/");
308 	#elif defined (_WIN32)
309 		/*
310 			The parent of C:\WINDOWS\CTRL.DLL is C:\WINDOWS.
311 			The parent of E:\Praat.exe is E:\.
312 			The parent of \\Swine\Apps\init.txt is \\Swine\Apps.
313 			The parent of \\Swine\init.txt is \\Swine\.   (BUG ?)
314 		*/
315 		str32cpy (parent -> path, file -> path);
316 		char32 *colon = str32chr (parent -> path, U':');
317 		if (colon) {
318 			char32 *backslash = str32rchr (parent -> path, U'\\');
319 			if (backslash) {   //   C:\WINDOWS\CTRL.DLL or C:\AUTOEXEC.BAT
320 				if (backslash - colon == 1) {   //   C:\AUTOEXEC.BAT
321 					* (backslash + 1) = U'\0';   //   C:\   -   !!! aargh this was a bug after converting this line to line comments
322 				} else {   //   C:\WINDOWS\CTRL.DLL
323 					*backslash = U'\0';   //   C:\WINDOWS
324 				}
325 			} else {   /* ??? */
326 				Melder_getDesktop (parent);   // empty string
327 			}
328 		} else if (parent -> path [0] == U'\\' && parent -> path [1] == U'\\') {
329 			char32 *backslash = str32rchr (parent -> path + 2, U'\\');
330 			if (backslash) {   //   \\Swine\Apps\init.txt or \\Swine\init.txt
331 				char32 *leftBackslash = str32chr (parent -> path + 2, U'\\');
332 				if (backslash - leftBackslash == 0) {   //   \\Swine\init.txt
333 					* (backslash + 1) = U'\0';   //   \\Swine\   -   !!! dear developer, don't delete this hyphen, lest the line ends in a backslash
334 				} else {   //   \\Swine\Apps\init.txt
335 					*backslash = U'\0';   //   \\Swine\Apps
336 				}
337 			} else {   //   \\init.txt   ???
338 				Melder_getDesktop (parent);   // empty string
339 			}
340 		} else {   // unknown path type
341 			Melder_getDesktop (parent);   // empty string
342 		}
343 	#endif
344 }
345 
MelderDir_getParentDir(MelderDir dir,MelderDir parent)346 void MelderDir_getParentDir (MelderDir dir, MelderDir parent) {
347 	#if defined (UNIX)
348 		/*
349 			The parent of /usr/local is /usr.
350 			The parent of /usr is /.
351 			The parent of / is "".
352 		*/
353 		str32cpy (parent -> path, dir -> path);
354 		char32 *slash = str32rchr (parent -> path, U'/');
355 		if (slash) {
356 			if (slash - parent -> path == 0) {
357 				if (slash [1] == U'\0') {   // child is "/"
358 					parent -> path [0] = U'\0';   // parent is ""
359 				} else {   // child is "/usr"
360 					slash [1] = '\0';   // parent is "/"
361 				}
362 			} else {   // child is "/usr/local"
363 				*slash = U'\0';   // parent is "/usr"
364 			}
365 		} else {
366 			parent -> path [0] = U'\0';   // some failure; desktop
367 		}
368 	#elif defined (_WIN32)
369 		/*
370 			The parent of C:\WINDOWS is C:\.
371 			The parent of E:\ is the desktop.
372 			The parent of \\Swine\ is the desktop.   (BUG ?)
373 		*/
374 		str32cpy (parent -> path, dir -> path);
375 		char32 *colon = str32chr (parent -> path, U':');
376 		if (colon) {
377 			int length = str32len (parent -> path);
378 			char32 *backslash = str32rchr (parent -> path, U'\\');
379 			if (backslash) {   //   C:\WINDOWS\FONTS or C:\WINDOWS or C:\   - (cannot add a line comment with a backslash)
380 				if (backslash - parent -> path == length - 1) {   //   C:\   -
381 					Melder_getDesktop (parent);   // empty string
382 				} else if (backslash - colon == 1) {   //   C:\WINDOWS
383 					* (backslash + 1) = U'\0';   //   C:\   -
384 				} else {   //   C:\WINDOWS\FONTS
385 					*backslash = U'\0';   //   C:\WINDOWS
386 				}
387 			} else {   //   LPT1:   ???
388 				Melder_getDesktop (parent);   // empty string
389 			}
390 		} else if (parent -> path [0] == U'\\' && parent -> path [1] == U'\\') {
391 			int length = str32len (parent -> path);
392 			char32 *backslash = str32rchr (parent -> path + 2, U'\\');
393 			if (backslash) {   //   \\Swine\Apps\Praats or \\Swine\Apps or \\Swine\   -
394 				if (backslash - parent -> path == length - 1) {   //   \\Swine\   -
395 					Melder_getDesktop (parent);   // empty string
396 				} else {   //   \\Swine\Apps\Praats or \\Swine\Apps
397 					char32 *leftBackslash = str32chr (parent -> path + 2, U'\\');
398 					if (backslash - leftBackslash == 0) {   //   \\Swine\Apps
399 						* (backslash + 1) = U'\0';   //   \\Swine\   -
400 					} else {   //   \\Swine\Apps\Praats
401 						*backslash = U'\0';   //   \\Swine\Apps
402 					}
403 				}
404 			} else {   //   \\Swine   ???
405 				Melder_getDesktop (parent);   // empty string
406 			}
407 		} else {   // unknown path type.
408 			Melder_getDesktop (parent);   // empty string
409 		}
410 	#endif
411 }
412 
MelderDir_isDesktop(MelderDir dir)413 bool MelderDir_isDesktop (MelderDir dir) {
414 	return dir -> path [0] == U'\0';
415 }
416 
MelderDir_getSubdir(MelderDir parent,conststring32 subdirName,MelderDir subdir)417 void MelderDir_getSubdir (MelderDir parent, conststring32 subdirName, MelderDir subdir) {
418 	#if defined (UNIX)
419 		if (parent -> path [0] == U'/' && parent -> path [1] == U'\0') {
420 			Melder_sprint (subdir -> path,kMelder_MAXPATH+1, U"/", subdirName);
421 		} else {
422 			Melder_sprint (subdir -> path,kMelder_MAXPATH+1, parent -> path, U"/", subdirName);
423 		}
424 	#elif defined (_WIN32)
425 		int length = str32len (parent -> path);
426 		char32 *backslash = str32rchr (parent -> path, U'\\');
427 		if (backslash && backslash - parent -> path == length - 1) {   //   C:\ or \\Swine\   -
428 			Melder_sprint (subdir -> path, kMelder_MAXPATH+1, parent -> path, subdirName);
429 		} else {   //   C:\WINDOWS or \\Swine\Apps or even C:
430 			Melder_sprint (subdir -> path,kMelder_MAXPATH+1, parent -> path, U"\\", subdirName);
431 		}
432 	#endif
433 }
434 
Melder_getHomeDir(MelderDir homeDir)435 void Melder_getHomeDir (MelderDir homeDir) {
436 	#if defined (UNIX)
437 		char *home = getenv ("HOME");
438 		Melder_sprint (homeDir -> path,kMelder_MAXPATH+1, home ? Melder_peek8to32 (home) : U"/");
439 	#elif defined (_WIN32)
440 		WCHAR driveW [kMelder_MAXPATH+1], pathW [kMelder_MAXPATH+1];
441 		DWORD n = GetEnvironmentVariableW (L"USERPROFILE", pathW, kMelder_MAXPATH+1);
442 		if (n > kMelder_MAXPATH) Melder_throw (U"Home directory name too long.");
443 		if (n > 0) {
444 			Melder_sprint (homeDir -> path,kMelder_MAXPATH+1, Melder_peekWto32 (pathW));
445 			return;
446 		}
447 		n = GetEnvironmentVariableW (L"HOMEDRIVE", driveW, kMelder_MAXPATH+1);
448 		if (n > kMelder_MAXPATH)
449 			Melder_throw (U"Home drive name too long.");
450 		if (n > 0) {
451 			GetEnvironmentVariable (L"HOMEPATH", pathW, kMelder_MAXPATH+1);
452 			Melder_sprint (homeDir -> path,kMelder_MAXPATH+1, Melder_peekWto32 (driveW), Melder_peekWto32 (pathW));
453 			return;
454 		}
455 		MelderDir_setToNull (homeDir);   // Windows 95 and 98: alas
456 	#endif
457 }
458 
Melder_getPrefDir(MelderDir prefDir)459 void Melder_getPrefDir (MelderDir prefDir) {
460 	#if defined (macintosh)
461 		structMelderDir homeDir { };
462 		Melder_getHomeDir (& homeDir);
463 		Melder_sprint (prefDir -> path,kMelder_MAXPATH+1, homeDir. path, U"/Library/Preferences");
464 	#elif defined (UNIX)
465 		/*
466 		 * Preferences files go into the home directory.
467 		 */
468 		Melder_getHomeDir (prefDir);
469 	#elif defined (_WIN32)
470 		/*
471 		 * On Windows 95, preferences files went in the Windows directory.
472 		 * On shared systems (NT, 2000, XP), preferences files go into the home directory.
473 		 * TODO: at some point, these files should be moved to HOME\AppData\Roaming\Praat.
474 		 */
475 		Melder_getHomeDir (prefDir);
476 	#endif
477 }
478 
Melder_getTempDir(MelderDir tempDir)479 void Melder_getTempDir (MelderDir tempDir) {
480 	#if defined (macintosh)
481 		Melder_sprint (tempDir -> path,kMelder_MAXPATH+1, Melder_peek8to32 (getenv ("TMPDIR")));   // or append /TemporaryItems
482 		// confstr with _CS_DARWIN_USER_TEMP_DIR
483 	#else
484 		(void) tempDir;
485 	#endif
486 }
487 
488 #ifdef CURLPRESENT
489 static int curl_initialized = 0;
write_URL_data_to_file(void * buffer,size_t size,size_t nmemb,void * userp)490 static size_t write_URL_data_to_file (void *buffer, size_t size, size_t nmemb, void *userp) {
491 	return fwrite (buffer, size, nmemb, userp);
492 }
read_URL_data_from_file(void * buffer,size_t size,size_t nmemb,void * userp)493 static size_t read_URL_data_from_file (void *buffer, size_t size, size_t nmemb, void *userp) {
494 	return fread (buffer, size, nmemb, userp);
495 }
496 #endif
497 
Melder_fopen(MelderFile file,const char * type)498 FILE * Melder_fopen (MelderFile file, const char *type) {
499 	if (MelderFile_isNull (file)) Melder_throw (U"Cannot open null file.");
500 	if (! Melder_isTracing)
501 		Melder_assert (str32equ (Melder_double (1.5), U"1.5"));   // check locale settings; because of the required file portability Praat cannot stand "1,5"
502 	/*
503 	 * On the Unix-like systems (including MacOS), the path has to be converted to 8-bit characters in UTF-8 encoding.
504 	 * On MacOS, the characters also have to be decomposed.
505 	 * On Windows, the characters have to be precomposed.
506 	 */
507 	char utf8path [kMelder_MAXPATH+1];
508 	Melder_32to8_fileSystem_inplace (file -> path, utf8path);
509 	FILE *f;
510 	file -> openForWriting = ( type [0] == 'w' || type [0] == 'a' || strchr (type, '+') );
511 	if (str32equ (file -> path, U"<stdout>") && file -> openForWriting) {
512 		f = stdout;
513 	#ifdef CURLPRESENT
514 	} else if (strstr (utf8path, "://") && file -> openForWriting) {
515 		Melder_assert (type [0] == 'w');   // reject "append" and "random" access
516 		f = tmpfile ();   // open a temporary file for writing
517 	} else if (strstr (utf8path, "://") && ! file -> openForWriting) {
518 		CURLcode CURLreturn;
519 		CURL *CURLhandle;
520 		char errorbuffer [CURL_ERROR_SIZE] = "";
521 		f = tmpfile ();   // open a temporary file for writing
522 		if (! curl_initialized) {
523 			CURLreturn = curl_global_init (CURL_GLOBAL_ALL);
524 			curl_initialized = 1;
525 		};
526 		CURLhandle = curl_easy_init ();   // initialize session
527 		/*
528 		 * Set up the connection parameters.
529 		 */
530 		/* Debugging: Verbose messages */
531 		/* CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_VERBOSE, 1); */
532 		/* Do not fail on error. */
533 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_FAILONERROR, 0);
534 		/* Store error messages in a buffer. */
535 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_ERRORBUFFER, errorbuffer);
536 		/* The file stream to store the URL. */
537 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_FILE, f);
538 		/* The function to write to the file, necessary for Win32.	*/
539 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_WRITEFUNCTION, write_URL_data_to_file);
540 		/* The actual URL to handle.	*/
541 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_URL, utf8path);
542 		/* Get the URL and write it to the given file. */
543 		CURLreturn = curl_easy_perform (CURLhandle);
544 		/* Handle errors. */
545 		if (CURLreturn) {
546 			Melder_appendError (Melder_peek8to32 (errorbuffer));
547 			f = nullptr;
548 		};
549 		/* Clean up session. */
550 		curl_easy_cleanup (CURLhandle);
551 		/* Do something with the file. Why? */
552 		if (f) rewind (f);
553 	#endif
554 	} else {
555 		#if defined (_WIN32) && ! defined (__CYGWIN__)
556 			f = _wfopen (Melder_peek32toW_fileSystem (file -> path), Melder_peek32toW (Melder_peek8to32 (type)));
557 		#else
558 			f = fopen ((char *) utf8path, type);
559 		#endif
560 	}
561 	if (! f) {
562 		char32 *path = file -> path;
563 		Melder_appendError (
564 			( errno == EPERM ? U"No permission to " : U"Cannot " ),
565 			( type [0] == 'r' ? U"open" : type [0] == 'a' ? U"append to" : U"create" ),
566 			U" file ", file, U"."
567 		);
568 		if (path [0] == U'\0')
569 			Melder_appendError (U"Hint: empty file name.");
570 		else if (path [0] == U' ' || path [0] == U'\t')
571 			Melder_appendError (U"Hint: file name starts with a space or tab.");
572 		else if (path [str32len (path) - 1] == U' ' || path [str32len (path) - 1] == U'\t')
573 			Melder_appendError (U"Hint: file name ends in a space or tab.");
574 		else if (str32chr (path, U'\n'))
575 			Melder_appendError (U"Hint: file name contains a newline symbol.");
576 		throw MelderError ();
577 		return nullptr;
578 	}
579 	return f;
580 }
581 
Melder_fclose(MelderFile file,FILE * f)582 void Melder_fclose (MelderFile file, FILE *f) {
583 	if (! f) return;
584 	#if defined (CURLPRESENT)
585  	if (str32str (file -> wpath, U"://") && file -> openForWriting) {
586 		unsigned char utf8path [kMelder_MAXPATH+1];
587 		Melder_32to8_fileSystem_inplace (file -> path, utf8path);
588 		/* Rewind the file. */
589 		if (f) rewind (f);
590 		CURLcode CURLreturn;
591 		CURL *CURLhandle;
592 		char errorbuffer [CURL_ERROR_SIZE] = "";
593 		/* Start global init (necessary only ONCE). */
594 		if (! curl_initialized) {
595 			CURLreturn = curl_global_init (CURL_GLOBAL_ALL);
596 			curl_initialized = 1;
597 		};
598 		CURLhandle = curl_easy_init ();   /* Initialize session. */
599 		/*
600 		 * Set up the connection parameters.
601 		 */
602 		/* Debugging: Verbose messages */
603 		/* CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_VERBOSE, 1); */
604         /* Catch FILE: protocol errors. No solution yet */
605 		if (str32str (file -> path, U"file://") || str32str (file -> path, U"FILE://")) {
606 			CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_VERBOSE, 1);
607 		}
608 		/* Do not return Error pages, just fail. */
609 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_FAILONERROR, 1);
610 		/* Store error messages in a buffer. */
611 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_ERRORBUFFER, errorbuffer);
612 		/* Send header. */
613 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_HEADER, 1);
614 		/* Upload. */
615 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_UPLOAD, 1);
616 		/* The actual URL to handle. */
617 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_URL, utf8path);
618 		/* The function to write to the peer, necessary for Win32. */
619 	    CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_READFUNCTION, read_URL_data_from_file);
620 		CURLreturn = curl_easy_setopt (CURLhandle, CURLOPT_READDATA, f);
621 		/* Get the URL and write the file to it. */
622 		CURLreturn = curl_easy_perform (CURLhandle);
623 		/* Handle errors. */
624 		if (CURLreturn) {
625 			curl_easy_cleanup (CURLhandle);
626 			f = nullptr;
627 			Melder_throw (Melder_peek8to32 (errorbuffer), U"\n");
628 	    };
629 		/* Clean up session */
630 		curl_easy_cleanup (CURLhandle);
631     }
632 	#endif
633 	if (f != stdout && fclose (f) == EOF)
634 		Melder_throw (U"Error closing file ", file, U".");
635 }
636 
Melder_files_cleanUp()637 void Melder_files_cleanUp () {
638 	#if defined (CURLPRESENT)
639 		if (curl_initialized) {
640 			curl_global_cleanup ();
641 			curl_initialized = 0;
642 		};
643 	#endif
644 }
645 
MelderFile_exists(MelderFile file)646 bool MelderFile_exists (MelderFile file) {
647 	#if defined (UNIX)
648 		struct stat statistics;
649 		return ! stat (Melder_peek32to8_fileSystem (file -> path), & statistics);
650 	#else
651 		try {
652 			autofile f = Melder_fopen (file, "rb");
653 			f.close (file);
654 			return true;
655 		} catch (MelderError) {
656 			Melder_clearError ();
657 			return false;
658 		}
659 	#endif
660 }
661 
MelderFile_readable(MelderFile file)662 bool MelderFile_readable (MelderFile file) {
663 	try {
664 		autofile f = Melder_fopen (file, "rb");
665 		f.close (file);
666 		return true;
667 	} catch (MelderError) {
668 		Melder_clearError ();
669 		return false;
670 	}
671 }
672 
Melder_tryToWriteFile(MelderFile file)673 bool Melder_tryToWriteFile (MelderFile file) {
674 	try {
675 		autofile f = Melder_fopen (file, "wb");
676 		f.close (file);
677 		return true;
678 	} catch (MelderError) {
679 		Melder_clearError ();
680 		return false;
681 	}
682 }
683 
Melder_tryToAppendFile(MelderFile file)684 bool Melder_tryToAppendFile (MelderFile file) {
685 	try {
686 		autofile f = Melder_fopen (file, "ab");
687 		f.close (file);
688 		return true;
689 	} catch (MelderError) {
690 		Melder_clearError ();
691 		return false;
692 	}
693 }
694 
MelderFile_length(MelderFile file)695 integer MelderFile_length (MelderFile file) {
696 	#if defined (UNIX)
697 		struct stat statistics;
698 		if (stat (Melder_peek32to8_fileSystem (file -> path), & statistics) != 0)
699 			return -1;
700 		return statistics. st_size;
701 	#else
702 		try {
703 			autofile f = Melder_fopen (file, "r");
704 			fseek (f, 0, SEEK_END);
705 			integer length = ftell (f);
706 			f.close (file);
707 			return length;
708 		} catch (MelderError) {
709 			Melder_clearError ();
710 			return -1;
711 		}
712 	#endif
713 }
714 
MelderFile_delete(MelderFile file)715 void MelderFile_delete (MelderFile file) {
716 	if (! file) return;
717 	#if defined (UNIX)
718 		remove (Melder_peek32to8_fileSystem (file -> path));
719 	#elif defined (_WIN32)
720 		DeleteFile (Melder_peek32toW_fileSystem (file -> path));
721 	#endif
722 }
723 
Melder_peekExpandBackslashes(conststring32 message)724 char32 * Melder_peekExpandBackslashes (conststring32 message) {
725 	static char32 names [11] [kMelder_MAXPATH+1];
726 	static int index = 0;
727 	if (++ index == 11) index = 0;
728 	char32 *to = & names [index] [0];
729 	for (const char32 *from = & message [0]; *from != '\0'; from ++, to ++) {
730 		*to = *from;
731 		if (*from == U'\\') {
732 			* ++ to = U'b';
733 			* ++ to = U's';
734 		}
735 	}
736 	*to = U'\0';
737 	return & names [index] [0];
738 }
739 
MelderFile_messageName(MelderFile file)740 conststring32 MelderFile_messageName (MelderFile file) {
741 	return Melder_cat (U"“", file -> path, U"”");   // BUG: is cat allowed here?
742 }
743 
744 #if defined (UNIX)
745 	/*
746 		From macOS 10.15 Catalina on, getcwd() has failed if a part of the path
747 		is inaccessible, such as when you open a script that is attached to an email message.
748 	*/
749 	static structMelderDir theDefaultDir;
750 #endif
751 
Melder_getDefaultDir(MelderDir dir)752 void Melder_getDefaultDir (MelderDir dir) {
753 	#if defined (UNIX)
754 		char path [kMelder_MAXPATH+1];
755 		char *pathResult = getcwd (path, kMelder_MAXPATH+1);
756 		if (pathResult)
757 			Melder_8bitFileRepresentationToStr32_inplace (path, dir -> path);
758 		else if (errno == EPERM)
759 			str32cpy (dir -> path, theDefaultDir. path);
760 		else
761 			Melder_throw (Melder_peek8to32 (strerror (errno)));
762 		Melder_assert (str32len (dir -> path) <= kMelder_MAXPATH);
763 	#elif defined (_WIN32)
764 		static WCHAR dirPathW [kMelder_MAXPATH+1];
765 		GetCurrentDirectory (kMelder_MAXPATH+1, dirPathW);
766 		Melder_sprint (dir -> path,kMelder_MAXPATH+1, Melder_peekWto32 (dirPathW));
767 	#endif
768 }
769 
Melder_setDefaultDir(MelderDir dir)770 void Melder_setDefaultDir (MelderDir dir) {
771 	#if defined (UNIX)
772 		chdir (Melder_peek32to8 (dir -> path));
773 		str32cpy (theDefaultDir. path, dir -> path);
774 	#elif defined (_WIN32)
775 		SetCurrentDirectory (Melder_peek32toW_fileSystem (dir -> path));
776 	#endif
777 }
778 
MelderFile_setDefaultDir(MelderFile file)779 void MelderFile_setDefaultDir (MelderFile file) {
780 	structMelderDir dir { };
781 	MelderFile_getParentDir (file, & dir);
782 	Melder_setDefaultDir (& dir);
783 }
784 
Melder_createDirectory(MelderDir parent,conststring32 dirName,int mode)785 void Melder_createDirectory (MelderDir parent, conststring32 dirName, int mode) {
786 #if defined (UNIX)
787 	structMelderFile file { };
788 	if (dirName [0] == U'/') {
789 		Melder_sprint (file. path,kMelder_MAXPATH+1, dirName);   // absolute path
790 	} else if (parent -> path [0] == U'/' && parent -> path [1] == U'\0') {
791 		Melder_sprint (file. path,kMelder_MAXPATH+1, U"/", dirName);   // relative path in root directory
792 	} else {
793 		Melder_sprint (file. path,kMelder_MAXPATH+1, parent -> path, U"/", dirName);   // relative path
794 	}
795 	if (mkdir (Melder_peek32to8_fileSystem (file. path), mode) == -1 && errno != EEXIST)   // ignore if directory already exists
796 		Melder_throw (U"Cannot create directory ", & file, U".");
797 #elif defined (_WIN32)
798 	structMelderFile file { };
799 	SECURITY_ATTRIBUTES sa;
800 	(void) mode;
801 	sa. nLength = sizeof (SECURITY_ATTRIBUTES);
802 	sa. lpSecurityDescriptor = nullptr;
803 	sa. bInheritHandle = false;
804 	if (str32chr (dirName, U':') || dirName [0] == U'/' && dirName [1] == U'/') {
805 		Melder_sprint (file. path,kMelder_MAXPATH+1, dirName);   // absolute path
806 	} else {
807 		Melder_sprint (file. path,kMelder_MAXPATH+1, parent -> path, U"/", dirName);   // relative path
808 	}
809 	if (! CreateDirectoryW (Melder_peek32toW_fileSystem (file. path), & sa) && GetLastError () != ERROR_ALREADY_EXISTS)   // ignore if directory already exists
810 		Melder_throw (U"Cannot create directory ", & file, U".");
811 #else
812 	//#error Unsupported operating system.
813 #endif
814 }
815 
fread_multi(char * buffer,size_t numberOfBytes,FILE * f)816 static size_t fread_multi (char *buffer, size_t numberOfBytes, FILE *f) {
817 	off_t offset = 0;
818 	size_t numberOfBytesRead = 0;
819 	const size_t chunkSize = 1'000'000'000;
820 	while (numberOfBytes > chunkSize) {
821 		size_t numberOfBytesReadInChunk = fread (buffer + offset, sizeof (char), chunkSize, f);
822 		numberOfBytesRead += numberOfBytesReadInChunk;
823 		if (numberOfBytesReadInChunk < chunkSize)
824 			return numberOfBytesRead;
825 		numberOfBytes -= chunkSize;
826 		offset += chunkSize;
827 	}
828 	size_t numberOfBytesReadInLastChunk = fread (buffer + offset, sizeof (char), numberOfBytes, f);
829 	numberOfBytesRead += numberOfBytesReadInLastChunk;
830 	return numberOfBytesRead;
831 }
832 
MelderFile_readText(MelderFile file,autostring8 * string8)833 autostring32 MelderFile_readText (MelderFile file, autostring8 *string8) {
834 	try {
835 		int type = 0;   // 8-bit
836 		autostring32 text;
837 		autofile f = Melder_fopen (file, "rb");
838 		if (fseeko (f, 0, SEEK_END) < 0)
839 			Melder_throw (U"Cannot count the bytes in the file.");
840 		Melder_assert (sizeof (off_t) >= 8);
841 		int64 length = ftello (f);
842 		rewind (f);
843 		if (length >= 2) {
844 			int firstByte = fgetc (f), secondByte = fgetc (f);
845 			if (firstByte == 0xFE && secondByte == 0xFF) {
846 				type = 1;   // big-endian 16-bit
847 			} else if (firstByte == 0xFF && secondByte == 0xFE) {
848 				type = 2;   // little-endian 16-bit
849 			} else if (firstByte == 0xEF && secondByte == 0xBB && length >= 3) {
850 				int thirdByte = fgetc (f);
851 				if (thirdByte == 0xBF)
852 					type = -1;   // UTF-8 with BOM
853 			}
854 		}
855 		if (type <= 0) {
856 			if (type == -1) {
857 				length -= 3;
858 				fseeko (f, 3, SEEK_SET);
859 			} else {
860 				rewind (f);   // length and type already set correctly.
861 			}
862 			autostring8 text8bit (length);
863 			Melder_assert (text8bit);
864 			size_t numberOfBytesRead = fread_multi (text8bit.get(), (size_t) length, f);
865 			Melder_require ((int64) numberOfBytesRead == length,
866 				U"The file contains ", length, U" bytes",
867 				type == -1 ? U" after the byte-order mark" : U"",
868 				U", but we could read only ", numberOfBytesRead, U" of them."
869 			);
870 			text8bit [length] = '\0';
871 			/*
872 				Count and repair null bytes.
873 			*/
874 			if (length > 0) {
875 				int64 numberOfNullBytes = 0;
876 				char *q = & text8bit [0];
877 				for (integer i = 0; i < length; i ++)
878 					if (text8bit [i] != '\0')
879 						* (q ++) = text8bit [i];
880 					else
881 						numberOfNullBytes ++;
882 				*q = '\0';
883 				if (numberOfNullBytes > 0)
884 					Melder_warning (U"Ignored ", numberOfNullBytes, U" null bytes in text file ", file, U".");
885 			}
886 			if (string8) {
887 				*string8 = text8bit.move();
888 				(void) Melder_killReturns_inplace (string8->get());
889 				return autostring32();   // OK
890 			} else {
891 				text = Melder_8to32 (text8bit.get(), kMelder_textInputEncoding::UNDEFINED);
892 			}
893 		} else {
894 			length = length / 2 - 1;   // Byte Order Mark subtracted. Length = number of UTF-16 codes
895 			text = autostring32 (length + 1);
896 			if (type == 1) {
897 				for (int64 i = 0; i < length; i ++) {
898 					char16 kar1 = bingetu16 (f);
899 					if (kar1 < 0xD800) {
900 						text [i] = (char32) kar1;   // convert up without sign extension
901 					} else if (kar1 < 0xDC00) {
902 						length --;
903 						char16 kar2 = bingetu16 (f);
904 						if (kar2 >= 0xDC00 && kar2 <= 0xDFFF) {
905 							text [i] = (char32) (0x010000 +
906 									(char32) (((char32) kar1 & 0x0003FF) << 10) +
907 									(char32)  ((char32) kar2 & 0x0003FF));
908 						} else {
909 							text [i] = UNICODE_REPLACEMENT_CHARACTER;
910 						}
911 					} else if (kar1 < 0xE000) {
912 						text [i] = UNICODE_REPLACEMENT_CHARACTER;
913 					} else {
914 						text [i] = (char32) kar1;   // convert up without sign extension
915 					}
916 				}
917 			} else {
918 				for (int64 i = 0; i < length; i ++) {
919 					char16 kar1 = bingetu16LE (f);
920 					if (kar1 < 0xD800) {
921 						text [i] = (char32) kar1;   // convert up without sign extension
922 					} else if (kar1 < 0xDC00) {
923 						length --;
924 						char16 kar2 = bingetu16LE (f);
925 						if (kar2 >= 0xDC00 && kar2 <= 0xDFFF) {
926 							text [i] = (char32) (0x01'0000 +
927 								(char32) (((char32) kar1 & 0x00'03FF) << 10) +
928 								(char32)  ((char32) kar2 & 0x00'03FF));
929 						} else {
930 							text [i] = UNICODE_REPLACEMENT_CHARACTER;
931 						}
932 					} else if (kar1 < 0xE000) {
933 						text [i] = UNICODE_REPLACEMENT_CHARACTER;
934 					} else if (kar1 <= 0xFFFF) {
935 						text [i] = (char32) kar1;   // convert up without sign extension
936 					} else {
937 						Melder_fatal (U"MelderFile_readText: unsigned short greater than 0xFFFF: should not occur.");
938 					}
939 				}
940 			}
941 			text [length] = U'\0';
942 			(void) Melder_killReturns_inplace (text.get());
943 		}
944 		f.close (file);
945 		return text;
946 	} catch (MelderError) {
947 		Melder_throw (U"Error reading file ", file, U".");
948 	}
949 }
950 
Melder_fwrite32to8(conststring32 string,FILE * f)951 void Melder_fwrite32to8 (conststring32 string, FILE *f) {
952 	/*
953 	 * Precondition:
954 	 *    the string's encoding is UTF-32.
955 	 * Failure:
956 	 *    if the precondition does not hold, we don't crash,
957 	 *    but the characters that are written may be incorrect.
958 	 */
959 	for (const char32* p = string; *p != U'\0'; p ++) {
960 		char32 kar = *p;
961 		if (kar <= 0x00'007F) {
962 			#ifdef _WIN32
963 				if (kar == U'\n')
964 					fputc (13, f);
965 			#endif
966 			fputc ((int) kar, f);   // because fputc wants an int instead of an uint8 (guarded conversion)
967 		} else if (kar <= 0x00'07FF) {
968 			fputc (0xC0 | (kar >> 6), f);
969 			fputc (0x80 | (kar & 0x00'003F), f);
970 		} else if (kar <= 0x00FFFF) {
971 			fputc (0xE0 | (kar >> 12), f);
972 			fputc (0x80 | ((kar >> 6) & 0x00'003F), f);
973 			fputc (0x80 | (kar & 0x00'003F), f);
974 		} else {
975 			fputc (0xF0 | (kar >> 18), f);
976 			fputc (0x80 | ((kar >> 12) & 0x00'003F), f);
977 			fputc (0x80 | ((kar >> 6) & 0x00'003F), f);
978 			fputc (0x80 | (kar & 0x00'003F), f);
979 		}
980 	}
981 }
982 
MelderFile_writeText(MelderFile file,conststring32 text,kMelder_textOutputEncoding outputEncoding)983 void MelderFile_writeText (MelderFile file, conststring32 text, kMelder_textOutputEncoding outputEncoding) {
984 	if (! text)
985 		text = U"";
986 	autofile f = Melder_fopen (file, "wb");
987 	if (outputEncoding == kMelder_textOutputEncoding::UTF8) {
988 		Melder_fwrite32to8 (text, f);
989 	} else if ((outputEncoding == kMelder_textOutputEncoding::ASCII_THEN_UTF16 && Melder_isValidAscii (text)) ||
990 		(outputEncoding == kMelder_textOutputEncoding::ISO_LATIN1_THEN_UTF16 && Melder_isEncodable (text, kMelder_textOutputEncoding_ISO_LATIN1)))
991 	{
992 		#ifdef _WIN32
993 			#define flockfile(f)  (void) 0
994 			#define funlockfile(f)  (void) 0
995 			#define putc_unlocked  putc
996 		#endif
997 		flockfile (f);
998 		integer n = str32len (text);
999 		for (integer i = 0; i < n; i ++) {
1000 			char32 kar = text [i];
1001 			#ifdef _WIN32
1002 				if (kar == U'\n')
1003 					putc_unlocked (13, f);
1004 			#endif
1005 			putc_unlocked (kar, f);
1006 		}
1007 		funlockfile (f);
1008 	} else {
1009 		binputu16 (0xFEFF, f);   // Byte Order Mark
1010 		integer n = str32len (text);
1011 		for (integer i = 0; i < n; i ++) {
1012 			char32 kar = text [i];
1013 			#ifdef _WIN32
1014 				if (kar == U'\n')
1015 					binputu16 (13, f);
1016 			#endif
1017 			if (kar <= 0x00'FFFF) {
1018 				binputu16 ((char16) kar, f);   // guarded conversion down
1019 			} else if (kar <= 0x10'FFFF) {
1020 				kar -= 0x010000;
1021 				binputu16 (0xD800 | (uint16) (kar >> 10), f);
1022 				binputu16 (0xDC00 | (uint16) ((char16) kar & 0x3ff), f);
1023 			} else {
1024 				binputu16 (UNICODE_REPLACEMENT_CHARACTER, f);
1025 			}
1026 		}
1027 	}
1028 	f.close (file);
1029 }
1030 
MelderFile_appendText(MelderFile file,conststring32 text)1031 void MelderFile_appendText (MelderFile file, conststring32 text) {
1032 	if (! text) text = U"";
1033 	autofile f1;
1034 	try {
1035 		f1.reset (Melder_fopen (file, "rb"));
1036 	} catch (MelderError) {
1037 		Melder_clearError ();   // it's OK if the file didn't exist yet...
1038 		MelderFile_writeText (file, text, Melder_getOutputEncoding ());   // because then we just "write"
1039 		return;
1040 	}
1041 	/*
1042 	 * The file already exists and is open. Determine its type.
1043 	 */
1044 	int firstByte = fgetc (f1), secondByte = fgetc (f1);
1045 	f1.close (file);
1046 	int type = 0;
1047 	if (firstByte == 0xfe && secondByte == 0xff) {
1048 		type = 1;   // big-endian 16-bit
1049 	} else if (firstByte == 0xff && secondByte == 0xfe) {
1050 		type = 2;   // little-endian 16-bit
1051 	}
1052 	if (type == 0) {
1053 		kMelder_textOutputEncoding outputEncoding = Melder_getOutputEncoding ();
1054 		if (outputEncoding == kMelder_textOutputEncoding::UTF8) {   // TODO: read as file's encoding
1055 			autofile f2 = Melder_fopen (file, "ab");
1056 			Melder_fwrite32to8 (text, f2);
1057 			f2.close (file);
1058 		} else if ((outputEncoding == kMelder_textOutputEncoding::ASCII_THEN_UTF16 && Melder_isEncodable (text, kMelder_textOutputEncoding_ASCII))
1059 		    || (outputEncoding == kMelder_textOutputEncoding::ISO_LATIN1_THEN_UTF16 && Melder_isEncodable (text, kMelder_textOutputEncoding_ISO_LATIN1)))
1060 		{
1061 			/*
1062 			 * Append ASCII or ISOLatin1 text to ASCII or ISOLatin1 file.
1063 			 */
1064 			autofile f2 = Melder_fopen (file, "ab");
1065 			int64 n = str32len (text);
1066 			for (int64 i = 0; i < n; i ++) {
1067 				char32 kar = text [i];
1068 				#ifdef _WIN32
1069 					if (kar == U'\n')
1070 						fputc (13, f2);
1071 				#endif
1072 				fputc ((char8) kar, f2);
1073 			}
1074 			f2.close (file);
1075 		} else {
1076 			/*
1077 			 * Convert to wide character file.
1078 			 */
1079 			autostring32 oldText = MelderFile_readText (file);
1080 			autofile f2 = Melder_fopen (file, "wb");
1081 			binputu16 (0xfeff, f2);
1082 			int64 n = str32len (oldText.get());
1083 			for (int64 i = 0; i < n; i ++) {
1084 				char32 kar = oldText [i];
1085 				#ifdef _WIN32
1086 					if (kar == U'\n')
1087 						binputu16 (13, f2);
1088 				#endif
1089 				if (kar <= 0x00'FFFF) {
1090 					binputu16 ((uint16) kar, f2);   // guarded conversion down
1091 				} else if (kar <= 0x10'FFFF) {
1092 					kar -= 0x01'0000;
1093 					binputu16 ((uint16) (0x00'D800 | (kar >> 10)), f2);
1094 					binputu16 ((uint16) (0x00'DC00 | (kar & 0x00'03ff)), f2);
1095 				} else {
1096 					binputu16 (UNICODE_REPLACEMENT_CHARACTER, f2);
1097 				}
1098 			}
1099 			n = str32len (text);
1100 			for (int64 i = 0; i < n; i ++) {
1101 				char32 kar = text [i];
1102 				#ifdef _WIN32
1103 					if (kar == U'\n')
1104 						binputu16 (13, f2);
1105 				#endif
1106 				if (kar <= 0x00FFFF) {
1107 					binputu16 ((uint16) kar, f2);   // guarded conversion down
1108 				} else if (kar <= 0x10'FFFF) {
1109 					kar -= 0x01'0000;
1110 					binputu16 ((uint16) (0x00'D800 | (kar >> 10)), f2);
1111 					binputu16 ((uint16) (0x00'DC00 | (kar & 0x00'03ff)), f2);
1112 				} else {
1113 					binputu16 (UNICODE_REPLACEMENT_CHARACTER, f2);
1114 				}
1115 			}
1116 			f2.close (file);
1117 		}
1118 	} else {
1119 		autofile f2 = Melder_fopen (file, "ab");
1120 		int64 n = str32len (text);
1121 		for (int64 i = 0; i < n; i ++) {
1122 			if (type == 1) {
1123 				char32 kar = text [i];
1124 				#ifdef _WIN32
1125 					if (kar == U'\n')
1126 						binputu16 (13, f2);
1127 				#endif
1128 				if (kar <= 0x00'FFFF) {
1129 					binputu16 ((uint16) kar, f2);   // guarded conversion down
1130 				} else if (kar <= 0x10'FFFF) {
1131 					kar -= 0x01'0000;
1132 					binputu16 ((uint16) (0x00'D800 | (kar >> 10)), f2);
1133 					binputu16 ((uint16) (0x00'DC00 | (kar & 0x00'03ff)), f2);
1134 				} else {
1135 					binputu16 (UNICODE_REPLACEMENT_CHARACTER, f2);
1136 				}
1137 			} else {
1138 				char32 kar = text [i];
1139 				#ifdef _WIN32
1140 					if (kar == U'\n')
1141 						binputu16LE (13, f2);
1142 				#endif
1143 				if (kar <= 0x00'FFFF) {
1144 					binputu16LE ((uint16) kar, f2);   // guarded conversion down
1145 				} else if (kar <= 0x10FFFF) {
1146 					kar -= 0x01'0000;
1147 					binputu16LE ((uint16) (0x00'D800 | (kar >> 10)), f2);
1148 					binputu16LE ((uint16) (0x00'DC00 | (kar & 0x00'03ff)), f2);
1149 				} else {
1150 					binputu16LE (UNICODE_REPLACEMENT_CHARACTER, f2);
1151 				}
1152 			}
1153 		}
1154 		f2.close (file);
1155 	}
1156 }
1157 
1158 /* End of file melder_files.cpp */
1159