1 #ifndef _melder_textencoding_h_
2 #define _melder_textencoding_h_
3 /* melder_textencoding.h
4  *
5  * Copyright (C) 1992-2020 Paul Boersma
6  *
7  * This code is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or (at
10  * your option) any later version.
11  *
12  * This code is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
15  * See the GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this work. If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 /**
22  * Text encodings.
23  */
24 void Melder_textEncoding_prefs ();
25 void Melder_setInputEncoding (kMelder_textInputEncoding encoding);
26 kMelder_textInputEncoding Melder_getInputEncoding ();
27 void Melder_setOutputEncoding (kMelder_textOutputEncoding encoding);
28 kMelder_textOutputEncoding Melder_getOutputEncoding ();
29 
30 /*
31  * Some other encodings. Although not used in the above set/get functions,
32  * these constants should stay separate from the above encoding constants
33  * because they occur in the same fields of struct MelderFile.
34  */
35 constexpr uint32 kMelder_textOutputEncoding_ASCII = 0x4153'4349;
36 constexpr uint32 kMelder_textOutputEncoding_ISO_LATIN1 = 0x4C41'5401;
37 constexpr uint32 kMelder_textOutputEncoding_FLAC = 0x464C'4143;
38 
39 bool Melder_isValidAscii (conststring32 string);
40 bool Melder_str8IsValidUtf8 (const char *string);
41 bool Melder_isEncodable (conststring32 string, int outputEncoding);
42 extern char32 Melder_decodeMacRoman [256];
43 extern char32 Melder_decodeWindowsLatin1 [256];
44 
45 /**
46 	 Replace all bare returns (old Mac) or return-plus-linefeed sequences (Win) with bare linefeeds
47 	 (generic: Unix and modern Mac).
48 	 Return new length of string (equal to or less than old length).
49 */
50 integer Melder_killReturns_inplace (mutablestring32 text);
51 integer Melder_killReturns_inplace (mutablestring8 text);
52 
53 size_t str32len_utf8  (conststring32 string, bool nativizeNewlines);
54 size_t str32len_utf16 (conststring32 string, bool nativizeNewlines);
55 
56 extern "C" conststring32 Melder_peek8to32 (conststring8 string);
57 void Melder_8to32_inplace (conststring8 source, mutablestring32 target, kMelder_textInputEncoding inputEncoding);
58 	// errors: Text is not valid UTF-8.
59 autostring32 Melder_8to32 (conststring8 string, kMelder_textInputEncoding inputEncoding);
60 	// errors: Out of memory; Text is not valid UTF-8.
61 autostring32 Melder_8to32 (conststring8 string);
62 	// errors: Out of memory; Text is not valid UTF-8.
63 
64 conststring32 Melder_peek16to32 (conststring16 text);
65 autostring32 Melder_16to32 (conststring16 text);
66 
67 extern "C" conststring8 Melder_peek32to8 (conststring32 string);
68 void Melder_32to8_inplace (conststring32 string, mutablestring8 utf8);
69 autostring8 Melder_32to8 (conststring32 string);
70 autostring16 Melder_32to16 (conststring32 string);
71 	// errors: Out of memory.
72 
73 conststring16 Melder_peek32to16 (conststring32 text, bool nativizeNewlines);
74 extern "C" conststring16 Melder_peek32to16 (conststring32 string);
75 
76 #ifdef _WIN32
Melder_peek32toW(conststring32 string)77 	inline conststringW Melder_peek32toW (conststring32 string) { return (conststringW) Melder_peek32to16 (string); }
78 	conststringW Melder_peek32toW_fileSystem (conststring32 string);
79 	autostringW Melder_32toW (conststring32 string);
80 	autostringW Melder_32toW_fileSystem (conststring32 string);
81 	inline conststring32 Melder_peekWto32 (conststringW string) { return Melder_peek16to32 ((conststring16) string); }
82 	inline autostring32 Melder_Wto32 (conststringW string) { return Melder_16to32 ((conststring16) string); }
83 #endif
84 
85 void Melder_32to8_fileSystem_inplace (conststring32 string, mutablestring8 utf8);
86 conststring8 Melder_peek32to8_fileSystem (conststring32 string);
87 
88 void Melder_8bitFileRepresentationToStr32_inplace (conststring8 utf8, mutablestring32 string);
89 const void * Melder_peek32toCfstring (conststring32 string);
90 
91 /* End of file melder_textencoding.h */
92 #endif
93