1 /* AbiSource Program Utilities
2  * Copyright (C) 1998,1999 AbiSource, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301 USA.
18  */
19 
20 
21 #ifndef UT_STRING_H
22 #define UT_STRING_H
23 
24 #include <map>
25 #include <string>
26 #include <string.h>
27 
28 /* pre-emptive dismissal; ut_types.h is needed by just about everything,
29  * so even if it's commented out in-file that's still a lot of work for
30  * the preprocessor to do...
31  */
32 #ifndef UT_TYPES_H
33 #include "ut_types.h"
34 #endif
35 
36 class UT_GrowBuf;
37 
38 G_BEGIN_DECLS
39 
40 // this function allocates (and returns a pointer to) new memory for the new string
41 ABI_EXPORT bool  UT_XML_cloneNoAmpersands(gchar *& rszDest, const gchar * szSource);
42 // replaces &X -> _X; allocates buffer
43 ABI_EXPORT bool  UT_XML_cloneConvAmpersands(gchar *& rszDest, const gchar * szSource);
44 // This function uses a static buffer to do the translation
45 ABI_EXPORT const gchar *  UT_XML_transNoAmpersands(const gchar * szSource);
46 
47 ABI_EXPORT void  UT_decodeUTF8string(const gchar * p, UT_uint32 len, UT_GrowBuf * pResult);
48 
49 ABI_EXPORT bool  UT_isValidXML(const char *s);
50 ABI_EXPORT bool  UT_validXML(char * s);
51 
52 /* ABI_EXPORT gchar *  UT_decodeXMLstring(gchar *pcIn);
53  * This has moved to ut_xml.cpp as UT_XML::decode ()
54  */
55 
56 ABI_EXPORT bool  UT_isSmartQuotableCharacter(UT_UCSChar c);
57 ABI_EXPORT bool  UT_isSmartQuotedCharacter(UT_UCSChar c);
58 
59 ////////////////////////////////////////////////////////////////////////
60 //
61 //  UCS-2 string (UT_UCS2Char)
62 //
63 //  String is built of 16-bit units (words)
64 //
65 //  TODO: Is this really UCS-2 or UTF-16?
66 //  TODO:  meaning, does it support surrogates or is it intended to
67 //  TODO:  support them at any time in the future?
68 //  TODO: Correctly, UCS-2 does not support surrogates and UTF-16 does.
69 //  TODO: BUT Microsoft calls their native Unicode encoding UCS-2
70 //  TODO:  while it supports surrogates and is thus really UTF-16.
71 //  TODO: Surrogates are Unicode characters with codepoints above
72 //  TODO:  65535 which cannot therefore fit into a 2-byte word.
73 //  TODO: This means that TRUE UCS-2 is a single-word encoding and
74 //  TODO:  UTF-16 is a multi-word encoding.
75 //
76 //  NOTE: We shouldn't actually need 16-bit strings anymore since
77 //  NOTE:  AbiWord is now fully converted to using 32-bit Unicode
78 //  NOTE:  internally. The only possible needs for this is for
79 //  NOTE:  Windows GUI, filesystem and API functions where applicable;
80 //  NOTE:  and perhaps some file formats or external libraries
81 //
82 ////////////////////////////////////////////////////////////////////////
83 
84 #ifdef ENABLE_UCS2_STRINGS
85 
86 #define UT_UCS2_isdigit(x)	(((x) >= '0') && ((x) <= '9'))  // TODO: make UNICODE-wise
87 
88 /*these are unicode-safe*/
89 ABI_EXPORT bool  UT_UCS2_isupper(UT_UCS2Char c);
90 ABI_EXPORT bool  UT_UCS2_islower(UT_UCS2Char c);
91 ABI_EXPORT bool  UT_UCS2_isalpha(UT_UCS2Char c);
92 ABI_EXPORT bool	 UT_UCS2_isSentenceSeparator(UT_UCS2Char c);
93 #define UT_UCS2_isalnum(x)	(UT_UCS2_isalpha(x) || UT_UCS2_isdigit(x)) // HACK: not UNICODE-safe
94 ABI_EXPORT bool UT_UCS2_isspace(UT_UCS2Char c);
95 #define UT_UCS2_ispunct(x)   ((!UT_UCS2_isspace(x)  &&  !UT_UCS2_isalnum(x)  &&  (x)>' '))  // HACK: not UNICODE safe
96 
97 // the naming convention has deviated from the above.  it's kind
98 // of a mutant libc/C++ naming convention.
99 ABI_EXPORT UT_UCS2Char * 	 UT_UCS2_strstr(const UT_UCS2Char * phaystack, const UT_UCS2Char * pneedle);
100 ABI_EXPORT UT_sint32 		 UT_UCS2_strcmp(const UT_UCS2Char* left, const UT_UCS2Char* right);
101 ABI_EXPORT UT_UCS2Char * 	 UT_UCS2_stristr(const UT_UCS2Char * phaystack, const UT_UCS2Char * pneedle);
102 ABI_EXPORT UT_UCS2Char * 	 UT_UCS2_strcpy(UT_UCS2Char * dest, const UT_UCS2Char * src);
103 ABI_EXPORT UT_UCS2Char * 	 UT_UCS2_strcpy_char(UT_UCS2Char * dest, const char * src);
104 ABI_EXPORT char *			 UT_UCS2_strcpy_to_char(char * dest, const UT_UCS2Char * src);
105 ABI_EXPORT bool			 UT_UCS2_cloneString(UT_UCS2Char ** dest, const UT_UCS2Char * src);
106 ABI_EXPORT bool			 UT_UCS2_cloneString_char(UT_UCS2Char ** dest, const char * src);
107 ABI_EXPORT UT_UCS2Char *     UT_UCS2_strncpy(UT_UCS2Char * dest, const UT_UCS2Char * src, UT_uint32 n);
108 ABI_EXPORT UT_UCS2Char *     UT_UCS2_strnrev(UT_UCS2Char * dest, UT_uint32 n);
109 
110 ABI_EXPORT UT_UCS2Char		 UT_UCS2_tolower(UT_UCS2Char c);
111 ABI_EXPORT UT_UCS2Char       UT_UCS2_toupper(UT_UCS2Char c);
112 
113 #endif
114 
115 // Don't ifdef this one out since MSWord importer uses it
116 
117 ABI_EXPORT UT_uint32 		 UT_UCS2_strlen(const UT_UCS2Char * string);
118 
119 ////////////////////////////////////////////////////////////////////////
120 //
121 //  UCS-4 string (UT_UCS4Char)
122 //
123 //  String is built of 32-bit units (longs)
124 //
125 //  NOTE: Ambiguity between UCS-2 and UTF-16 above makes no difference
126 //  NOTE:  in the case of UCS-4 and UTF-32 since they really are
127 //  NOTE:  identical
128 //
129 ////////////////////////////////////////////////////////////////////////
130 
131 /*these are unicode-safe*/
132 ABI_EXPORT bool  UT_UCS4_isupper(UT_UCS4Char c);
133 ABI_EXPORT bool  UT_UCS4_islower(UT_UCS4Char c);
134 ABI_EXPORT bool  UT_UCS4_isalpha(UT_UCS4Char c);
135 ABI_EXPORT bool	 UT_UCS4_isSentenceSeparator(UT_UCS4Char c);
136 ABI_EXPORT bool  UT_UCS4_isdigit(UT_UCS4Char c);
137 #define UT_UCS4_isalnum(x)	(UT_UCS4_isalpha(x) || UT_UCS4_isdigit(x)) // HACK: not UNICODE-safe
138 ABI_EXPORT bool UT_UCS4_isspace(UT_UCS4Char c);
139 #define UT_UCS4_ispunct(x)   ((!UT_UCS4_isspace(x)  &&  !UT_UCS4_isalnum(x)  &&  (x)>' '))  // HACK: not UNICODE safe
140 
141 // the naming convention has deviated from the above.  it's kind
142 // of a mutant libc/C++ naming convention.
143 ABI_EXPORT UT_sint32 		 UT_UCS4_strcmp(const UT_UCS4Char* left, const UT_UCS4Char* right);
144 ABI_EXPORT UT_UCS4Char * 	 UT_UCS4_strstr(const UT_UCS4Char * phaystack, const UT_UCS4Char * pneedle);
145 ABI_EXPORT UT_UCS4Char * 	 UT_UCS4_stristr(const UT_UCS4Char * phaystack, const UT_UCS4Char * pneedle);
146 ABI_EXPORT UT_uint32 		 UT_UCS4_strlen(const UT_UCS4Char * string);
147 ABI_EXPORT UT_uint32		 UT_UCS4_strlen_as_char(const UT_UCS4Char * string);
148 ABI_EXPORT UT_UCS4Char * 	 UT_UCS4_strcpy(UT_UCS4Char * dest, const UT_UCS4Char * src);
149 ABI_EXPORT UT_UCS4Char * 	 UT_UCS4_strcpy_char(UT_UCS4Char * dest, const char * src);
150 ABI_EXPORT UT_UCS4Char * 	 UT_UCS4_strncpy_char(UT_UCS4Char * dest, const char * src, int);
151 ABI_EXPORT UT_UCS4Char * 	 UT_UCS4_strcpy_utf8_char(UT_UCS4Char * dest, const char * src);
152 ABI_EXPORT char *			 UT_UCS4_strcpy_to_char(char * dest, const UT_UCS4Char * src);
153 ABI_EXPORT char *			 UT_UCS4_strncpy_to_char(char * dest, const UT_UCS4Char * src, int);
154 ABI_EXPORT bool			     UT_UCS4_cloneString(UT_UCS4Char ** dest, const UT_UCS4Char * src);
155 ABI_EXPORT bool			     UT_UCS4_cloneString_char(UT_UCS4Char ** dest, const char * src);
156 ABI_EXPORT UT_UCS4Char *     UT_UCS4_strncpy(UT_UCS4Char * dest, const UT_UCS4Char * src, UT_uint32 n);
157 ABI_EXPORT UT_UCS4Char *     UT_UCS4_strnrev(UT_UCS4Char * dest, UT_uint32 n);
158 
159 ABI_EXPORT UT_UCS4Char		 UT_UCS4_tolower(UT_UCS4Char c);
160 ABI_EXPORT UT_UCS4Char       UT_UCS4_toupper(UT_UCS4Char c);
161 
162 
163 ABI_EXPORT void UT_parse_attributes(const char * attributes,
164 									std::map<std::string, std::string> & map);
165 ABI_EXPORT void UT_parse_properties(const char * props,
166 									std::map<std::string, std::string> & map);
167 
168 // implemented in UT_strptime.cpp - see strptime() as it is not avail on win.
169 extern "C" {
170 ABI_EXPORT char *UT_strptime (const char *buf, const char *format, struct tm *tm);
171 }
172 
173 
174 #ifdef WIN32
175 #define snprintf _snprintf
176 
177 #define _(String) (String)
178 #define N_(String) (String)
179 
180 #endif /* WIN32 */
181 
182 #if defined (SNPRINTF_MISSING)
183   extern int snprintf(char *str, size_t size, const  char  *format, ...);
184 #endif
185 
186 /*
187  this one prints floating point value but using dot as fractional serparator
188  independent of the current locale's settings.
189 */
190 ABI_EXPORT const char*  std_size_string(float f);
191 
192 
193 #include <fribidi.h>
194 
195 typedef FriBidiCharType UT_BidiCharType;
196 
197 #define UT_BIDI_LTR FRIBIDI_TYPE_LTR
198 #define UT_BIDI_RTL FRIBIDI_TYPE_RTL
199 #define UT_BIDI_WS FRIBIDI_TYPE_WS
200 #define UT_BIDI_EN FRIBIDI_TYPE_EN
201 #define UT_BIDI_ES FRIBIDI_TYPE_ES
202 #define UT_BIDI_ET FRIBIDI_TYPE_ET
203 #define UT_BIDI_AN FRIBIDI_TYPE_AN
204 #define UT_BIDI_CS FRIBIDI_TYPE_CS
205 #define UT_BIDI_BS FRIBIDI_TYPE_BS
206 #define UT_BIDI_SS FRIBIDI_TYPE_SS
207 #define UT_BIDI_AL FRIBIDI_TYPE_AL
208 #define UT_BIDI_NSM FRIBIDI_TYPE_NSM
209 #define UT_BIDI_RLE FRIBIDI_TYPE_RLE
210 #define UT_BIDI_LRE FRIBIDI_TYPE_LRE
211 #define UT_BIDI_LRO FRIBIDI_TYPE_LRO
212 #define UT_BIDI_RLO FRIBIDI_TYPE_RLO
213 #define UT_BIDI_PDF FRIBIDI_TYPE_PDF
214 #define UT_BIDI_ON FRIBIDI_TYPE_ON
215 
216 
217 #define UT_BIDI_UNSET FRIBIDI_TYPE_UNSET
218 #define UT_BIDI_IGNORE FRIBIDI_TYPE_IGNORE
219 
220 #define UT_BIDI_IS_STRONG FRIBIDI_IS_STRONG
221 #define UT_BIDI_IS_WEAK FRIBIDI_IS_WEAK
222 #define UT_BIDI_IS_NUMBER FRIBIDI_IS_NUMBER
223 #define UT_BIDI_IS_RTL FRIBIDI_IS_RTL
224 #define UT_BIDI_IS_NEUTRAL FRIBIDI_IS_NEUTRAL
225 #define UT_BIDI_IS_LETTER FRIBIDI_IS_LETTER
226 #define UT_BIDI_IS_NSM(x) ((x) & FRIBIDI_MASK_NSM)
227 
228 
229 ABI_EXPORT UT_BidiCharType UT_bidiGetCharType(UT_UCS4Char c);
230 
231 ABI_EXPORT bool            UT_bidiMapLog2Vis(const UT_UCS4Char * pStrIn, UT_uint32 len, UT_BidiCharType baseDir,
232 											 UT_uint32 *pL2V, UT_uint32 * pV2L, UT_Byte * pEmbed);
233 
234 ABI_EXPORT bool            UT_bidiReorderString(const UT_UCS4Char * pStrIn, UT_uint32 len, UT_BidiCharType baseDir,
235 												UT_UCS4Char * pStrOut);
236 
237 
238 ABI_EXPORT bool            UT_bidiGetMirrorChar(UT_UCS4Char c, UT_UCS4Char &mc);
239 
240 G_END_DECLS
241 
242 #endif /* UT_STRING_H */
243