1 /* AbiSource Program Utilities 2 * Copyright (C) 1998,1999 AbiSource, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 2 7 * of the License, or (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 17 * 02110-1301 USA. 18 */ 19 20 21 #ifndef UT_STRING_H 22 #define UT_STRING_H 23 24 #include <map> 25 #include <string> 26 #include <string.h> 27 28 /* pre-emptive dismissal; ut_types.h is needed by just about everything, 29 * so even if it's commented out in-file that's still a lot of work for 30 * the preprocessor to do... 31 */ 32 #ifndef UT_TYPES_H 33 #include "ut_types.h" 34 #endif 35 36 class UT_GrowBuf; 37 38 G_BEGIN_DECLS 39 40 // this function allocates (and returns a pointer to) new memory for the new string 41 ABI_EXPORT bool UT_XML_cloneNoAmpersands(gchar *& rszDest, const gchar * szSource); 42 // replaces &X -> _X; allocates buffer 43 ABI_EXPORT bool UT_XML_cloneConvAmpersands(gchar *& rszDest, const gchar * szSource); 44 // This function uses a static buffer to do the translation 45 ABI_EXPORT const gchar * UT_XML_transNoAmpersands(const gchar * szSource); 46 47 ABI_EXPORT void UT_decodeUTF8string(const gchar * p, UT_uint32 len, UT_GrowBuf * pResult); 48 49 ABI_EXPORT bool UT_isValidXML(const char *s); 50 ABI_EXPORT bool UT_validXML(char * s); 51 52 /* ABI_EXPORT gchar * UT_decodeXMLstring(gchar *pcIn); 53 * This has moved to ut_xml.cpp as UT_XML::decode () 54 */ 55 56 ABI_EXPORT bool UT_isSmartQuotableCharacter(UT_UCSChar c); 57 ABI_EXPORT bool UT_isSmartQuotedCharacter(UT_UCSChar c); 58 59 //////////////////////////////////////////////////////////////////////// 60 // 61 // UCS-2 string (UT_UCS2Char) 62 // 63 // String is built of 16-bit units (words) 64 // 65 // TODO: Is this really UCS-2 or UTF-16? 66 // TODO: meaning, does it support surrogates or is it intended to 67 // TODO: support them at any time in the future? 68 // TODO: Correctly, UCS-2 does not support surrogates and UTF-16 does. 69 // TODO: BUT Microsoft calls their native Unicode encoding UCS-2 70 // TODO: while it supports surrogates and is thus really UTF-16. 71 // TODO: Surrogates are Unicode characters with codepoints above 72 // TODO: 65535 which cannot therefore fit into a 2-byte word. 73 // TODO: This means that TRUE UCS-2 is a single-word encoding and 74 // TODO: UTF-16 is a multi-word encoding. 75 // 76 // NOTE: We shouldn't actually need 16-bit strings anymore since 77 // NOTE: AbiWord is now fully converted to using 32-bit Unicode 78 // NOTE: internally. The only possible needs for this is for 79 // NOTE: Windows GUI, filesystem and API functions where applicable; 80 // NOTE: and perhaps some file formats or external libraries 81 // 82 //////////////////////////////////////////////////////////////////////// 83 84 #ifdef ENABLE_UCS2_STRINGS 85 86 #define UT_UCS2_isdigit(x) (((x) >= '0') && ((x) <= '9')) // TODO: make UNICODE-wise 87 88 /*these are unicode-safe*/ 89 ABI_EXPORT bool UT_UCS2_isupper(UT_UCS2Char c); 90 ABI_EXPORT bool UT_UCS2_islower(UT_UCS2Char c); 91 ABI_EXPORT bool UT_UCS2_isalpha(UT_UCS2Char c); 92 ABI_EXPORT bool UT_UCS2_isSentenceSeparator(UT_UCS2Char c); 93 #define UT_UCS2_isalnum(x) (UT_UCS2_isalpha(x) || UT_UCS2_isdigit(x)) // HACK: not UNICODE-safe 94 ABI_EXPORT bool UT_UCS2_isspace(UT_UCS2Char c); 95 #define UT_UCS2_ispunct(x) ((!UT_UCS2_isspace(x) && !UT_UCS2_isalnum(x) && (x)>' ')) // HACK: not UNICODE safe 96 97 // the naming convention has deviated from the above. it's kind 98 // of a mutant libc/C++ naming convention. 99 ABI_EXPORT UT_UCS2Char * UT_UCS2_strstr(const UT_UCS2Char * phaystack, const UT_UCS2Char * pneedle); 100 ABI_EXPORT UT_sint32 UT_UCS2_strcmp(const UT_UCS2Char* left, const UT_UCS2Char* right); 101 ABI_EXPORT UT_UCS2Char * UT_UCS2_stristr(const UT_UCS2Char * phaystack, const UT_UCS2Char * pneedle); 102 ABI_EXPORT UT_UCS2Char * UT_UCS2_strcpy(UT_UCS2Char * dest, const UT_UCS2Char * src); 103 ABI_EXPORT UT_UCS2Char * UT_UCS2_strcpy_char(UT_UCS2Char * dest, const char * src); 104 ABI_EXPORT char * UT_UCS2_strcpy_to_char(char * dest, const UT_UCS2Char * src); 105 ABI_EXPORT bool UT_UCS2_cloneString(UT_UCS2Char ** dest, const UT_UCS2Char * src); 106 ABI_EXPORT bool UT_UCS2_cloneString_char(UT_UCS2Char ** dest, const char * src); 107 ABI_EXPORT UT_UCS2Char * UT_UCS2_strncpy(UT_UCS2Char * dest, const UT_UCS2Char * src, UT_uint32 n); 108 ABI_EXPORT UT_UCS2Char * UT_UCS2_strnrev(UT_UCS2Char * dest, UT_uint32 n); 109 110 ABI_EXPORT UT_UCS2Char UT_UCS2_tolower(UT_UCS2Char c); 111 ABI_EXPORT UT_UCS2Char UT_UCS2_toupper(UT_UCS2Char c); 112 113 #endif 114 115 // Don't ifdef this one out since MSWord importer uses it 116 117 ABI_EXPORT UT_uint32 UT_UCS2_strlen(const UT_UCS2Char * string); 118 119 //////////////////////////////////////////////////////////////////////// 120 // 121 // UCS-4 string (UT_UCS4Char) 122 // 123 // String is built of 32-bit units (longs) 124 // 125 // NOTE: Ambiguity between UCS-2 and UTF-16 above makes no difference 126 // NOTE: in the case of UCS-4 and UTF-32 since they really are 127 // NOTE: identical 128 // 129 //////////////////////////////////////////////////////////////////////// 130 131 /*these are unicode-safe*/ 132 ABI_EXPORT bool UT_UCS4_isupper(UT_UCS4Char c); 133 ABI_EXPORT bool UT_UCS4_islower(UT_UCS4Char c); 134 ABI_EXPORT bool UT_UCS4_isalpha(UT_UCS4Char c); 135 ABI_EXPORT bool UT_UCS4_isSentenceSeparator(UT_UCS4Char c); 136 ABI_EXPORT bool UT_UCS4_isdigit(UT_UCS4Char c); 137 #define UT_UCS4_isalnum(x) (UT_UCS4_isalpha(x) || UT_UCS4_isdigit(x)) // HACK: not UNICODE-safe 138 ABI_EXPORT bool UT_UCS4_isspace(UT_UCS4Char c); 139 #define UT_UCS4_ispunct(x) ((!UT_UCS4_isspace(x) && !UT_UCS4_isalnum(x) && (x)>' ')) // HACK: not UNICODE safe 140 141 // the naming convention has deviated from the above. it's kind 142 // of a mutant libc/C++ naming convention. 143 ABI_EXPORT UT_sint32 UT_UCS4_strcmp(const UT_UCS4Char* left, const UT_UCS4Char* right); 144 ABI_EXPORT UT_UCS4Char * UT_UCS4_strstr(const UT_UCS4Char * phaystack, const UT_UCS4Char * pneedle); 145 ABI_EXPORT UT_UCS4Char * UT_UCS4_stristr(const UT_UCS4Char * phaystack, const UT_UCS4Char * pneedle); 146 ABI_EXPORT UT_uint32 UT_UCS4_strlen(const UT_UCS4Char * string); 147 ABI_EXPORT UT_uint32 UT_UCS4_strlen_as_char(const UT_UCS4Char * string); 148 ABI_EXPORT UT_UCS4Char * UT_UCS4_strcpy(UT_UCS4Char * dest, const UT_UCS4Char * src); 149 ABI_EXPORT UT_UCS4Char * UT_UCS4_strcpy_char(UT_UCS4Char * dest, const char * src); 150 ABI_EXPORT UT_UCS4Char * UT_UCS4_strncpy_char(UT_UCS4Char * dest, const char * src, int); 151 ABI_EXPORT UT_UCS4Char * UT_UCS4_strcpy_utf8_char(UT_UCS4Char * dest, const char * src); 152 ABI_EXPORT char * UT_UCS4_strcpy_to_char(char * dest, const UT_UCS4Char * src); 153 ABI_EXPORT char * UT_UCS4_strncpy_to_char(char * dest, const UT_UCS4Char * src, int); 154 ABI_EXPORT bool UT_UCS4_cloneString(UT_UCS4Char ** dest, const UT_UCS4Char * src); 155 ABI_EXPORT bool UT_UCS4_cloneString_char(UT_UCS4Char ** dest, const char * src); 156 ABI_EXPORT UT_UCS4Char * UT_UCS4_strncpy(UT_UCS4Char * dest, const UT_UCS4Char * src, UT_uint32 n); 157 ABI_EXPORT UT_UCS4Char * UT_UCS4_strnrev(UT_UCS4Char * dest, UT_uint32 n); 158 159 ABI_EXPORT UT_UCS4Char UT_UCS4_tolower(UT_UCS4Char c); 160 ABI_EXPORT UT_UCS4Char UT_UCS4_toupper(UT_UCS4Char c); 161 162 163 ABI_EXPORT void UT_parse_attributes(const char * attributes, 164 std::map<std::string, std::string> & map); 165 ABI_EXPORT void UT_parse_properties(const char * props, 166 std::map<std::string, std::string> & map); 167 168 // implemented in UT_strptime.cpp - see strptime() as it is not avail on win. 169 extern "C" { 170 ABI_EXPORT char *UT_strptime (const char *buf, const char *format, struct tm *tm); 171 } 172 173 174 #ifdef WIN32 175 #define snprintf _snprintf 176 177 #define _(String) (String) 178 #define N_(String) (String) 179 180 #endif /* WIN32 */ 181 182 #if defined (SNPRINTF_MISSING) 183 extern int snprintf(char *str, size_t size, const char *format, ...); 184 #endif 185 186 /* 187 this one prints floating point value but using dot as fractional serparator 188 independent of the current locale's settings. 189 */ 190 ABI_EXPORT const char* std_size_string(float f); 191 192 193 #include <fribidi.h> 194 195 typedef FriBidiCharType UT_BidiCharType; 196 197 #define UT_BIDI_LTR FRIBIDI_TYPE_LTR 198 #define UT_BIDI_RTL FRIBIDI_TYPE_RTL 199 #define UT_BIDI_WS FRIBIDI_TYPE_WS 200 #define UT_BIDI_EN FRIBIDI_TYPE_EN 201 #define UT_BIDI_ES FRIBIDI_TYPE_ES 202 #define UT_BIDI_ET FRIBIDI_TYPE_ET 203 #define UT_BIDI_AN FRIBIDI_TYPE_AN 204 #define UT_BIDI_CS FRIBIDI_TYPE_CS 205 #define UT_BIDI_BS FRIBIDI_TYPE_BS 206 #define UT_BIDI_SS FRIBIDI_TYPE_SS 207 #define UT_BIDI_AL FRIBIDI_TYPE_AL 208 #define UT_BIDI_NSM FRIBIDI_TYPE_NSM 209 #define UT_BIDI_RLE FRIBIDI_TYPE_RLE 210 #define UT_BIDI_LRE FRIBIDI_TYPE_LRE 211 #define UT_BIDI_LRO FRIBIDI_TYPE_LRO 212 #define UT_BIDI_RLO FRIBIDI_TYPE_RLO 213 #define UT_BIDI_PDF FRIBIDI_TYPE_PDF 214 #define UT_BIDI_ON FRIBIDI_TYPE_ON 215 216 217 #define UT_BIDI_UNSET FRIBIDI_TYPE_UNSET 218 #define UT_BIDI_IGNORE FRIBIDI_TYPE_IGNORE 219 220 #define UT_BIDI_IS_STRONG FRIBIDI_IS_STRONG 221 #define UT_BIDI_IS_WEAK FRIBIDI_IS_WEAK 222 #define UT_BIDI_IS_NUMBER FRIBIDI_IS_NUMBER 223 #define UT_BIDI_IS_RTL FRIBIDI_IS_RTL 224 #define UT_BIDI_IS_NEUTRAL FRIBIDI_IS_NEUTRAL 225 #define UT_BIDI_IS_LETTER FRIBIDI_IS_LETTER 226 #define UT_BIDI_IS_NSM(x) ((x) & FRIBIDI_MASK_NSM) 227 228 229 ABI_EXPORT UT_BidiCharType UT_bidiGetCharType(UT_UCS4Char c); 230 231 ABI_EXPORT bool UT_bidiMapLog2Vis(const UT_UCS4Char * pStrIn, UT_uint32 len, UT_BidiCharType baseDir, 232 UT_uint32 *pL2V, UT_uint32 * pV2L, UT_Byte * pEmbed); 233 234 ABI_EXPORT bool UT_bidiReorderString(const UT_UCS4Char * pStrIn, UT_uint32 len, UT_BidiCharType baseDir, 235 UT_UCS4Char * pStrOut); 236 237 238 ABI_EXPORT bool UT_bidiGetMirrorChar(UT_UCS4Char c, UT_UCS4Char &mc); 239 240 G_END_DECLS 241 242 #endif /* UT_STRING_H */ 243