1 /*************************************************************************** 2 begin : Sat Jun 28 2003 3 copyright : (C) 2003 by Martin Preuss 4 email : martin@libchipcard.de 5 6 *************************************************************************** 7 * * 8 * This library is free software; you can redistribute it and/or * 9 * modify it under the terms of the GNU Lesser General Public * 10 * License as published by the Free Software Foundation; either * 11 * version 2.1 of the License, or (at your option) any later version. * 12 * * 13 * This library is distributed in the hope that it will be useful, * 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * 16 * Lesser General Public License for more details. * 17 * * 18 * You should have received a copy of the GNU Lesser General Public * 19 * License along with this library; if not, write to the Free Software * 20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * 21 * MA 02111-1307 USA * 22 * * 23 ***************************************************************************/ 24 25 26 27 #ifndef GWENHYWFAR_TEXT_H 28 #define GWENHYWFAR_TEXT_H 29 30 #include <gwenhywfar/gwenhywfarapi.h> 31 #include <gwenhywfar/types.h> 32 #include <gwenhywfar/logger.h> 33 #include <gwenhywfar/buffer.h> 34 #include <stdio.h> 35 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #define GWEN_TEXT_FUZZY_SHIFT 10 42 43 44 #define GWEN_TEXT_FLAGS_DEL_LEADING_BLANKS 0x00000001 45 #define GWEN_TEXT_FLAGS_DEL_TRAILING_BLANKS 0x00000002 46 #define GWEN_TEXT_FLAGS_DEL_MULTIPLE_BLANKS 0x00000004 47 #define GWEN_TEXT_FLAGS_NEED_DELIMITER 0x00000008 48 #define GWEN_TEXT_FLAGS_NULL_IS_DELIMITER 0x00000010 49 #define GWEN_TEXT_FLAGS_DEL_QUOTES 0x00000020 50 #define GWEN_TEXT_FLAGS_CHECK_BACKSLASH 0x00000040 51 52 53 /** 54 * This function cuts out a word from a given string. 55 * @return address of the new word, 0 on error 56 * @param src pointer to the beginning of the source string 57 * @param delims pointer to a string containing all delimiters 58 * @param buffer pointer to the destination buffer 59 * @param maxsize length of the buffer. Actually up to this number of 60 * characters are copied to the buffer. If after this number of chars no 61 * delimiter follows the string will be terminated. You will have to check 62 * whether there is a delimiter directly after the copied string 63 * @param flags defines how the source string is to be processed 64 * @param next pointer to a pointer to receive the address up to which the 65 * source string has been handled. You can use this to continue with the 66 * source string behind the word we've just cut out. This variable is only 67 * modified upon successfull return 68 */ 69 GWENHYWFAR_API 70 char *GWEN_Text_GetWord(const char *src, 71 const char *delims, 72 char *buffer, 73 unsigned int maxsize, 74 uint32_t flags, 75 const char **next); 76 77 GWENHYWFAR_API 78 int GWEN_Text_GetWordToBuffer(const char *src, 79 const char *delims, 80 GWEN_BUFFER *buf, 81 uint32_t flags, 82 const char **next); 83 84 85 /** 86 * This function does escaping like it is used for HTTP URL encoding. 87 * All characters which are not alphanumeric are escaped by %XX where 88 * XX ist the hexadecimal code of the character. 89 */ 90 GWENHYWFAR_API 91 char *GWEN_Text_Escape(const char *src, 92 char *buffer, 93 unsigned int maxsize); 94 95 GWENHYWFAR_API 96 char *GWEN_Text_Unescape(const char *src, 97 char *buffer, 98 unsigned int maxsize); 99 100 GWENHYWFAR_API 101 char *GWEN_Text_UnescapeN(const char *src, 102 unsigned int srclen, 103 char *buffer, 104 unsigned int maxsize); 105 106 GWENHYWFAR_API 107 char *GWEN_Text_EscapeTolerant(const char *src, 108 char *buffer, 109 unsigned int maxsize); 110 111 GWENHYWFAR_API 112 char *GWEN_Text_UnescapeTolerant(const char *src, 113 char *buffer, 114 unsigned int maxsize); 115 116 GWENHYWFAR_API 117 char *GWEN_Text_UnescapeTolerantN(const char *src, 118 unsigned int srclen, 119 char *buffer, 120 unsigned int maxsize); 121 122 123 GWENHYWFAR_API 124 int GWEN_Text_EscapeToBuffer(const char *src, GWEN_BUFFER *buf); 125 126 GWENHYWFAR_API 127 int GWEN_Text_UnescapeToBuffer(const char *src, GWEN_BUFFER *buf); 128 129 /** 130 * Does the same as @ref GWEN_Text_EscapeToBuffer does, but this version 131 * here does not escape some characters generally accepted within strings 132 * (such as space, comma, decimal point etc). 133 */ 134 GWENHYWFAR_API 135 int GWEN_Text_EscapeToBufferTolerant(const char *src, GWEN_BUFFER *buf); 136 137 GWENHYWFAR_API 138 int GWEN_Text_EscapeToBufferTolerant2(GWEN_BUFFER *src, GWEN_BUFFER *buf); 139 140 141 /** 142 * This function does the same as @ref GWEN_Text_UnescapeToBuffer but it 143 * doesn't complain about unescaped characters in the source string. 144 */ 145 GWENHYWFAR_API 146 int GWEN_Text_UnescapeToBufferTolerant(const char *src, GWEN_BUFFER *buf); 147 148 149 GWENHYWFAR_API 150 char *GWEN_Text_ToHex(const char *src, unsigned l, char *buffer, 151 unsigned int maxsize); 152 153 /** 154 * Writes the given binary data as a hex string to the destination buffer. 155 * @param groupsize if !=0 then after this many characters in the destination 156 * buffer the delimiter is inserted 157 * @param delimiter character to write after groupsize characters 158 * @param skipLeadingZeroes if !=0 then leading zeroes are suppressed 159 */ 160 GWENHYWFAR_API 161 int GWEN_Text_ToHexBuffer(const char *src, unsigned l, 162 GWEN_BUFFER *buf, 163 unsigned int groupsize, 164 char delimiter, 165 int skipLeadingZeroes); 166 167 /** 168 * Converts a string to Hex. After "groupsize" bytes the "delimiter" is 169 * inserted. 170 */ 171 GWENHYWFAR_API 172 char *GWEN_Text_ToHexGrouped(const char *src, 173 unsigned l, 174 char *buffer, 175 unsigned maxsize, 176 unsigned int groupsize, 177 char delimiter, 178 int skipLeadingZeros); 179 180 GWENHYWFAR_API 181 int GWEN_Text_FromHex(const char *src, char *buffer, unsigned maxsize); 182 183 /** 184 * Reads hex bytes and stores them in the given buffer. 185 */ 186 GWENHYWFAR_API 187 int GWEN_Text_FromHexBuffer(const char *src, GWEN_BUFFER *buf); 188 189 190 /** 191 * Reads bcd bytes and stores them in the given buffer. 192 */ 193 GWENHYWFAR_API 194 int GWEN_Text_FromBcdBuffer(const char *src, GWEN_BUFFER *buf); 195 196 197 /** 198 * Writes the given BCD data as a hex string to the destination buffer. 199 * @param groupsize if !=0 then after this many characters in the destination 200 * buffer the delimiter is inserted 201 * @param delimiter character to write after groupsize characters 202 * @param skipLeadingZeroes if !=0 then leading zeroes are suppressed 203 */ 204 GWENHYWFAR_API 205 int GWEN_Text_ToBcdBuffer(const char *src, unsigned l, 206 GWEN_BUFFER *buf, 207 unsigned int groupsize, 208 char delimiter, 209 int skipLeadingZeroes); 210 211 212 /** 213 * @return number of bytes in the buffer (-1 on error) 214 * @param fillchar if 0 then no filling takes place, positive values 215 * extend to the right, negative values to the left. 216 */ 217 GWENHYWFAR_API 218 int GWEN_Text_NumToString(int num, char *buffer, unsigned int bufsize, 219 int fillchar); 220 221 /** 222 * This functions transforms a string into a double float value. 223 * It always uses a decimal point (".") regardless of the current locale settings. 224 * This makes sure that a value can always be parsed regardless of the 225 * country settings of the producer of that string. 226 */ 227 GWENHYWFAR_API 228 int GWEN_Text_DoubleToBuffer(double num, GWEN_BUFFER *buf); 229 230 /** 231 * This functions transforms a double float value into a string. 232 * It always uses a decimal point (".") regardless of the current locale settings. 233 * This makes sure that a value can always be parsed regardless of the 234 * country settings of the producer of that string. 235 */ 236 GWENHYWFAR_API 237 int GWEN_Text_StringToDouble(const char *s, double *num); 238 239 240 /** 241 * Compares two strings. If either of them is given but empty, that string 242 * will be treaten as not given. This way a string NOT given equals a string 243 * which is given but empty. 244 * @param ign set to !=0 to ignore cases 245 */ 246 GWENHYWFAR_API 247 int GWEN_Text_Compare(const char *s1, const char *s2, int ign); 248 249 250 /** 251 * This function provides the functionality of strcasestr() which is not available 252 * on some systems. 253 */ 254 GWENHYWFAR_API 255 const char *GWEN_Text_StrCaseStr(const char *haystack, const char *needle); 256 257 258 /** 259 * This function provides the functionality of strndup() which is not 260 * available on Windows (uses strndup() on all other systems). 261 */ 262 GWENHYWFAR_API 263 char *GWEN_Text_strndup(const char *s, size_t n); 264 265 266 /** 267 * This function compares two string and returns the number of matches or 268 * -1 on error. 269 * @param w string to compare 270 * @param p pattern to compare against 271 * @param sensecase if 0 then cases are ignored 272 */ 273 GWENHYWFAR_API 274 int GWEN_Text_ComparePattern(const char *w, const char *p, int sensecase); 275 276 277 /** 278 * This is used for debugging purposes and it shows the given data as a 279 * classical hexdump. 280 */ 281 GWENHYWFAR_API 282 void GWEN_Text_DumpString(const char *s, unsigned int l, 283 unsigned int insert); 284 285 286 GWENHYWFAR_API 287 void GWEN_Text_DumpString2Buffer(const char *s, unsigned int l, 288 GWEN_BUFFER *mbuf, 289 unsigned int insert); 290 291 GWENHYWFAR_API 292 void GWEN_Text_LogString(const char *s, unsigned int l, 293 const char *logDomain, 294 GWEN_LOGGER_LEVEL lv); 295 296 297 /** 298 * Condenses a buffer containing chars. 299 * This means removing unnecessary spaces. 300 */ 301 GWENHYWFAR_API 302 void GWEN_Text_CondenseBuffer(GWEN_BUFFER *buf); 303 304 305 /** 306 * This function counts the number of characters in the given UTF-8 buffer. 307 * @param s pointer to a buffer which contains UTF-8 characters 308 * @param len number of bytes (if 0 then all bytes up to a zero byte are 309 * counted) 310 */ 311 GWENHYWFAR_API 312 int GWEN_Text_CountUtf8Chars(const char *s, int len); 313 314 315 /** 316 * Replaces special characters which are used by XML (like "<", "&" etc) 317 * by known sequences (like "&"). 318 */ 319 GWENHYWFAR_API 320 int GWEN_Text_EscapeXmlToBuffer(const char *src, GWEN_BUFFER *buf); 321 322 /** 323 * Replaces special character sequences to their coresponding character. 324 */ 325 GWENHYWFAR_API 326 int GWEN_Text_UnescapeXmlToBuffer(const char *src, GWEN_BUFFER *buf); 327 328 329 /** 330 * Compares two strings and returns the percentage of their equality. 331 * It is calculated by this formula: 332 * matches*100 / ((length of s1)+(length of s2)) 333 * Each match is weight like this: 334 * <ul> 335 * <li>*s1==*s2: 2</li> 336 * <li>toupper(*s1)==toupper(*s2): 2 if ign, 1 otherwise</li> 337 * <li>isalnum(*s1)==isalnum(*s2): 1 338 * </ul> 339 * @return percentage of equality between both strings 340 * @param s1 1st of two strings to compare 341 * @param s2 2nd of two strings to compare 342 * @param ign if !=0 then the cases are ignored 343 */ 344 GWENHYWFAR_API 345 double GWEN_Text_CheckSimilarity(const char *s1, const char *s2, int ign); 346 347 348 /** 349 * This function converts a given text from one charset to another one. 350 * Currently, iconv is used for this conversion. 351 * 352 * @param fromCharset charset of the source text 353 * @param toCharset charset of the source text 354 * @param text text to convert 355 * @param len length of the text (excluding trailing 0) 356 * @param tbuf destination buffer for the converted text 357 */ 358 GWENHYWFAR_API 359 int GWEN_Text_ConvertCharset(const char *fromCharset, 360 const char *toCharset, 361 const char *text, int len, 362 GWEN_BUFFER *tbuf); 363 364 365 366 367 368 typedef int GWENHYWFAR_CB(*GWEN_TEXT_REPLACE_VARS_CB)(void *cbPtr, const char *name, int index, int maxLen, 369 GWEN_BUFFER *dstBuf); 370 371 372 /** 373 * Read the given string and replace every variable in that string ( noted as "$(varname)") with the 374 * value of the mentioned variable. Uses a callback to actually retrieve the data. 375 */ 376 GWENHYWFAR_API 377 int GWEN_Text_ReplaceVars(const char *s, GWEN_BUFFER *dbuf, GWEN_TEXT_REPLACE_VARS_CB fn, void *ptr); 378 379 380 381 #ifdef __cplusplus 382 } 383 #endif 384 385 386 #endif 387 388 389 390