1 /***************************************************************************
2  begin       : Sat Jun 28 2003
3  copyright   : (C) 2003 by Martin Preuss
4  email       : martin@libchipcard.de
5 
6  ***************************************************************************
7  *                                                                         *
8  *   This library is free software; you can redistribute it and/or         *
9  *   modify it under the terms of the GNU Lesser General Public            *
10  *   License as published by the Free Software Foundation; either          *
11  *   version 2.1 of the License, or (at your option) any later version.    *
12  *                                                                         *
13  *   This library is distributed in the hope that it will be useful,       *
14  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
15  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *
16  *   Lesser General Public License for more details.                       *
17  *                                                                         *
18  *   You should have received a copy of the GNU Lesser General Public      *
19  *   License along with this library; if not, write to the Free Software   *
20  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston,                 *
21  *   MA  02111-1307  USA                                                   *
22  *                                                                         *
23  ***************************************************************************/
24 
25 
26 
27 #ifndef GWENHYWFAR_TEXT_H
28 #define GWENHYWFAR_TEXT_H
29 
30 #include <gwenhywfar/gwenhywfarapi.h>
31 #include <gwenhywfar/types.h>
32 #include <gwenhywfar/logger.h>
33 #include <gwenhywfar/buffer.h>
34 #include <stdio.h>
35 
36 
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40 
41 #define GWEN_TEXT_FUZZY_SHIFT               10
42 
43 
44 #define GWEN_TEXT_FLAGS_DEL_LEADING_BLANKS  0x00000001
45 #define GWEN_TEXT_FLAGS_DEL_TRAILING_BLANKS 0x00000002
46 #define GWEN_TEXT_FLAGS_DEL_MULTIPLE_BLANKS 0x00000004
47 #define GWEN_TEXT_FLAGS_NEED_DELIMITER      0x00000008
48 #define GWEN_TEXT_FLAGS_NULL_IS_DELIMITER   0x00000010
49 #define GWEN_TEXT_FLAGS_DEL_QUOTES          0x00000020
50 #define GWEN_TEXT_FLAGS_CHECK_BACKSLASH     0x00000040
51 
52 
53 /**
54  * This function cuts out a word from a given string.
55  * @return address of the new word, 0 on error
56  * @param src pointer to the beginning of the source string
57  * @param delims pointer to a string containing all delimiters
58  * @param buffer pointer to the destination buffer
59  * @param maxsize length of the buffer. Actually up to this number of
60  * characters are copied to the buffer. If after this number of chars no
61  * delimiter follows the string will be terminated. You will have to check
62  * whether there is a delimiter directly after the copied string
63  * @param flags defines how the source string is to be processed
64  * @param next pointer to a pointer to receive the address up to which the
65  * source string has been handled. You can use this to continue with the
66  * source string behind the word we've just cut out. This variable is only
67  * modified upon successfull return
68  */
69 GWENHYWFAR_API
70 char *GWEN_Text_GetWord(const char *src,
71                         const char *delims,
72                         char *buffer,
73                         unsigned int maxsize,
74                         uint32_t flags,
75                         const char **next);
76 
77 GWENHYWFAR_API
78 int GWEN_Text_GetWordToBuffer(const char *src,
79                               const char *delims,
80                               GWEN_BUFFER *buf,
81                               uint32_t flags,
82                               const char **next);
83 
84 
85 /**
86  * This function does escaping like it is used for HTTP URL encoding.
87  * All characters which are not alphanumeric are escaped by %XX where
88  * XX ist the hexadecimal code of the character.
89  */
90 GWENHYWFAR_API
91 char *GWEN_Text_Escape(const char *src,
92                        char *buffer,
93                        unsigned int maxsize);
94 
95 GWENHYWFAR_API
96 char *GWEN_Text_Unescape(const char *src,
97                          char *buffer,
98                          unsigned int maxsize);
99 
100 GWENHYWFAR_API
101 char *GWEN_Text_UnescapeN(const char *src,
102                           unsigned int srclen,
103                           char *buffer,
104                           unsigned int maxsize);
105 
106 GWENHYWFAR_API
107 char *GWEN_Text_EscapeTolerant(const char *src,
108                                char *buffer,
109                                unsigned int maxsize);
110 
111 GWENHYWFAR_API
112 char *GWEN_Text_UnescapeTolerant(const char *src,
113                                  char *buffer,
114                                  unsigned int maxsize);
115 
116 GWENHYWFAR_API
117 char *GWEN_Text_UnescapeTolerantN(const char *src,
118                                   unsigned int srclen,
119                                   char *buffer,
120                                   unsigned int maxsize);
121 
122 
123 GWENHYWFAR_API
124 int GWEN_Text_EscapeToBuffer(const char *src, GWEN_BUFFER *buf);
125 
126 GWENHYWFAR_API
127 int GWEN_Text_UnescapeToBuffer(const char *src, GWEN_BUFFER *buf);
128 
129 /**
130  * Does the same as @ref GWEN_Text_EscapeToBuffer does, but this version
131  * here does not escape some characters generally accepted within strings
132  * (such as space, comma, decimal point etc).
133  */
134 GWENHYWFAR_API
135 int GWEN_Text_EscapeToBufferTolerant(const char *src, GWEN_BUFFER *buf);
136 
137 GWENHYWFAR_API
138 int GWEN_Text_EscapeToBufferTolerant2(GWEN_BUFFER *src, GWEN_BUFFER *buf);
139 
140 
141 /**
142  * This function does the same as @ref GWEN_Text_UnescapeToBuffer but it
143  * doesn't complain about unescaped characters in the source string.
144  */
145 GWENHYWFAR_API
146 int GWEN_Text_UnescapeToBufferTolerant(const char *src, GWEN_BUFFER *buf);
147 
148 
149 GWENHYWFAR_API
150 char *GWEN_Text_ToHex(const char *src, unsigned l, char *buffer,
151                       unsigned int maxsize);
152 
153 /**
154  * Writes the given binary data as a hex string to the destination buffer.
155  * @param groupsize if !=0 then after this many characters in the destination
156  *   buffer the delimiter is inserted
157  * @param delimiter character to write after groupsize characters
158  * @param skipLeadingZeroes if !=0 then leading zeroes are suppressed
159  */
160 GWENHYWFAR_API
161 int GWEN_Text_ToHexBuffer(const char *src, unsigned l,
162                           GWEN_BUFFER *buf,
163                           unsigned int groupsize,
164                           char delimiter,
165                           int skipLeadingZeroes);
166 
167 /**
168  * Converts a string to Hex. After "groupsize" bytes the "delimiter" is
169  * inserted.
170  */
171 GWENHYWFAR_API
172 char *GWEN_Text_ToHexGrouped(const char *src,
173                              unsigned l,
174                              char *buffer,
175                              unsigned maxsize,
176                              unsigned int groupsize,
177                              char delimiter,
178                              int skipLeadingZeros);
179 
180 GWENHYWFAR_API
181 int GWEN_Text_FromHex(const char *src, char *buffer, unsigned maxsize);
182 
183 /**
184  * Reads hex bytes and stores them in the given buffer.
185  */
186 GWENHYWFAR_API
187 int GWEN_Text_FromHexBuffer(const char *src, GWEN_BUFFER *buf);
188 
189 
190 /**
191  * Reads bcd bytes and stores them in the given buffer.
192  */
193 GWENHYWFAR_API
194 int GWEN_Text_FromBcdBuffer(const char *src, GWEN_BUFFER *buf);
195 
196 
197 /**
198  * Writes the given BCD data as a hex string to the destination buffer.
199  * @param groupsize if !=0 then after this many characters in the destination
200  *   buffer the delimiter is inserted
201  * @param delimiter character to write after groupsize characters
202  * @param skipLeadingZeroes if !=0 then leading zeroes are suppressed
203  */
204 GWENHYWFAR_API
205 int GWEN_Text_ToBcdBuffer(const char *src, unsigned l,
206                           GWEN_BUFFER *buf,
207                           unsigned int groupsize,
208                           char delimiter,
209                           int skipLeadingZeroes);
210 
211 
212 /**
213  * @return number of bytes in the buffer (-1 on error)
214  * @param fillchar if 0 then no filling takes place, positive values
215  * extend to the right, negative values to the left.
216  */
217 GWENHYWFAR_API
218 int GWEN_Text_NumToString(int num, char *buffer, unsigned int bufsize,
219                           int fillchar);
220 
221 /**
222  * This functions transforms a string into a double float value.
223  * It always uses a decimal point (".") regardless of the current locale settings.
224  * This makes sure that a value can always be parsed regardless of the
225  * country settings of the producer of that string.
226  */
227 GWENHYWFAR_API
228 int GWEN_Text_DoubleToBuffer(double num, GWEN_BUFFER *buf);
229 
230 /**
231  * This functions transforms a double float value into a string.
232  * It always uses a decimal point (".") regardless of the current locale settings.
233  * This makes sure that a value can always be parsed regardless of the
234  * country settings of the producer of that string.
235  */
236 GWENHYWFAR_API
237 int GWEN_Text_StringToDouble(const char *s, double *num);
238 
239 
240 /**
241  * Compares two strings. If either of them is given but empty, that string
242  * will be treaten as not given. This way a string NOT given equals a string
243  * which is given but empty.
244  * @param ign set to !=0 to ignore cases
245  */
246 GWENHYWFAR_API
247 int GWEN_Text_Compare(const char *s1, const char *s2, int ign);
248 
249 
250 /**
251  * This function provides the functionality of strcasestr() which is not available
252  * on some systems.
253  */
254 GWENHYWFAR_API
255 const char *GWEN_Text_StrCaseStr(const char *haystack, const char *needle);
256 
257 
258 /**
259  * This function provides the functionality of strndup() which is not
260  * available on Windows (uses strndup() on all other systems).
261  */
262 GWENHYWFAR_API
263 char *GWEN_Text_strndup(const char *s, size_t n);
264 
265 
266 /**
267  * This function compares two string and returns the number of matches or
268  * -1 on error.
269  * @param w string to compare
270  * @param p pattern to compare against
271  * @param sensecase if 0 then cases are ignored
272  */
273 GWENHYWFAR_API
274 int GWEN_Text_ComparePattern(const char *w, const char *p, int sensecase);
275 
276 
277 /**
278  * This is used for debugging purposes and it shows the given data as a
279  * classical hexdump.
280  */
281 GWENHYWFAR_API
282 void GWEN_Text_DumpString(const char *s, unsigned int l,
283                           unsigned int insert);
284 
285 
286 GWENHYWFAR_API
287 void GWEN_Text_DumpString2Buffer(const char *s, unsigned int l,
288                                  GWEN_BUFFER *mbuf,
289                                  unsigned int insert);
290 
291 GWENHYWFAR_API
292 void GWEN_Text_LogString(const char *s, unsigned int l,
293                          const char *logDomain,
294                          GWEN_LOGGER_LEVEL lv);
295 
296 
297 /**
298  * Condenses a buffer containing chars.
299  * This means removing unnecessary spaces.
300  */
301 GWENHYWFAR_API
302 void GWEN_Text_CondenseBuffer(GWEN_BUFFER *buf);
303 
304 
305 /**
306  * This function counts the number of characters in the given UTF-8 buffer.
307  * @param s pointer to a buffer which contains UTF-8 characters
308  * @param len number of bytes (if 0 then all bytes up to a zero byte are
309  *  counted)
310  */
311 GWENHYWFAR_API
312 int GWEN_Text_CountUtf8Chars(const char *s, int len);
313 
314 
315 /**
316  * Replaces special characters which are used by XML (like "<", "&" etc)
317  * by known sequences (like "&amp;").
318  */
319 GWENHYWFAR_API
320 int GWEN_Text_EscapeXmlToBuffer(const char *src, GWEN_BUFFER *buf);
321 
322 /**
323  * Replaces special character sequences to their coresponding character.
324  */
325 GWENHYWFAR_API
326 int GWEN_Text_UnescapeXmlToBuffer(const char *src, GWEN_BUFFER *buf);
327 
328 
329 /**
330  * Compares two strings and returns the percentage of their equality.
331  * It is calculated by this formula:
332  *  matches*100 / ((length of s1)+(length of s2))
333  * Each match is weight like this:
334  *  <ul>
335  *   <li>*s1==*s2: 2</li>
336  *   <li>toupper(*s1)==toupper(*s2): 2 if ign, 1 otherwise</li>
337  *   <li>isalnum(*s1)==isalnum(*s2): 1
338  *  </ul>
339  * @return percentage of equality between both strings
340  * @param s1 1st of two strings to compare
341  * @param s2 2nd of two strings to compare
342  * @param ign if !=0 then the cases are ignored
343  */
344 GWENHYWFAR_API
345 double GWEN_Text_CheckSimilarity(const char *s1, const char *s2, int ign);
346 
347 
348 /**
349  * This function converts a given text from one charset to another one.
350  * Currently, iconv is used for this conversion.
351  *
352  * @param fromCharset charset of the source text
353  * @param toCharset charset of the source text
354  * @param text text to convert
355  * @param len length of the text (excluding trailing 0)
356  * @param tbuf destination buffer for the converted text
357  */
358 GWENHYWFAR_API
359 int GWEN_Text_ConvertCharset(const char *fromCharset,
360                              const char *toCharset,
361                              const char *text, int len,
362                              GWEN_BUFFER *tbuf);
363 
364 
365 
366 
367 
368 typedef int GWENHYWFAR_CB(*GWEN_TEXT_REPLACE_VARS_CB)(void *cbPtr, const char *name, int index, int maxLen,
369                                                       GWEN_BUFFER *dstBuf);
370 
371 
372 /**
373  * Read the given string and replace every variable in that string ( noted as "$(varname)") with the
374  * value of the mentioned variable. Uses a callback to actually retrieve the data.
375  */
376 GWENHYWFAR_API
377 int GWEN_Text_ReplaceVars(const char *s, GWEN_BUFFER *dbuf, GWEN_TEXT_REPLACE_VARS_CB fn, void *ptr);
378 
379 
380 
381 #ifdef __cplusplus
382 }
383 #endif
384 
385 
386 #endif
387 
388 
389 
390