1 /************************************************************************/
2 /*! \file
3
4 \brief Functions for manipulating strings.
5
6 Various functions for manipulating strings. Some of these functions
7 provide new functionality, whereas others are drop-in replacements
8 of standard functions (but with enhanced functionality).
9
10 \date Started 11/1/99
11 \author George
12 \version $Id: string.c 10711 2011-08-31 22:23:04Z karypis $
13 */
14 /************************************************************************/
15
16 #include <GKlib.h>
17
18
19
20 /************************************************************************/
21 /*! \brief Replaces certain characters in a string.
22
23 This function takes a string and replaces all the characters in the
24 \c fromlist with the corresponding characters from the \c tolist.
25 That is, each occurence of <tt>fromlist[i]</tt> is replaced by
26 <tt>tolist[i]</tt>.
27 If the \c tolist is shorter than \c fromlist, then the corresponding
28 characters are deleted. The modifications on \c str are done in place.
29 It tries to provide a functionality similar to Perl's \b tr// function.
30
31 \param str is the string whose characters will be replaced.
32 \param fromlist is the set of characters to be replaced.
33 \param tolist is the set of replacement characters .
34 \returns A pointer to \c str itself.
35 */
36 /************************************************************************/
gk_strchr_replace(char * str,char * fromlist,char * tolist)37 char *gk_strchr_replace(char *str, char *fromlist, char *tolist)
38 {
39 gk_idx_t i, j, k;
40 size_t len, fromlen, tolen;
41
42 len = strlen(str);
43 fromlen = strlen(fromlist);
44 tolen = strlen(tolist);
45
46 for (i=j=0; i<len; i++) {
47 for (k=0; k<fromlen; k++) {
48 if (str[i] == fromlist[k]) {
49 if (k < tolen)
50 str[j++] = tolist[k];
51 break;
52 }
53 }
54 if (k == fromlen)
55 str[j++] = str[i];
56 }
57 str[j] = '\0';
58
59 return str;
60 }
61
62
63
64 /************************************************************************/
65 /*! \brief Regex-based search-and-replace function
66
67 This function is a C implementation of Perl's <tt> s//</tt> regular-expression
68 based substitution function.
69
70 \param str
71 is the input string on which the operation will be performed.
72 \param pattern
73 is the regular expression for the pattern to be matched for substitution.
74 \param replacement
75 is the replacement string, in which the possible captured pattern substrings
76 are referred to as $1, $2, ..., $9. The entire matched pattern is refered
77 to as $0.
78 \param options
79 is a string specified options for the substitution operation. Currently the
80 <tt>"i"</tt> (case insensitive) and <tt>"g"</tt> (global substitution) are
81 supported.
82 \param new_str
83 is a reference to a pointer that will store a pointer to the newly created
84 string that results from the substitutions. This string is allocated via
85 gk_malloc() and needs to be freed using gk_free(). The string is returned
86 even if no substitutions were performed.
87 \returns
88 If successful, it returns 1 + the number of substitutions that were performed.
89 Thus, if no substitutions were performed, the returned value will be 1.
90 Otherwise it returns 0. In case of error, a meaningful error message is
91 returned in <tt>newstr</tt>, which also needs to be freed afterwards.
92 */
93 /************************************************************************/
gk_strstr_replace(char * str,char * pattern,char * replacement,char * options,char ** new_str)94 int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options,
95 char **new_str)
96 {
97 gk_idx_t i;
98 int j, rc, flags, global, nmatches;
99 size_t len, rlen, nlen, offset, noffset;
100 regex_t re;
101 regmatch_t matches[10];
102
103
104 /* Parse the options */
105 flags = REG_EXTENDED;
106 if (strchr(options, 'i') != NULL)
107 flags = flags | REG_ICASE;
108 global = (strchr(options, 'g') != NULL ? 1 : 0);
109
110
111 /* Compile the regex */
112 if ((rc = regcomp(&re, pattern, flags)) != 0) {
113 len = regerror(rc, &re, NULL, 0);
114 *new_str = gk_cmalloc(len, "gk_strstr_replace: new_str");
115 regerror(rc, &re, *new_str, len);
116 return 0;
117 }
118
119 /* Prepare the output string */
120 len = strlen(str);
121 nlen = 2*len;
122 noffset = 0;
123 *new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str");
124
125
126 /* Get into the matching-replacing loop */
127 rlen = strlen(replacement);
128 offset = 0;
129 nmatches = 0;
130 do {
131 rc = regexec(&re, str+offset, 10, matches, 0);
132
133 if (rc == REG_ESPACE) {
134 gk_free((void **)new_str, LTERM);
135 *new_str = gk_strdup("regexec ran out of memory.");
136 regfree(&re);
137 return 0;
138 }
139 else if (rc == REG_NOMATCH) {
140 if (nlen-noffset < len-offset) {
141 nlen += (len-offset) - (nlen-noffset);
142 *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
143 }
144 strcpy(*new_str+noffset, str+offset);
145 noffset += (len-offset);
146 break;
147 }
148 else { /* A match was found! */
149 nmatches++;
150
151 /* Copy the left unmatched portion of the string */
152 if (matches[0].rm_so > 0) {
153 if (nlen-noffset < matches[0].rm_so) {
154 nlen += matches[0].rm_so - (nlen-noffset);
155 *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
156 }
157 strncpy(*new_str+noffset, str+offset, matches[0].rm_so);
158 noffset += matches[0].rm_so;
159 }
160
161 /* Go and append the replacement string */
162 for (i=0; i<rlen; i++) {
163 switch (replacement[i]) {
164 case '\\':
165 if (i+1 < rlen) {
166 if (nlen-noffset < 1) {
167 nlen += nlen + 1;
168 *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
169 }
170 *new_str[noffset++] = replacement[++i];
171 }
172 else {
173 gk_free((void **)new_str, LTERM);
174 *new_str = gk_strdup("Error in replacement string. Missing character following '\'.");
175 regfree(&re);
176 return 0;
177 }
178 break;
179
180 case '$':
181 if (i+1 < rlen) {
182 j = (int)(replacement[++i] - '0');
183 if (j < 0 || j > 9) {
184 gk_free((void **)new_str, LTERM);
185 *new_str = gk_strdup("Error in captured subexpression specification.");
186 regfree(&re);
187 return 0;
188 }
189
190 if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) {
191 nlen += nlen + (matches[j].rm_eo-matches[j].rm_so);
192 *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
193 }
194
195 strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo);
196 noffset += matches[j].rm_eo-matches[j].rm_so;
197 }
198 else {
199 gk_free((void **)new_str, LTERM);
200 *new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'.");
201 regfree(&re);
202 return 0;
203 }
204 break;
205
206 default:
207 if (nlen-noffset < 1) {
208 nlen += nlen + 1;
209 *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
210 }
211 (*new_str)[noffset++] = replacement[i];
212 }
213 }
214
215 /* Update the offset of str for the next match */
216 offset += matches[0].rm_eo;
217
218 if (!global) {
219 /* Copy the right portion of the string if no 'g' option */
220 if (nlen-noffset < len-offset) {
221 nlen += (len-offset) - (nlen-noffset);
222 *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
223 }
224 strcpy(*new_str+noffset, str+offset);
225 noffset += (len-offset);
226 }
227 }
228 } while (global);
229
230 (*new_str)[noffset] = '\0';
231
232 regfree(&re);
233 return nmatches + 1;
234
235 }
236
237
238
239 /************************************************************************/
240 /*! \brief Prunes characters from the end of the string.
241
242 This function removes any trailing characters that are included in the
243 \c rmlist. The trimming stops at the last character (i.e., first character
244 from the end) that is not in \c rmlist.
245 This function can be used to removed trailing spaces, newlines, etc.
246 This is a distructive operation as it modifies the string.
247
248 \param str is the string that will be trimmed.
249 \param rmlist contains the set of characters that will be removed.
250 \returns A pointer to \c str itself.
251 \sa gk_strhprune()
252 */
253 /*************************************************************************/
gk_strtprune(char * str,char * rmlist)254 char *gk_strtprune(char *str, char *rmlist)
255 {
256 gk_idx_t i, j;
257 size_t len;
258
259 len = strlen(rmlist);
260
261 for (i=strlen(str)-1; i>=0; i--) {
262 for (j=0; j<len; j++) {
263 if (str[i] == rmlist[j])
264 break;
265 }
266 if (j == len)
267 break;
268 }
269
270 str[i+1] = '\0';
271
272 return str;
273 }
274
275
276 /************************************************************************/
277 /*! \brief Prunes characters from the beginning of the string.
278
279 This function removes any starting characters that are included in the
280 \c rmlist. The trimming stops at the first character that is not in
281 \c rmlist.
282 This function can be used to removed leading spaces, tabs, etc.
283 This is a distructive operation as it modifies the string.
284
285 \param str is the string that will be trimmed.
286 \param rmlist contains the set of characters that will be removed.
287 \returns A pointer to \c str itself.
288 \sa gk_strtprune()
289 */
290 /*************************************************************************/
gk_strhprune(char * str,char * rmlist)291 char *gk_strhprune(char *str, char *rmlist)
292 {
293 gk_idx_t i, j;
294 size_t len;
295
296 len = strlen(rmlist);
297
298 for (i=0; str[i]; i++) {
299 for (j=0; j<len; j++) {
300 if (str[i] == rmlist[j])
301 break;
302 }
303 if (j == len)
304 break;
305 }
306
307 if (i>0) { /* If something needs to be removed */
308 for (j=0; str[i]; i++, j++)
309 str[j] = str[i];
310 str[j] = '\0';
311 }
312
313 return str;
314 }
315
316
317 /************************************************************************/
318 /*! \brief Converts a string to upper case.
319
320 This function converts a string to upper case. This operation modifies the
321 string itself.
322
323 \param str is the string whose case will be changed.
324 \returns A pointer to \c str itself.
325 \sa gk_strtolower()
326 */
327 /*************************************************************************/
gk_strtoupper(char * str)328 char *gk_strtoupper(char *str)
329 {
330 int i;
331
332 for (i=0; str[i]!='\0'; str[i]=toupper(str[i]), i++);
333 return str;
334 }
335
336
337 /************************************************************************/
338 /*! \brief Converts a string to lower case.
339
340 This function converts a string to lower case. This operation modifies the
341 string itself.
342
343 \param str is the string whose case will be changed.
344 \returns A pointer to \c str itself.
345 \sa gk_strtoupper()
346 */
347 /*************************************************************************/
gk_strtolower(char * str)348 char *gk_strtolower(char *str)
349 {
350 int i;
351
352 for (i=0; str[i]!='\0'; str[i]=tolower(str[i]), i++);
353 return str;
354 }
355
356
357 /************************************************************************/
358 /*! \brief Duplicates a string
359
360 This function is a replacement for C's standard <em>strdup()</em> function.
361 The key differences between the two are that gk_strdup():
362 - uses the dynamic memory allocation routines of \e GKlib.
363 - it correctly handles NULL input strings.
364
365 The string that is returned must be freed by gk_free().
366
367 \param orgstr is the string that will be duplicated.
368 \returns A pointer to the newly created string.
369 \sa gk_free()
370 */
371 /*************************************************************************/
gk_strdup(char * orgstr)372 char *gk_strdup(char *orgstr)
373 {
374 int len;
375 char *str=NULL;
376
377 if (orgstr != NULL) {
378 len = strlen(orgstr)+1;
379 str = gk_malloc(len*sizeof(char), "gk_strdup: str");
380 strcpy(str, orgstr);
381 }
382
383 return str;
384 }
385
386
387 /************************************************************************/
388 /*! \brief Case insensitive string comparison.
389
390 This function compares two strings for equality by ignoring the case of the
391 strings.
392
393 \warning This function is \b not equivalent to a case-insensitive
394 <em>strcmp()</em> function, as it does not return ordering
395 information.
396
397 \todo Remove the above warning.
398
399 \param s1 is the first string to be compared.
400 \param s2 is the second string to be compared.
401 \retval 1 if the strings are identical,
402 \retval 0 otherwise.
403 */
404 /*************************************************************************/
gk_strcasecmp(char * s1,char * s2)405 int gk_strcasecmp(char *s1, char *s2)
406 {
407 int i=0;
408
409 if (strlen(s1) != strlen(s2))
410 return 0;
411
412 while (s1[i] != '\0') {
413 if (tolower(s1[i]) != tolower(s2[i]))
414 return 0;
415 i++;
416 }
417
418 return 1;
419 }
420
421
422 /************************************************************************/
423 /*! \brief Compare two strings in revere order
424
425 This function is similar to strcmp but it performs the comparison as
426 if the two strings were reversed.
427
428 \param s1 is the first string to be compared.
429 \param s2 is the second string to be compared.
430 \retval -1, 0, 1, if the s1 < s2, s1 == s2, or s1 > s2.
431 */
432 /*************************************************************************/
gk_strrcmp(char * s1,char * s2)433 int gk_strrcmp(char *s1, char *s2)
434 {
435 int i1 = strlen(s1)-1;
436 int i2 = strlen(s2)-1;
437
438 while ((i1 >= 0) && (i2 >= 0)) {
439 if (s1[i1] != s2[i2])
440 return (s1[i1] - s2[i2]);
441 i1--;
442 i2--;
443 }
444
445 /* i1 == -1 and/or i2 == -1 */
446
447 if (i1 < i2)
448 return -1;
449 if (i1 > i2)
450 return 1;
451 return 0;
452 }
453
454
455
456 /************************************************************************/
457 /*! \brief Converts a time_t time into a string
458
459 This function takes a time_t-specified time and returns a string-formated
460 representation of the corresponding time. The format of the string is
461 <em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
462
463 \param time is the time to be converted.
464 \return It returns a pointer to a statically allocated string that is
465 over-written in successive calls of this function. If the
466 conversion failed, it returns NULL.
467
468 */
469 /*************************************************************************/
gk_time2str(time_t time)470 char *gk_time2str(time_t time)
471 {
472 static char datestr[128];
473 struct tm *tm;
474
475 tm = localtime(&time);
476
477 if (strftime(datestr, 128, "%m/%d/%Y %H:%M:%S", tm) == 0)
478 return NULL;
479 else
480 return datestr;
481 }
482
483
484
485 #if !defined(WIN32) && !defined(__MINGW32__)
486 /************************************************************************/
487 /*! \brief Converts a date/time string into its equivalent time_t value
488
489 This function takes date and/or time specification and converts it in
490 the equivalent time_t representation. The conversion is done using the
491 strptime() function. The format that gk_str2time() understands is
492 <em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
493
494 \param str is the date/time string to be converted.
495 \return If the conversion was successful it returns the time, otherwise
496 it returns -1.
497 */
498 /*************************************************************************/
gk_str2time(char * str)499 time_t gk_str2time(char *str)
500 {
501 struct tm time;
502 time_t rtime;
503
504 memset(&time, '\0', sizeof(time));
505
506 if (strptime(str, "%m/%d/%Y %H:%M:%S", &time) == NULL)
507 return -1;
508
509 rtime = mktime(&time);
510 return (rtime < 0 ? 0 : rtime);
511 }
512 #endif
513
514
515 /*************************************************************************
516 * This function returns the ID of a particular string based on the
517 * supplied StringMap array
518 **************************************************************************/
gk_GetStringID(gk_StringMap_t * strmap,char * key)519 int gk_GetStringID(gk_StringMap_t *strmap, char *key)
520 {
521 int i;
522
523 for (i=0; strmap[i].name; i++) {
524 if (gk_strcasecmp(key, strmap[i].name))
525 return strmap[i].id;
526 }
527
528 return -1;
529 }
530