1 /************************************************************************/
2 /*! \file
3 
4 \brief Functions for manipulating strings.
5 
6 Various functions for manipulating strings. Some of these functions
7 provide new functionality, whereas others are drop-in replacements
8 of standard functions (but with enhanced functionality).
9 
10 \date Started 11/1/99
11 \author George
12 \version $Id: string.c 10711 2011-08-31 22:23:04Z karypis $
13 */
14 /************************************************************************/
15 
16 #include <GKlib.h>
17 
18 
19 
20 /************************************************************************/
21 /*! \brief Replaces certain characters in a string.
22 
23 This function takes a string and replaces all the characters in the
24 \c fromlist with the corresponding characters from the \c tolist.
25 That is, each occurence of <tt>fromlist[i]</tt> is replaced by
26 <tt>tolist[i]</tt>.
27 If the \c tolist is shorter than \c fromlist, then the corresponding
28 characters are deleted. The modifications on \c str are done in place.
29 It tries to provide a functionality similar to Perl's \b tr// function.
30 
31 \param str is the string whose characters will be replaced.
32 \param fromlist is the set of characters to be replaced.
33 \param tolist is the set of replacement characters .
34 \returns A pointer to \c str itself.
35 */
36 /************************************************************************/
gk_strchr_replace(char * str,char * fromlist,char * tolist)37 char *gk_strchr_replace(char *str, char *fromlist, char *tolist)
38 {
39   gk_idx_t i, j, k;
40   size_t len, fromlen, tolen;
41 
42   len     = strlen(str);
43   fromlen = strlen(fromlist);
44   tolen   = strlen(tolist);
45 
46   for (i=j=0; i<len; i++) {
47     for (k=0; k<fromlen; k++) {
48       if (str[i] == fromlist[k]) {
49         if (k < tolen)
50           str[j++] = tolist[k];
51         break;
52       }
53     }
54     if (k == fromlen)
55       str[j++] = str[i];
56   }
57   str[j] = '\0';
58 
59   return str;
60 }
61 
62 
63 
64 /************************************************************************/
65 /*! \brief Regex-based search-and-replace function
66 
67 This function is a C implementation of Perl's <tt> s//</tt> regular-expression
68 based substitution function.
69 
70 \param str
71   is the input string on which the operation will be performed.
72 \param pattern
73   is the regular expression for the pattern to be matched for substitution.
74 \param replacement
75   is the replacement string, in which the possible captured pattern substrings
76   are referred to as $1, $2, ..., $9. The entire matched pattern is refered
77   to as $0.
78 \param options
79   is a string specified options for the substitution operation. Currently the
80   <tt>"i"</tt> (case insensitive) and <tt>"g"</tt> (global substitution) are
81   supported.
82 \param new_str
83   is a reference to a pointer that will store a pointer to the newly created
84   string that results from the substitutions. This string is allocated via
85   gk_malloc() and needs to be freed using gk_free(). The string is returned
86   even if no substitutions were performed.
87 \returns
88   If successful, it returns 1 + the number of substitutions that were performed.
89   Thus, if no substitutions were performed, the returned value will be 1.
90   Otherwise it returns 0. In case of error, a meaningful error message is
91   returned in <tt>newstr</tt>, which also needs to be freed afterwards.
92 */
93 /************************************************************************/
gk_strstr_replace(char * str,char * pattern,char * replacement,char * options,char ** new_str)94 int gk_strstr_replace(char *str, char *pattern, char *replacement, char *options,
95       char **new_str)
96 {
97   gk_idx_t i;
98   int j, rc, flags, global, nmatches;
99   size_t len, rlen, nlen, offset, noffset;
100   regex_t re;
101   regmatch_t matches[10];
102 
103 
104   /* Parse the options */
105   flags = REG_EXTENDED;
106   if (strchr(options, 'i') != NULL)
107     flags = flags | REG_ICASE;
108   global = (strchr(options, 'g') != NULL ? 1 : 0);
109 
110 
111   /* Compile the regex */
112   if ((rc = regcomp(&re, pattern, flags)) != 0) {
113     len = regerror(rc, &re, NULL, 0);
114     *new_str = gk_cmalloc(len, "gk_strstr_replace: new_str");
115     regerror(rc, &re, *new_str, len);
116     return 0;
117   }
118 
119   /* Prepare the output string */
120   len = strlen(str);
121   nlen = 2*len;
122   noffset = 0;
123   *new_str = gk_cmalloc(nlen+1, "gk_strstr_replace: new_str");
124 
125 
126   /* Get into the matching-replacing loop */
127   rlen = strlen(replacement);
128   offset = 0;
129   nmatches = 0;
130   do {
131     rc = regexec(&re, str+offset, 10, matches, 0);
132 
133     if (rc == REG_ESPACE) {
134       gk_free((void **)new_str, LTERM);
135       *new_str = gk_strdup("regexec ran out of memory.");
136       regfree(&re);
137       return 0;
138     }
139     else if (rc == REG_NOMATCH) {
140       if (nlen-noffset < len-offset) {
141         nlen += (len-offset) - (nlen-noffset);
142         *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
143       }
144       strcpy(*new_str+noffset, str+offset);
145       noffset += (len-offset);
146       break;
147     }
148     else { /* A match was found! */
149       nmatches++;
150 
151       /* Copy the left unmatched portion of the string */
152       if (matches[0].rm_so > 0) {
153         if (nlen-noffset < matches[0].rm_so) {
154           nlen += matches[0].rm_so - (nlen-noffset);
155           *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
156         }
157         strncpy(*new_str+noffset, str+offset, matches[0].rm_so);
158         noffset += matches[0].rm_so;
159       }
160 
161       /* Go and append the replacement string */
162       for (i=0; i<rlen; i++) {
163         switch (replacement[i]) {
164           case '\\':
165             if (i+1 < rlen) {
166               if (nlen-noffset < 1) {
167                 nlen += nlen + 1;
168                 *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
169               }
170               *new_str[noffset++] = replacement[++i];
171             }
172             else {
173               gk_free((void **)new_str, LTERM);
174               *new_str = gk_strdup("Error in replacement string. Missing character following '\'.");
175               regfree(&re);
176               return 0;
177             }
178             break;
179 
180           case '$':
181             if (i+1 < rlen) {
182               j = (int)(replacement[++i] - '0');
183               if (j < 0 || j > 9) {
184                 gk_free((void **)new_str, LTERM);
185                 *new_str = gk_strdup("Error in captured subexpression specification.");
186                 regfree(&re);
187                 return 0;
188               }
189 
190               if (nlen-noffset < matches[j].rm_eo-matches[j].rm_so) {
191                 nlen += nlen + (matches[j].rm_eo-matches[j].rm_so);
192                 *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
193               }
194 
195               strncpy(*new_str+noffset, str+offset+matches[j].rm_so, matches[j].rm_eo);
196               noffset += matches[j].rm_eo-matches[j].rm_so;
197             }
198             else {
199               gk_free((void **)new_str, LTERM);
200               *new_str = gk_strdup("Error in replacement string. Missing subexpression number folloing '$'.");
201               regfree(&re);
202               return 0;
203             }
204             break;
205 
206           default:
207             if (nlen-noffset < 1) {
208               nlen += nlen + 1;
209               *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
210             }
211             (*new_str)[noffset++] = replacement[i];
212         }
213       }
214 
215       /* Update the offset of str for the next match */
216       offset += matches[0].rm_eo;
217 
218       if (!global) {
219         /* Copy the right portion of the string if no 'g' option */
220         if (nlen-noffset < len-offset) {
221           nlen += (len-offset) - (nlen-noffset);
222           *new_str = (char *)gk_realloc(*new_str, (nlen+1)*sizeof(char), "gk_strstr_replace: new_str");
223         }
224         strcpy(*new_str+noffset, str+offset);
225         noffset += (len-offset);
226       }
227     }
228   } while (global);
229 
230   (*new_str)[noffset] = '\0';
231 
232   regfree(&re);
233   return nmatches + 1;
234 
235 }
236 
237 
238 
239 /************************************************************************/
240 /*! \brief Prunes characters from the end of the string.
241 
242 This function removes any trailing characters that are included in the
243 \c rmlist. The trimming stops at the last character (i.e., first character
244 from the end) that is not in \c rmlist.
245 This function can be used to removed trailing spaces, newlines, etc.
246 This is a distructive operation as it modifies the string.
247 
248 \param str is the string that will be trimmed.
249 \param rmlist contains the set of characters that will be removed.
250 \returns A pointer to \c str itself.
251 \sa gk_strhprune()
252 */
253 /*************************************************************************/
gk_strtprune(char * str,char * rmlist)254 char *gk_strtprune(char *str, char *rmlist)
255 {
256   gk_idx_t i, j;
257   size_t len;
258 
259   len = strlen(rmlist);
260 
261   for (i=strlen(str)-1; i>=0; i--) {
262     for (j=0; j<len; j++) {
263       if (str[i] == rmlist[j])
264         break;
265     }
266     if (j == len)
267       break;
268   }
269 
270   str[i+1] = '\0';
271 
272   return str;
273 }
274 
275 
276 /************************************************************************/
277 /*! \brief Prunes characters from the beginning of the string.
278 
279 This function removes any starting characters that are included in the
280 \c rmlist. The trimming stops at the first character that is not in
281 \c rmlist.
282 This function can be used to removed leading spaces, tabs, etc.
283 This is a distructive operation as it modifies the string.
284 
285 \param str is the string that will be trimmed.
286 \param rmlist contains the set of characters that will be removed.
287 \returns A pointer to \c str itself.
288 \sa gk_strtprune()
289 */
290 /*************************************************************************/
gk_strhprune(char * str,char * rmlist)291 char *gk_strhprune(char *str, char *rmlist)
292 {
293   gk_idx_t i, j;
294   size_t len;
295 
296   len = strlen(rmlist);
297 
298   for (i=0; str[i]; i++) {
299     for (j=0; j<len; j++) {
300       if (str[i] == rmlist[j])
301         break;
302     }
303     if (j == len)
304       break;
305   }
306 
307   if (i>0) { /* If something needs to be removed */
308     for (j=0; str[i]; i++, j++)
309       str[j] = str[i];
310     str[j] = '\0';
311   }
312 
313   return str;
314 }
315 
316 
317 /************************************************************************/
318 /*! \brief Converts a string to upper case.
319 
320 This function converts a string to upper case. This operation modifies the
321 string itself.
322 
323 \param str is the string whose case will be changed.
324 \returns A pointer to \c str itself.
325 \sa gk_strtolower()
326 */
327 /*************************************************************************/
gk_strtoupper(char * str)328 char *gk_strtoupper(char *str)
329 {
330   int i;
331 
332   for (i=0; str[i]!='\0'; str[i]=toupper(str[i]), i++);
333   return str;
334 }
335 
336 
337 /************************************************************************/
338 /*! \brief Converts a string to lower case.
339 
340 This function converts a string to lower case. This operation modifies the
341 string itself.
342 
343 \param str is the string whose case will be changed.
344 \returns A pointer to \c str itself.
345 \sa gk_strtoupper()
346 */
347 /*************************************************************************/
gk_strtolower(char * str)348 char *gk_strtolower(char *str)
349 {
350   int i;
351 
352   for (i=0; str[i]!='\0'; str[i]=tolower(str[i]), i++);
353   return str;
354 }
355 
356 
357 /************************************************************************/
358 /*! \brief Duplicates a string
359 
360 This function is a replacement for C's standard <em>strdup()</em> function.
361 The key differences between the two are that gk_strdup():
362   - uses the dynamic memory allocation routines of \e GKlib.
363   - it correctly handles NULL input strings.
364 
365 The string that is returned must be freed by gk_free().
366 
367 \param orgstr is the string that will be duplicated.
368 \returns A pointer to the newly created string.
369 \sa gk_free()
370 */
371 /*************************************************************************/
gk_strdup(char * orgstr)372 char *gk_strdup(char *orgstr)
373 {
374   int len;
375   char *str=NULL;
376 
377   if (orgstr != NULL) {
378     len = strlen(orgstr)+1;
379     str = gk_malloc(len*sizeof(char), "gk_strdup: str");
380     strcpy(str, orgstr);
381   }
382 
383   return str;
384 }
385 
386 
387 /************************************************************************/
388 /*! \brief Case insensitive string comparison.
389 
390 This function compares two strings for equality by ignoring the case of the
391 strings.
392 
393 \warning This function is \b not equivalent to a case-insensitive
394          <em>strcmp()</em> function, as it does not return ordering
395          information.
396 
397 \todo Remove the above warning.
398 
399 \param s1 is the first string to be compared.
400 \param s2 is the second string to be compared.
401 \retval 1 if the strings are identical,
402 \retval 0 otherwise.
403 */
404 /*************************************************************************/
gk_strcasecmp(char * s1,char * s2)405 int gk_strcasecmp(char *s1, char *s2)
406 {
407   int i=0;
408 
409   if (strlen(s1) != strlen(s2))
410     return 0;
411 
412   while (s1[i] != '\0') {
413     if (tolower(s1[i]) != tolower(s2[i]))
414       return 0;
415     i++;
416   }
417 
418   return 1;
419 }
420 
421 
422 /************************************************************************/
423 /*! \brief Compare two strings in revere order
424 
425 This function is similar to strcmp but it performs the comparison as
426 if the two strings were reversed.
427 
428 \param s1 is the first string to be compared.
429 \param s2 is the second string to be compared.
430 \retval -1, 0, 1, if the s1 < s2, s1 == s2, or s1 > s2.
431 */
432 /*************************************************************************/
gk_strrcmp(char * s1,char * s2)433 int gk_strrcmp(char *s1, char *s2)
434 {
435   int i1 = strlen(s1)-1;
436   int i2 = strlen(s2)-1;
437 
438   while ((i1 >= 0) && (i2 >= 0)) {
439     if (s1[i1] != s2[i2])
440       return (s1[i1] - s2[i2]);
441     i1--;
442     i2--;
443   }
444 
445   /* i1 == -1 and/or i2 == -1 */
446 
447   if (i1 < i2)
448     return -1;
449   if (i1 > i2)
450     return 1;
451   return 0;
452 }
453 
454 
455 
456 /************************************************************************/
457 /*! \brief Converts a time_t time into a string
458 
459 This function takes a time_t-specified time and returns a string-formated
460 representation of the corresponding time. The format of the string is
461 <em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
462 
463 \param time is the time to be converted.
464 \return It returns a pointer to a statically allocated string that is
465         over-written in successive calls of this function. If the
466         conversion failed, it returns NULL.
467 
468 */
469 /*************************************************************************/
gk_time2str(time_t time)470 char *gk_time2str(time_t time)
471 {
472   static char datestr[128];
473   struct tm *tm;
474 
475   tm = localtime(&time);
476 
477   if (strftime(datestr, 128, "%m/%d/%Y %H:%M:%S", tm) == 0)
478     return NULL;
479   else
480     return datestr;
481 }
482 
483 
484 
485 #if !defined(WIN32) && !defined(__MINGW32__)
486 /************************************************************************/
487 /*! \brief Converts a date/time string into its equivalent time_t value
488 
489 This function takes date and/or time specification and converts it in
490 the equivalent time_t representation. The conversion is done using the
491 strptime() function. The format that gk_str2time() understands is
492 <em>mm/dd/yyyy hh:mm:ss</em>, in which the hours are in military time.
493 
494 \param str is the date/time string to be converted.
495 \return If the conversion was successful it returns the time, otherwise
496         it returns -1.
497 */
498 /*************************************************************************/
gk_str2time(char * str)499 time_t gk_str2time(char *str)
500 {
501   struct tm time;
502   time_t rtime;
503 
504   memset(&time, '\0', sizeof(time));
505 
506   if (strptime(str, "%m/%d/%Y %H:%M:%S", &time) == NULL)
507     return -1;
508 
509   rtime = mktime(&time);
510   return (rtime < 0 ? 0 : rtime);
511 }
512 #endif
513 
514 
515 /*************************************************************************
516 * This function returns the ID of a particular string based on the
517 * supplied StringMap array
518 **************************************************************************/
gk_GetStringID(gk_StringMap_t * strmap,char * key)519 int gk_GetStringID(gk_StringMap_t *strmap, char *key)
520 {
521   int i;
522 
523   for (i=0; strmap[i].name; i++) {
524     if (gk_strcasecmp(key, strmap[i].name))
525       return strmap[i].id;
526   }
527 
528   return -1;
529 }
530