1 /**
2  * Various string functions mainly used for controlling text output
3 
4  * Copyright (C) 2003  Shawn Betts
5  * Copyright (C) 2004, 2007, 2008, 2009  Sylvain Beucler
6 
7  * This file is part of GNU FreeDink
8 
9  * GNU FreeDink is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 3 of the
12  * License, or (at your option) any later version.
13 
14  * GNU FreeDink is distributed in the hope that it will be useful, but
15  * WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * General Public License for more details.
18 
19  * You should have received a copy of the GNU General Public License
20  * along with this program.  If not, see
21  * <http://www.gnu.org/licenses/>.
22  */
23 
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 
28 #include <ctype.h>
29 #include <string.h>
30 #include <strings.h> /* strcasecmp */
31 #include <stdlib.h> /* free */
32 #include <stdarg.h> /* va_start */
33 #include <stdio.h> /* vasprintf */
34 #include <xalloc.h>
35 #include "str_util.h"
36 #include "log.h"
37 
38 /**
39  * Upcase the string
40  */
41 void
strtoupper(char * s)42 strtoupper (char *s)
43 {
44   for (; *s; s++)
45     *s = toupper (*s);
46 }
47 
48 /**
49  * Copy string w/o memory overlap
50  */
strcpy_nooverlap(char * dst,char * src)51 void strcpy_nooverlap(char *dst, char* src)
52 {
53   char *tmp = strdup(src);
54   strcpy(dst, tmp);
55   free(tmp);
56 }
57 
58 
59 /**
60  * Utility - same as asprint, but appends to the specified buffer
61  *
62  * If strp points to a NULL pointer, it allocates a new buffer that
63  * you'll have to free.
64  */
asprintf_append(char ** strp,const char * fmt,...)65 int asprintf_append(char **strp, const char* fmt, ...)
66 {
67   va_list ap;
68 
69   if (*strp == NULL)
70     {
71       *strp = (char*)malloc(1);
72       *strp[0] = '\0';
73     }
74 
75   char *tmp = NULL;
76   va_start(ap, fmt);
77   int result = vasprintf(&tmp, fmt, ap);
78   va_end(ap);
79 
80   *strp = realloc(*strp, strlen(*strp) + strlen(tmp) + 1);
81   strcat(*strp, tmp);
82   free(tmp);
83   return result;
84 }
85 
reverse(char * st)86 void reverse(char *st)
87 {
88         int i,ii;
89         char don[255];
90         don[0] = 0;
91         ii = strlen(st);
92         for (i=ii; i > -1; i--)
93         {
94                 strchar(don, st[i]);
95         }
96         strcpy(st, don);
97 }
98 
99 /**
100  * This acts in the same way as strcat except it combines a string and
101  * a single character, updating the null at the end.
102  */
strchar(char * string,char ch)103 void strchar(char *string, char ch)
104 {
105   int last = strlen(string);
106   string[last] = ch;
107   string[last+1] = '\0';
108 }
109 
110 /**
111  * Split 'str' in words separated by _one_ 'sep', and copy the #'num'
112  * one to 'return1'. The function does not alter 'str'. Return empty
113  * string if not found. Several 'sep' enclose empty words
114  * (e.g. separators are not collapsed, unlike 'get_word(...)').
115  */
separate_string(char * line,int num,char sep)116 char* separate_string (char* line, int num, char sep)
117 {
118   int l;
119   int k;
120   int line_len = strlen(line);
121 
122   l = 1;
123   int start = 0;
124   int end = 0;
125 
126   for (k = 0; k < line_len; k++)
127     {
128       if (line[k] == sep)
129 	{
130 	  if (l == num)
131 	    break;
132 	  l++;
133 	  start = end = (k + 1);
134 	}
135       else /* (str[k] != sep) */
136 	{
137 	  end++;
138 	}
139     }
140 
141   if (l >= num)
142     {
143       int size = end - start;
144       char* result = xmalloc(size + 1);
145       strncpy(result, line + start, size);
146       result[size] = '\0';
147 
148       replace_norealloc("\r", "", result); //Take the /r off it.
149       replace_norealloc("\n", "", result); //Take the /n off it.
150       return result;
151     }
152   else /* less than 'num' tokens */
153     {
154       return strdup("");
155     }
156 }
157 
158 /**
159  * Return the word number 'word' present in 'line'. If not present,
160  * returns an empty string. Words are separated by one _or more_
161  * spaces and count from 1 (i.e. not 0).
162  */
get_word(char * line,int word)163 char* get_word(char* line, int word)
164 {
165   int cur_word = 1;
166 
167   /* find word */
168   char* pc = line;
169   while (*pc != '\0')
170     {
171       if (cur_word == word)
172 	break;
173       if (*pc == ' ')
174 	{
175 	  cur_word++;
176 	  while(*pc == ' ' && *pc != '\0')
177 	    pc++;
178 	}
179       else
180 	{
181 	  while(*pc != ' ' && *pc != '\0')
182 	    pc++;
183 	}
184     }
185 
186   /* find end-of-word */
187   char* start = pc;
188   while(*pc != '\0' && *pc != ' ')
189     pc++;
190 
191   /* copy word - either we're on the right word and will copy it,
192      either we're at the end of string and will copy an empty word */
193   int len = pc - start;
194   char* result = xmalloc(len + 1);
195   memcpy(result, start, len);
196   result[len] = '\0';
197 
198   return result;
199 }
200 
201 
202 /**
203  * Return whether 'orig' and 'comp' are the same string
204  * (case-insensitive comparison).
205  */
compare(char * orig,char * comp)206 /*bool*/int compare(char *orig, char *comp)
207 {
208   return (strcasecmp(orig, comp) == 0);
209 }
210 
211 /**
212  * Move chars between 'start' and the end of 'line' to the left, with
213  * a postponement of 'shift' chars. Copy the trailing '\0'.
214  */
shift_left(char * line,int start,int shift)215 static void shift_left(char* line, int start, int shift)
216 {
217   /* Beware of the direction so as not to overwrite */
218   int i = start;
219   int max = strlen(line);
220   for (; i <= max; i++)
221     line[i-shift] = line[i];
222 }
223 
224 /**
225  * Move chars between 'start' and the end of 'line' to the right, with
226  * a postponement of 'shift' chars. Copy the trailing '\0'.
227  */
shift_right(char * line,int start,int shift)228 static void shift_right(char* line, int start, int shift)
229 {
230   /* Beware of the direction so as not to overwrite */
231   int i = strlen(line);
232   for (; i >= start; i--)
233     line[i+shift] = line[i];
234 }
235 
236 /**
237  * Replace word 'find' by word 'repl' in 'line', as many times as
238  * possible.
239  *
240  * Note:
241  *
242  * - '*line_p' is xrealloc'd only if strlen(repl) > strlen(find), to
243       make the calling code easier.
244  */
replace(const char * find,const char * repl,char ** line_p)245 void replace(const char* find, const char* repl, char** line_p)
246 {
247   int len_find = strlen(find);
248   int len_repl = strlen(repl);
249   char* line = *line_p;
250   int len_line = strlen(line);
251 
252   int u = -1;
253   int checker = 0;
254   for (u = 0; u < len_line; u++)
255     {
256       if (toupper(line[u]) == toupper(find[checker]))
257 	{
258 	  checker++;
259 	  if (checker == len_find)
260 	    {
261 	      int pos_repl = u + 1 - len_find;
262 
263 	      int len_newline = len_line + len_repl - len_find;
264 	      /* Only change line_p if there's need to */
265 	      if (len_newline > len_line)
266 		{
267 		  line = xrealloc(line, len_newline + 1);
268 		  *line_p = line;
269 		}
270 
271 	      /* Move what's after the replacement, if necessary */
272 	      if (len_repl < len_find)
273 		shift_left (line, pos_repl + len_find, len_find - len_repl);
274 	      else if (len_repl > len_find)
275 		shift_right(line, pos_repl + len_find, len_repl - len_find);
276 
277 	      /* Actually replace */
278 	      strncpy(line + pos_repl, repl, len_repl);
279 
280 	      /* Prepare for next loop */
281 	      checker = 0;
282 	      len_line += len_repl - len_find;
283 	      u += len_repl - len_find;
284 	    }
285 	}
286       else
287 	{
288 	  checker = 0;
289 	}
290     }
291 }
292 
293 /**
294  * Alias to 'replace' that emphasize that '&line' won't be modified
295  * (i.e. realloc'd). Only valid if the replacement is shorter or as
296  * long as the search term.
297  */
replace_norealloc(const char * find,const char * repl,char * line)298 void replace_norealloc(const char* find, const char* repl, char* line)
299 {
300   if (strlen(repl) > strlen(find))
301     {
302       log_fatal("Internal error: invalid string substitution");
303       exit(EXIT_FAILURE);
304     }
305   replace(find, repl, &line);
306 }
307 
308 
309 /**
310  * Convert Latin-1-encoded 'source' to UTF-8-encoded. Result will
311  * always be NULL-terminated.
312  */
latin1_to_utf8(char * source)313 char* latin1_to_utf8(char* source)
314 {
315   int cur_alloc_size = 512;
316   const int step = 256;
317   unsigned char* dest = xmalloc(cur_alloc_size);
318   unsigned char* pcs = (unsigned char*) source;
319   unsigned char* pcd = (unsigned char*) dest;
320   unsigned char* pcd_limit = pcd + cur_alloc_size;
321   while(*pcs != '\0')
322     {
323       if (pcd == pcd_limit)
324 	{
325 	  cur_alloc_size += step;
326 	  dest = xrealloc(dest, cur_alloc_size);
327 	  pcd = dest + cur_alloc_size - step;
328 	  pcd_limit = pcd + step;
329 	}
330       if (*pcs < 128)
331 	{
332 	  *pcd = *pcs;
333 	  pcs++;
334 	  pcd++;
335 	}
336       else
337 	{
338 	  *pcd = 0xc2 + ((*pcs - 128) / 64);
339 	  pcd++;
340 	  *pcd = 0x80 + ((*pcs - 128) % 64);
341 	  pcd++;
342 	  pcs++;
343 	}
344     }
345   *pcd = '\0';
346   return (char*)dest;
347 }
348 
349 /* Here's a small Python script to explain the above formula: */
350 
351 // # Static charset conversion table from Latin-1 to UTF-8:
352 // print 'unsigned char conv[][2] = {\n', ',\n'.join(
353 //     ['\/*%d:\*/ {%s}' % (
354 //         c,
355 //         ', '.join(
356 //             [hex(ord(i)) for i in chr(c).decode('ISO-8859-1').encode('utf-8')]
357 //             )
358 //         ) for c in range(128,256)]
359 //     ), '};'
360 //
361 // # Test computed (!= static) table:
362 // for c in range(128,256):
363 //     method1 = [ord(i) for i in chr(c).decode('ISO-8859-1').encode('utf-8')]
364 //     method2 = [0xc2 + ((c - 128) / 64), 0x80 + ((c - 128) % 64)]
365 //     #print method1, method2
366 //     if method1[0] != method2[0] or method1[1] != method2[1]:
367 //         print "Mismatch at %c"
368