1 /**
2 * Various string functions mainly used for controlling text output
3
4 * Copyright (C) 2003 Shawn Betts
5 * Copyright (C) 2004, 2007, 2008, 2009 Sylvain Beucler
6
7 * This file is part of GNU FreeDink
8
9 * GNU FreeDink is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 3 of the
12 * License, or (at your option) any later version.
13
14 * GNU FreeDink is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18
19 * You should have received a copy of the GNU General Public License
20 * along with this program. If not, see
21 * <http://www.gnu.org/licenses/>.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <ctype.h>
29 #include <string.h>
30 #include <strings.h> /* strcasecmp */
31 #include <stdlib.h> /* free */
32 #include <stdarg.h> /* va_start */
33 #include <stdio.h> /* vasprintf */
34 #include <xalloc.h>
35 #include "str_util.h"
36 #include "log.h"
37
38 /**
39 * Upcase the string
40 */
41 void
strtoupper(char * s)42 strtoupper (char *s)
43 {
44 for (; *s; s++)
45 *s = toupper (*s);
46 }
47
48 /**
49 * Copy string w/o memory overlap
50 */
strcpy_nooverlap(char * dst,char * src)51 void strcpy_nooverlap(char *dst, char* src)
52 {
53 char *tmp = strdup(src);
54 strcpy(dst, tmp);
55 free(tmp);
56 }
57
58
59 /**
60 * Utility - same as asprint, but appends to the specified buffer
61 *
62 * If strp points to a NULL pointer, it allocates a new buffer that
63 * you'll have to free.
64 */
asprintf_append(char ** strp,const char * fmt,...)65 int asprintf_append(char **strp, const char* fmt, ...)
66 {
67 va_list ap;
68
69 if (*strp == NULL)
70 {
71 *strp = (char*)malloc(1);
72 *strp[0] = '\0';
73 }
74
75 char *tmp = NULL;
76 va_start(ap, fmt);
77 int result = vasprintf(&tmp, fmt, ap);
78 va_end(ap);
79
80 *strp = realloc(*strp, strlen(*strp) + strlen(tmp) + 1);
81 strcat(*strp, tmp);
82 free(tmp);
83 return result;
84 }
85
reverse(char * st)86 void reverse(char *st)
87 {
88 int i,ii;
89 char don[255];
90 don[0] = 0;
91 ii = strlen(st);
92 for (i=ii; i > -1; i--)
93 {
94 strchar(don, st[i]);
95 }
96 strcpy(st, don);
97 }
98
99 /**
100 * This acts in the same way as strcat except it combines a string and
101 * a single character, updating the null at the end.
102 */
strchar(char * string,char ch)103 void strchar(char *string, char ch)
104 {
105 int last = strlen(string);
106 string[last] = ch;
107 string[last+1] = '\0';
108 }
109
110 /**
111 * Split 'str' in words separated by _one_ 'sep', and copy the #'num'
112 * one to 'return1'. The function does not alter 'str'. Return empty
113 * string if not found. Several 'sep' enclose empty words
114 * (e.g. separators are not collapsed, unlike 'get_word(...)').
115 */
separate_string(char * line,int num,char sep)116 char* separate_string (char* line, int num, char sep)
117 {
118 int l;
119 int k;
120 int line_len = strlen(line);
121
122 l = 1;
123 int start = 0;
124 int end = 0;
125
126 for (k = 0; k < line_len; k++)
127 {
128 if (line[k] == sep)
129 {
130 if (l == num)
131 break;
132 l++;
133 start = end = (k + 1);
134 }
135 else /* (str[k] != sep) */
136 {
137 end++;
138 }
139 }
140
141 if (l >= num)
142 {
143 int size = end - start;
144 char* result = xmalloc(size + 1);
145 strncpy(result, line + start, size);
146 result[size] = '\0';
147
148 replace_norealloc("\r", "", result); //Take the /r off it.
149 replace_norealloc("\n", "", result); //Take the /n off it.
150 return result;
151 }
152 else /* less than 'num' tokens */
153 {
154 return strdup("");
155 }
156 }
157
158 /**
159 * Return the word number 'word' present in 'line'. If not present,
160 * returns an empty string. Words are separated by one _or more_
161 * spaces and count from 1 (i.e. not 0).
162 */
get_word(char * line,int word)163 char* get_word(char* line, int word)
164 {
165 int cur_word = 1;
166
167 /* find word */
168 char* pc = line;
169 while (*pc != '\0')
170 {
171 if (cur_word == word)
172 break;
173 if (*pc == ' ')
174 {
175 cur_word++;
176 while(*pc == ' ' && *pc != '\0')
177 pc++;
178 }
179 else
180 {
181 while(*pc != ' ' && *pc != '\0')
182 pc++;
183 }
184 }
185
186 /* find end-of-word */
187 char* start = pc;
188 while(*pc != '\0' && *pc != ' ')
189 pc++;
190
191 /* copy word - either we're on the right word and will copy it,
192 either we're at the end of string and will copy an empty word */
193 int len = pc - start;
194 char* result = xmalloc(len + 1);
195 memcpy(result, start, len);
196 result[len] = '\0';
197
198 return result;
199 }
200
201
202 /**
203 * Return whether 'orig' and 'comp' are the same string
204 * (case-insensitive comparison).
205 */
compare(char * orig,char * comp)206 /*bool*/int compare(char *orig, char *comp)
207 {
208 return (strcasecmp(orig, comp) == 0);
209 }
210
211 /**
212 * Move chars between 'start' and the end of 'line' to the left, with
213 * a postponement of 'shift' chars. Copy the trailing '\0'.
214 */
shift_left(char * line,int start,int shift)215 static void shift_left(char* line, int start, int shift)
216 {
217 /* Beware of the direction so as not to overwrite */
218 int i = start;
219 int max = strlen(line);
220 for (; i <= max; i++)
221 line[i-shift] = line[i];
222 }
223
224 /**
225 * Move chars between 'start' and the end of 'line' to the right, with
226 * a postponement of 'shift' chars. Copy the trailing '\0'.
227 */
shift_right(char * line,int start,int shift)228 static void shift_right(char* line, int start, int shift)
229 {
230 /* Beware of the direction so as not to overwrite */
231 int i = strlen(line);
232 for (; i >= start; i--)
233 line[i+shift] = line[i];
234 }
235
236 /**
237 * Replace word 'find' by word 'repl' in 'line', as many times as
238 * possible.
239 *
240 * Note:
241 *
242 * - '*line_p' is xrealloc'd only if strlen(repl) > strlen(find), to
243 make the calling code easier.
244 */
replace(const char * find,const char * repl,char ** line_p)245 void replace(const char* find, const char* repl, char** line_p)
246 {
247 int len_find = strlen(find);
248 int len_repl = strlen(repl);
249 char* line = *line_p;
250 int len_line = strlen(line);
251
252 int u = -1;
253 int checker = 0;
254 for (u = 0; u < len_line; u++)
255 {
256 if (toupper(line[u]) == toupper(find[checker]))
257 {
258 checker++;
259 if (checker == len_find)
260 {
261 int pos_repl = u + 1 - len_find;
262
263 int len_newline = len_line + len_repl - len_find;
264 /* Only change line_p if there's need to */
265 if (len_newline > len_line)
266 {
267 line = xrealloc(line, len_newline + 1);
268 *line_p = line;
269 }
270
271 /* Move what's after the replacement, if necessary */
272 if (len_repl < len_find)
273 shift_left (line, pos_repl + len_find, len_find - len_repl);
274 else if (len_repl > len_find)
275 shift_right(line, pos_repl + len_find, len_repl - len_find);
276
277 /* Actually replace */
278 strncpy(line + pos_repl, repl, len_repl);
279
280 /* Prepare for next loop */
281 checker = 0;
282 len_line += len_repl - len_find;
283 u += len_repl - len_find;
284 }
285 }
286 else
287 {
288 checker = 0;
289 }
290 }
291 }
292
293 /**
294 * Alias to 'replace' that emphasize that '&line' won't be modified
295 * (i.e. realloc'd). Only valid if the replacement is shorter or as
296 * long as the search term.
297 */
replace_norealloc(const char * find,const char * repl,char * line)298 void replace_norealloc(const char* find, const char* repl, char* line)
299 {
300 if (strlen(repl) > strlen(find))
301 {
302 log_fatal("Internal error: invalid string substitution");
303 exit(EXIT_FAILURE);
304 }
305 replace(find, repl, &line);
306 }
307
308
309 /**
310 * Convert Latin-1-encoded 'source' to UTF-8-encoded. Result will
311 * always be NULL-terminated.
312 */
latin1_to_utf8(char * source)313 char* latin1_to_utf8(char* source)
314 {
315 int cur_alloc_size = 512;
316 const int step = 256;
317 unsigned char* dest = xmalloc(cur_alloc_size);
318 unsigned char* pcs = (unsigned char*) source;
319 unsigned char* pcd = (unsigned char*) dest;
320 unsigned char* pcd_limit = pcd + cur_alloc_size;
321 while(*pcs != '\0')
322 {
323 if (pcd == pcd_limit)
324 {
325 cur_alloc_size += step;
326 dest = xrealloc(dest, cur_alloc_size);
327 pcd = dest + cur_alloc_size - step;
328 pcd_limit = pcd + step;
329 }
330 if (*pcs < 128)
331 {
332 *pcd = *pcs;
333 pcs++;
334 pcd++;
335 }
336 else
337 {
338 *pcd = 0xc2 + ((*pcs - 128) / 64);
339 pcd++;
340 *pcd = 0x80 + ((*pcs - 128) % 64);
341 pcd++;
342 pcs++;
343 }
344 }
345 *pcd = '\0';
346 return (char*)dest;
347 }
348
349 /* Here's a small Python script to explain the above formula: */
350
351 // # Static charset conversion table from Latin-1 to UTF-8:
352 // print 'unsigned char conv[][2] = {\n', ',\n'.join(
353 // ['\/*%d:\*/ {%s}' % (
354 // c,
355 // ', '.join(
356 // [hex(ord(i)) for i in chr(c).decode('ISO-8859-1').encode('utf-8')]
357 // )
358 // ) for c in range(128,256)]
359 // ), '};'
360 //
361 // # Test computed (!= static) table:
362 // for c in range(128,256):
363 // method1 = [ord(i) for i in chr(c).decode('ISO-8859-1').encode('utf-8')]
364 // method2 = [0xc2 + ((c - 128) / 64), 0x80 + ((c - 128) % 64)]
365 // #print method1, method2
366 // if method1[0] != method2[0] or method1[1] != method2[1]:
367 // print "Mismatch at %c"
368