1 /*
2  * Claws Mail -- a GTK+ based, lightweight, and fast e-mail client
3  * Copyright (C) 2005 Andrej Kacian <andrej@kacian.sk>
4  *
5  * - a strreplace function (something like sed's s/foo/bar/g)
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20  */
21 
22 #ifdef HAVE_CONFIG_H
23 #  include "config.h"
24 #endif
25 
26 /* Global includes */
27 #include <glib.h>
28 #include <stdlib.h>
29 #include <ctype.h>
30 
31 /* Claws Mail includes */
32 #include <common/utils.h>
33 #include <entity.h>
34 
35 /* Local includes */
36 /* (shouldn't be any) */
37 
rssyl_strreplace(gchar * source,gchar * pattern,gchar * replacement)38 gchar *rssyl_strreplace(gchar *source, gchar *pattern,
39 		gchar *replacement)
40 {
41 	gchar *new, *w_new = NULL, *c;
42 	guint count = 0, final_length;
43 	size_t len_pattern, len_replacement;
44 
45 	/*
46 	debug_print("RSSyl: ======= strreplace: '%s': '%s'->'%s'\n", source, pattern,
47 			replacement);
48 	*/
49 
50 	g_return_val_if_fail(source != NULL, g_strdup(source));
51 	g_return_val_if_fail(pattern != NULL, g_strdup(source));
52 
53 	g_return_val_if_fail(g_utf8_validate(source, -1, NULL), g_strdup(source));
54 	g_return_val_if_fail(g_utf8_validate(pattern, -1, NULL), g_strdup(source));
55 
56 	len_pattern = strlen(pattern);
57 	len_replacement = strlen(replacement);
58 
59 	c = source;
60 	while( ( c = g_strstr_len(c, strlen(c), pattern) ) ) {
61 		count++;
62 		c += len_pattern;
63 	}
64 
65 	/*
66 	debug_print("RSSyl: ==== count = %d\n", count);
67 	*/
68 
69 	final_length = strlen(source)
70 		- ( count * len_pattern )
71 		+ ( count * len_replacement );
72 
73 	new = malloc(final_length + 1);
74 	memset(new, '\0', final_length + 1);
75 
76 	/* 'c' will be our iterator over original string
77 	 * 'w_new' our iterator over the new string */
78 	c = source;
79 	w_new = new;
80 
81 	/* Go until either end of string is reached, or until the
82 	 * remaining text is shorter than the pattern. */
83 	while( *c != '\0' && strlen(c) >= len_pattern) {
84 		if( !memcmp(c, pattern, len_pattern) ) {
85 			int i;
86 			for (i = 0; i < len_replacement; i++) {
87 				*w_new = replacement[i];
88 				w_new++;
89 			}
90 			c = c + len_pattern;
91 		} else {
92 			*w_new = *c;
93 			w_new++;
94 			c++;
95 		}
96 	}
97 
98 	/* We broke off the above cycle because remaining text was not
99 	 * long enough for the pattern, so now we need to append the
100 	 * remaining text to the new string. */
101 	if (*c != '\0') {
102 		strncat(new, c, final_length - strlen(new));
103 	}
104 
105 	return new;
106 }
107 
108 typedef struct _RSSyl_HTMLSymbol RSSyl_HTMLSymbol;
109 struct _RSSyl_HTMLSymbol
110 {
111 	gchar *const key;
112 	gchar *const val;
113 };
114 
115 static RSSyl_HTMLSymbol tag_list[] = {
116 	{ "<cite>", "\"" },
117 	{ "</cite>", "\"" },
118 	{ "<i>", "" },
119 	{ "</i>", "" },
120 	{ "<em>", "" },
121 	{ "</em>", "" },
122 	{ "<b>", "" },
123 	{ "</b>", "" },
124 	{ "<nobr>", "" },
125 	{ "</nobr>", "" },
126 	{ "<wbr>", "" },
127 	{ "<sub>", "" },
128 	{ "</sub>", "" },
129 	{ NULL, NULL }
130 };
131 
rssyl_replace_chrefs(gchar * string)132 static gchar *rssyl_replace_chrefs(gchar *string)
133 {
134 	char *new = g_malloc0(strlen(string) + 1), *ret;
135 	gchar *entity;
136 	int i, ii;
137 
138 	/* &xx; */
139 	ii = 0;
140 	for (i = 0; i < strlen(string); ++i) {
141 		if (string[i] == '&') {
142 			entity = entity_decode(&(string[i]));
143 			if (entity != NULL) {
144 				g_strlcat(new, entity, strlen(string));
145 				ii += strlen(entity);
146 				g_free(entity);
147 				entity = NULL;
148 				while (string[++i] != ';');
149 				--i; /* loop will inc it again */
150 			} else {
151 				new[ii++] = string[i];
152 			}
153 		} else {
154 			new[ii++] = string[i];
155 		}
156 	}
157 
158 	ret = g_strdup(new);
159 	g_free(new);
160 	return ret;
161 }
162 
rssyl_replace_html_stuff(gchar * text,gboolean symbols,gboolean tags)163 gchar *rssyl_replace_html_stuff(gchar *text,
164 		gboolean symbols, gboolean tags)
165 {
166 	gchar *tmp = NULL, *wtext = NULL;
167 	gint i;
168 
169 	g_return_val_if_fail(text != NULL, NULL);
170 
171 	if( symbols ) {
172 		wtext = rssyl_replace_chrefs(text);
173 	} else {
174 		wtext = g_strdup(text);
175 	}
176 
177 	/* TODO: rewrite this part to work similarly to rssyl_replace_chrefs() */
178 	if( tags ) {
179 		for( i = 0; tag_list[i].key != NULL; i++ ) {
180 			if( g_strstr_len(text, strlen(text), tag_list[i].key) ) {
181 				tmp = rssyl_strreplace(wtext, tag_list[i].key, tag_list[i].val);
182 				g_free(wtext);
183 				wtext = tmp;
184 			}
185 		}
186 	}
187 
188 	return wtext;
189 }
190 
rssyl_sanitize_string(gchar * str,gboolean strip_nl)191 static gchar *rssyl_sanitize_string(gchar *str, gboolean strip_nl)
192 {
193 	gchar *new = NULL, *c = str, *n = NULL;
194 
195 	if( str == NULL )
196 		return NULL;
197 
198 	n = new = malloc(strlen(str) + 1);
199 	memset(new, '\0', strlen(str) + 1);
200 
201 	while( *c != '\0' ) {
202 		if( !isspace(*c) || *c == ' ' || (!strip_nl && *c == '\n') ) {
203 			*n = *c;
204 			n++;
205 		}
206 		c++;
207 	}
208 
209 	return new;
210 }
211 
212 /* rssyl_format_string()
213  * - return value needs to be freed
214  */
rssyl_format_string(gchar * str,gboolean replace_html,gboolean strip_nl)215 gchar *rssyl_format_string(gchar *str, gboolean replace_html,
216 		gboolean strip_nl)
217 {
218 	gchar *res = NULL, *tmp = NULL;
219 
220 	g_return_val_if_fail(str != NULL, NULL);
221 
222 	if (replace_html)
223 		tmp = rssyl_replace_html_stuff(str, TRUE, TRUE);
224 	else
225 		tmp = g_strdup(str);
226 
227 	res = rssyl_sanitize_string(tmp, strip_nl);
228 	g_free(tmp);
229 
230 	g_strstrip(res);
231 
232 	return res;
233 }
234 
235 /* this functions splits a string into an array of string, by
236  * returning an array of pointers to positions of the delimiter
237  * in the original string and replacing this delimiter with a
238  * NULL. It does not duplicate memory, hence you should only
239  * free the array and not its elements, and you should not
240  * free the original string before you're done with the array.
241  * maybe could be part of the core (utils.c).
242  */
strsplit_no_copy(gchar * str,char delimiter)243 gchar **strsplit_no_copy(gchar *str, char delimiter)
244 {
245 	gchar **array = g_new(gchar *, 1);
246 	int i = 0;
247 	gchar *cur = str, *next;
248 
249 	array[i] = cur;
250 	i++;
251 	while ((next = strchr(cur, delimiter)) != NULL) {
252 		*(next) = '\0';
253 		array = g_realloc(array, (sizeof(gchar *)) * (i + 1));
254 		array[i] = next + 1;
255 		cur = next + 1;
256 		i++;
257 	}
258 	array = g_realloc(array, (sizeof(gchar *)) * (i + 1));
259 	array[i] = NULL;
260 	return array;
261 }
262 
263 /* This is a very dumb function - it just strips <, > and everything between
264  * them. */
strip_html(gchar * str)265 void strip_html(gchar *str)
266 {
267 	gchar *p = str;
268 	gboolean intag = FALSE;
269 
270 	while (*p) {
271 		if (*p == '<')
272 			intag = TRUE;
273 		else if (*p == '>')
274 			intag = FALSE;
275 
276 		if (*p == '<' || *p == '>' || intag)
277 			memmove(p, p + 1, strlen(p));
278 		else
279 			p++;
280 	}
281 }
282 
my_normalize_url(const gchar * url)283 gchar *my_normalize_url(const gchar *url)
284 {
285 	gchar *myurl = NULL;
286 
287 	if (!strncmp(url, "feed://", 7))
288 		myurl = g_strdup(url+7);
289 	else if (!strncmp(url, "feed:", 5))
290 		myurl = g_strdup(url+5);
291 	else
292 		myurl = g_strdup(url);
293 
294 	return g_strstrip(myurl);
295 }
296