1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * gedit-convert.c
4  * This file is part of gedit
5  *
6  * Copyright (C) 2003 - Paolo Maggi
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  */
23 
24 /*
25  * Modified by the gedit Team, 2003. See the AUTHORS file for a
26  * list of people on the gedit Team.
27  * See the ChangeLog files for a list of changes.
28  */
29 
30 #include <string.h>
31 #include <stdio.h>
32 
33 #include <glib/gi18n.h>
34 
35 #include "anjuta-convert.h"
36 
37 GQuark
anjuta_convert_error_quark(void)38 anjuta_convert_error_quark (void)
39 {
40 	static GQuark quark;
41 	if (!quark)
42 		quark = g_quark_from_static_string ("anjuta_convert_error");
43 
44 	return quark;
45 }
46 
47 static gchar *
anjuta_convert_to_utf8_from_charset(const gchar * content,gsize len,const gchar * charset,gsize * new_len,GError ** error)48 anjuta_convert_to_utf8_from_charset (const gchar  *content,
49 				    gsize         len,
50 				    const gchar  *charset,
51 				    gsize        *new_len,
52 				    GError 	**error)
53 {
54 	gchar *utf8_content = NULL;
55 	GError *conv_error = NULL;
56 	gchar* converted_contents = NULL;
57 	gsize bytes_read;
58 
59 	g_return_val_if_fail (content != NULL, NULL);
60 	g_return_val_if_fail (len > 0, NULL);
61 	g_return_val_if_fail (charset != NULL, NULL);
62 
63 	if (strcmp (charset, "UTF-8") == 0)
64 	{
65 		if (g_utf8_validate (content, len, NULL))
66 		{
67 			if (new_len != NULL)
68 				*new_len = len;
69 
70 			return g_strndup (content, len);
71 		}
72 		else
73 		{
74 			g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
75 				     _("The file you are trying to open contains an invalid byte sequence."));
76 
77 			return NULL;
78 		}
79 	}
80 
81 	converted_contents = g_convert (content,
82 					len,
83 					"UTF-8",
84 					charset,
85 					&bytes_read,
86 					new_len,
87 					&conv_error);
88 
89 	/* There is no way we can avoid to run 	g_utf8_validate on the converted text.
90 	 *
91 	 * <paolo> hmmm... but in that case g_convert should fail
92 	 * <owen> paolo: g_convert() doesn't necessarily have the same definition
93          * <owen> GLib just uses the system's iconv
94          * <owen> paolo: I think we've explained what's going on.
95          * I have to define it as NOTABUG since g_convert() isn't going to
96          * start post-processing or checking what iconv() does and
97          * changing g_utf8_valdidate() wouldn't be API compatible even if I
98          * thought it was right
99 	 */
100 	if ((conv_error != NULL) ||
101 	    !g_utf8_validate (converted_contents, *new_len, NULL) ||
102 	    (bytes_read != len))
103 	{
104 
105 		if (converted_contents != NULL)
106 			g_free (converted_contents);
107 
108 		if (conv_error != NULL)
109 			g_propagate_error (error, conv_error);
110 		else
111 		{
112 			g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
113 				     _("The file you are trying to open contains an invalid byte sequence."));
114 		}
115 	}
116 	else
117 	{
118 		g_return_val_if_fail (converted_contents != NULL, NULL);
119 
120 		utf8_content = converted_contents;
121 	}
122 
123 	return utf8_content;
124 }
125 
126 gchar *
anjuta_convert_to_utf8(const gchar * content,gsize len,const AnjutaEncoding ** encoding,gsize * new_len,GError ** error)127 anjuta_convert_to_utf8 (const gchar          *content,
128 		       gsize                 len,
129 		       const AnjutaEncoding **encoding,
130 		       gsize                *new_len,
131 		       GError              **error)
132 {
133 	g_return_val_if_fail (content != NULL, NULL);
134 	g_return_val_if_fail (encoding != NULL, NULL);
135 
136 	if (len < 0)
137 		len = strlen (content);
138 
139 	if (*encoding != NULL)
140 	{
141 		const gchar* charset;
142 
143 		charset = anjuta_encoding_get_charset (*encoding);
144 
145 		g_return_val_if_fail (charset != NULL, NULL);
146 
147 		return anjuta_convert_to_utf8_from_charset (content,
148 							   len,
149 							   charset,
150 							   new_len,
151 							   error);
152 	}
153 	else
154 	{
155 		/* Automatically detect the encoding used */
156 	GSList *encodings;
157 	GSList *start;
158 	gchar *ret = NULL;
159 	if (g_utf8_validate (content, len, NULL))
160 		{
161 			if (new_len != NULL)
162 				*new_len = len;
163 
164 			return g_strndup (content, len);
165 		}
166 		else
167 		{
168 			g_set_error (error, ANJUTA_CONVERT_ERROR,
169 				     ANJUTA_CONVERT_ERROR_AUTO_DETECTION_FAILED,
170 			 	     _("Anjuta was not able to automatically determine "
171 				     "the encoding of the file you want to open."));
172 			return NULL;
173 		}
174 
175 		start = encodings;
176 
177 		while (encodings != NULL)
178 		{
179 			const AnjutaEncoding *enc;
180 			const gchar *charset;
181 			gchar *utf8_content;
182 
183 			enc = (const AnjutaEncoding *)encodings->data;
184 
185 			charset = anjuta_encoding_get_charset (enc);
186 			g_return_val_if_fail (charset != NULL, NULL);
187 
188 			utf8_content = anjuta_convert_to_utf8_from_charset (content,
189 									   len,
190 									   charset,
191 									   new_len,
192 									   NULL);
193 
194 			if (utf8_content != NULL)
195 			{
196 				*encoding = enc;
197 				ret = utf8_content;
198 
199 				break;
200 			}
201 
202 			encodings = g_slist_next (encodings);
203 		}
204 
205 		if (ret == NULL)
206 		{
207 			g_set_error (error, ANJUTA_CONVERT_ERROR,
208 				     ANJUTA_CONVERT_ERROR_AUTO_DETECTION_FAILED,
209 			 	     _("Anjuta was not able to automatically determine "
210 				     "the encoding of the file you want to open."));
211 		}
212 
213 		g_slist_free (start);
214 
215 		return ret;
216 	}
217 
218 	g_return_val_if_reached (NULL);
219 }
220 
221 gchar *
anjuta_convert_from_utf8(const gchar * content,gsize len,const AnjutaEncoding * encoding,gsize * new_len,GError ** error)222 anjuta_convert_from_utf8 (const gchar          *content,
223 		         gsize                 len,
224 		         const AnjutaEncoding  *encoding,
225 			 gsize                *new_len,
226 			 GError 	     **error)
227 {
228 	GError *conv_error         = NULL;
229 	gchar  *converted_contents = NULL;
230 	gsize   bytes_written = 0;
231 
232 	g_return_val_if_fail (content != NULL, NULL);
233 	g_return_val_if_fail (g_utf8_validate (content, len, NULL), NULL);
234 	g_return_val_if_fail (encoding != NULL, NULL);
235 
236 	if (len < 0)
237 		len = strlen (content);
238 
239 	if (encoding == anjuta_encoding_get_utf8 ())
240 		return g_strndup (content, len);
241 
242 	converted_contents = g_convert (content,
243 					len,
244 					anjuta_encoding_get_charset (encoding),
245 					"UTF-8",
246 					NULL,
247 					&bytes_written,
248 					&conv_error);
249 
250 	if (conv_error != NULL)
251 	{
252 		if (converted_contents != NULL)
253 		{
254 			g_free (converted_contents);
255 			converted_contents = NULL;
256 		}
257 
258 		g_propagate_error (error, conv_error);
259 	}
260 	else
261 	{
262 		if (new_len != NULL)
263 			*new_len = bytes_written;
264 	}
265 
266 	return converted_contents;
267 }
268 
269