1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3 * gedit-convert.c
4 * This file is part of gedit
5 *
6 * Copyright (C) 2003 - Paolo Maggi
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 */
23
24 /*
25 * Modified by the gedit Team, 2003. See the AUTHORS file for a
26 * list of people on the gedit Team.
27 * See the ChangeLog files for a list of changes.
28 */
29
30 #include <string.h>
31 #include <stdio.h>
32
33 #include <glib/gi18n.h>
34
35 #include "anjuta-convert.h"
36
37 GQuark
anjuta_convert_error_quark(void)38 anjuta_convert_error_quark (void)
39 {
40 static GQuark quark;
41 if (!quark)
42 quark = g_quark_from_static_string ("anjuta_convert_error");
43
44 return quark;
45 }
46
47 static gchar *
anjuta_convert_to_utf8_from_charset(const gchar * content,gsize len,const gchar * charset,gsize * new_len,GError ** error)48 anjuta_convert_to_utf8_from_charset (const gchar *content,
49 gsize len,
50 const gchar *charset,
51 gsize *new_len,
52 GError **error)
53 {
54 gchar *utf8_content = NULL;
55 GError *conv_error = NULL;
56 gchar* converted_contents = NULL;
57 gsize bytes_read;
58
59 g_return_val_if_fail (content != NULL, NULL);
60 g_return_val_if_fail (len > 0, NULL);
61 g_return_val_if_fail (charset != NULL, NULL);
62
63 if (strcmp (charset, "UTF-8") == 0)
64 {
65 if (g_utf8_validate (content, len, NULL))
66 {
67 if (new_len != NULL)
68 *new_len = len;
69
70 return g_strndup (content, len);
71 }
72 else
73 {
74 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
75 _("The file you are trying to open contains an invalid byte sequence."));
76
77 return NULL;
78 }
79 }
80
81 converted_contents = g_convert (content,
82 len,
83 "UTF-8",
84 charset,
85 &bytes_read,
86 new_len,
87 &conv_error);
88
89 /* There is no way we can avoid to run g_utf8_validate on the converted text.
90 *
91 * <paolo> hmmm... but in that case g_convert should fail
92 * <owen> paolo: g_convert() doesn't necessarily have the same definition
93 * <owen> GLib just uses the system's iconv
94 * <owen> paolo: I think we've explained what's going on.
95 * I have to define it as NOTABUG since g_convert() isn't going to
96 * start post-processing or checking what iconv() does and
97 * changing g_utf8_valdidate() wouldn't be API compatible even if I
98 * thought it was right
99 */
100 if ((conv_error != NULL) ||
101 !g_utf8_validate (converted_contents, *new_len, NULL) ||
102 (bytes_read != len))
103 {
104
105 if (converted_contents != NULL)
106 g_free (converted_contents);
107
108 if (conv_error != NULL)
109 g_propagate_error (error, conv_error);
110 else
111 {
112 g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
113 _("The file you are trying to open contains an invalid byte sequence."));
114 }
115 }
116 else
117 {
118 g_return_val_if_fail (converted_contents != NULL, NULL);
119
120 utf8_content = converted_contents;
121 }
122
123 return utf8_content;
124 }
125
126 gchar *
anjuta_convert_to_utf8(const gchar * content,gsize len,const AnjutaEncoding ** encoding,gsize * new_len,GError ** error)127 anjuta_convert_to_utf8 (const gchar *content,
128 gsize len,
129 const AnjutaEncoding **encoding,
130 gsize *new_len,
131 GError **error)
132 {
133 g_return_val_if_fail (content != NULL, NULL);
134 g_return_val_if_fail (encoding != NULL, NULL);
135
136 if (len < 0)
137 len = strlen (content);
138
139 if (*encoding != NULL)
140 {
141 const gchar* charset;
142
143 charset = anjuta_encoding_get_charset (*encoding);
144
145 g_return_val_if_fail (charset != NULL, NULL);
146
147 return anjuta_convert_to_utf8_from_charset (content,
148 len,
149 charset,
150 new_len,
151 error);
152 }
153 else
154 {
155 /* Automatically detect the encoding used */
156 GSList *encodings;
157 GSList *start;
158 gchar *ret = NULL;
159 if (g_utf8_validate (content, len, NULL))
160 {
161 if (new_len != NULL)
162 *new_len = len;
163
164 return g_strndup (content, len);
165 }
166 else
167 {
168 g_set_error (error, ANJUTA_CONVERT_ERROR,
169 ANJUTA_CONVERT_ERROR_AUTO_DETECTION_FAILED,
170 _("Anjuta was not able to automatically determine "
171 "the encoding of the file you want to open."));
172 return NULL;
173 }
174
175 start = encodings;
176
177 while (encodings != NULL)
178 {
179 const AnjutaEncoding *enc;
180 const gchar *charset;
181 gchar *utf8_content;
182
183 enc = (const AnjutaEncoding *)encodings->data;
184
185 charset = anjuta_encoding_get_charset (enc);
186 g_return_val_if_fail (charset != NULL, NULL);
187
188 utf8_content = anjuta_convert_to_utf8_from_charset (content,
189 len,
190 charset,
191 new_len,
192 NULL);
193
194 if (utf8_content != NULL)
195 {
196 *encoding = enc;
197 ret = utf8_content;
198
199 break;
200 }
201
202 encodings = g_slist_next (encodings);
203 }
204
205 if (ret == NULL)
206 {
207 g_set_error (error, ANJUTA_CONVERT_ERROR,
208 ANJUTA_CONVERT_ERROR_AUTO_DETECTION_FAILED,
209 _("Anjuta was not able to automatically determine "
210 "the encoding of the file you want to open."));
211 }
212
213 g_slist_free (start);
214
215 return ret;
216 }
217
218 g_return_val_if_reached (NULL);
219 }
220
221 gchar *
anjuta_convert_from_utf8(const gchar * content,gsize len,const AnjutaEncoding * encoding,gsize * new_len,GError ** error)222 anjuta_convert_from_utf8 (const gchar *content,
223 gsize len,
224 const AnjutaEncoding *encoding,
225 gsize *new_len,
226 GError **error)
227 {
228 GError *conv_error = NULL;
229 gchar *converted_contents = NULL;
230 gsize bytes_written = 0;
231
232 g_return_val_if_fail (content != NULL, NULL);
233 g_return_val_if_fail (g_utf8_validate (content, len, NULL), NULL);
234 g_return_val_if_fail (encoding != NULL, NULL);
235
236 if (len < 0)
237 len = strlen (content);
238
239 if (encoding == anjuta_encoding_get_utf8 ())
240 return g_strndup (content, len);
241
242 converted_contents = g_convert (content,
243 len,
244 anjuta_encoding_get_charset (encoding),
245 "UTF-8",
246 NULL,
247 &bytes_written,
248 &conv_error);
249
250 if (conv_error != NULL)
251 {
252 if (converted_contents != NULL)
253 {
254 g_free (converted_contents);
255 converted_contents = NULL;
256 }
257
258 g_propagate_error (error, conv_error);
259 }
260 else
261 {
262 if (new_len != NULL)
263 *new_len = bytes_written;
264 }
265
266 return converted_contents;
267 }
268
269