1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; coding: utf-8 -*- */
2 /* test-buffer-output-stream.c
3  * This file is part of GtkSourceView
4  *
5  * Copyright (C) 2010 - Ignacio Casal Quinteiro
6  * Copyright (C) 2014 - Sébastien Wilmet
7  *
8  * GtkSourceView is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * GtkSourceView is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21  */
22 
23 #include <glib.h>
24 #include <glib/gprintf.h>
25 #include <gtk/gtk.h>
26 #include <string.h>
27 #include <gtksourceview/gtksource.h>
28 #include "gtksourceview/gtksourcebufferoutputstream.h"
29 
30 static void
test_consecutive_write(const gchar * inbuf,const gchar * outbuf,gsize write_chunk_len,GtkSourceNewlineType newline_type)31 test_consecutive_write (const gchar          *inbuf,
32 			const gchar          *outbuf,
33 			gsize                 write_chunk_len,
34 			GtkSourceNewlineType  newline_type)
35 {
36 	GtkSourceBuffer *source_buffer;
37 	GtkSourceBufferOutputStream *out;
38 	gsize len;
39 	gssize n, w;
40 	GError *err = NULL;
41 	gchar *b;
42 	GtkSourceNewlineType type;
43 	GSList *encodings = NULL;
44 
45 	source_buffer = gtk_source_buffer_new (NULL);
46 	encodings = g_slist_prepend (encodings, (gpointer)gtk_source_encoding_get_utf8 ());
47 	out = gtk_source_buffer_output_stream_new (source_buffer, encodings, TRUE);
48 
49 	n = 0;
50 
51 	do
52 	{
53 		len = MIN (write_chunk_len, strlen (inbuf + n));
54 		w = g_output_stream_write (G_OUTPUT_STREAM (out), inbuf + n, len, NULL, &err);
55 		g_assert_cmpint (w, >=, 0);
56 		g_assert_no_error (err);
57 
58 		n += w;
59 	} while (w != 0);
60 
61 	g_output_stream_flush (G_OUTPUT_STREAM (out), NULL, &err);
62 
63 	g_assert_no_error (err);
64 
65 	type = gtk_source_buffer_output_stream_detect_newline_type (out);
66 	g_assert (type == newline_type);
67 
68 	g_output_stream_close (G_OUTPUT_STREAM (out), NULL, &err);
69 	g_assert_no_error (err);
70 
71 	g_object_get (G_OBJECT (source_buffer), "text", &b, NULL);
72 
73 	g_assert_cmpstr (outbuf, ==, b);
74 	g_free (b);
75 
76 	g_assert (gtk_text_buffer_get_modified (GTK_TEXT_BUFFER (source_buffer)) == FALSE);
77 
78 	g_object_unref (source_buffer);
79 	g_object_unref (out);
80 }
81 
82 static void
test_empty(void)83 test_empty (void)
84 {
85 	test_consecutive_write ("", "", 10, GTK_SOURCE_NEWLINE_TYPE_DEFAULT);
86 	test_consecutive_write ("\r\n", "", 10, GTK_SOURCE_NEWLINE_TYPE_CR_LF);
87 	test_consecutive_write ("\r", "", 10, GTK_SOURCE_NEWLINE_TYPE_CR);
88 	test_consecutive_write ("\n", "", 10, GTK_SOURCE_NEWLINE_TYPE_LF);
89 }
90 
91 static void
test_consecutive(void)92 test_consecutive (void)
93 {
94 	test_consecutive_write ("hello\nhow\nare\nyou", "hello\nhow\nare\nyou", 2,
95 				GTK_SOURCE_NEWLINE_TYPE_LF);
96 	test_consecutive_write ("hello\rhow\rare\ryou", "hello\rhow\rare\ryou", 2,
97 				GTK_SOURCE_NEWLINE_TYPE_CR);
98 	test_consecutive_write ("hello\r\nhow\r\nare\r\nyou", "hello\r\nhow\r\nare\r\nyou", 2,
99 				GTK_SOURCE_NEWLINE_TYPE_CR_LF);
100 }
101 
102 static void
test_consecutive_tnewline(void)103 test_consecutive_tnewline (void)
104 {
105 	test_consecutive_write ("hello\nhow\nare\nyou\n", "hello\nhow\nare\nyou", 2,
106 				GTK_SOURCE_NEWLINE_TYPE_LF);
107 	test_consecutive_write ("hello\rhow\rare\ryou\r", "hello\rhow\rare\ryou", 2,
108 				GTK_SOURCE_NEWLINE_TYPE_CR);
109 	test_consecutive_write ("hello\r\nhow\r\nare\r\nyou\r\n", "hello\r\nhow\r\nare\r\nyou", 2,
110 				GTK_SOURCE_NEWLINE_TYPE_CR_LF);
111 }
112 
113 static void
test_big_char(void)114 test_big_char (void)
115 {
116 	test_consecutive_write ("\343\203\200\343\203\200", "\343\203\200\343\203\200", 2,
117 				GTK_SOURCE_NEWLINE_TYPE_LF);
118 }
119 
120 static void
test_boundary(void)121 test_boundary (void)
122 {
123 	GtkSourceBuffer *source_buffer;
124 	GtkSourceBufferOutputStream *out;
125 	gint line_count;
126 	GError *err = NULL;
127 	GSList *encodings = NULL;
128 
129 	source_buffer = gtk_source_buffer_new (NULL);
130 	encodings = g_slist_prepend (encodings, (gpointer)gtk_source_encoding_get_utf8 ());
131 	out = gtk_source_buffer_output_stream_new (source_buffer, encodings, TRUE);
132 
133 	g_output_stream_write (G_OUTPUT_STREAM (out), "\r", 1, NULL, NULL);
134 	g_output_stream_write (G_OUTPUT_STREAM (out), "\n", 1, NULL, NULL);
135 
136 	g_output_stream_flush (G_OUTPUT_STREAM (out), NULL, &err);
137 	g_assert_no_error (err);
138 
139 	line_count = gtk_text_buffer_get_line_count (GTK_TEXT_BUFFER (source_buffer));
140 
141 	g_assert_cmpint (line_count, ==, 2);
142 
143 	g_output_stream_close (G_OUTPUT_STREAM (out), NULL, &err);
144 	g_assert_no_error (err);
145 
146 	g_object_unref (source_buffer);
147 	g_object_unref (out);
148 }
149 
150 #if 0
151 static void
152 test_invalid_utf8 (void)
153 {
154 	test_consecutive_write ("foobar\n\xef\xbf\xbe", "foobar\n\\EF\\BF\\BE", 10,
155 	                        GTK_SOURCE_NEWLINE_TYPE_LF);
156 	test_consecutive_write ("foobar\n\xef\xbf\xbezzzzzz\n", "foobar\n\\EF\\BF\\BEzzzzzz", 10,
157 	                        GTK_SOURCE_NEWLINE_TYPE_LF);
158 	test_consecutive_write ("\xef\xbf\xbezzzzzz\n", "\\EF\\BF\\BEzzzzzz", 10,
159 	                        GTK_SOURCE_NEWLINE_TYPE_LF);
160 }
161 #endif
162 
163 /* SMART CONVERSION */
164 
165 #define TEXT_TO_CONVERT "this is some text to make the tests"
166 #define TEXT_TO_GUESS "hello \xe6\x96\x87 world"
167 
168 static gchar *
get_encoded_text(const gchar * text,gint nread,const GtkSourceEncoding * to,const GtkSourceEncoding * from,gsize * bytes_written_aux,gboolean care_about_error)169 get_encoded_text (const gchar             *text,
170 		  gint                     nread,
171 		  const GtkSourceEncoding *to,
172 		  const GtkSourceEncoding *from,
173 		  gsize                   *bytes_written_aux,
174 		  gboolean                 care_about_error)
175 {
176 	GCharsetConverter *converter;
177 	gchar *out, *out_aux;
178 	gsize bytes_read, bytes_read_aux;
179 	gsize bytes_written;
180 	GConverterResult res;
181 	GError *err;
182 
183 	converter = g_charset_converter_new (gtk_source_encoding_get_charset (to),
184 					     gtk_source_encoding_get_charset (from),
185 					     NULL);
186 
187 	out = g_malloc (200);
188 	out_aux = g_malloc (200);
189 	err = NULL;
190 	bytes_read_aux = 0;
191 	*bytes_written_aux = 0;
192 
193 	if (nread == -1)
194 	{
195 		nread = strlen (text);
196 	}
197 
198 	do
199 	{
200 		res = g_converter_convert (G_CONVERTER (converter),
201 		                           text + bytes_read_aux,
202 		                           nread,
203 		                           out_aux,
204 		                           200,
205 		                           G_CONVERTER_INPUT_AT_END,
206 		                           &bytes_read,
207 		                           &bytes_written,
208 		                           &err);
209 		memcpy (out + *bytes_written_aux, out_aux, bytes_written);
210 		bytes_read_aux += bytes_read;
211 		*bytes_written_aux += bytes_written;
212 		nread -= bytes_read;
213 	} while (res != G_CONVERTER_FINISHED && res != G_CONVERTER_ERROR);
214 
215 	if (care_about_error)
216 	{
217 		g_assert_no_error (err);
218 	}
219 	else if (err)
220 	{
221 		g_printf ("** You don't care, but there was an error: %s", err->message);
222 		return NULL;
223 	}
224 
225 	out[*bytes_written_aux] = '\0';
226 
227 	if (!g_utf8_validate (out, *bytes_written_aux, NULL) && !care_about_error)
228 	{
229 		if (!care_about_error)
230 		{
231 			return NULL;
232 		}
233 		else
234 		{
235 			g_assert_not_reached ();
236 		}
237 	}
238 
239 	return out;
240 }
241 
242 static gchar *
do_test(const gchar * inbuf,const gchar * enc,GSList * encodings,gsize len,gsize write_chunk_len,const GtkSourceEncoding ** guessed)243 do_test (const gchar              *inbuf,
244 	 const gchar              *enc,
245 	 GSList                   *encodings,
246 	 gsize                     len,
247 	 gsize                     write_chunk_len,
248 	 const GtkSourceEncoding **guessed)
249 {
250 	GtkSourceBuffer *source_buffer;
251 	GtkSourceBufferOutputStream *out;
252 	GError *err = NULL;
253 	GtkTextIter start, end;
254 	gchar *text;
255 	gsize to_write;
256 	gssize n, w;
257 
258 	if (enc != NULL)
259 	{
260 		encodings = NULL;
261 		encodings = g_slist_prepend (encodings, (gpointer)gtk_source_encoding_get_from_charset (enc));
262 	}
263 
264 	source_buffer = gtk_source_buffer_new (NULL);
265 	out = gtk_source_buffer_output_stream_new (source_buffer, encodings, TRUE);
266 
267 	n = 0;
268 
269 	do
270 	{
271 		to_write = MIN (len, write_chunk_len);
272 		w = g_output_stream_write (G_OUTPUT_STREAM (out), inbuf + n, to_write, NULL, &err);
273 		g_assert_cmpint (w, >=, 0);
274 		g_assert_no_error (err);
275 
276 		len -= w;
277 		n += w;
278 	} while (len != 0);
279 
280 	g_output_stream_flush (G_OUTPUT_STREAM (out), NULL, &err);
281 	g_assert_no_error (err);
282 
283 	g_output_stream_close (G_OUTPUT_STREAM (out), NULL, &err);
284 	g_assert_no_error (err);
285 
286 	if (guessed != NULL)
287 	{
288 		*guessed = gtk_source_buffer_output_stream_get_guessed (out);
289 	}
290 
291 	gtk_text_buffer_get_bounds (GTK_TEXT_BUFFER (source_buffer), &start, &end);
292 	text = gtk_text_buffer_get_text (GTK_TEXT_BUFFER (source_buffer),
293 	                                 &start,
294 	                                 &end,
295 	                                 FALSE);
296 
297 	g_object_unref (source_buffer);
298 	g_object_unref (out);
299 
300 	return text;
301 }
302 
303 static void
test_utf8_utf8(void)304 test_utf8_utf8 (void)
305 {
306 	gchar *aux;
307 
308 	aux = do_test (TEXT_TO_CONVERT, "UTF-8", NULL, strlen (TEXT_TO_CONVERT), strlen (TEXT_TO_CONVERT), NULL);
309 	g_assert_cmpstr (aux, ==, TEXT_TO_CONVERT);
310 	g_free (aux);
311 
312 	aux = do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 18, 18, NULL);
313 	g_assert_cmpstr (aux, ==, "foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz");
314 	g_free (aux);
315 
316 	/* small chunk */
317 	aux = do_test ("foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz", "UTF-8", NULL, 18, 2, NULL);
318 	g_assert_cmpstr (aux, ==, "foobar\xc3\xa8\xc3\xa8\xc3\xa8zzzzzz");
319 	g_free (aux);
320 }
321 
322 static void
test_empty_conversion(void)323 test_empty_conversion (void)
324 {
325 	const GtkSourceEncoding *guessed;
326 	gchar *out;
327 	GSList *encodings = NULL;
328 
329 	/* testing the case of an empty file and list of encodings with no
330 	   utf-8. In this case, the smart converter cannot determine the right
331 	   encoding (because there is no input), but should still default to
332 	   utf-8 for the detection */
333 	encodings = g_slist_prepend (encodings, (gpointer)gtk_source_encoding_get_from_charset ("UTF-16"));
334 	encodings = g_slist_prepend (encodings, (gpointer)gtk_source_encoding_get_from_charset ("ISO-8859-15"));
335 
336 	out = do_test ("", NULL, encodings, 0, 0, &guessed);
337 
338 	g_assert_cmpstr (out, ==, "");
339 	g_free (out);
340 
341 	g_assert (guessed == gtk_source_encoding_get_utf8 ());
342 }
343 
344 static void
test_guessed(void)345 test_guessed (void)
346 {
347 	GSList *encs = NULL;
348 	gchar *aux, *aux2, *fail;
349 	gsize aux_len, fail_len;
350 	const GtkSourceEncoding *guessed;
351 
352 	aux = get_encoded_text (TEXT_TO_GUESS, -1,
353 	                        gtk_source_encoding_get_from_charset ("UTF-16"),
354 	                        gtk_source_encoding_get_from_charset ("UTF-8"),
355 	                        &aux_len,
356 	                        TRUE);
357 
358 	fail = get_encoded_text (aux, aux_len,
359 	                         gtk_source_encoding_get_from_charset ("UTF-8"),
360 	                         gtk_source_encoding_get_from_charset ("ISO-8859-15"),
361 	                         &fail_len,
362 	                         FALSE);
363 
364 	g_assert (fail == NULL);
365 
366 	/* ISO-8859-15 should fail */
367 	encs = g_slist_append (encs, (gpointer)gtk_source_encoding_get_from_charset ("ISO-8859-15"));
368 	encs = g_slist_append (encs, (gpointer)gtk_source_encoding_get_from_charset ("UTF-16"));
369 
370 	aux2 = do_test (aux, NULL, encs, aux_len, aux_len, &guessed);
371 	g_free (aux);
372 	g_free (aux2);
373 
374 	g_assert (guessed == gtk_source_encoding_get_from_charset ("UTF-16"));
375 }
376 
377 static void
test_utf16_utf8(void)378 test_utf16_utf8 (void)
379 {
380 	gchar *text, *aux;
381 	gsize aux_len;
382 
383 	text = get_encoded_text ("\xe2\xb4\xb2", -1,
384 	                         gtk_source_encoding_get_from_charset ("UTF-16"),
385 	                         gtk_source_encoding_get_from_charset ("UTF-8"),
386 	                         &aux_len,
387 	                         TRUE);
388 
389 	aux = do_test (text, "UTF-16", NULL, aux_len, aux_len, NULL);
390 	g_assert_cmpstr (aux, ==, "\xe2\xb4\xb2");
391 	g_free (aux);
392 
393 	aux = do_test (text, "UTF-16", NULL, aux_len, 1, NULL);
394 	g_assert_cmpstr (aux, ==, "\xe2\xb4\xb2");
395 	g_free (aux);
396 }
397 
398 gint
main(gint argc,gchar * argv[])399 main (gint   argc,
400       gchar *argv[])
401 {
402 	g_test_init (&argc, &argv, NULL);
403 
404 	g_test_add_func ("/buffer-output-stream/empty", test_empty);
405 
406 	g_test_add_func ("/buffer-output-stream/consecutive", test_consecutive);
407 	g_test_add_func ("/buffer-output-stream/consecutive_tnewline", test_consecutive_tnewline);
408 	g_test_add_func ("/buffer-output-stream/big-char", test_big_char);
409 	g_test_add_func ("/buffer-output-stream/test-boundary", test_boundary);
410 
411 
412 	/* This broke after https://bugzilla.gnome.org/show_bug.cgi?id=694669 We
413 	 * need to revisit the test to pick something that is actually invalid
414 	 * utf8.
415 	 */
416 #if 0
417 	g_test_add_func ("/buffer-output-stream/test-invalid-utf8", test_invalid_utf8);
418 #endif
419 	g_test_add_func ("/buffer-output-stream/smart conversion: utf8-utf8", test_utf8_utf8);
420 	g_test_add_func ("/buffer-output-stream/smart conversion: empty", test_empty_conversion);
421 	g_test_add_func ("/buffer-output-stream/smart conversion: guessed", test_guessed);
422 	g_test_add_func ("/buffer-output-stream/smart conversion: utf16-utf8", test_utf16_utf8);
423 
424 	return g_test_run ();
425 }
426