1 /* Bluefish HTML Editor
2  * bftextview2_spell.c
3  *
4  * Copyright (C) 2009-2020 Olivier Sessink
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 /*#define SPELL_PROFILING*/
20 
21 /* for the design docs see bftextview2.h */
22 
23 #include "bluefish.h"
24 
25 #ifdef HAVE_LIBENCHANT_2
26 /*nothing to do */
27 #else
28 	#ifdef HAVE_LIBENCHANT_1_4
29 	/*nothing to do */
30 	#else
31 	#define HAVE_LIBENCHANT_OLD
32 	#endif
33 #endif
34 
35 
36 #ifdef HAVE_LIBENCHANT
37 #ifdef HAVE_ENCHANT_ENCHANT_H
38 #include <enchant/enchant.h>
39 #else							/* HAVE_ENCHANT_ENCHANT_H */
40 #include <enchant.h>
41 #endif							/* HAVE_ENCHANT_ENCHANT_H */
42 #include <string.h>				/*strlen */
43 #include "bftextview2_private.h"
44 #include "bftextview2_scanner.h"
45 #include "bftextview2_langmgr.h"
46 #include "bftextview2_spell.h"
47 #ifdef MARKREGION
48 #include "bftextview2_markregion.h"
49 #endif
50 #include "document.h"
51 #include "xml_entity.h"
52 
53 /*#undef DBG_SPELL
54 #define DBG_SPELL g_print*/
55 
56 #define MAX_CONTINUOUS_SPELLCHECK_INTERVAL 0.1	/* float in seconds */
57 
58 static EnchantBroker *eb;
59 static guint loops_per_timer = 1000;
60 
61 #ifdef MARKREGION
62 static gboolean
markregion_find_region2spellcheck(BluefishTextView * btv,GtkTextIter * sit,GtkTextIter * eit)63 markregion_find_region2spellcheck(BluefishTextView * btv, GtkTextIter * sit, GtkTextIter * eit)
64 {
65 	guint start,end;
66 	markregion_get_region(&btv->spellcheck, NULL, &start, &end);
67 	if (start == BF_OFFSET_UNDEFINED) {
68 		return FALSE;
69 	}
70 	gtk_text_buffer_get_iter_at_offset(btv->buffer, sit, start);
71 	gtk_text_buffer_get_iter_at_offset(btv->buffer, eit, end);
72 	DBG_MARKREGION("markregion_find_region2spellcheck, tried iters at %u:%u, got iters at %u:%u\n",start,end,gtk_text_iter_get_offset(sit),gtk_text_iter_get_offset(eit));
73 	return TRUE;
74 }
75 #endif
76 
77 #ifdef NEEDSCANNING
78 static gboolean
needscanning_find_region2spellcheck(BluefishTextView * btv,GtkTextIter * start,GtkTextIter * end)79 needscanning_find_region2spellcheck(BluefishTextView * btv, GtkTextIter * start, GtkTextIter * end)
80 {
81 	/* first find a region that needs a spellcheck */
82 	gtk_text_buffer_get_start_iter(btv->buffer, start);
83 	if (!gtk_text_iter_begins_tag(start, btv->needspellcheck)) {
84 		DBG_SPELL("iter %d does not begins tag needspellcheck %p, needscanning(%p)=%d\n",
85 				  gtk_text_iter_get_offset(start), btv->needspellcheck, btv->needscanning,
86 				  gtk_text_iter_begins_tag(start, btv->needscanning));
87 		if (!gtk_text_iter_forward_to_tag_toggle(start, btv->needspellcheck)) {
88 			/* nothing to spellcheck */
89 			DBG_SPELL("tag needspellcheck is never started\n");
90 			return FALSE;
91 		}
92 	}
93 	/* find the end of the region */
94 	*end = *start;
95 	gtk_text_iter_forward_char(end);
96 	if (!gtk_text_iter_ends_tag(end, btv->needspellcheck)) {
97 		if (!gtk_text_iter_forward_to_tag_toggle(end, btv->needspellcheck)) {
98 			DBG_MSG("BUG: we should never get here\n");
99 			return FALSE;
100 		}
101 	}
102 	DBG_MARKREGION("needscanning_find_region2spellcheck, return iters at %d:%d\n",gtk_text_iter_get_offset(start),gtk_text_iter_get_offset(end));
103 	return TRUE;
104 }
105 #endif
106 
107 static gboolean
bftextview2_find_region2spellcheck(BluefishTextView * btv,GtkTextIter * start,GtkTextIter * end)108 bftextview2_find_region2spellcheck(BluefishTextView * btv, GtkTextIter * start,GtkTextIter * end)
109 {
110 	GtkTextTag *misspelled;
111 	gboolean ret;
112 #ifdef MARKREGION
113 	ret = markregion_find_region2spellcheck(btv, start, end);
114 #ifdef NEEDSCANNING
115 	gboolean mrret = ret;
116 	GtkTextIter mrits=*start, mrite=*end;
117 #endif
118 #endif
119 #ifdef NEEDSCANNING
120 	ret = needscanning_find_region2spellcheck(btv, start, end);
121 #endif
122 
123 #ifdef MARKREGION
124 #ifdef NEEDSCANNING
125 	if (mrret != ret) {
126 		g_print("ABORT: find_region2spellcheck, markregion returned %d, needscanning returned %d\n",mrret,ret);
127 		g_assert_not_reached();
128 	}
129 
130 	if (ret && (!gtk_text_iter_equal(&mrits, start) || !gtk_text_iter_equal(&mrite, end))) {
131 		g_print("ABORT: find_region2spellcheck, markregion (%d:%d) and needscanning code(%d:%d) have different regions!!\n",
132 				gtk_text_iter_get_offset(&mrits),gtk_text_iter_get_offset(&mrite),
133 				gtk_text_iter_get_offset(start),gtk_text_iter_get_offset(end));
134 		g_assert_not_reached();
135 	}
136 #endif
137 #endif
138 	if (!ret) {
139 		return FALSE;
140 	}
141 	/* if the region is within a misspelled word, enlarge it to the total misspelled word
142 	   (this fixes the situation where you add a space in the middle of "forgotthespace" and only
143 	   the space is scanned again) */
144 	misspelled = gtk_text_tag_table_lookup(langmgr_get_tagtable(), "_spellerror_");
145 	DBG_SPELL("find_region2spellcheck, start at %d has misspelled=%d, ends_mispelled=%d\n"
146 				,gtk_text_iter_get_offset(start)
147 				,gtk_text_iter_has_tag(start, misspelled)
148 				,gtk_text_iter_ends_tag(start, misspelled));
149 	if (gtk_text_iter_has_tag(start, misspelled) || gtk_text_iter_ends_tag(start, misspelled)) {
150 		gtk_text_iter_backward_to_tag_toggle(start, misspelled);
151 		DBG_SPELL("find_region2spellcheck, new start at %d\n"
152 				,gtk_text_iter_get_offset(start));
153 	}
154 	if (gtk_text_iter_has_tag(end, misspelled) || gtk_text_iter_begins_tag(end, misspelled)) {
155 		gtk_text_iter_forward_to_tag_toggle(end, misspelled);
156 		DBG_SPELL("find_region2spellcheck, new end at %d\n"
157 				,gtk_text_iter_get_offset(end));
158 	}
159 	return TRUE;
160 }
161 
162 static void
dicts_load_first_lcb(const char * const lang_tag,const char * const provider_name,const char * const provider_desc,const char * const provider_file,void * data)163 dicts_load_first_lcb(const char *const lang_tag, const char *const provider_name,
164 					 const char *const provider_desc, const char *const provider_file, void *data)
165 {
166 	Tbfwin *bfwin = data;
167 	if (!bfwin->ed && lang_tag) {
168 		bfwin->ed = (void *) enchant_broker_request_dict(eb, lang_tag);
169 		DBG_SPELL("loaded first available dictionary %s at %p\n", lang_tag, bfwin->ed);
170 		if (bfwin->ed) {
171 			g_free(bfwin->session->spell_lang);
172 			bfwin->session->spell_lang = g_strdup(lang_tag);
173 		}
174 	}
175 }
176 
177 void
unload_spell_dictionary(Tbfwin * bfwin)178 unload_spell_dictionary(Tbfwin * bfwin)
179 {
180 	if (bfwin->ed) {
181 #ifndef HAVE_LIBENCHANT_OLD
182 		DBG_SPELL("unload_spell_dictionary, bfwin=%p, ed=%p\n", bfwin, bfwin->ed);
183 		enchant_broker_free_dict(eb, (EnchantDict *) bfwin->ed);
184 		bfwin->ed = NULL;
185 #else
186 		/* enchant < 1.4.0 does not do refcounting, so we have to check ourselves if we are the last window that
187 		   is using this dictionary */
188 		GList *tmplist;
189 		gboolean in_use = FALSE;
190 		for (tmplist = g_list_first(main_v->bfwinlist); tmplist; tmplist = g_list_next(tmplist)) {
191 			if (tmplist->data != bfwin && BFWIN(tmplist->data)->ed == bfwin->ed) {
192 				in_use = TRUE;
193 				DBG_SPELL("keep dictionary %p, it is in use by bfwin %p\n", bfwin->ed, tmplist->data);
194 				break;
195 			}
196 		}
197 		if (!in_use) {
198 			DBG_SPELL("free dictionary %p, it is not in use\n", bfwin->ed);
199 			enchant_broker_free_dict(eb, (EnchantDict *) bfwin->ed);
200 			bfwin->ed = NULL;
201 		}
202 #endif
203 	}
204 }
205 
206 static gboolean
load_dictionary(Tbfwin * bfwin)207 load_dictionary(Tbfwin * bfwin)
208 {
209 	DBG_SPELL("load_dictionary for bfwin %p, bfwin->ed=%p\n", bfwin, bfwin->ed);
210 	unload_spell_dictionary(bfwin);
211 	if (bfwin->session->spell_lang && bfwin->session->spell_lang[0] != '\0'
212 		&& enchant_broker_dict_exists(eb, bfwin->session->spell_lang)) {
213 		DBG_SPELL("load_dictionary called for bfwin %p which has session->spell_lang=%s\n", bfwin,
214 				  bfwin->session->spell_lang);
215 		bfwin->ed = (void *) enchant_broker_request_dict(eb, bfwin->session->spell_lang);
216 		DBG_SPELL("loaded dictionary %s at %p\n", bfwin->session->spell_lang, bfwin->ed);
217 		return (bfwin->ed != NULL);
218 	} else {
219 		DBG_SPELL("load_dictionary, no setting, load first available\n");
220 		bfwin->ed = NULL;
221 		/* load the first existing enchant dict */
222 		enchant_broker_list_dicts(eb, dicts_load_first_lcb, bfwin);
223 		return (bfwin->ed != NULL);
224 	}
225 }
226 
227 static void
spellcheck_word(BluefishTextView * btv,GtkTextBuffer * buffer,GtkTextIter * start,GtkTextIter * end)228 spellcheck_word(BluefishTextView * btv, GtkTextBuffer * buffer, GtkTextIter * start, GtkTextIter * end)
229 {
230 	gchar *tocheck;
231 
232 	tocheck = gtk_text_buffer_get_text(buffer, start, end, FALSE);
233 	if (!tocheck)
234 		return;
235 
236 	if (strlen(tocheck)==0) {
237 		g_free(tocheck);
238 		return;
239 	}
240 
241 	DBG_SPELL("spellcheck_word, check word %s in dictionary %p\n", tocheck,
242 			  BFWIN(DOCUMENT(btv->doc)->bfwin)->ed);
243 	if (enchant_dict_check((EnchantDict *) BFWIN(DOCUMENT(btv->doc)->bfwin)->ed, tocheck, strlen(tocheck)) !=
244 		0) {
245 		DBG_SPELL("'%s' *not* spelled correctly!\n", tocheck);
246 		if (g_utf8_strchr(tocheck, -1, '&')) {
247 			gchar *tocheck_conv = xmlentities2utf8(tocheck);
248 			/* check for entities */
249 			if (tocheck_conv
250 				&& enchant_dict_check((EnchantDict *) BFWIN(DOCUMENT(btv->doc)->bfwin)->ed, tocheck_conv,
251 									  strlen(tocheck_conv)) != 0) {
252 				DBG_SPELL("'%s' after entity conversion (%s) *not* spelled correctly!\n", tocheck,
253 						  tocheck_conv);
254 				gtk_text_buffer_apply_tag_by_name(buffer, "_spellerror_", start, end);
255 			} else {
256 				DBG_SPELL("'%s' after entity conversion (%s) spelled correctly!\n", tocheck, tocheck_conv);
257 			}
258 			g_free(tocheck_conv);
259 		} else {
260 			gtk_text_buffer_apply_tag_by_name(buffer, "_spellerror_", start, end);
261 		}
262 	} else {
263 		DBG_SPELL("'%s' spelled correctly!\n", tocheck);
264 	}
265 	g_free(tocheck);
266 }
267 
268 
269 static gboolean
has_tags(GSList * tags,GtkTextTag ** tagarr)270 has_tags(GSList * tags, GtkTextTag ** tagarr)
271 {
272 	GSList *tmpslist = tags;
273 	while (tmpslist) {
274 		gint i = 0;
275 		while (tagarr[i]) {
276 			if (tagarr[i] == tmpslist->data) {
277 				/*g_print("has_tags, return TRUE for tag %p in tagarray[%d] %p\n",tagarr[i],i,tagarr); */
278 				return TRUE;
279 			}
280 			i++;
281 		}
282 		tmpslist = tmpslist->next;
283 	}
284 	return FALSE;
285 }
286 
287 static gboolean
get_spellcheck_from_context_at_position(BluefishTextView * btv,GtkTextIter * iter)288 get_spellcheck_from_context_at_position(BluefishTextView *btv, GtkTextIter *iter)
289 {
290 	Tfoundcontext *tmpfcontext=NULL;
291 	Tfound *found;
292 	gint changecounter;
293 
294 	if (!btv || !btv->bflang)
295 		return TRUE;
296 
297 	found = get_foundcache_at_offset(btv, gtk_text_iter_get_offset(iter), NULL);
298 
299 	if (!found) {
300 		return btv->bflang->default_spellcheck;
301 	}
302 
303 	tmpfcontext = found->fcontext;
304 	changecounter = found->numcontextchange;
305 
306 	while (changecounter < 0) {
307 		tmpfcontext = (Tfoundcontext *) tmpfcontext->parentfcontext;
308 		changecounter++;
309 	}
310 	DBG_SPELL("get_spellcheck_from_context_at_position, got context %d with default_spellcheck=%d\n"
311 					,tmpfcontext ? tmpfcontext->context: 0
312 					, tmpfcontext ?g_array_index(btv->bflang->st->contexts, Tcontext, tmpfcontext->context).default_spellcheck: -1);
313 	while (tmpfcontext && g_array_index(btv->bflang->st->contexts, Tcontext, tmpfcontext->context).default_spellcheck == SPELLCHECK_INHERIT) {
314 		tmpfcontext = (Tfoundcontext *) tmpfcontext->parentfcontext;
315 		DBG_SPELL("get_spellcheck_from_context_at_position, get parent, got context %d with default_spellcheck=%d\n"
316 					,tmpfcontext ? tmpfcontext->context: 0
317 					, tmpfcontext ?g_array_index(btv->bflang->st->contexts, Tcontext, tmpfcontext->context).default_spellcheck: -1);
318 	}
319 	if (tmpfcontext) {
320 		return g_array_index(btv->bflang->st->contexts, Tcontext, tmpfcontext->context).default_spellcheck;
321 	}
322 	DBG_SPELL("return bflang->default_spellcheck=%d\n",btv->bflang->default_spellcheck);
323 	return btv->bflang->default_spellcheck;
324 }
325 
326 static gboolean
get_next_region(BluefishTextView * btv,GtkTextIter * so,GtkTextIter * eo)327 get_next_region(BluefishTextView * btv, GtkTextIter * so, GtkTextIter * eo)
328 {
329 	gboolean fso = FALSE, feo = FALSE;
330 	GtkTextIter iter = *so;
331 	GSList *tmpslist;
332 	/* search start */
333 	DBG_SPELL("get_next_region, search from %d to %d\n", gtk_text_iter_get_offset(so),
334 			  gtk_text_iter_get_offset(eo));
335 	do {
336 		tmpslist = gtk_text_iter_get_tags(&iter);
337 		if (has_tags(tmpslist, langmgr_need_spellcheck_tags())) {
338 			/* yes we need to scan */
339 			DBG_SPELL("found tag to scan at %d\n", gtk_text_iter_get_offset(&iter));
340 			fso = TRUE;
341 		} else if (has_tags(tmpslist, langmgr_no_spellcheck_tags())) {
342 			/* do not scan */
343 			DBG_SPELL("found tag not to scan, skip at %d\n", gtk_text_iter_get_offset(&iter));
344 		} else if (btv->bflang && btv->bflang->st) {
345 			/* scan depending on the settings of the language if it needs spell checking in default area's */
346 			fso = get_spellcheck_from_context_at_position(btv, &iter);
347 		} else {
348 			if (btv->bflang->default_spellcheck) {
349 				DBG_SPELL("no tags: scan at %d\n", gtk_text_iter_get_offset(&iter));
350 				fso = TRUE;
351 			}
352 		}
353 		g_slist_free(tmpslist);
354 	} while (fso == FALSE && gtk_text_iter_forward_to_tag_toggle(&iter, NULL)
355 			 && !gtk_text_iter_is_end(&iter));
356 	if (fso) {
357 		*so = iter;
358 
359 		/* search end */
360 		while (feo == FALSE && gtk_text_iter_forward_to_tag_toggle(&iter, NULL)
361 			   && !gtk_text_iter_is_end(&iter)) {
362 			tmpslist = gtk_text_iter_get_tags(&iter);
363 			DBG_SPELL("got %d tags at %d\n", g_slist_length(tmpslist), gtk_text_iter_get_offset(&iter));
364 			if (has_tags(tmpslist, langmgr_need_spellcheck_tags())) {
365 				/* yes we need to scan */
366 				DBG_SPELL("found tag to scan, continue scanning at %d\n", gtk_text_iter_get_offset(&iter));
367 			} else if (has_tags(tmpslist, langmgr_no_spellcheck_tags())) {
368 				/* do not scan */
369 				DBG_SPELL("found tag not to scan at %d\n", gtk_text_iter_get_offset(&iter));
370 				feo = TRUE;
371 			} else {
372 				/* scan depending on the settings of the language if it needs scanning in default area's */
373 				if (!btv->bflang->default_spellcheck) {
374 					DBG_SPELL("no tags, no scan at %d\n", gtk_text_iter_get_offset(&iter));
375 					feo = TRUE;
376 				}
377 			}
378 			g_slist_free(tmpslist);
379 		}
380 		if (feo || btv->bflang->default_spellcheck) {
381 			*eo = iter;
382 			return TRUE;
383 		}
384 	}
385 	DBG_SPELL("get_next_region, return FALSE\n");
386 	return FALSE;
387 }
388 
389 static inline gboolean
forward_to_end_of_entity(GtkTextIter * iter)390 forward_to_end_of_entity(GtkTextIter * iter)
391 {
392 	gint i = 0;
393 	GtkTextIter tmpiter = *iter;
394 	while (gtk_text_iter_get_char(&tmpiter) != ';') {
395 		if (!gtk_text_iter_forward_char(&tmpiter))
396 			return FALSE;
397 		i++;
398 		if (i > 8)
399 			return FALSE;
400 	}
401 	/* forward one more char */
402 	gtk_text_iter_forward_char(&tmpiter);
403 	*iter = tmpiter;
404 	return TRUE;
405 }
406 
407 /* handle apostrophe and entities in word gracefully */
408 static inline gboolean
text_iter_next_word_bounds(GtkTextIter * soword,GtkTextIter * eoword,gboolean enable_entities)409 text_iter_next_word_bounds(GtkTextIter * soword, GtkTextIter * eoword, gboolean enable_entities)
410 {
411 	gunichar uc;
412 	gboolean handled_starting_entity = FALSE;
413 
414 	if (!gtk_text_iter_forward_word_end(eoword))
415 		return FALSE;
416 	*soword = *eoword;
417 	gtk_text_iter_backward_word_start(soword);
418 
419 	uc = gtk_text_iter_get_char(eoword);
420 	DBG_SPELL("text_iter_next_word_bounds, uc=%c\n", uc);
421 	while (uc == '\'' || (enable_entities && (uc == ';' || uc == '&'))) {
422 		GtkTextIter iter = *eoword;
423 		DBG_SPELL("text_iter_next_word_bounds, in loop, uc=%c\n", uc);
424 		if (uc == '\'' && gtk_text_iter_forward_char(&iter)) {
425 			if (g_unichar_isalpha(gtk_text_iter_get_char(&iter))) {
426 				gtk_text_iter_forward_word_end(eoword);
427 			} else {
428 				return TRUE;
429 			}
430 		} else if (enable_entities && uc == '&') {
431 			if (!forward_to_end_of_entity(&iter))
432 				return TRUE;	/* no entity, return previous word end */
433 			*eoword = iter;
434 			if (g_unichar_isalpha(gtk_text_iter_get_char(&iter))) {
435 				/* continue in the loop */
436 				gtk_text_iter_forward_word_end(eoword);
437 			} else {
438 				/* after the entity the word stops, return TRUE */
439 				return TRUE;
440 			}
441 
442 		} else if (enable_entities && uc == ';') {
443 			GtkTextIter tmp = *soword;
444 			if (!handled_starting_entity && gtk_text_iter_backward_char(&tmp)
445 				&& gtk_text_iter_get_char(&tmp) == '&') {
446 				/* the word probably starts with an entity */
447 				*soword = tmp;
448 				DBG_SPELL
449 					("text_iter_next_word_bounds, word starts with an enity, forward eoword one position\n");
450 				gtk_text_iter_forward_char(eoword);
451 				if (g_unichar_isalpha(gtk_text_iter_get_char(eoword))) {
452 					gtk_text_iter_forward_word_end(eoword);
453 				}
454 				handled_starting_entity = TRUE;
455 			} else {
456 				tmp = *eoword;
457 				gtk_text_iter_forward_char(&tmp);
458 				if (g_unichar_isalpha(gtk_text_iter_get_char(&tmp))) {
459 					gtk_text_iter_forward_word_end(eoword);
460 				} else {
461 					DBG_SPELL("text_iter_next_word_bounds, tmp uc=%c\n", gtk_text_iter_get_char(&tmp));
462 					return TRUE;
463 				}
464 			}
465 		} else {
466 			/* is it possible to get here? */
467 			return TRUE;
468 		}
469 		uc = gtk_text_iter_get_char(eoword);
470 		DBG_SPELL("text_iter_next_word_bounds, new uc=%c\n", uc);
471 	}
472 	return TRUE;
473 }
474 
475 #ifdef SPELL_PROFILING
476 	static guint profile_words;
477 #endif
478 
479 static gboolean
spellcheck_region(BluefishTextView * btv,GTimer * timer,GtkTextIter * itcursor,GtkTextIter * so,GtkTextIter * eo)480 spellcheck_region(BluefishTextView * btv, GTimer *timer, GtkTextIter *itcursor, GtkTextIter *so, GtkTextIter *eo)
481 {
482 	GtkTextIter iter;
483 	gboolean cont=TRUE;
484 	gint loop=0;
485 
486 #ifdef SPELL_PROFILING
487 	gdouble time_at_start = g_timer_elapsed(timer, NULL);
488 	guint words_at_start = profile_words;
489 #endif
490 
491 	iter = *so;
492 	DBG_SPELL("spellcheck_region, in bfwin=%p, bfwin->ed=%p loop1 from %d to %d\n",
493 			  DOCUMENT(btv->doc)->bfwin, BFWIN(DOCUMENT(btv->doc)->bfwin)->ed, gtk_text_iter_get_offset(so),
494 			  gtk_text_iter_get_offset(eo));
495 
496 	gtk_text_buffer_remove_tag_by_name(btv->buffer, "_spellerror_", so, eo);
497 	/* we have two loops inside each other:
498 		* loop1 loops over the entire region that was marked with 'needspellcheck'
499 		   within this loop1 html tags and such are found, these are not to be scanned
500 	   * loop2 loops over the elements, keywords, function names etc. that are found
501 	     within this loop only the remaining text is scanned
502 	*/
503 	do {
504 		GtkTextIter eo2 = *eo;
505 		gboolean cont2 = TRUE;
506 		if (btv->bflang->st) {
507 			cont2 = get_next_region(btv, &iter, &eo2);
508 			if (!cont2) {
509 				iter = *eo;
510 			}
511 		} else {				/* no scantable */
512 			eo2 = *eo;
513 		}
514 		DBG_SPELL("spellcheck_region, loop2 from %d to %d\n", gtk_text_iter_get_offset(&iter),
515 				  gtk_text_iter_get_offset(&eo2));
516 		while (cont2 && (loop % loops_per_timer != 0 || g_timer_elapsed(timer, NULL) < MAX_CONTINUOUS_SPELLCHECK_INTERVAL)) {	/* loop from iter to eo2 */
517 			GtkTextIter wordstart = iter;
518 			loop++;
519 			DBG_SPELL("spellcheck_region, iter at %d, now forward to word end\n", gtk_text_iter_get_offset(&iter));
520 			if (text_iter_next_word_bounds(&wordstart, &iter, btv->bflang->spell_decode_entities)
521 				&& gtk_text_iter_compare(&iter, &eo2) <= 0) {
522 				DBG_SPELL("spellcheck_region, iter at %d, backward wordstart at %d\n", gtk_text_iter_get_offset(&iter),
523 						  gtk_text_iter_get_offset(&wordstart));
524 
525 				/* check word */
526 #ifdef SPELL_PROFILING
527 				profile_words++;
528 #endif
529 				spellcheck_word(btv, btv->buffer, &wordstart, &iter);
530 			} else {
531 				DBG_SPELL("spellcheck_region, no word end within region\n");
532 				iter = eo2;
533 				cont2 = FALSE;
534 			}
535 			DBG_SPELL("spellcheck_region, iter=%d, eo=%d, eo2=%d\n", gtk_text_iter_get_offset(&iter),
536 					  gtk_text_iter_get_offset(eo), gtk_text_iter_get_offset(&eo2));
537 			if (cont2 && gtk_text_iter_compare(&iter, &eo2) >= 0)
538 				cont2 = FALSE;
539 		}
540 
541 		if (gtk_text_iter_compare(&iter, eo) >= 0) {
542 			DBG_SPELL
543 				("spellcheck_region, iter (%d) equals eo, finished this area for spell checking\n",
544 				 gtk_text_iter_get_offset(&iter));
545 			cont = FALSE;
546 		}
547 
548 	} while (cont
549 			 && (loop % loops_per_timer != 0
550 				 || g_timer_elapsed(timer, NULL) < MAX_CONTINUOUS_SPELLCHECK_INTERVAL));
551 	DBG_SPELL("spellcheck_region, loop finished, cont=%d, timer_elapsed=%d ms\n",cont,(gint) (1000.0 * g_timer_elapsed(timer, NULL)));
552 	if (cont) {
553 		loops_per_timer = MAX(loop / 10, 100);
554 	}
555 #ifdef SPELL_PROFILING
556 	g_print("%d ms spell run (loop=%d, loops_per_timer=%d) from %d to %d checked %d words\n",loop, loops_per_timer,
557 			(gint) (1000.0 * (g_timer_elapsed(timer, NULL)-time_at_start)), gtk_text_iter_get_offset(so),
558 			gtk_text_iter_get_offset(&iter), profile_words-words_at_start);
559 #endif
560 #ifdef NEEDSCANNING
561 	DBG_SPELL("spellcheck_region, remove needspellcheck from start %d to iter at %d\n",
562 			  gtk_text_iter_get_offset(so), gtk_text_iter_get_offset(&iter));
563 	gtk_text_buffer_remove_tag(btv->buffer, btv->needspellcheck, so, &iter);
564 #endif
565 #ifdef MARKREGION
566 	markregion_region_done(&btv->spellcheck, gtk_text_iter_get_offset(&iter));
567 #endif
568 	return (!gtk_text_iter_is_end(&iter));
569 }
570 
571 gboolean
bftextview2_run_spellcheck(BluefishTextView * btv)572 bftextview2_run_spellcheck(BluefishTextView * btv)
573 {
574 	GtkTextIter so, eo, itcursor;
575 	GTimer *timer;
576 	gboolean cont = TRUE;
577 
578 #ifdef SPELL_PROFILING
579 	profile_words = 0;
580 #endif
581 
582 	if (!btv->spell_check)
583 		return FALSE;
584 
585 	if (!BFWIN(DOCUMENT(btv->doc)->bfwin)->ed && !load_dictionary(BFWIN(DOCUMENT(btv->doc)->bfwin))) {
586 		DBG_SPELL("bftextview2_run_spellcheck, no dictionary.. return..\n");
587 		return FALSE;
588 	}
589 
590 	timer = g_timer_new();
591 
592 	gtk_text_buffer_get_iter_at_mark(btv->buffer, &itcursor, gtk_text_buffer_get_insert(btv->buffer));
593 	/* if we start at the cursor, that might be an indication that the previous word was
594 	skipped because it ended at the cursor, so lets skip back one word */
595 
596 	do {
597 		if (!bftextview2_find_region2spellcheck(btv, &so, &eo)) {
598 			DBG_SPELL("bftextview2_run_spellcheck, no region to spellcheck found... return FALSE\n");
599 			DBG_DELAYSCANNING("bftextview2_run_spellcheck, nothing to spellcheck..\n");
600 			g_timer_destroy(timer);
601 #ifdef SPELL_PROFILING
602 			g_print("no more region to spellcheck, %d ms spell run\n",
603 					(gint) (1000.0 * g_timer_elapsed(timer, NULL)));
604 #endif
605 			return FALSE;
606 		}
607 		DBG_SPELL("bftextview2_run_spellcheck, call spellcheck_region(%d:%d)\n",gtk_text_iter_get_offset(&so), gtk_text_iter_get_offset(&eo));
608 		cont = spellcheck_region(btv, timer, &itcursor, &so, &eo);
609 
610 
611 
612 
613 	} while(cont && g_timer_elapsed(timer, NULL) < MAX_CONTINUOUS_SPELLCHECK_INTERVAL);
614 #ifdef SPELL_PROFILING
615 	g_print("%d ms spell run, checked %d words, not yet finished\n",
616 			(gint) (1000.0 * g_timer_elapsed(timer, NULL)), profile_words);
617 #endif
618 
619 	g_timer_destroy(timer);
620 	return cont;
621 }
622 
623 void
bftextview2_spell_init(void)624 bftextview2_spell_init(void)
625 {
626 	eb = enchant_broker_init();
627 	if (!eb) {
628 		g_warning("could not initialize spell checking engine\n");
629 		return;
630 	}
631 }
632 
633 void
bftextview2_spell_cleanup(void)634 bftextview2_spell_cleanup(void)
635 {
636 	enchant_broker_free(eb);
637 	eb = NULL;
638 }
639 
640 static void
recheck_document(Tdocument * doc)641 recheck_document(Tdocument * doc)
642 {
643 #ifdef NEEDSCANNING
644 	GtkTextIter start, end;
645 	gtk_text_buffer_get_bounds(doc->buffer, &start, &end);
646 	gtk_text_buffer_apply_tag(doc->buffer, BLUEFISH_TEXT_VIEW(doc->view)->needspellcheck, &start, &end);
647 #endif
648 #ifdef MARKREGION
649 	GtkTextIter ite;
650 	gtk_text_buffer_get_end_iter(BLUEFISH_TEXT_VIEW(doc->view)->buffer, &ite);
651 	markregion_nochange(&BLUEFISH_TEXT_VIEW(doc->view)->spellcheck, 0, gtk_text_iter_get_offset(&ite));
652 #endif
653 }
654 
655 static void
recheck_bfwin(Tbfwin * bfwin)656 recheck_bfwin(Tbfwin * bfwin)
657 {
658 	GList *tmplist;
659 	for (tmplist = g_list_first(bfwin->documentlist); tmplist; tmplist = g_list_next(tmplist)) {
660 		recheck_document(DOCUMENT(tmplist->data));
661 	}
662 	if (bfwin->current_document)
663 		bluefish_text_view_rescan(BLUEFISH_TEXT_VIEW(bfwin->current_document->view));
664 }
665 
666 
667 static gboolean
get_misspelled_word_at_bevent(BluefishTextView * btv,GtkTextIter * wordstart,GtkTextIter * wordend)668 get_misspelled_word_at_bevent(BluefishTextView * btv, GtkTextIter * wordstart, GtkTextIter * wordend)
669 {
670 	GtkTextTag *misspelled;
671 
672 	misspelled = gtk_text_tag_table_lookup(langmgr_get_tagtable(), "_spellerror_");
673 	gtk_text_buffer_get_iter_at_offset(gtk_text_view_get_buffer(GTK_TEXT_VIEW(btv)),
674 									   wordstart, main_v->bevent_charoffset);
675 	if (gtk_text_iter_has_tag(wordstart, misspelled)) {
676 		*wordend = *wordstart;
677 		if (gtk_text_iter_backward_to_tag_toggle(wordstart, misspelled)
678 			&& gtk_text_iter_forward_to_tag_toggle(wordend, misspelled))
679 			return TRUE;
680 	}
681 	return FALSE;
682 }
683 
684 static void
bftextview2_add_word_backend(BluefishTextView * btv,Tbfwin * bfwin,gboolean to_dict)685 bftextview2_add_word_backend(BluefishTextView * btv, Tbfwin * bfwin, gboolean to_dict)
686 {
687 	GtkTextIter so, eo;
688 	gchar *word;
689 	if (!get_misspelled_word_at_bevent(btv, &so, &eo))
690 		return;
691 
692 	word = gtk_text_buffer_get_text(gtk_text_view_get_buffer(GTK_TEXT_VIEW(btv)), &so, &eo, FALSE);
693 	if (to_dict) {
694 #ifdef HAVE_LIBENCHANT_2
695 		enchant_dict_add((EnchantDict *) bfwin->ed, word, strlen(word));
696 #else
697 #ifdef HAVE_LIBENCHANT_1_4
698 		enchant_dict_add((EnchantDict *) bfwin->ed, word, strlen(word));
699 #else
700 		enchant_dict_add_to_pwl((EnchantDict *) bfwin->ed, word, strlen(word));
701 #endif
702 #endif
703 	} else {
704 		enchant_dict_add_to_session((EnchantDict *) bfwin->ed, word, strlen(word));
705 	}
706 	recheck_bfwin(bfwin);
707 }
708 
709 static void
bftexview2_add_word_to_dict(GtkWidget * widget,gpointer data)710 bftexview2_add_word_to_dict(GtkWidget * widget, gpointer data)
711 {
712 	Tdocument *doc = data;
713 	bftextview2_add_word_backend(BLUEFISH_TEXT_VIEW(doc->view), BFWIN(doc->bfwin), TRUE);
714 }
715 
716 static void
bftexview2_add_word_to_ses(GtkWidget * widget,gpointer data)717 bftexview2_add_word_to_ses(GtkWidget * widget, gpointer data)
718 {
719 	Tdocument *doc = data;
720 	bftextview2_add_word_backend(BLUEFISH_TEXT_VIEW(doc->view), BFWIN(doc->bfwin), FALSE);
721 }
722 
723 static void
bftextview2_suggestion_menu_lcb(GtkWidget * widget,gpointer data)724 bftextview2_suggestion_menu_lcb(GtkWidget * widget, gpointer data)
725 {
726 	Tdocument *doc = data;
727 	GtkTextIter wordstart, wordend;
728 	if (main_v->bevent_doc != doc)
729 		return;
730 	DBG_SPELL("chosen %s\n", gtk_label_get_text(GTK_LABEL(gtk_bin_get_child(GTK_BIN(widget)))));
731 	if (get_misspelled_word_at_bevent(BLUEFISH_TEXT_VIEW(doc->view), &wordstart, &wordend)) {
732 		gint start, end;
733 		/* no need to remove the tag because the text with this tag is deleted by the replace
734 		   gtk_text_buffer_remove_tag_by_name(doc->buffer, "_spellerror_", &wordstart, &wordend); */
735 		start = gtk_text_iter_get_offset(&wordstart);
736 		end = gtk_text_iter_get_offset(&wordend);
737 		if (BFWIN(DOCUMENT(data)->bfwin)->session->spell_insert_entities) {
738 			gchar *word;
739 			word =
740 				utf82xmlentities(gtk_label_get_text(GTK_LABEL(gtk_bin_get_child(GTK_BIN(widget)))), TRUE,
741 								 TRUE, TRUE, TRUE, TRUE, FALSE);
742 			doc_replace_text(doc, word, start, end);
743 			g_free(word);
744 		} else {
745 			doc_replace_text(doc, gtk_label_get_text(GTK_LABEL(gtk_bin_get_child(GTK_BIN(widget)))), start,
746 							 end);
747 		}
748 	}
749 }
750 
751 static void
mark_all_docs_needspelling(Tbfwin * bfwin)752 mark_all_docs_needspelling(Tbfwin * bfwin)
753 {
754 	GList *tmplist;
755 	/* now mark all documents in this window with 'need_spellcheck' */
756 	for (tmplist = g_list_first(bfwin->documentlist); tmplist; tmplist = g_list_next(tmplist)) {
757 		recheck_document(DOCUMENT(tmplist->data));
758 	}
759 	if (bfwin->current_document)
760 		bluefish_text_view_rescan(BLUEFISH_TEXT_VIEW(bfwin->current_document->view));
761 }
762 
763 static void
bftextview2_preferences_menu_lcb(GtkWidget * widget,gpointer data)764 bftextview2_preferences_menu_lcb(GtkWidget * widget, gpointer data)
765 {
766 	Tbfwin *bfwin = data;
767 	/*g_print("bftextview2_preferences_menu_lcb, called for widget %p and bfwin %p\n",widget,data);*/
768 	if (gtk_check_menu_item_get_active(GTK_CHECK_MENU_ITEM(widget))) {
769 		/*g_print("check menu widget %p is active, with label %s\n",widget, gtk_label_get_text(GTK_LABEL(gtk_bin_get_child(GTK_BIN(widget)))));*/
770 		if (bfwin->session->spell_lang)
771 			g_free(bfwin->session->spell_lang);
772 		bfwin->session->spell_lang =
773 			g_strdup(gtk_label_get_text(GTK_LABEL(gtk_bin_get_child(GTK_BIN(widget)))));
774 		DBG_SPELL("bftextview2_preferences_menu_lcb, set spell check language '%s'\n",bfwin->session->spell_lang);
775 		if (load_dictionary(bfwin)) {
776 			mark_all_docs_needspelling(bfwin);
777 		} else {
778 			DBG_SPELL("failed to load dictionary ?\n");
779 		}
780 	}
781 }
782 
783 static void
bftexview2_spell_insert_entities(GtkWidget * widget,gpointer data)784 bftexview2_spell_insert_entities(GtkWidget * widget, gpointer data)
785 {
786 	BFWIN(DOCUMENT(data)->bfwin)->session->spell_insert_entities =
787 		gtk_check_menu_item_get_active(GTK_CHECK_MENU_ITEM(widget));
788 }
789 
790 typedef struct {
791 	Tbfwin *bfwin;
792 	GtkWidget *menu;
793 	GSList *group;
794 } Tdictlist;
795 
796 static void
list_dicts_lcb(const char * const lang_tag,const char * const provider_name,const char * const provider_desc,const char * const provider_file,void * data)797 list_dicts_lcb(const char *const lang_tag, const char *const provider_name, const char *const provider_desc,
798 			   const char *const provider_file, void *data)
799 {
800 	Tdictlist *dl = data;
801 	GtkWidget *menuitem;
802 	DBG_SPELL("lang_tag=%s, provider_name=%s, provider_desc=%s, provider_file=%s\n",lang_tag,provider_name,provider_desc,provider_file);
803 	menuitem = gtk_radio_menu_item_new_with_label(dl->group, lang_tag);
804 	if (!dl->group)
805 		dl->group = gtk_radio_menu_item_get_group(GTK_RADIO_MENU_ITEM(menuitem));
806 	if (g_strcmp0(dl->bfwin->session->spell_lang, lang_tag) == 0) {
807 		gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(menuitem), TRUE);
808 	}
809 	/*g_print("list_dicts_lcb, connect menuitem %p with group %p and label %s and bfwin %p to menu callback\n",menuitem, dl->group, lang_tag, dl->bfwin);*/
810 	g_signal_connect(menuitem, "activate", G_CALLBACK(bftextview2_preferences_menu_lcb), dl->bfwin);
811 	gtk_menu_shell_prepend(GTK_MENU_SHELL(dl->menu), GTK_WIDGET(menuitem));
812 }
813 
814 static gboolean
pure_ascii(const gchar * string)815 pure_ascii(const gchar * string)
816 {
817 	gunichar uchar;
818 	const gchar *tmp = string;
819 	do {
820 		uchar = g_utf8_get_char(tmp);
821 		if (uchar > 127)
822 			return FALSE;
823 		tmp = g_utf8_next_char(tmp);
824 	} while (*tmp != '\0');
825 	return TRUE;
826 }
827 
828 void
bftextview2_populate_suggestions_popup(GtkMenu * menu,Tdocument * doc)829 bftextview2_populate_suggestions_popup(GtkMenu * menu, Tdocument * doc)
830 {
831 	GtkTextIter wordstart, wordend;
832 	Tdictlist dl;
833 	GtkWidget *menuitem, *submenu;
834 
835 	if (main_v->bevent_doc != doc)
836 		return;
837 
838 	if (!BLUEFISH_TEXT_VIEW(doc->view)->spell_check)
839 		return;
840 
841 	gtk_menu_shell_prepend(GTK_MENU_SHELL(menu), GTK_WIDGET(gtk_menu_item_new()));
842 
843 	menuitem = gtk_image_menu_item_new_with_label(_("Spell check language"));
844 	gtk_image_menu_item_set_image(GTK_IMAGE_MENU_ITEM(menuitem),
845 								  gtk_image_new_from_stock(GTK_STOCK_SPELL_CHECK, GTK_ICON_SIZE_MENU));
846 	gtk_menu_shell_prepend(GTK_MENU_SHELL(menu), GTK_WIDGET(menuitem));
847 
848 	submenu = gtk_menu_new();
849 	gtk_menu_item_set_submenu(GTK_MENU_ITEM(menuitem), submenu);
850 
851 	dl.bfwin = doc->bfwin;
852 	dl.menu = submenu;
853 	dl.group = NULL;
854 	enchant_broker_list_dicts(eb, list_dicts_lcb, &dl);
855 
856 	if (!BFWIN(doc->bfwin)->ed)
857 		return;
858 
859 	if (get_misspelled_word_at_bevent(BLUEFISH_TEXT_VIEW(doc->view), &wordstart, &wordend)) {
860 		gchar *word, **suggestions;
861 		size_t n_suggs;
862 		gboolean have_non_ascii = FALSE;
863 
864 		word = gtk_text_buffer_get_text(doc->buffer, &wordstart, &wordend, FALSE);
865 
866 		if (g_utf8_strchr(word, -1, '&') && g_utf8_strchr(word, -1, ';')) {
867 			gchar *tmp = xmlentities2utf8(word);
868 			g_free(word);
869 			word = tmp;
870 		}
871 
872 		DBG_SPELL("list alternatives for %s\n", word);
873 		suggestions =
874 			enchant_dict_suggest((EnchantDict *) BFWIN(doc->bfwin)->ed, word, strlen(word), &n_suggs);
875 
876 		menuitem = gtk_image_menu_item_new_with_label(_("Add to dictionary"));
877 		gtk_menu_shell_prepend(GTK_MENU_SHELL(menu), GTK_WIDGET(menuitem));
878 		g_signal_connect(menuitem, "activate", G_CALLBACK(bftexview2_add_word_to_dict), doc);
879 		menuitem = gtk_image_menu_item_new_with_label(_("Ignore spelling"));
880 		gtk_menu_shell_prepend(GTK_MENU_SHELL(menu), GTK_WIDGET(menuitem));
881 		g_signal_connect(menuitem, "activate", G_CALLBACK(bftexview2_add_word_to_ses), doc);
882 
883 		if (suggestions) {
884 			gint i;
885 			for (i = 0; i < n_suggs; i++) {
886 				if (!pure_ascii(suggestions[i])) {
887 					have_non_ascii = TRUE;
888 					break;
889 				}
890 			}
891 		}
892 
893 		if (have_non_ascii) {
894 			menuitem = gtk_check_menu_item_new_with_label(_("Insert special characters as entities"));
895 			gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(menuitem),
896 										   BFWIN(doc->bfwin)->session->spell_insert_entities);
897 			gtk_menu_shell_prepend(GTK_MENU_SHELL(menu), GTK_WIDGET(menuitem));
898 			g_signal_connect(menuitem, "activate", G_CALLBACK(bftexview2_spell_insert_entities), doc);
899 		}
900 
901 		if (suggestions) {
902 			GtkWidget *menuitem;
903 			gint i;
904 			gtk_menu_shell_prepend(GTK_MENU_SHELL(menu), GTK_WIDGET(gtk_menu_item_new()));
905 			for (i = 0; i < n_suggs; i++) {
906 				menuitem = gtk_image_menu_item_new_with_label(suggestions[i]);
907 				g_signal_connect(menuitem, "activate", G_CALLBACK(bftextview2_suggestion_menu_lcb), doc);
908 				/*gtk_image_menu_item_set_image(GTK_IMAGE_MENU_ITEM(menuitem),gtk_image_new_from_stock(GTK_STOCK_FIND, GTK_ICON_SIZE_MENU)); */
909 				gtk_menu_shell_prepend(GTK_MENU_SHELL(menu), GTK_WIDGET(menuitem));
910 			}
911 
912 			enchant_dict_free_string_list((EnchantDict *) BFWIN(doc->bfwin)->ed, suggestions);
913 		}
914 		g_free(word);
915 	}
916 }
917 
918 void
reload_spell_dictionary(Tbfwin * bfwin)919 reload_spell_dictionary(Tbfwin * bfwin)
920 {
921 	if (load_dictionary(bfwin)) {
922 		mark_all_docs_needspelling(bfwin);
923 	}
924 }
925 
926 
927 /*
928 static void bftextview2_preferences_menu_enable_lcb(GtkWidget *widget, gpointer data) {
929 	Tbfwin *bfwin=data;
930 	bfwin->session->spell_enable = gtk_check_menu_item_get_active(GTK_CHECK_MENU_ITEM(widget));
931 	gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(bfwin->toolbar_spell),bfwin->session->spell_enable);
932 }
933 
934 void bftextview2_populate_preferences_popup(GtkMenu *menu, Tdocument *doc) {
935 	GtkWidget *menuitem, *submenu;
936 	Tdictlist dl;
937 
938 
939 	menuitem = gtk_check_menu_item_new_with_label(_("Enable spell check"));
940 	gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(menuitem), BFWIN(doc->bfwin)->session->spell_enable);
941 	g_signal_connect(menuitem, "activate", G_CALLBACK(bftextview2_preferences_menu_enable_lcb), doc->bfwin);
942 	gtk_menu_shell_prepend(GTK_MENU_SHELL(menu), GTK_WIDGET(menuitem));
943 }*/
944 
945 /*void bftextview2_gui_toggle_spell_check(GtkWidget *widget, gpointer data) {
946 	Tbfwin *bfwin=data;
947 	bfwin->session->spell_enable = gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(widget));
948 	if (bfwin->current_document && bfwin->current_document->view) {
949 		/ * the signal is also emitted when the toggle button gets it's initial value during the building of the window * /
950 		bluefish_text_view_rescan(BLUEFISH_TEXT_VIEW(bfwin->current_document->view));
951 	}
952 }*/
953 
954 #endif							/*HAVE_LIBENCHANT */
955