1 
2 /*
3  * auto-correct.c:
4  *
5  * Authors:
6  *        Jukka-Pekka Iivonen <jiivonen@hutcs.cs.hut.fi>
7  *        Morten Welinder (UTF-8).
8  *
9  * (C) Copyright 2000, 2001 by Jukka-Pekka Iivonen <iivonen@iki.fi>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, see <https://www.gnu.org/licenses/>.
23  */
24 
25 #include <gnumeric-config.h>
26 #include <glib/gi18n-lib.h>
27 #include <gnumeric.h>
28 #include <tools/auto-correct.h>
29 
30 #include <application.h>
31 #include <gutils.h>
32 #include <gnumeric-conf.h>
33 #include <parse-util.h>
34 #include <goffice/goffice.h>
35 #include <gsf/gsf-impl-utils.h>
36 #include <string.h>
37 
38 
39 /*
40  * Utility to replace a single character in an UTF-8 string.
41  */
42 static char *
replace1(const char * src,int keepbytes,const char * mid,const char * tail)43 replace1 (const char *src, int keepbytes, const char *mid, const char *tail)
44 {
45 	int midlen = strlen (mid);
46 	char *dst = g_new (char, strlen (src) + midlen + 2);
47 	char *p = dst;
48 
49 	memcpy (p, src, keepbytes);
50 	p += keepbytes;
51 
52 	strcpy (p, mid);
53 	p += midlen;
54 
55 	strcpy (p, tail);
56 	return dst;
57 }
58 
59 
60 static char *
autocorrect_initial_caps(const char * src)61 autocorrect_initial_caps (const char *src)
62 {
63 	enum State {
64 		S_waiting_for_word_begin,
65 		S_waiting_for_whitespace,
66 		S_seen_one_caps,
67 		S_seen_two_caps
68 	};
69 
70 	enum State state = S_waiting_for_word_begin;
71 	char *res = NULL;
72 	const char *p;
73 
74 	if (gnm_expr_char_start_p (src))
75 		return NULL;
76 
77 	for (p = src; *p; p = g_utf8_next_char (p)) {
78 		gunichar c = g_utf8_get_char (p);
79 
80 		switch (state) {
81 		case S_waiting_for_word_begin:
82 			if (g_unichar_isupper (c))
83 				state = S_seen_one_caps;
84 			else if (g_unichar_isalpha (c))
85 				state = S_waiting_for_whitespace;
86 			break;
87 
88 		case S_waiting_for_whitespace:
89 			if (g_unichar_isspace (c))
90 				state = S_waiting_for_word_begin;
91 			break;
92 
93 		case S_seen_one_caps:
94 			if (g_unichar_isupper (c))
95 				state = S_seen_two_caps;
96 			else
97 				state = S_waiting_for_whitespace;
98 			break;
99 
100 		case S_seen_two_caps:
101 			state = S_waiting_for_whitespace;
102 
103 			if (g_unichar_islower (c)) {
104 				const char *target = g_utf8_prev_char (p);
105 				const char *begin = g_utf8_prev_char (target);
106 				GSList *l;
107 				char *newres, *lotext;
108 				gboolean exception_found = FALSE;
109 
110 				for (l = gnm_conf_get_autocorrect_init_caps_list (); l; l = l->next) {
111 					const char *except = l->data;
112 					if (strncmp (begin, except, strlen (except)) == 0) {
113 						exception_found = TRUE;
114 						break;
115 					}
116 				}
117 
118 				if (!exception_found) {
119 					const char *q;
120 					for (q = g_utf8_next_char (p);
121 					     *q && !g_unichar_isspace (g_utf8_get_char (q));
122 					     q = g_utf8_next_char (q)) {
123 						if (g_unichar_isupper
124 						    (g_utf8_get_char (q))) {
125 							exception_found = TRUE;
126 							break;
127 						}
128 					}
129 				}
130 
131 				if (!exception_found) {
132 					lotext = g_utf8_strdown (target, 1);
133 					newres = replace1 (src, target - src, lotext, p);
134 					g_free (lotext);
135 					p = newres + (p - src);
136 					g_free (res);
137 					src = res = newres;
138 				}
139 			}
140 			break;
141 
142 #ifndef DEBUG_SWITCH_ENUM
143 		default:
144 			g_assert_not_reached ();
145 #endif
146 		}
147 	}
148 
149 	return res;
150 }
151 
152 static gboolean
autocorrect_first_letter_exception(const char * start,const char * end)153 autocorrect_first_letter_exception (const char *start, const char *end)
154 {
155 	GSList *l = gnm_conf_get_autocorrect_first_letter_list ();
156 	char *text;
157 
158 	if (l == NULL)
159 		return FALSE;
160 
161 	text = g_strndup (start, end - start + 1);
162 
163 	for (; l != NULL; l = l->next) {
164 		if (g_str_has_suffix(text, l->data)) {
165 			g_free (text);
166 			return TRUE;
167 		}
168 	}
169 
170 	g_free (text);
171 	return FALSE;
172 }
173 
174 
175 static gboolean
autocorrect_first_letter_trigger(gunichar this_char)176 autocorrect_first_letter_trigger (gunichar this_char)
177 {
178 	if (!g_unichar_ispunct (this_char))
179 		return FALSE;
180 
181 	return (
182 		this_char == 0x0021 ||
183 		this_char == 0x002e ||
184 		this_char == 0x003f ||
185 		this_char == 0x037e ||
186 		this_char == 0x0589 ||
187 		this_char == 0x061f ||
188 		this_char == 0x0700 ||
189 		this_char == 0x0701 ||
190 		this_char == 0x0702 ||
191 		this_char == 0x1362 ||
192 		this_char == 0x1367 ||
193 		this_char == 0x1368 ||
194 		this_char == 0x166e ||
195 		this_char == 0x1803 ||
196 		this_char == 0x1809 ||
197 		this_char == 0x1944 ||
198 		this_char == 0x1945 ||
199 		this_char == 0x203c ||
200 		this_char == 0x203d ||
201 		this_char == 0x2047 ||
202 		this_char == 0x2048 ||
203 		this_char == 0x2049 ||
204 		this_char == 0x3002 ||
205 		this_char == 0xfe52 ||
206 		this_char == 0xfe56 ||
207 		this_char == 0xfe57 ||
208 		this_char == 0xff01 ||
209 		this_char == 0xff0e ||
210 		this_char == 0xff1f ||
211 		this_char == 0xff61
212 		);
213 }
214 
215 static char *
autocorrect_first_letter(const char * src)216 autocorrect_first_letter (const char *src)
217 {
218 	const char * last_end = NULL;
219 	const char *last_copy = src;
220 	const char *this;
221 	GString *gstr = NULL;
222 	gboolean seen_text = FALSE;
223 	gboolean seen_white = FALSE;
224 
225 	for (this = src; '\0' != *this; this = g_utf8_next_char (this)) {
226 		gunichar this_char = g_utf8_get_char (this);
227 
228 		seen_text = seen_text || g_unichar_isalpha (this_char);
229 
230 		if (seen_text && autocorrect_first_letter_trigger (this_char))
231 			last_end = this;
232 		else if ((last_end != NULL) && g_unichar_isspace (this_char))
233 			seen_white = TRUE;
234 		else if ((last_end != NULL) && !g_unichar_isspace (this_char)) {
235 			if (seen_white) {
236 				gunichar new = g_unichar_totitle (this_char);
237 
238 				if ((this_char != new) &&
239 				    !autocorrect_first_letter_exception (src, last_end)) {
240 					if (gstr == NULL)
241 						gstr = g_string_new (NULL);
242 					g_string_append_len (gstr, last_copy,
243 							     this - last_copy);
244 					g_string_append_unichar (gstr, new);
245 					last_copy = g_utf8_next_char (this);
246 				}
247 				seen_white = FALSE;
248 			}
249 			last_end = NULL;
250 		}
251 	}
252 
253 	if (gstr != NULL) {
254 		g_string_append_len (gstr, last_copy,
255 				     strlen (last_copy));
256 		return g_string_free (gstr, FALSE);
257 	}
258 
259 	return NULL;
260 }
261 
262 
263 static char *
autocorrect_names_of_days(const char * src)264 autocorrect_names_of_days (const char *src)
265 {
266 	/* English, except for lower case.  */
267 	static char const * const days[7] = {
268 		"monday", "tuesday", "wednesday", "thursday",
269 		"friday", "saturday", "sunday"
270 	};
271 
272 	char *res = NULL;
273 	int i;
274 
275 	for (i = 0; i < 7; i++) {
276 		const char *day = days[i];
277 		const char *pos = strstr (src, day);
278 		if (pos) {
279 			char *newres = g_strdup (src);
280 			/* It's ASCII...  */
281 			newres[pos - src] += ('A' - 'a');
282 			g_free (res);
283 			src = res = newres;
284 			continue;
285 		}
286 	}
287 
288 	return res;
289 }
290 
291 
292 /*
293  * NOTE: If in the future this is extended with methods that insert or
294  * delete characters (bytes to be precise), the there might need to be
295  * rich text corrections.
296  */
297 char *
autocorrect_tool(char const * src)298 autocorrect_tool (char const *src)
299 {
300 	char *res = NULL;
301 
302         if (gnm_conf_get_autocorrect_init_caps ()) {
303 		char *res2 = autocorrect_initial_caps (src);
304 		if (res2) {
305 			g_free (res);
306 			src = res = res2;
307 		}
308 	}
309 
310 	if (gnm_conf_get_autocorrect_first_letter ()) {
311 		char *res2 = autocorrect_first_letter (src);
312 		if (res2) {
313 			g_free (res);
314 			src = res = res2;
315 		}
316 	}
317 
318 	if (gnm_conf_get_autocorrect_names_of_days ()) {
319 		char *res2 = autocorrect_names_of_days (src);
320 		if (res2) {
321 			g_free (res);
322 			src = res = res2;
323 		}
324 	}
325 
326 	if (!res) res = g_strdup (src);
327 	return res;
328 }
329