1
2 /*
3 * auto-correct.c:
4 *
5 * Authors:
6 * Jukka-Pekka Iivonen <jiivonen@hutcs.cs.hut.fi>
7 * Morten Welinder (UTF-8).
8 *
9 * (C) Copyright 2000, 2001 by Jukka-Pekka Iivonen <iivonen@iki.fi>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, see <https://www.gnu.org/licenses/>.
23 */
24
25 #include <gnumeric-config.h>
26 #include <glib/gi18n-lib.h>
27 #include <gnumeric.h>
28 #include <tools/auto-correct.h>
29
30 #include <application.h>
31 #include <gutils.h>
32 #include <gnumeric-conf.h>
33 #include <parse-util.h>
34 #include <goffice/goffice.h>
35 #include <gsf/gsf-impl-utils.h>
36 #include <string.h>
37
38
39 /*
40 * Utility to replace a single character in an UTF-8 string.
41 */
42 static char *
replace1(const char * src,int keepbytes,const char * mid,const char * tail)43 replace1 (const char *src, int keepbytes, const char *mid, const char *tail)
44 {
45 int midlen = strlen (mid);
46 char *dst = g_new (char, strlen (src) + midlen + 2);
47 char *p = dst;
48
49 memcpy (p, src, keepbytes);
50 p += keepbytes;
51
52 strcpy (p, mid);
53 p += midlen;
54
55 strcpy (p, tail);
56 return dst;
57 }
58
59
60 static char *
autocorrect_initial_caps(const char * src)61 autocorrect_initial_caps (const char *src)
62 {
63 enum State {
64 S_waiting_for_word_begin,
65 S_waiting_for_whitespace,
66 S_seen_one_caps,
67 S_seen_two_caps
68 };
69
70 enum State state = S_waiting_for_word_begin;
71 char *res = NULL;
72 const char *p;
73
74 if (gnm_expr_char_start_p (src))
75 return NULL;
76
77 for (p = src; *p; p = g_utf8_next_char (p)) {
78 gunichar c = g_utf8_get_char (p);
79
80 switch (state) {
81 case S_waiting_for_word_begin:
82 if (g_unichar_isupper (c))
83 state = S_seen_one_caps;
84 else if (g_unichar_isalpha (c))
85 state = S_waiting_for_whitespace;
86 break;
87
88 case S_waiting_for_whitespace:
89 if (g_unichar_isspace (c))
90 state = S_waiting_for_word_begin;
91 break;
92
93 case S_seen_one_caps:
94 if (g_unichar_isupper (c))
95 state = S_seen_two_caps;
96 else
97 state = S_waiting_for_whitespace;
98 break;
99
100 case S_seen_two_caps:
101 state = S_waiting_for_whitespace;
102
103 if (g_unichar_islower (c)) {
104 const char *target = g_utf8_prev_char (p);
105 const char *begin = g_utf8_prev_char (target);
106 GSList *l;
107 char *newres, *lotext;
108 gboolean exception_found = FALSE;
109
110 for (l = gnm_conf_get_autocorrect_init_caps_list (); l; l = l->next) {
111 const char *except = l->data;
112 if (strncmp (begin, except, strlen (except)) == 0) {
113 exception_found = TRUE;
114 break;
115 }
116 }
117
118 if (!exception_found) {
119 const char *q;
120 for (q = g_utf8_next_char (p);
121 *q && !g_unichar_isspace (g_utf8_get_char (q));
122 q = g_utf8_next_char (q)) {
123 if (g_unichar_isupper
124 (g_utf8_get_char (q))) {
125 exception_found = TRUE;
126 break;
127 }
128 }
129 }
130
131 if (!exception_found) {
132 lotext = g_utf8_strdown (target, 1);
133 newres = replace1 (src, target - src, lotext, p);
134 g_free (lotext);
135 p = newres + (p - src);
136 g_free (res);
137 src = res = newres;
138 }
139 }
140 break;
141
142 #ifndef DEBUG_SWITCH_ENUM
143 default:
144 g_assert_not_reached ();
145 #endif
146 }
147 }
148
149 return res;
150 }
151
152 static gboolean
autocorrect_first_letter_exception(const char * start,const char * end)153 autocorrect_first_letter_exception (const char *start, const char *end)
154 {
155 GSList *l = gnm_conf_get_autocorrect_first_letter_list ();
156 char *text;
157
158 if (l == NULL)
159 return FALSE;
160
161 text = g_strndup (start, end - start + 1);
162
163 for (; l != NULL; l = l->next) {
164 if (g_str_has_suffix(text, l->data)) {
165 g_free (text);
166 return TRUE;
167 }
168 }
169
170 g_free (text);
171 return FALSE;
172 }
173
174
175 static gboolean
autocorrect_first_letter_trigger(gunichar this_char)176 autocorrect_first_letter_trigger (gunichar this_char)
177 {
178 if (!g_unichar_ispunct (this_char))
179 return FALSE;
180
181 return (
182 this_char == 0x0021 ||
183 this_char == 0x002e ||
184 this_char == 0x003f ||
185 this_char == 0x037e ||
186 this_char == 0x0589 ||
187 this_char == 0x061f ||
188 this_char == 0x0700 ||
189 this_char == 0x0701 ||
190 this_char == 0x0702 ||
191 this_char == 0x1362 ||
192 this_char == 0x1367 ||
193 this_char == 0x1368 ||
194 this_char == 0x166e ||
195 this_char == 0x1803 ||
196 this_char == 0x1809 ||
197 this_char == 0x1944 ||
198 this_char == 0x1945 ||
199 this_char == 0x203c ||
200 this_char == 0x203d ||
201 this_char == 0x2047 ||
202 this_char == 0x2048 ||
203 this_char == 0x2049 ||
204 this_char == 0x3002 ||
205 this_char == 0xfe52 ||
206 this_char == 0xfe56 ||
207 this_char == 0xfe57 ||
208 this_char == 0xff01 ||
209 this_char == 0xff0e ||
210 this_char == 0xff1f ||
211 this_char == 0xff61
212 );
213 }
214
215 static char *
autocorrect_first_letter(const char * src)216 autocorrect_first_letter (const char *src)
217 {
218 const char * last_end = NULL;
219 const char *last_copy = src;
220 const char *this;
221 GString *gstr = NULL;
222 gboolean seen_text = FALSE;
223 gboolean seen_white = FALSE;
224
225 for (this = src; '\0' != *this; this = g_utf8_next_char (this)) {
226 gunichar this_char = g_utf8_get_char (this);
227
228 seen_text = seen_text || g_unichar_isalpha (this_char);
229
230 if (seen_text && autocorrect_first_letter_trigger (this_char))
231 last_end = this;
232 else if ((last_end != NULL) && g_unichar_isspace (this_char))
233 seen_white = TRUE;
234 else if ((last_end != NULL) && !g_unichar_isspace (this_char)) {
235 if (seen_white) {
236 gunichar new = g_unichar_totitle (this_char);
237
238 if ((this_char != new) &&
239 !autocorrect_first_letter_exception (src, last_end)) {
240 if (gstr == NULL)
241 gstr = g_string_new (NULL);
242 g_string_append_len (gstr, last_copy,
243 this - last_copy);
244 g_string_append_unichar (gstr, new);
245 last_copy = g_utf8_next_char (this);
246 }
247 seen_white = FALSE;
248 }
249 last_end = NULL;
250 }
251 }
252
253 if (gstr != NULL) {
254 g_string_append_len (gstr, last_copy,
255 strlen (last_copy));
256 return g_string_free (gstr, FALSE);
257 }
258
259 return NULL;
260 }
261
262
263 static char *
autocorrect_names_of_days(const char * src)264 autocorrect_names_of_days (const char *src)
265 {
266 /* English, except for lower case. */
267 static char const * const days[7] = {
268 "monday", "tuesday", "wednesday", "thursday",
269 "friday", "saturday", "sunday"
270 };
271
272 char *res = NULL;
273 int i;
274
275 for (i = 0; i < 7; i++) {
276 const char *day = days[i];
277 const char *pos = strstr (src, day);
278 if (pos) {
279 char *newres = g_strdup (src);
280 /* It's ASCII... */
281 newres[pos - src] += ('A' - 'a');
282 g_free (res);
283 src = res = newres;
284 continue;
285 }
286 }
287
288 return res;
289 }
290
291
292 /*
293 * NOTE: If in the future this is extended with methods that insert or
294 * delete characters (bytes to be precise), the there might need to be
295 * rich text corrections.
296 */
297 char *
autocorrect_tool(char const * src)298 autocorrect_tool (char const *src)
299 {
300 char *res = NULL;
301
302 if (gnm_conf_get_autocorrect_init_caps ()) {
303 char *res2 = autocorrect_initial_caps (src);
304 if (res2) {
305 g_free (res);
306 src = res = res2;
307 }
308 }
309
310 if (gnm_conf_get_autocorrect_first_letter ()) {
311 char *res2 = autocorrect_first_letter (src);
312 if (res2) {
313 g_free (res);
314 src = res = res2;
315 }
316 }
317
318 if (gnm_conf_get_autocorrect_names_of_days ()) {
319 char *res2 = autocorrect_names_of_days (src);
320 if (res2) {
321 g_free (res);
322 src = res = res2;
323 }
324 }
325
326 if (!res) res = g_strdup (src);
327 return res;
328 }
329