1 /*
2  * Copyright (C) 2005-2006 Evgeniy <dushistov@mail.ru>
3  * Copyright 2011 kubtek <kubtek@mail.com>
4  *
5  * This file is part of StarDict.
6  *
7  * StarDict is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * StarDict is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with StarDict.  If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #ifdef HAVE_CONFIG_H
22 #  include "config.h"
23 #endif
24 
25 #include <cstring>
26 #include <glib.h>
27 #include <glib/gi18n.h>
28 #include <glib/gstdio.h>
29 #include <cstdlib>
30 #include <gtk/gtk.h>
31 #include <fcntl.h>
32 #include <cerrno>
33 
34 #ifdef CONFIG_GNOME
35 #  include <libgnome/libgnome.h>
36 #elif defined(_WIN32)
37 #  include <gdk/gdkwin32.h>
38 #  include <Shlwapi.h>
39 #  include <io.h>
40 #  include <ERRNO.H>
41 #endif
42 
43 #ifndef O_BINARY
44 #define O_BINARY 0
45 #endif
46 
47 #include "libcommon.h"
48 #include "utils.h"
49 
50 
ProcessGtkEvent()51 void ProcessGtkEvent()
52 {
53   while (gtk_events_pending())
54     gtk_main_iteration();
55 }
56 
combnum2str(gint comb_code)57 std::string combnum2str(gint comb_code)
58 {
59   switch (comb_code) {
60 #ifdef _WIN32
61   case 0:
62     return "Shift";
63   case 1:
64     return "Alt";
65   case 2:
66     return "Ctrl";
67   case 3:
68     return "Ctrl+Alt";
69 #else
70   case 0:
71     return "Win";
72   case 1:
73     return "Shift";
74   case 2:
75     return "Alt";
76   case 3:
77     return "Ctrl";
78   case 4:
79     return "Ctrl+Alt";
80   case 5:
81     return "Ctrl+e";
82   case 6:
83     return "Win+d";
84   case 7:
85     return "F1";
86   case 8:
87     return "F2";
88   case 9:
89     return "F3";
90   case 10:
91     return "F4";
92 #endif
93   default:
94     return "";
95   }
96 }
97 
split(const std::string & str,char sep)98 std::vector<std::string> split(const std::string& str, char sep)
99 {
100 	std::vector<std::string> res;
101 	std::string::size_type prev_pos=0, pos = 0;
102 	while ((pos=str.find(sep, prev_pos))!=std::string::npos) {
103 		res.push_back(std::string(str, prev_pos, pos-prev_pos));
104 		prev_pos=pos+1;
105 	}
106 	res.push_back(std::string(str, prev_pos, str.length()-prev_pos));
107 
108 	return res;
109 }
110 
load_image_from_file(const std::string & filename)111 GdkPixbuf *load_image_from_file(const std::string& filename)
112 {
113 	GError *err=NULL;
114 	GdkPixbuf *res=gdk_pixbuf_new_from_file(filename.c_str(), &err);
115 	if (!res) {
116 		g_error(_("Can not load image. %s"), err->message);
117 		g_error_free(err);
118 	}
119 
120 	return res;
121 }
122 
byte_to_hex(unsigned char nr)123 static gchar * byte_to_hex(unsigned char nr) {
124 	gchar *result = NULL;
125 
126 	result = g_strdup_printf("%%%x%x", nr / 0x10, nr % 0x10);
127 	return result;
128 }
129 
common_encode_uri_string(const char * string)130 char *common_encode_uri_string(const char *string)
131 {
132 	gchar		*newURIString;
133 	gchar		*hex, *tmp = NULL;
134 	int		i, j, len, bytes;
135 
136 	/* the UTF-8 string is casted to ASCII to treat
137 	   the characters bytewise and convert non-ASCII
138 	   compatible chars to URI hexcodes */
139 	newURIString = g_strdup("");
140 	len = strlen(string);
141 	for(i = 0; i < len; i++) {
142 		if(g_ascii_isalnum(string[i]) || strchr("-_.!~*'()", (int)string[i]))
143 		   	tmp = g_strdup_printf("%s%c", newURIString, string[i]);
144 		else if(string[i] == ' ')
145 			tmp = g_strdup_printf("%s%%20", newURIString);
146 		else if((unsigned char)string[i] <= 127) {
147 			tmp = g_strdup_printf("%s%s", newURIString, hex = byte_to_hex(string[i]));g_free(hex);
148 		} else {
149 			bytes = 0;
150 			if(((unsigned char)string[i] >= 192) && ((unsigned char)string[i] <= 223))
151 				bytes = 2;
152 			else if(((unsigned char)string[i] > 223) && ((unsigned char)string[i] <= 239))
153 				bytes = 3;
154 			else if(((unsigned char)string[i] > 239) && ((unsigned char)string[i] <= 247))
155 				bytes = 4;
156 			else if(((unsigned char)string[i] > 247) && ((unsigned char)string[i] <= 251))
157 				bytes = 5;
158 			else if(((unsigned char)string[i] > 247) && ((unsigned char)string[i] <= 251))
159 				bytes = 6;
160 
161 			if(0 != bytes) {
162 				if((i + (bytes - 1)) > len) {
163 					g_warning(("Unexpected end of character sequence or corrupt UTF-8 encoding! Some characters were dropped!"));
164 					break;
165 				}
166 
167 				for(j=0; j < (bytes - 1); j++) {
168 					tmp = g_strdup_printf("%s%s", newURIString, hex = byte_to_hex((unsigned char)string[i++]));
169 					g_free(hex);
170 					g_free(newURIString);
171 					newURIString = tmp;
172 				}
173 				tmp = g_strdup_printf("%s%s", newURIString, hex = byte_to_hex((unsigned char)string[i]));
174 				g_free(hex);
175 			} else {
176 				/* sh..! */
177 				g_error("Internal error while converting UTF-8 chars to HTTP URI!");
178 			}
179 		}
180 		g_free(newURIString);
181 		newURIString = tmp;
182 	}
183 	return newURIString;
184 }
185 
186 
187 /* based on g_mkstemp_full
188 #if defined(_WIN32)
189 	flags is type of operation allowed parameter of _wsopen_s function
190 	mode is permission settings parameter of _wsopen_s function
191 #else
192 	for the meaning of flags and mode parameters see g_mkstemp_full function
193 #endif
194 */
195 gint
stardict_mkstemp_full(gchar * tmpl,int flags,int mode)196 stardict_mkstemp_full (gchar *tmpl,
197 	int    flags,
198 	int    mode)
199 {
200 	char *XXXXXX;
201 	int count, fd;
202 	static const char letters[] =
203 		"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
204 	static const int NLETTERS = sizeof (letters) - 1;
205 	glong value;
206 	GTimeVal tv;
207 	static int counter = 0;
208 
209 	g_return_val_if_fail (tmpl != NULL, -1);
210 
211 
212 	/* find the last occurrence of "XXXXXX" */
213 	XXXXXX = g_strrstr (tmpl, "XXXXXX");
214 
215 	if (!XXXXXX || strncmp (XXXXXX, "XXXXXX", 6))
216 	{
217 		errno = EINVAL;
218 		return -1;
219 	}
220 #if defined(_WIN32)
221 	std::string tmpl_utf8;
222 	if(!file_name_to_utf8(tmpl, tmpl_utf8))
223 		return -1;
224 	std_win_string tmpl_win;
225 	if(!utf8_to_windows(tmpl_utf8, tmpl_win))
226 		return -1;
227 	size_t XXXXXXind = tmpl_win.rfind(TEXT("XXXXXX"));
228 	if(XXXXXXind == std_win_string::npos)
229 		return -1;
230 #endif
231 
232 	/* Get some more or less random data.  */
233 	g_get_current_time (&tv);
234 	value = (tv.tv_usec ^ tv.tv_sec) + counter++;
235 
236 	for (count = 0; count < 100; value += 7777, ++count)
237 	{
238 		glong v = value;
239 
240 		/* Fill in the random bits.  */
241 		XXXXXX[0] = letters[v % NLETTERS];
242 		v /= NLETTERS;
243 		XXXXXX[1] = letters[v % NLETTERS];
244 		v /= NLETTERS;
245 		XXXXXX[2] = letters[v % NLETTERS];
246 		v /= NLETTERS;
247 		XXXXXX[3] = letters[v % NLETTERS];
248 		v /= NLETTERS;
249 		XXXXXX[4] = letters[v % NLETTERS];
250 		v /= NLETTERS;
251 		XXXXXX[5] = letters[v % NLETTERS];
252 
253 #if defined(_WIN32)
254 		tmpl_win[XXXXXXind + 0] = (TCHAR)XXXXXX[0];
255 		tmpl_win[XXXXXXind + 1] = (TCHAR)XXXXXX[1];
256 		tmpl_win[XXXXXXind + 2] = (TCHAR)XXXXXX[2];
257 		tmpl_win[XXXXXXind + 3] = (TCHAR)XXXXXX[3];
258 		tmpl_win[XXXXXXind + 4] = (TCHAR)XXXXXX[4];
259 		tmpl_win[XXXXXXind + 5] = (TCHAR)XXXXXX[5];
260 		errno_t err = _wsopen_s(&fd, tmpl_win.c_str(),
261 			flags | _O_CREAT | _O_EXCL,
262 			_SH_DENYWR, mode);
263 		if(err == EEXIST || err == EACCES)
264 			continue;
265 		if(err != 0)
266 			return -1;
267 		if(fd >= 0)
268 			return fd;
269 #else
270 		/* tmpl is in UTF-8 on Windows, thus use g_open() */
271 		fd = g_open (tmpl, flags | O_CREAT | O_EXCL, mode);
272 
273 		if (fd >= 0)
274 			return fd;
275 		else if (errno != EEXIST)
276 			/* Any other error will apply also to other names we might
277 			*  try, and there are 2^32 or so of them, so give up now.
278 			*/
279 			return -1;
280 #endif
281 	}
282 
283 	/* We got out of the loop because we ran out of combinations to try.  */
284 	errno = EEXIST;
285 	return -1;
286 }
287 
288 /* based on g_mkstemp */
289 gint
stardict_mkstemp(gchar * tmpl)290 stardict_mkstemp (gchar *tmpl)
291 {
292 #if defined(_WIN32)
293 	return stardict_mkstemp_full (tmpl, _O_RDWR | _O_BINARY, _S_IREAD | _S_IWRITE);
294 #else
295 	return stardict_mkstemp_full (tmpl, O_RDWR | O_BINARY, 0600);
296 #endif
297 }
298 
299 /* based on g_file_open_tmp */
300 gint
stardict_file_open_tmp(const gchar * tmpl,gchar ** name_used,GError ** error)301 stardict_file_open_tmp (const gchar  *tmpl,
302 				 gchar       **name_used,
303 				 GError      **error)
304 {
305 	int retval;
306 	const char *tmpdir;
307 	const char *sep;
308 	char *fulltemplate;
309 	const char *slash;
310 
311 	if (tmpl == NULL)
312 		tmpl = ".XXXXXX";
313 
314 	if ((slash = strchr (tmpl, G_DIR_SEPARATOR)) != NULL
315 #ifdef G_OS_WIN32
316 		|| (strchr (tmpl, '/') != NULL && (slash = "/"))
317 #endif
318 		)
319 	{
320 		gchar *display_tmpl = g_filename_display_name (tmpl);
321 		char c[2];
322 		c[0] = *slash;
323 		c[1] = '\0';
324 
325 		g_set_error (error,
326 			G_FILE_ERROR,
327 			G_FILE_ERROR_FAILED,
328 			_("Template '%s' invalid, should not contain a '%s'"),
329 			display_tmpl, c);
330 		g_free (display_tmpl);
331 
332 		return -1;
333 	}
334 
335 	if (strstr (tmpl, "XXXXXX") == NULL)
336 	{
337 		gchar *display_tmpl = g_filename_display_name (tmpl);
338 		g_set_error (error,
339 			G_FILE_ERROR,
340 			G_FILE_ERROR_FAILED,
341 			_("Template '%s' doesn't contain XXXXXX"),
342 			display_tmpl);
343 		g_free (display_tmpl);
344 		return -1;
345 	}
346 
347 	tmpdir = g_get_tmp_dir ();
348 
349 	if (G_IS_DIR_SEPARATOR (tmpdir [strlen (tmpdir) - 1]))
350 		sep = "";
351 	else
352 		sep = G_DIR_SEPARATOR_S;
353 
354 	fulltemplate = g_strconcat (tmpdir, sep, tmpl, NULL);
355 
356 	retval = stardict_mkstemp (fulltemplate);
357 
358 	if (retval == -1)
359 	{
360 		int save_errno = errno;
361 		gchar *display_fulltemplate = g_filename_display_name (fulltemplate);
362 
363 		g_set_error (error,
364 			G_FILE_ERROR,
365 			g_file_error_from_errno (save_errno),
366 			_("Failed to create file '%s': %s"),
367 			display_fulltemplate, g_strerror (save_errno));
368 		g_free (display_fulltemplate);
369 		g_free (fulltemplate);
370 		return -1;
371 	}
372 
373 	if (name_used)
374 		*name_used = fulltemplate;
375 	else
376 		g_free (fulltemplate);
377 
378 	return retval;
379 }
380 
analyse_query(const char * s,std::string & res)381 query_t analyse_query(const char *s, std::string& res)
382 {
383 	if (!s || !*s) {
384 		res="";
385 		return qtSIMPLE;
386 	}
387 	if (*s=='/') {
388 		res=s+1;
389 		return qtFUZZY;
390 	}
391 	if (*s==':') {
392 		res=s+1;
393 		return qtREGEX;
394 	}
395 
396 	if (*s=='|') {
397 		res=s+1;
398 		return qtFULLTEXT;
399 	}
400 
401 	bool pattern=false;
402 	const char *p=s;
403 	res="";
404 	for (; *p; res+=*p, ++p) {
405 		if (*p=='\\') {
406 			++p;
407 			if (!*p)
408 				break;
409 			continue;
410 		}
411 		if (*p=='*' || *p=='?')
412 			pattern=true;
413 	}
414 	if (pattern)
415 		return qtPATTERN;
416 
417 	return qtSIMPLE;
418 }
419 
stardict_input_escape(const char * text,std::string & res)420 void stardict_input_escape(const char *text, std::string &res)
421 {
422 	res.clear();
423 	const char *p = text;
424 	if (*p == '/' || *p == '|' || *p == ':') {
425 		res = "\\";
426 		res += *p;
427 		p++;
428 	}
429 	while (*p) {
430 		if (*p == '\\' || *p == '*' || *p == '?') {
431 			res += '\\';
432 			res += *p;
433 		} else {
434 			res += *p;
435 		}
436 		p++;
437 	}
438 }
439 
440 static const char* html_entrs[] =     { "lt;", "gt;", "amp;", "apos;", "quot;", 0 };
441 static const int html_entrs_len[] =   { 3,     3,     4,      5,       5,       0 };
442 static const char html_raw_entrs[] =  { '<',   '>',   '&',    '\'',    '\"',    0 };
443 
444 static const char* html_tags[] =     { "br>", 0 };
445 static const int html_tags_len[] =   { 3,     0 };
446 static const char html_raw_tags[] =  { '\n',  0 };
447 
html_decode(const char * str,std::string & decoded)448 void html_decode(const char *str, std::string& decoded)
449 {
450 	decoded.clear();
451 	decoded.reserve(strlen(str));
452 	int ind;
453 
454 	const char* p = str;
455 	while (*p)
456 		if (*p == '&') {
457 			if(*(p+1) == '#') {
458 				const char *q = strchr(p+2, ';');
459 				if(q) {
460 					long code = atol(p+2);
461 					char buf[10]; // must be at least 6 bytes long
462 					buf[g_unichar_to_utf8(gunichar(code), buf)] = '\0';
463 					decoded += buf;
464 					p = q + 1;
465 				} else {
466 					g_debug("unknown entry %s", p);
467 					break;
468 				}
469 			} else {
470 				for(ind = 0; html_entrs[ind] != 0; ++ind)
471 					if(strncmp(p + 1, html_entrs[ind], html_entrs_len[ind]) == 0) {
472 						decoded += html_raw_entrs[ind];
473 						p += html_entrs_len[ind]+1;
474 						break;
475 					}
476 				if (html_entrs[ind] == 0) { // unrecognized sequence
477 					const char *q = strchr(p+1, ';');
478 					if(q) {
479 						++q;
480 						g_debug("unknown entry %s", std::string(p, q-p).c_str());
481 						p = q;
482 					} else {
483 						g_debug("unknown entry %s", p);
484 						break;
485 					}
486 				}
487 			}
488 		} else if(*p == '<') {
489 			for(ind = 0; html_tags[ind] != 0; ++ind)
490 				if(strncmp(p + 1, html_tags[ind], html_tags_len[ind]) == 0) {
491 					decoded += html_raw_tags[ind];
492 					p += html_tags_len[ind]+1;
493 					break;
494 				}
495 			if (html_tags[ind] == 0) { // unrecognized sequence
496 				const char *q = strchr(p+1, '>');
497 				if(q) {
498 					++q;
499 					g_debug("unknown tag %s", std::string(p, q-p).c_str());
500 					p = q;
501 				} else {
502 					g_debug("unknown tag %s", p);
503 					break;
504 				}
505 			}
506 		} else {
507 			decoded += *p++;
508 		}
509 }
510 
511 /* extract first pure english word (or a sequence of words separated by spaces)
512 	src in utf-8,
513 	dst must point to a buffer of size enough to hold the src string */
GetPureEnglishAlpha(char * dst,const char * src)514 void GetPureEnglishAlpha(char *dst, const char *src)
515 {
516 	while(*src && (uchar(*src) >= 128 || !isalpha(*src)))
517 		src++;
518 	while(*src && uchar(*src) < 128 && (isalpha(*src) || *src == ' '))
519 		*dst++ = *src++;
520 	*dst = '\0';
521 }
522 
IsASCII(const char * str)523 bool IsASCII(const char *str)
524 {
525 	// works with UTF-8 strings (bytes of a multi-byte character has the highest
526 	// bit set)
527 	for(; *str; ++str)
528 		if(uchar(*str) >= 128)
529 			return false;
530 	return true;
531 }
532 
533 /* returns pointer to the first non-space unichar in the string */
skip_spaces(const char * str)534 const char* skip_spaces(const char *str)
535 {
536 	while(*str && g_unichar_isspace(g_utf8_get_char(str)))
537 		str = g_utf8_next_char(str);
538 	return str;
539 }
540 
541 /* copy src to dst converting adjacent unicode spaces into one ASCII space ' '
542 	return value - new '\0' character */
copy_normalize_spaces(char * dst,const char * src)543 char* copy_normalize_spaces(char *dst, const char *src)
544 {
545 	while(*src) {
546 		if (g_unichar_isspace(g_utf8_get_char(src))) {
547 			*dst++=' ';
548 			src = g_utf8_next_char(src);
549 			while(g_unichar_isspace(g_utf8_get_char(src)))
550 				src = g_utf8_next_char(src);
551 		} else {
552 			g_utf8_strncpy(dst,src,1);
553 			src = g_utf8_next_char(src);
554 			dst = g_utf8_next_char(dst);
555 		}
556 	}
557 	*dst='\0';
558 	return dst;
559 }
560 
561 /* copy src to dst converting adjacent unicode spaces into one ASCII space ' ',
562 	remove leading and trailing unicode spaces */
copy_normalize_trim_spaces(char * dst,const char * src)563 void copy_normalize_trim_spaces(char *dst, const char *src)
564 {
565 	src = skip_spaces(src);
566 	char* end = copy_normalize_spaces(dst, src);
567 	delete_trailing_spaces_ASCII(dst, end);
568 }
569 
570 /* delete trailing ASCII spaces.
571 begin - beginning of the string, end - '\0' character
572 return value - new '\0' character */
delete_trailing_spaces_ASCII(const char * begin,char * end)573 char* delete_trailing_spaces_ASCII(const char *begin, char *end)
574 {
575 	while(begin < end && *(end - 1) == ' ')
576 		--end;
577 	*end = '\0';
578 	return end;
579 }
580 
581 /* delete the last word separated by ASCII space
582 In other word we replace the last ASCII space with '\0'.
583 begin - beginning of the string, end - '\0' character
584 return value - new '\0' character */
delete_trailing_word_ASCII(const char * begin,char * end)585 char* delete_trailing_word_ASCII(const char *begin, char *end)
586 {
587 	while(begin < end && *(end-1) != ' ')
588 		--end;
589 	if(begin < end)
590 		--end;
591 	*end = '\0';
592 	return end;
593 }
594 
595 /* return value - new '\0' character */
delete_trailing_char(char * begin,char * end)596 char* delete_trailing_char(char *begin, char *end)
597 {
598 	char* p = g_utf8_find_prev_char(begin, end);
599 	if(p) {
600 		*p = '\0';
601 		return p;
602 	} else {
603 		*begin = '\0';
604 		return begin;
605 	}
606 }
607 
608 /* like extract_word but does not copy the extracted word
609 instead assignes begin and end to the first and next to last chars in the source string */
extract_word_in_place(const char ** begin,const char ** end,const char * src,int BeginPos,gboolean (* is_splitter)(gunichar c))610 void extract_word_in_place(const char **begin, const char **end, const char* src,
611 	int BeginPos, gboolean (*is_splitter)(gunichar c))
612 {
613 	g_assert(begin);
614 	g_assert(end);
615 	g_assert(BeginPos >= 0);
616 	*begin = *end = NULL;
617 	const char* const pointer = src + BeginPos;
618 	const char *word_begin = NULL;
619 	while(true) {
620 		while(*src && is_splitter(g_utf8_get_char(src)))
621 			src = g_utf8_next_char(src);
622 		if(!*src)
623 			break;
624 		if(src <= pointer || !word_begin)
625 			word_begin = src;
626 		if(pointer <= src)
627 			break;
628 		while(*src && !is_splitter(g_utf8_get_char(src)))
629 			src = g_utf8_next_char(src);
630 		if(!*src)
631 			break;
632 	}
633 	if(!word_begin)
634 		return;
635 	src = word_begin;
636 	while(*src && !is_splitter(g_utf8_get_char(src))) {
637 		src = g_utf8_next_char(src);
638 	}
639 	*begin = word_begin;
640 	*end = src;
641 }
642 
643 /* extract the word at position BeginPos
644 word separator chars are identified by is_splitter function
645 If BeginPos points to a splitter, get the word to the left of the point.
646 If there is no word to the left, get the word to the right of the point.
647 copy result into dst */
extract_word(char * dst,const char * src,int BeginPos,gboolean (* is_splitter)(gunichar c))648 void extract_word(char *dst, const char* src, int BeginPos, gboolean (*is_splitter)(gunichar c))
649 {
650 	g_assert(BeginPos >= 0);
651 	const char *begin, *end;
652 	extract_word_in_place(&begin, &end, src, BeginPos, is_splitter);
653 	if(!begin || !end) {
654 		*dst = '\0';
655 		return;
656 	}
657 	const int num = end - begin;
658 	strncpy(dst, begin, num);
659 	dst[num] = '\0';
660 }
661 
662 /* Extract a capitalized word
663 	the word must look like Word
664 	where the first letter satisfies the is_first_letter function,
665 	the first letter must be followed by at least one letter satisfing the is_second_letter function */
extract_capitalized_word_in_place(const char ** begin,const char ** end,const char * src,int BeginPos,gboolean (* is_first_letter)(gunichar c),gboolean (* is_second_letter)(gunichar c))666 void extract_capitalized_word_in_place(const char **begin, const char **end,
667 	const char* src, int BeginPos,
668 	gboolean (*is_first_letter)(gunichar c), gboolean (*is_second_letter)(gunichar c))
669 {
670 	g_assert(begin);
671 	g_assert(end);
672 	g_assert(BeginPos >= 0);
673 	*begin = *end = NULL;
674 	const char* const pointer = src + BeginPos;
675 	const char *word_begin = NULL;
676 	while(true) {
677 		while(*src && !is_first_letter(g_utf8_get_char(src)))
678 			src = g_utf8_next_char(src);
679 		if(!*src)
680 			break;
681 		const char *cur_word_begin = src;
682 		src = g_utf8_next_char(src);
683 		if(!is_second_letter(g_utf8_get_char(src)))
684 			continue;
685 		if(cur_word_begin <= pointer || !word_begin)
686 			word_begin = cur_word_begin;
687 		if(pointer <= cur_word_begin)
688 			break;
689 		while(*src && is_second_letter(g_utf8_get_char(src)))
690 			src = g_utf8_next_char(src);
691 		if(!*src)
692 			break;
693 	}
694 	if(!word_begin)
695 		return;
696 	// skip the first letter
697 	src = g_utf8_next_char(word_begin);
698 	while(*src && is_second_letter(g_utf8_get_char(src))) {
699 		src = g_utf8_next_char(src);
700 	}
701 	*begin = word_begin;
702 	*end = src;
703 }
704 
extract_capitalized_word(char * dst,const char * src,int BeginPos,gboolean (* is_first_letter)(gunichar c),gboolean (* is_second_letter)(gunichar c))705 void extract_capitalized_word(char *dst, const char* src, int BeginPos,
706 	gboolean (*is_first_letter)(gunichar c), gboolean (*is_second_letter)(gunichar c))
707 {
708 	g_assert(BeginPos >= 0);
709 	const char *begin, *end;
710 	extract_capitalized_word_in_place(&begin, &end, src, BeginPos, is_first_letter, is_second_letter);
711 	if(!begin || !end) {
712 		*dst = '\0';
713 		return;
714 	}
715 	const int num = end - begin;
716 	strncpy(dst, begin, num);
717 	dst[num] = '\0';
718 }
719 
find_first(const char * src,gboolean (* isfunc)(gunichar c))720 const char* find_first(const char* src, gboolean (*isfunc)(gunichar c))
721 {
722 	while(*src && !isfunc(g_utf8_get_char(src)))
723 		src = g_utf8_next_char(src);
724 	if(isfunc(g_utf8_get_char(src)))
725 		return src;
726 	else
727 		return NULL;
728 }
729 
find_first_not(const char * src,gboolean (* isfunc)(gunichar c))730 const char* find_first_not(const char* src, gboolean (*isfunc)(gunichar c))
731 {
732 	while(*src && isfunc(g_utf8_get_char(src)))
733 		src = g_utf8_next_char(src);
734 	if(isfunc(g_utf8_get_char(src)))
735 		return NULL;
736 	else
737 		return src;
738 }
739 
is_space_or_punct(gunichar c)740 gboolean is_space_or_punct(gunichar c)
741 {
742 	return g_unichar_isspace(c) || g_unichar_ispunct(c);
743 }
744 
is_not_alpha(gunichar c)745 gboolean is_not_alpha(gunichar c)
746 {
747 	return !g_unichar_isalpha(c);
748 }
749 
is_not_upper(gunichar c)750 gboolean is_not_upper(gunichar c)
751 {
752 	return !g_unichar_isupper(c);
753 }
754 
is_not_lower(gunichar c)755 gboolean is_not_lower(gunichar c)
756 {
757 	return !g_unichar_islower(c);
758 }
759