1 #include <string.h>
2
3 #include "iregex.h"
4
5 struct _MatchInfo {
6 const char *valid_string;
7 GMatchInfo *g_match_info;
8 };
9
10 static const gchar *
make_valid_utf8(const gchar * text,gboolean * free_ret)11 make_valid_utf8(const gchar *text, gboolean *free_ret)
12 {
13 GString *str;
14 const gchar *ptr;
15 if (g_utf8_validate(text, -1, NULL)) {
16 if (free_ret)
17 *free_ret = FALSE;
18 return text;
19 }
20
21 str = g_string_sized_new(strlen(text) + 12);
22
23 ptr = text;
24 while (*ptr) {
25 gunichar c = g_utf8_get_char_validated(ptr, -1);
26 /* the unicode is invalid */
27 if (c == (gunichar)-1 || c == (gunichar)-2) {
28 /* encode the byte into PUA-A */
29 g_string_append_unichar(str, (gunichar) (0xfff00 | (*ptr & 0xff)));
30 ptr++;
31 } else {
32 g_string_append_unichar(str, c);
33 ptr = g_utf8_next_char(ptr);
34 }
35 }
36
37 if (free_ret)
38 *free_ret = TRUE;
39 return g_string_free(str, FALSE);
40 }
41
42 Regex *
i_regex_new(const gchar * pattern,GRegexCompileFlags compile_options,GRegexMatchFlags match_options,GError ** error)43 i_regex_new (const gchar *pattern,
44 GRegexCompileFlags compile_options,
45 GRegexMatchFlags match_options,
46 GError **error)
47 {
48 const gchar *valid_pattern;
49 gboolean free_valid_pattern;
50 Regex *ret = NULL;
51
52 valid_pattern = make_valid_utf8(pattern, &free_valid_pattern);
53 ret = g_regex_new(valid_pattern, compile_options, match_options, error);
54
55 if (free_valid_pattern)
56 g_free_not_null((gchar *)valid_pattern);
57
58 return ret;
59 }
60
61 void
i_regex_unref(Regex * regex)62 i_regex_unref (Regex *regex)
63 {
64 g_regex_unref(regex);
65 }
66
67 gboolean
i_regex_match(const Regex * regex,const gchar * string,GRegexMatchFlags match_options,MatchInfo ** match_info)68 i_regex_match (const Regex *regex,
69 const gchar *string,
70 GRegexMatchFlags match_options,
71 MatchInfo **match_info)
72 {
73 gboolean ret;
74 gboolean free_valid_string;
75 const gchar *valid_string = make_valid_utf8(string, &free_valid_string);
76
77 if (match_info != NULL)
78 *match_info = g_new0(MatchInfo, 1);
79
80 ret = g_regex_match(regex, valid_string, match_options,
81 match_info != NULL ? &(*match_info)->g_match_info : NULL);
82
83 if (free_valid_string) {
84 if (match_info != NULL)
85 (*match_info)->valid_string = valid_string;
86 else
87 g_free_not_null((gchar *)valid_string);
88 }
89
90 return ret;
91 }
92
93 static gsize
strlen_pua_oddly(const char * str)94 strlen_pua_oddly(const char *str)
95 {
96 const gchar *ptr;
97 gsize ret = 0;
98 ptr = str;
99
100 while (*ptr) {
101 const gchar *old;
102 gunichar c = g_utf8_get_char(ptr);
103 old = ptr;
104 ptr = g_utf8_next_char(ptr);
105
106 /* it is our PUA encoded byte */
107 if ((c & 0xfff00) == 0xfff00)
108 ret++;
109 else
110 ret += ptr - old;
111 }
112
113 return ret;
114 }
115
116 /* new_string should be passed in here from the i_regex_match call.
117 The start_pos and end_pos will then be calculated as if they were on
118 the original string */
119 gboolean
i_match_info_fetch_pos(const MatchInfo * match_info,gint match_num,gint * start_pos,gint * end_pos)120 i_match_info_fetch_pos (const MatchInfo *match_info,
121 gint match_num,
122 gint *start_pos,
123 gint *end_pos)
124 {
125 gint tmp_start, tmp_end, new_start_pos;
126 gboolean ret;
127
128 if (!match_info->valid_string || (!start_pos && !end_pos))
129 return g_match_info_fetch_pos(match_info->g_match_info,
130 match_num, start_pos, end_pos);
131
132 ret = g_match_info_fetch_pos(match_info->g_match_info,
133 match_num, &tmp_start, &tmp_end);
134 if (start_pos || end_pos) {
135 const gchar *str = match_info->valid_string;
136 gchar *to_start = g_strndup(str, tmp_start);
137 new_start_pos = strlen_pua_oddly(to_start);
138 g_free_not_null(to_start);
139
140 if (start_pos)
141 *start_pos = new_start_pos;
142
143 if (end_pos) {
144 gchar *to_end = g_strndup(str + tmp_start, tmp_end - tmp_start);
145 *end_pos = new_start_pos + strlen_pua_oddly(to_end);
146 g_free_not_null(to_end);
147 }
148 }
149 return ret;
150 }
151
152 gboolean
i_match_info_matches(const MatchInfo * match_info)153 i_match_info_matches (const MatchInfo *match_info)
154 {
155 g_return_val_if_fail(match_info != NULL, FALSE);
156
157 return g_match_info_matches(match_info->g_match_info);
158 }
159
160 void
i_match_info_free(MatchInfo * match_info)161 i_match_info_free (MatchInfo *match_info)
162 {
163 g_match_info_free(match_info->g_match_info);
164 g_free(match_info);
165 }
166