1 /* dzl-pattern-spec.c
2 *
3 * Copyright (C) 2015-2017 Christian Hergert <christian@hergert.me>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #define G_LOG_DOMAIN "dzl-pattern-spec"
20
21 #include "config.h"
22
23 #ifndef _GNU_SOURCE
24 # define _GNU_SOURCE
25 #endif
26 #include <string.h>
27
28 #include "search/dzl-pattern-spec.h"
29 #include "util/dzl-macros.h"
30
31 G_DEFINE_BOXED_TYPE (DzlPatternSpec, dzl_pattern_spec, dzl_pattern_spec_ref, dzl_pattern_spec_unref)
32
33 /**
34 * SECTION:dzl-pattern-spec
35 * @title: DzlPatternSpec
36 * @short_description: Simple glob-like searching
37 *
38 * This works similar to #GPatternSpec except the query syntax is different.
39 * It tries to match word boundaries, but with matching partial words up
40 * to those boundaries. For example, "gtk widg" would match "gtk_widget_show".
41 * Word boundaries include '_' and ' '. If any character is uppercase, then
42 * case sensitivity is used.
43 */
44
45 struct _DzlPatternSpec
46 {
47 volatile gint ref_count;
48 gchar *needle;
49 gchar **parts;
50 guint case_sensitive : 1;
51 };
52
53 #ifdef G_OS_WIN32
54 /* A fallback for missing strcasestr() on Windows. This is not in any way
55 * optimized, but at least it supports something resembling UTF-8.
56 */
57 static char *
strcasestr(const gchar * haystack,const gchar * needle)58 strcasestr (const gchar *haystack,
59 const gchar *needle)
60 {
61 g_autofree gchar *haystack_folded = g_utf8_casefold (haystack, -1);
62 g_autofree gchar *needle_folded = g_utf8_casefold (needle, -1);
63 const gchar *pos;
64 gsize n_chars = 0;
65
66 pos = strstr (haystack_folded, needle_folded);
67
68 if (pos == NULL)
69 return NULL;
70
71 for (const gchar *iter = haystack_folded;
72 *iter != '\0';
73 iter = g_utf8_next_char (iter))
74 {
75 if (iter >= pos)
76 break;
77 n_chars++;
78 }
79
80 return g_utf8_offset_to_pointer (haystack, n_chars);
81 }
82 #endif
83
84 DzlPatternSpec *
dzl_pattern_spec_new(const gchar * needle)85 dzl_pattern_spec_new (const gchar *needle)
86 {
87 DzlPatternSpec *self;
88 const gchar *tmp;
89
90 if (needle == NULL)
91 needle = "";
92
93 self = g_slice_new0 (DzlPatternSpec);
94 self->ref_count = 1;
95 self->needle = g_strdup (needle);
96 self->parts = g_strsplit (needle, " ", 0);
97 self->case_sensitive = FALSE;
98
99 for (tmp = needle; *tmp; tmp = g_utf8_next_char (tmp))
100 {
101 if (g_unichar_isupper (g_utf8_get_char (tmp)))
102 {
103 self->case_sensitive = TRUE;
104 break;
105 }
106 }
107
108 return self;
109 }
110
111 const gchar *
dzl_pattern_spec_get_text(DzlPatternSpec * self)112 dzl_pattern_spec_get_text (DzlPatternSpec *self)
113 {
114 g_return_val_if_fail (self != NULL, NULL);
115
116 return self->needle;
117 }
118
119 static void
dzl_pattern_spec_free(DzlPatternSpec * self)120 dzl_pattern_spec_free (DzlPatternSpec *self)
121 {
122 g_clear_pointer (&self->parts, g_strfreev);
123 g_clear_pointer (&self->needle, g_free);
124 g_slice_free (DzlPatternSpec, self);
125 }
126
127 static inline gboolean
is_word_break(gunichar ch)128 is_word_break (gunichar ch)
129 {
130 return (ch == ' ' || ch == '_' || ch == '-' || ch == '.');
131 }
132
133 static const gchar *
next_word_start(const gchar * haystack)134 next_word_start (const gchar *haystack)
135 {
136 for (; *haystack; haystack = g_utf8_next_char (haystack))
137 {
138 gunichar ch = g_utf8_get_char (haystack);
139
140 if (is_word_break (ch))
141 break;
142 }
143
144 for (; *haystack; haystack = g_utf8_next_char (haystack))
145 {
146 gunichar ch = g_utf8_get_char (haystack);
147
148 if (is_word_break (ch))
149 continue;
150
151 break;
152 }
153
154 g_return_val_if_fail (*haystack == '\0' || !is_word_break (*haystack), NULL);
155
156 return haystack;
157 }
158
159 gboolean
dzl_pattern_spec_match(DzlPatternSpec * self,const gchar * haystack)160 dzl_pattern_spec_match (DzlPatternSpec *self,
161 const gchar *haystack)
162 {
163 gsize i;
164
165 if (self == NULL || haystack == NULL)
166 return FALSE;
167
168 for (i = 0; (haystack != NULL) && self->parts [i]; i++)
169 {
170 if (self->parts [i][0] == '\0')
171 continue;
172
173 if (self->case_sensitive)
174 haystack = strstr (haystack, self->parts [i]);
175 else
176 haystack = strcasestr (haystack, self->parts [i]);
177
178 if (haystack == NULL)
179 return FALSE;
180
181 if (self->parts [i + 1] != NULL)
182 haystack = next_word_start (haystack + strlen (self->parts [i]));
183 }
184
185 return TRUE;
186 }
187
188 DzlPatternSpec *
dzl_pattern_spec_ref(DzlPatternSpec * self)189 dzl_pattern_spec_ref (DzlPatternSpec *self)
190 {
191 g_return_val_if_fail (self, NULL);
192 g_return_val_if_fail (self->ref_count > 0, NULL);
193
194 g_atomic_int_inc (&self->ref_count);
195
196 return self;
197 }
198
199 void
dzl_pattern_spec_unref(DzlPatternSpec * self)200 dzl_pattern_spec_unref (DzlPatternSpec *self)
201 {
202 g_return_if_fail (self);
203 g_return_if_fail (self->ref_count > 0);
204
205 if (g_atomic_int_dec_and_test (&self->ref_count))
206 dzl_pattern_spec_free (self);
207 }
208