1 /* dzl-pattern-spec.c
2  *
3  * Copyright (C) 2015-2017 Christian Hergert <christian@hergert.me>
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #define G_LOG_DOMAIN "dzl-pattern-spec"
20 
21 #include "config.h"
22 
23 #ifndef _GNU_SOURCE
24 # define _GNU_SOURCE
25 #endif
26 #include <string.h>
27 
28 #include "search/dzl-pattern-spec.h"
29 #include "util/dzl-macros.h"
30 
31 G_DEFINE_BOXED_TYPE (DzlPatternSpec, dzl_pattern_spec, dzl_pattern_spec_ref, dzl_pattern_spec_unref)
32 
33 /**
34  * SECTION:dzl-pattern-spec
35  * @title: DzlPatternSpec
36  * @short_description: Simple glob-like searching
37  *
38  * This works similar to #GPatternSpec except the query syntax is different.
39  * It tries to match word boundaries, but with matching partial words up
40  * to those boundaries. For example, "gtk widg" would match "gtk_widget_show".
41  * Word boundaries include '_' and ' '. If any character is uppercase, then
42  * case sensitivity is used.
43  */
44 
45 struct _DzlPatternSpec
46 {
47   volatile gint   ref_count;
48   gchar          *needle;
49   gchar         **parts;
50   guint           case_sensitive : 1;
51 };
52 
53 #ifdef G_OS_WIN32
54 /* A fallback for missing strcasestr() on Windows. This is not in any way
55  * optimized, but at least it supports something resembling UTF-8.
56  */
57 static char *
strcasestr(const gchar * haystack,const gchar * needle)58 strcasestr (const gchar *haystack,
59             const gchar *needle)
60 {
61   g_autofree gchar *haystack_folded = g_utf8_casefold (haystack, -1);
62   g_autofree gchar *needle_folded = g_utf8_casefold (needle, -1);
63   const gchar *pos;
64   gsize n_chars = 0;
65 
66   pos = strstr (haystack_folded, needle_folded);
67 
68   if (pos == NULL)
69     return NULL;
70 
71   for (const gchar *iter = haystack_folded;
72        *iter != '\0';
73        iter = g_utf8_next_char (iter))
74     {
75       if (iter >= pos)
76         break;
77       n_chars++;
78     }
79 
80   return g_utf8_offset_to_pointer (haystack, n_chars);
81 }
82 #endif
83 
84 DzlPatternSpec *
dzl_pattern_spec_new(const gchar * needle)85 dzl_pattern_spec_new (const gchar *needle)
86 {
87   DzlPatternSpec *self;
88   const gchar *tmp;
89 
90   if (needle == NULL)
91     needle = "";
92 
93   self = g_slice_new0 (DzlPatternSpec);
94   self->ref_count = 1;
95   self->needle = g_strdup (needle);
96   self->parts = g_strsplit (needle, " ", 0);
97   self->case_sensitive = FALSE;
98 
99   for (tmp = needle; *tmp; tmp = g_utf8_next_char (tmp))
100     {
101       if (g_unichar_isupper (g_utf8_get_char (tmp)))
102         {
103           self->case_sensitive = TRUE;
104           break;
105         }
106     }
107 
108   return self;
109 }
110 
111 const gchar *
dzl_pattern_spec_get_text(DzlPatternSpec * self)112 dzl_pattern_spec_get_text (DzlPatternSpec *self)
113 {
114   g_return_val_if_fail (self != NULL, NULL);
115 
116   return self->needle;
117 }
118 
119 static void
dzl_pattern_spec_free(DzlPatternSpec * self)120 dzl_pattern_spec_free (DzlPatternSpec *self)
121 {
122   g_clear_pointer (&self->parts, g_strfreev);
123   g_clear_pointer (&self->needle, g_free);
124   g_slice_free (DzlPatternSpec, self);
125 }
126 
127 static inline gboolean
is_word_break(gunichar ch)128 is_word_break (gunichar ch)
129 {
130   return (ch == ' ' || ch == '_' || ch == '-' || ch == '.');
131 }
132 
133 static const gchar *
next_word_start(const gchar * haystack)134 next_word_start (const gchar *haystack)
135 {
136   for (; *haystack; haystack = g_utf8_next_char (haystack))
137     {
138       gunichar ch = g_utf8_get_char (haystack);
139 
140       if (is_word_break (ch))
141         break;
142     }
143 
144   for (; *haystack; haystack = g_utf8_next_char (haystack))
145     {
146       gunichar ch = g_utf8_get_char (haystack);
147 
148       if (is_word_break (ch))
149         continue;
150 
151       break;
152     }
153 
154   g_return_val_if_fail (*haystack == '\0' || !is_word_break (*haystack), NULL);
155 
156   return haystack;
157 }
158 
159 gboolean
dzl_pattern_spec_match(DzlPatternSpec * self,const gchar * haystack)160 dzl_pattern_spec_match (DzlPatternSpec *self,
161                         const gchar    *haystack)
162 {
163   gsize i;
164 
165   if (self == NULL || haystack == NULL)
166     return FALSE;
167 
168   for (i = 0; (haystack != NULL) && self->parts [i]; i++)
169     {
170       if (self->parts [i][0] == '\0')
171         continue;
172 
173       if (self->case_sensitive)
174         haystack = strstr (haystack, self->parts [i]);
175       else
176         haystack = strcasestr (haystack, self->parts [i]);
177 
178       if (haystack == NULL)
179         return FALSE;
180 
181       if (self->parts [i + 1] != NULL)
182         haystack = next_word_start (haystack + strlen (self->parts [i]));
183     }
184 
185   return TRUE;
186 }
187 
188 DzlPatternSpec *
dzl_pattern_spec_ref(DzlPatternSpec * self)189 dzl_pattern_spec_ref (DzlPatternSpec *self)
190 {
191   g_return_val_if_fail (self, NULL);
192   g_return_val_if_fail (self->ref_count > 0, NULL);
193 
194   g_atomic_int_inc (&self->ref_count);
195 
196   return self;
197 }
198 
199 void
dzl_pattern_spec_unref(DzlPatternSpec * self)200 dzl_pattern_spec_unref (DzlPatternSpec *self)
201 {
202   g_return_if_fail (self);
203   g_return_if_fail (self->ref_count > 0);
204 
205   if (g_atomic_int_dec_and_test (&self->ref_count))
206     dzl_pattern_spec_free (self);
207 }
208