1 /*
2  * Copyright (c) 2015-2017 Balabit
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  *
18  * As an additional exemption you are allowed to compile & link against the
19  * OpenSSL libraries as published by the OpenSSL project. See the file
20  * COPYING for details.
21  *
22  */
23 #include "kv-scanner.h"
24 #include "str-repr/decode.h"
25 #include "str-repr/encode.h"
26 #include "scratch-buffers.h"
27 #include <string.h>
28 
29 static inline gboolean
_is_valid_key_character(gchar c)30 _is_valid_key_character(gchar c)
31 {
32   return (c >= 'a' && c <= 'z') ||
33          (c >= 'A' && c <= 'Z') ||
34          (c >= '0' && c <= '9') ||
35          (c == '_') ||
36          (c == '.') ||
37          (c == '-');
38 }
39 
40 static inline const gchar *
_locate_separator(KVScanner * self,const gchar * start)41 _locate_separator(KVScanner *self, const gchar *start)
42 {
43   return strchr(start, self->value_separator);
44 }
45 
46 static inline void
_locate_start_of_key(KVScanner * self,const gchar * end_of_key,const gchar ** start_of_key)47 _locate_start_of_key(KVScanner *self, const gchar *end_of_key, const gchar **start_of_key)
48 {
49   const gchar *input = &self->input[self->input_pos];
50   const gchar *cur;
51 
52   cur = end_of_key;
53   while (cur > input && self->is_valid_key_character(*(cur - 1)))
54     cur--;
55   *start_of_key = cur;
56 }
57 
58 static inline void
_locate_end_of_key(KVScanner * self,const gchar * separator,const gchar ** end_of_key)59 _locate_end_of_key(KVScanner *self, const gchar *separator, const gchar **end_of_key)
60 {
61   const gchar *input = &self->input[self->input_pos];
62   const gchar *cur;
63 
64   /* this function locates the character pointing right next to the end of
65    * the key, e.g. with this input
66    *   foo   = bar
67    *
68    * it would start with the '=' sign and skip spaces backwards, to locate
69    * the space right next to "foo" */
70 
71   cur = separator;
72   while (cur > input && (*(cur - 1)) == ' ')
73     cur--;
74   *end_of_key = cur;
75 }
76 
77 static inline gboolean
_extract_key_from_positions(KVScanner * self,const gchar * start_of_key,const gchar * end_of_key)78 _extract_key_from_positions(KVScanner *self, const gchar *start_of_key, const gchar *end_of_key)
79 {
80   gint len = end_of_key - start_of_key;
81 
82   if (len >= 1)
83     {
84       g_string_assign_len(self->key, start_of_key, len);
85       return TRUE;
86     }
87   return FALSE;
88 }
89 
90 static inline void
_extract_stray_word(KVScanner * self,const gchar * stray_word,gssize len)91 _extract_stray_word(KVScanner *self, const gchar *stray_word, gssize len)
92 {
93   if (len < 0)
94     len = strlen(stray_word);
95   if (self->stray_words && len > 0)
96     {
97       while (len > 0 && stray_word[len - 1] == ' ')
98         len--;
99       while (len > 0 && stray_word[0] == ' ')
100         {
101           stray_word++;
102           len--;
103         }
104       if (len > 0)
105         {
106           if (self->stray_words->len)
107             g_string_append_c(self->stray_words, ',');
108 
109           str_repr_encode_append(self->stray_words, stray_word, len, ",");
110         }
111     }
112 }
113 
114 static gboolean
_should_stop(KVScanner * self)115 _should_stop(KVScanner *self)
116 {
117   const gchar *input = &self->input[self->input_pos];
118   return *input == self->stop_char;
119 }
120 
121 static gboolean
_extract_key(KVScanner * self)122 _extract_key(KVScanner *self)
123 {
124   const gchar *input = &self->input[self->input_pos];
125   const gchar *start_of_key, *end_of_key;
126   const gchar *separator;
127 
128   separator = _locate_separator(self, input);
129   while (separator)
130     {
131       _locate_end_of_key(self, separator, &end_of_key);
132       _locate_start_of_key(self, end_of_key, &start_of_key);
133 
134       if (_extract_key_from_positions(self, start_of_key, end_of_key))
135         {
136           _extract_stray_word(self, input, start_of_key - input);
137           self->input_pos = separator - self->input + 1;
138           return TRUE;
139         }
140       separator = _locate_separator(self, separator + 1);
141     }
142   _extract_stray_word(self, input, -1);
143   return FALSE;
144 }
145 
146 static gboolean
_is_quoted(const gchar * input)147 _is_quoted(const gchar *input)
148 {
149   return *input == '\'' || *input == '\"';
150 }
151 
152 static gboolean
_key_follows(KVScanner * self,const gchar * cur)153 _key_follows(KVScanner *self, const gchar *cur)
154 {
155   const gchar *key = cur;
156 
157   while (self->is_valid_key_character(*key))
158     key++;
159 
160   while (*key == ' ')
161     key++;
162   return (key != cur) && (*key == self->value_separator);
163 }
164 
165 static inline void
_skip_spaces(const gchar ** input)166 _skip_spaces(const gchar **input)
167 {
168   const gchar *cur = *input;
169 
170   while (*cur == ' ')
171     cur++;
172   *input = cur;
173 }
174 
175 static inline gboolean
_end_of_string(const gchar * cur)176 _end_of_string(const gchar *cur)
177 {
178   return *cur == 0;
179 }
180 
181 static inline gboolean
_pair_separator(KVScanner * self,const gchar * cur,const gchar ** new_cur)182 _pair_separator(KVScanner *self, const gchar *cur, const gchar **new_cur)
183 {
184   if (self->pair_separator && (strncmp(cur, self->pair_separator, self->pair_separator_len) == 0))
185     {
186       *new_cur = cur + self->pair_separator_len;
187       return TRUE;
188     }
189   return FALSE;
190 }
191 
192 static inline gboolean
_pair_separator_starts_with_a_space(KVScanner * self)193 _pair_separator_starts_with_a_space(KVScanner *self)
194 {
195   return (self->pair_separator && self->pair_separator[0] == ' ');
196 }
197 
198 static gboolean
_match_delimiter(const gchar * cur,const gchar ** new_cur,gpointer user_data)199 _match_delimiter(const gchar *cur, const gchar **new_cur, gpointer user_data)
200 {
201   KVScanner *self = (gpointer) user_data;
202   gboolean result = FALSE;
203 
204   if (!self->value_was_quoted &&
205       *cur == ' ')
206     {
207       if (_pair_separator_starts_with_a_space(self) &&
208           _pair_separator(self, cur, new_cur))
209         {
210           result = TRUE;
211         }
212       else
213         {
214           _skip_spaces(&cur);
215 
216           if (_end_of_string(cur) ||
217               _key_follows(self, cur))
218             {
219               *new_cur = cur;
220               result = TRUE;
221             }
222           else if (_pair_separator(self, cur, new_cur))
223             {
224               result = TRUE;
225             }
226         }
227     }
228   else if (*cur == ' ')
229     {
230       result = TRUE;
231       *new_cur = cur + 1;
232     }
233   else if (*cur == self->stop_char)
234     {
235       result = TRUE;
236       *new_cur = cur;
237     }
238   else
239     {
240       result = _pair_separator(self, cur, new_cur);
241     }
242   return result;
243 }
244 
245 static inline void
_skip_initial_spaces(KVScanner * self)246 _skip_initial_spaces(KVScanner *self)
247 {
248   const gchar *input = &self->input[self->input_pos];
249   const gchar *end;
250 
251   while (*input == ' ' && !_match_delimiter(input, &end, self))
252     input++;
253   self->input_pos = input - self->input;
254 }
255 
256 static inline void
_decode_value(KVScanner * self)257 _decode_value(KVScanner *self)
258 {
259   const gchar *input = &self->input[self->input_pos];
260   const gchar *end;
261   StrReprDecodeOptions options =
262   {
263     .match_delimiter = _match_delimiter,
264     .match_delimiter_data = self,
265     .delimiter_chars = { ' ', self->pair_separator[0], self->stop_char },
266   };
267 
268   self->value_was_quoted = _is_quoted(input);
269   if (str_repr_decode_with_options(self->value, input, &end, &options))
270     {
271       self->input_pos = end - self->input;
272     }
273   else
274     {
275       /* quotation error, set was_quoted to FALSE */
276       self->value_was_quoted = FALSE;
277     }
278 }
279 
280 static void
_extract_optional_annotation(KVScanner * self)281 _extract_optional_annotation(KVScanner *self)
282 {
283   if (self->extract_annotation)
284     self->extract_annotation(self);
285 }
286 
287 static void
_extract_value(KVScanner * self)288 _extract_value(KVScanner *self)
289 {
290   self->value_was_quoted = FALSE;
291   _skip_initial_spaces(self);
292   _decode_value(self);
293 }
294 
295 static inline void
_transform_value(KVScanner * self)296 _transform_value(KVScanner *self)
297 {
298   if (self->transform_value)
299     {
300       g_string_truncate(self->decoded_value, 0);
301       if (self->transform_value(self))
302         g_string_assign_len(self->value, self->decoded_value->str, self->decoded_value->len);
303     }
304 }
305 
306 gboolean
kv_scanner_scan_next(KVScanner * s)307 kv_scanner_scan_next(KVScanner *s)
308 {
309   KVScanner *self = (KVScanner *)s;
310 
311   if (_should_stop(self))
312     return FALSE;
313 
314   if (!_extract_key(self))
315     return FALSE;
316 
317   _extract_optional_annotation(self);
318 
319   _extract_value(self);
320   _transform_value(s);
321 
322   return TRUE;
323 }
324 
325 void
kv_scanner_deinit(KVScanner * self)326 kv_scanner_deinit(KVScanner *self)
327 {
328 }
329 
330 void
kv_scanner_init(KVScanner * self,gchar value_separator,const gchar * pair_separator,gboolean extract_stray_words)331 kv_scanner_init(KVScanner *self, gchar value_separator, const gchar *pair_separator,
332                 gboolean extract_stray_words)
333 {
334   memset(self, 0, sizeof(*self));
335   self->key = scratch_buffers_alloc();
336   self->value = scratch_buffers_alloc();
337   self->decoded_value = scratch_buffers_alloc();
338   if (extract_stray_words)
339     self->stray_words = scratch_buffers_alloc();
340   self->value_separator = value_separator;
341   self->pair_separator = pair_separator ? : ", ";
342   self->pair_separator_len = strlen(self->pair_separator);
343   self->is_valid_key_character = _is_valid_key_character;
344   self->stop_char = 0;
345 }
346