1 /*
2 * Copyright (c) 2015-2017 Balabit
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 * As an additional exemption you are allowed to compile & link against the
19 * OpenSSL libraries as published by the OpenSSL project. See the file
20 * COPYING for details.
21 *
22 */
23 #include "kv-scanner.h"
24 #include "str-repr/decode.h"
25 #include "str-repr/encode.h"
26 #include "scratch-buffers.h"
27 #include <string.h>
28
29 static inline gboolean
_is_valid_key_character(gchar c)30 _is_valid_key_character(gchar c)
31 {
32 return (c >= 'a' && c <= 'z') ||
33 (c >= 'A' && c <= 'Z') ||
34 (c >= '0' && c <= '9') ||
35 (c == '_') ||
36 (c == '.') ||
37 (c == '-');
38 }
39
40 static inline const gchar *
_locate_separator(KVScanner * self,const gchar * start)41 _locate_separator(KVScanner *self, const gchar *start)
42 {
43 return strchr(start, self->value_separator);
44 }
45
46 static inline void
_locate_start_of_key(KVScanner * self,const gchar * end_of_key,const gchar ** start_of_key)47 _locate_start_of_key(KVScanner *self, const gchar *end_of_key, const gchar **start_of_key)
48 {
49 const gchar *input = &self->input[self->input_pos];
50 const gchar *cur;
51
52 cur = end_of_key;
53 while (cur > input && self->is_valid_key_character(*(cur - 1)))
54 cur--;
55 *start_of_key = cur;
56 }
57
58 static inline void
_locate_end_of_key(KVScanner * self,const gchar * separator,const gchar ** end_of_key)59 _locate_end_of_key(KVScanner *self, const gchar *separator, const gchar **end_of_key)
60 {
61 const gchar *input = &self->input[self->input_pos];
62 const gchar *cur;
63
64 /* this function locates the character pointing right next to the end of
65 * the key, e.g. with this input
66 * foo = bar
67 *
68 * it would start with the '=' sign and skip spaces backwards, to locate
69 * the space right next to "foo" */
70
71 cur = separator;
72 while (cur > input && (*(cur - 1)) == ' ')
73 cur--;
74 *end_of_key = cur;
75 }
76
77 static inline gboolean
_extract_key_from_positions(KVScanner * self,const gchar * start_of_key,const gchar * end_of_key)78 _extract_key_from_positions(KVScanner *self, const gchar *start_of_key, const gchar *end_of_key)
79 {
80 gint len = end_of_key - start_of_key;
81
82 if (len >= 1)
83 {
84 g_string_assign_len(self->key, start_of_key, len);
85 return TRUE;
86 }
87 return FALSE;
88 }
89
90 static inline void
_extract_stray_word(KVScanner * self,const gchar * stray_word,gssize len)91 _extract_stray_word(KVScanner *self, const gchar *stray_word, gssize len)
92 {
93 if (len < 0)
94 len = strlen(stray_word);
95 if (self->stray_words && len > 0)
96 {
97 while (len > 0 && stray_word[len - 1] == ' ')
98 len--;
99 while (len > 0 && stray_word[0] == ' ')
100 {
101 stray_word++;
102 len--;
103 }
104 if (len > 0)
105 {
106 if (self->stray_words->len)
107 g_string_append_c(self->stray_words, ',');
108
109 str_repr_encode_append(self->stray_words, stray_word, len, ",");
110 }
111 }
112 }
113
114 static gboolean
_should_stop(KVScanner * self)115 _should_stop(KVScanner *self)
116 {
117 const gchar *input = &self->input[self->input_pos];
118 return *input == self->stop_char;
119 }
120
121 static gboolean
_extract_key(KVScanner * self)122 _extract_key(KVScanner *self)
123 {
124 const gchar *input = &self->input[self->input_pos];
125 const gchar *start_of_key, *end_of_key;
126 const gchar *separator;
127
128 separator = _locate_separator(self, input);
129 while (separator)
130 {
131 _locate_end_of_key(self, separator, &end_of_key);
132 _locate_start_of_key(self, end_of_key, &start_of_key);
133
134 if (_extract_key_from_positions(self, start_of_key, end_of_key))
135 {
136 _extract_stray_word(self, input, start_of_key - input);
137 self->input_pos = separator - self->input + 1;
138 return TRUE;
139 }
140 separator = _locate_separator(self, separator + 1);
141 }
142 _extract_stray_word(self, input, -1);
143 return FALSE;
144 }
145
146 static gboolean
_is_quoted(const gchar * input)147 _is_quoted(const gchar *input)
148 {
149 return *input == '\'' || *input == '\"';
150 }
151
152 static gboolean
_key_follows(KVScanner * self,const gchar * cur)153 _key_follows(KVScanner *self, const gchar *cur)
154 {
155 const gchar *key = cur;
156
157 while (self->is_valid_key_character(*key))
158 key++;
159
160 while (*key == ' ')
161 key++;
162 return (key != cur) && (*key == self->value_separator);
163 }
164
165 static inline void
_skip_spaces(const gchar ** input)166 _skip_spaces(const gchar **input)
167 {
168 const gchar *cur = *input;
169
170 while (*cur == ' ')
171 cur++;
172 *input = cur;
173 }
174
175 static inline gboolean
_end_of_string(const gchar * cur)176 _end_of_string(const gchar *cur)
177 {
178 return *cur == 0;
179 }
180
181 static inline gboolean
_pair_separator(KVScanner * self,const gchar * cur,const gchar ** new_cur)182 _pair_separator(KVScanner *self, const gchar *cur, const gchar **new_cur)
183 {
184 if (self->pair_separator && (strncmp(cur, self->pair_separator, self->pair_separator_len) == 0))
185 {
186 *new_cur = cur + self->pair_separator_len;
187 return TRUE;
188 }
189 return FALSE;
190 }
191
192 static inline gboolean
_pair_separator_starts_with_a_space(KVScanner * self)193 _pair_separator_starts_with_a_space(KVScanner *self)
194 {
195 return (self->pair_separator && self->pair_separator[0] == ' ');
196 }
197
198 static gboolean
_match_delimiter(const gchar * cur,const gchar ** new_cur,gpointer user_data)199 _match_delimiter(const gchar *cur, const gchar **new_cur, gpointer user_data)
200 {
201 KVScanner *self = (gpointer) user_data;
202 gboolean result = FALSE;
203
204 if (!self->value_was_quoted &&
205 *cur == ' ')
206 {
207 if (_pair_separator_starts_with_a_space(self) &&
208 _pair_separator(self, cur, new_cur))
209 {
210 result = TRUE;
211 }
212 else
213 {
214 _skip_spaces(&cur);
215
216 if (_end_of_string(cur) ||
217 _key_follows(self, cur))
218 {
219 *new_cur = cur;
220 result = TRUE;
221 }
222 else if (_pair_separator(self, cur, new_cur))
223 {
224 result = TRUE;
225 }
226 }
227 }
228 else if (*cur == ' ')
229 {
230 result = TRUE;
231 *new_cur = cur + 1;
232 }
233 else if (*cur == self->stop_char)
234 {
235 result = TRUE;
236 *new_cur = cur;
237 }
238 else
239 {
240 result = _pair_separator(self, cur, new_cur);
241 }
242 return result;
243 }
244
245 static inline void
_skip_initial_spaces(KVScanner * self)246 _skip_initial_spaces(KVScanner *self)
247 {
248 const gchar *input = &self->input[self->input_pos];
249 const gchar *end;
250
251 while (*input == ' ' && !_match_delimiter(input, &end, self))
252 input++;
253 self->input_pos = input - self->input;
254 }
255
256 static inline void
_decode_value(KVScanner * self)257 _decode_value(KVScanner *self)
258 {
259 const gchar *input = &self->input[self->input_pos];
260 const gchar *end;
261 StrReprDecodeOptions options =
262 {
263 .match_delimiter = _match_delimiter,
264 .match_delimiter_data = self,
265 .delimiter_chars = { ' ', self->pair_separator[0], self->stop_char },
266 };
267
268 self->value_was_quoted = _is_quoted(input);
269 if (str_repr_decode_with_options(self->value, input, &end, &options))
270 {
271 self->input_pos = end - self->input;
272 }
273 else
274 {
275 /* quotation error, set was_quoted to FALSE */
276 self->value_was_quoted = FALSE;
277 }
278 }
279
280 static void
_extract_optional_annotation(KVScanner * self)281 _extract_optional_annotation(KVScanner *self)
282 {
283 if (self->extract_annotation)
284 self->extract_annotation(self);
285 }
286
287 static void
_extract_value(KVScanner * self)288 _extract_value(KVScanner *self)
289 {
290 self->value_was_quoted = FALSE;
291 _skip_initial_spaces(self);
292 _decode_value(self);
293 }
294
295 static inline void
_transform_value(KVScanner * self)296 _transform_value(KVScanner *self)
297 {
298 if (self->transform_value)
299 {
300 g_string_truncate(self->decoded_value, 0);
301 if (self->transform_value(self))
302 g_string_assign_len(self->value, self->decoded_value->str, self->decoded_value->len);
303 }
304 }
305
306 gboolean
kv_scanner_scan_next(KVScanner * s)307 kv_scanner_scan_next(KVScanner *s)
308 {
309 KVScanner *self = (KVScanner *)s;
310
311 if (_should_stop(self))
312 return FALSE;
313
314 if (!_extract_key(self))
315 return FALSE;
316
317 _extract_optional_annotation(self);
318
319 _extract_value(self);
320 _transform_value(s);
321
322 return TRUE;
323 }
324
325 void
kv_scanner_deinit(KVScanner * self)326 kv_scanner_deinit(KVScanner *self)
327 {
328 }
329
330 void
kv_scanner_init(KVScanner * self,gchar value_separator,const gchar * pair_separator,gboolean extract_stray_words)331 kv_scanner_init(KVScanner *self, gchar value_separator, const gchar *pair_separator,
332 gboolean extract_stray_words)
333 {
334 memset(self, 0, sizeof(*self));
335 self->key = scratch_buffers_alloc();
336 self->value = scratch_buffers_alloc();
337 self->decoded_value = scratch_buffers_alloc();
338 if (extract_stray_words)
339 self->stray_words = scratch_buffers_alloc();
340 self->value_separator = value_separator;
341 self->pair_separator = pair_separator ? : ", ";
342 self->pair_separator_len = strlen(self->pair_separator);
343 self->is_valid_key_character = _is_valid_key_character;
344 self->stop_char = 0;
345 }
346