1<?php declare(strict_types = 1);
2/*
3** Zabbix
4** Copyright (C) 2001-2021 Zabbix SIA
5**
6** This program is free software; you can redistribute it and/or modify
7** it under the terms of the GNU General Public License as published by
8** the Free Software Foundation; either version 2 of the License, or
9** (at your option) any later version.
10**
11** This program is distributed in the hope that it will be useful,
12** but WITHOUT ANY WARRANTY; without even the implied warranty of
13** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14** GNU General Public License for more details.
15**
16** You should have received a copy of the GNU General Public License
17** along with this program; if not, write to the Free Software
18** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
19**/
20
21
22class CFilterParser extends CParser {
23
24	// For parsing of filter expressions.
25	private const STATE_AFTER_OPEN_BRACE = 1;
26	private const STATE_AFTER_LOGICAL_OPERATOR = 2;
27	private const STATE_AFTER_NOT_OPERATOR = 3;
28	private const STATE_AFTER_CLOSE_BRACE = 4;
29	private const STATE_AFTER_PAIR = 5;
30
31	// Token types.
32	public const TOKEN_TYPE_OPEN_BRACE = 0;
33	public const TOKEN_TYPE_CLOSE_BRACE = 1;
34	public const TOKEN_TYPE_OPERATOR = 2;
35	public const TOKEN_TYPE_KEYWORD = 3;
36	public const TOKEN_TYPE_USER_MACRO = 4;
37	public const TOKEN_TYPE_LLD_MACRO = 5;
38	public const TOKEN_TYPE_STRING = 6;
39
40	/**
41	 * Chars that should be treated as spaces.
42	 */
43	public const WHITESPACES = " \r\n\t";
44
45	/**
46	 * Array of tokens.
47	 *
48	 * @var array
49	 */
50	protected $tokens = [];
51
52	/**
53	 * An options array.
54	 *
55	 * Supported options:
56	 *   'usermacros' => false  Enable user macros usage in filter expression.
57	 *   'lldmacros' => false   Enable low-level discovery macros usage in filter expression.
58	 *
59	 * @var array
60	 */
61	private $options = [
62		'usermacros' => false,
63		'lldmacros' => false
64	];
65
66	/**
67	 * @param array $options
68	 */
69	public function __construct(array $options = []) {
70		$this->options = $options + $this->options;
71	}
72
73	/**
74	 * Parse a filter expression.
75	 *
76	 * Examples:
77	 *   ?[tag = "Service:MySQL" and group = "Database servers"]
78	 *
79	 * @param string $expression
80	 * @param int    $pos
81	 *
82	 * @return int
83	 */
84	public function parse($source, $pos = 0) {
85		// initializing local variables
86		$this->match = '';
87		$this->length = 0;
88
89		$p = $pos;
90		$tokens = [];
91
92		if (substr($source, $p, 2) !== '?[') {
93			return self::PARSE_FAIL;
94		}
95		$p += 2;
96
97		if (!self::parseExpression($source, $p, $tokens, $this->options)) {
98			return self::PARSE_FAIL;
99		}
100
101		if (!isset($source[$p]) || $source[$p] !== ']') {
102			return self::PARSE_FAIL;
103		}
104		$p++;
105
106		$len = $p - $pos;
107
108		$this->length = $len;
109		$this->match = substr($source, $pos, $len);
110		$this->tokens = $tokens;
111
112		return isset($source[$p]) ? self::PARSE_SUCCESS_CONT : self::PARSE_SUCCESS;
113	}
114
115	/**
116	 * Parses an expression.
117	 *
118	 * @param string  $source
119	 * @param int     $pos
120	 * @param array   $tokens
121	 * @param array   $options
122	 *
123	 * @return bool  Returns true if parsed successfully, false otherwise.
124	 */
125	private static function parseExpression(string $source, int &$pos, array &$tokens, array $options): bool {
126		$logical_operator_parser = new CSetParser(['and', 'or']);
127		$not_operator_parser = new CSetParser(['not']);
128
129		$state = self::STATE_AFTER_OPEN_BRACE;
130		$after_space = false;
131		$level = 0;
132		$p = $pos;
133		$_tokens = [];
134
135		while (isset($source[$p])) {
136			$char = $source[$p];
137
138			if (strpos(self::WHITESPACES, $char) !== false) {
139				$after_space = true;
140				$p++;
141				continue;
142			}
143
144			switch ($state) {
145				case self::STATE_AFTER_OPEN_BRACE:
146					switch ($char) {
147						case '(':
148							$level++;
149							$_tokens[] = [
150								'type' => self::TOKEN_TYPE_OPEN_BRACE,
151								'pos' => $p,
152								'match' => $char,
153								'length' => 1
154							];
155							break;
156
157						default:
158							if (self::parseUsing($not_operator_parser, $source, $p, $_tokens,
159									self::TOKEN_TYPE_OPERATOR)) {
160								$state = self::STATE_AFTER_NOT_OPERATOR;
161							}
162							elseif (self::parsePair($source, $p, $_tokens, $options)) {
163								$state = self::STATE_AFTER_PAIR;
164
165								if ($level == 0) {
166									$pos = $p + 1;
167									$tokens = $_tokens;
168								}
169							}
170							else {
171								break 3;
172							}
173					}
174					break;
175
176				case self::STATE_AFTER_LOGICAL_OPERATOR:
177					switch ($char) {
178						case '(':
179							$level++;
180							$state = self::STATE_AFTER_OPEN_BRACE;
181							$_tokens[] = [
182								'type' => self::TOKEN_TYPE_OPEN_BRACE,
183								'pos' => $p,
184								'match' => $char,
185								'length' => 1
186							];
187							break;
188
189						default:
190							if (!$after_space) {
191								break 3;
192							}
193
194							if (self::parseUsing($not_operator_parser, $source, $p, $_tokens,
195									self::TOKEN_TYPE_OPERATOR)) {
196								$state = self::STATE_AFTER_NOT_OPERATOR;
197							}
198							elseif (self::parsePair($source, $p, $_tokens, $options)) {
199								$state = self::STATE_AFTER_PAIR;
200
201								if ($level == 0) {
202									$pos = $p + 1;
203									$tokens = $_tokens;
204								}
205							}
206							else {
207								break 3;
208							}
209					}
210					break;
211
212				case self::STATE_AFTER_CLOSE_BRACE:
213					switch ($char) {
214						case ')':
215							if ($level == 0) {
216								break 3;
217							}
218							$level--;
219							$_tokens[] = [
220								'type' => self::TOKEN_TYPE_CLOSE_BRACE,
221								'pos' => $p,
222								'match' => $char,
223								'length' => 1
224							];
225
226							if ($level == 0) {
227								$pos = $p + 1;
228								$tokens = $_tokens;
229							}
230							break;
231
232						default:
233							if (self::parseUsing($logical_operator_parser, $source, $p, $_tokens,
234									self::TOKEN_TYPE_OPERATOR)) {
235								$state = self::STATE_AFTER_LOGICAL_OPERATOR;
236								break;
237							}
238							else {
239								break 3;
240							}
241					}
242					break;
243
244				case self::STATE_AFTER_PAIR:
245					switch ($char) {
246						case ')':
247							if ($level == 0) {
248								break 3;
249							}
250							$level--;
251							$state = self::STATE_AFTER_CLOSE_BRACE;
252							$_tokens[] = [
253								'type' => self::TOKEN_TYPE_CLOSE_BRACE,
254								'pos' => $p,
255								'match' => $char,
256								'length' => 1
257							];
258
259							if ($level == 0) {
260								$pos = $p + 1;
261								$tokens = $_tokens;
262							}
263							break;
264
265						default:
266							if ($after_space && self::parseUsing($logical_operator_parser, $source, $p, $_tokens,
267									self::TOKEN_TYPE_OPERATOR)) {
268								$state = self::STATE_AFTER_LOGICAL_OPERATOR;
269							}
270							else {
271								break 3;
272							}
273					}
274					break;
275
276				case self::STATE_AFTER_NOT_OPERATOR:
277					switch ($char) {
278						case '(':
279							$level++;
280							$state = self::STATE_AFTER_OPEN_BRACE;
281							$_tokens[] = [
282								'type' => self::TOKEN_TYPE_OPEN_BRACE,
283								'pos' => $p,
284								'match' => $char,
285								'length' => 1
286							];
287							break;
288
289						default:
290							if (!$after_space) {
291								break 3;
292							}
293
294							if (self::parsePair($source, $p, $_tokens, $options)) {
295								$state = self::STATE_AFTER_PAIR;
296
297								if ($level == 0) {
298									$pos = $p + 1;
299									$tokens = $_tokens;
300								}
301							}
302							else {
303								break 3;
304							}
305					}
306					break;
307			}
308
309			$after_space = false;
310			$p++;
311		}
312
313		if ($tokens) {
314			// Including trailing whitespaces as part of the expression.
315			while (isset($source[$pos]) && strpos(self::WHITESPACES, $source[$pos]) !== false) {
316				$pos++;
317			}
318		}
319
320		return (bool) $tokens;
321	}
322
323	/**
324	 * Parses a constant in the expression.
325	 *
326	 * The pair can be:
327	 *  - <keyword> <operator> <quoted string>
328	 *  - <quoted string> <operator> <keyword>
329	 *
330	 *  <operator> - =|<>
331	 *  <keyword> - tag|group
332	 *
333	 * @param string  $source
334	 * @param int     $pos
335	 * @param array   $tokens
336	 * @param array   $options
337	 *
338	 * @return bool  Returns true if parsed successfully, false otherwise.
339	 */
340	private static function parsePair(string $source, int &$pos, array &$tokens, array $options): bool {
341		$keyword_parser = new CSetParser(['tag', 'group']);
342		$binary_operator_parser = new CSetParser(['=', '<>']);
343
344		$p = $pos;
345		$_tokens = [];
346		$keywords = 0;
347
348		if (self::parseUsing($keyword_parser, $source, $p, $_tokens, self::TOKEN_TYPE_KEYWORD)) {
349			$keywords++;
350		}
351		elseif (!self::parseConstant($source, $p, $_tokens, $options)) {
352			return false;
353		}
354		$p++;
355
356		while (isset($source[$p]) && strpos(self::WHITESPACES, $source[$p]) !== false) {
357			$p++;
358		}
359
360		if (!self::parseUsing($binary_operator_parser, $source, $p, $_tokens, self::TOKEN_TYPE_OPERATOR)) {
361			return false;
362		}
363		$p++;
364
365		while (isset($source[$p]) && strpos(self::WHITESPACES, $source[$p]) !== false) {
366			$p++;
367		}
368
369		if (self::parseUsing($keyword_parser, $source, $p, $_tokens, self::TOKEN_TYPE_KEYWORD)) {
370			$keywords++;
371		}
372		else if (!self::parseConstant($source, $p, $_tokens, $options)) {
373			return false;
374		}
375		$p++;
376
377		if ($keywords > 1) {
378			return false;
379		}
380
381		$pos = $p - 1;
382		$tokens = array_merge($tokens, $_tokens);
383
384		return true;
385	}
386
387
388	/**
389	 * Parses a constant in the expression.
390	 *
391	 * The constant can be:
392	 *  - string
393	 *  - user macro like {$MACRO}
394	 *  - LLD macro like {#LLD}
395	 *  - LLD macro with function like {{#LLD}.func())}
396	 *
397	 * @param string  $source
398	 * @param int     $pos
399	 * @param array   $tokens
400	 * @param array   $options
401	 *
402	 * @return bool  Returns true if parsed successfully, false otherwise.
403	 */
404	private static function parseConstant(string $source, int &$pos, array &$tokens, array $options): bool {
405		if (self::parseString($source, $pos, $tokens)) {
406			return true;
407		}
408
409		if ($options['usermacros'] && self::parseUsing(new CUserMacroParser(), $source, $pos, $tokens,
410				self::TOKEN_TYPE_USER_MACRO)) {
411			return true;
412		}
413
414		if ($options['lldmacros'] && self::parseUsing(new CLLDMacroParser(), $source, $pos, $tokens,
415				self::TOKEN_TYPE_LLD_MACRO)) {
416			return true;
417		}
418
419		if ($options['lldmacros'] && self::parseUsing(new CLLDMacroFunctionParser(), $source, $pos, $tokens,
420				self::TOKEN_TYPE_LLD_MACRO)) {
421			return true;
422		}
423
424		return false;
425	}
426	/**
427	 * Parses a quoted string constant in the expression.
428	 *
429	 * @param string  $source
430	 * @param int     $pos
431	 * @param array   $tokens
432	 *
433	 * @return bool returns true if parsed successfully, false otherwise
434	 */
435	private static function parseString(string $source, int &$pos, array &$tokens): bool {
436		if (!preg_match('/^"([^"\\\\]|\\\\["\\\\])*"/', substr($source, $pos), $matches)) {
437			return false;
438		}
439
440		$len = strlen($matches[0]);
441		$tokens[] = [
442			'type' => self::TOKEN_TYPE_STRING,
443			'pos' => $pos,
444			'match' => $matches[0],
445			'length' => $len
446		];
447		$pos += $len - 1;
448
449		return true;
450	}
451
452	/**
453	 * Parse the string using the given parser. If a match has been found, move the cursor to the last symbol of the
454	 * matched string.
455	 *
456	 * @param CParser $parser
457	 * @param string  $source
458	 * @param int     $pos
459	 * @param array   $tokens
460	 * @param int     $token_type
461	 *
462	 * @return bool
463	 */
464	private static function parseUsing(CParser $parser, string $source, int &$pos, array &$tokens,
465			int $token_type): bool {
466		if ($parser->parse($source, $pos) == CParser::PARSE_FAIL) {
467			return false;
468		}
469
470		$tokens[] = [
471			'type' => $token_type,
472			'pos' => $pos,
473			'match' => $parser->getMatch(),
474			'length' => $parser->getLength()
475		];
476		$pos += $parser->getLength() - 1;
477
478		return true;
479	}
480
481	/**
482	 * Return the expression tokens.
483	 *
484	 * @return array
485	 */
486	public function getTokens(): array {
487		return $this->tokens;
488	}
489
490	/**
491	 * Returns tokens of the given types.
492	 *
493	 * @param array  $types
494	 *
495	 * @return array
496	 */
497	public function getTokensOfTypes(array $types): array {
498		$result = [];
499
500		foreach ($this->tokens as $token) {
501			if (in_array($token['type'], $types)) {
502				$result[] = $token;
503			}
504		}
505
506		return $result;
507	}
508
509	/**
510	 * Unquoting quoted string $value.
511	 *
512	 * @param string $value
513	 *
514	 * @return string
515	 */
516	public static function unquoteString(string $value): string {
517		return strtr(substr($value, 1, -1), ['\\"' => '"', '\\\\' => '\\']);
518	}
519
520	/**
521	 * Quoting string $value.
522	 *
523	 * @param string $value
524	 *
525	 * @return string
526	 */
527	public static function quoteString(string $value): string {
528		return '"'.strtr($value, ['\\' => '\\\\', '"' => '\\"']).'"';
529	}
530}
531