1 /**
2  * @file xpath.h
3  * @author Michal Vasko <mvasko@cesnet.cz>
4  * @brief YANG XPath evaluation functions header
5  *
6  * Copyright (c) 2015 CESNET, z.s.p.o.
7  *
8  * This source code is licensed under BSD 3-Clause License (the "License").
9  * You may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *     https://opensource.org/licenses/BSD-3-Clause
13  */
14 
15 #ifndef _XPATH_H
16 #define _XPATH_H
17 
18 #include <stdint.h>
19 
20 #include "libyang.h"
21 #include "tree_schema.h"
22 #include "tree_data.h"
23 
24 /*
25  * XPath evaluator fully compliant with http://www.w3.org/TR/1999/REC-xpath-19991116/
26  * except the following restrictions in the grammar.
27  *
28  * PARSED GRAMMAR
29  *
30  * Full axes are not supported, abbreviated forms must be used,
31  * variables are not supported, "id()" function is not supported,
32  * and processing instruction and comment nodes are not supported,
33  * which is also reflected in the grammar. Undefined rules and
34  * constants are tokens.
35  *
36  * Modified full grammar:
37  *
38  * [1] Expr ::= OrExpr // just an alias
39  *
40  * [2] LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
41  * [3] AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
42  * [4] RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
43  * [5] Step ::= '@'? NodeTest Predicate* | '.' | '..'
44  * [6] NodeTest ::= NameTest | NodeType '(' ')'
45  * [7] Predicate ::= '[' Expr ']'
46  * [8] PrimaryExpr ::= '(' Expr ')' | Literal | Number | FunctionCall
47  * [9] FunctionCall ::= FunctionName '(' ( Expr ( ',' Expr )* )? ')'
48  * [10] PathExpr ::= LocationPath | PrimaryExpr Predicate*
49  *                 | PrimaryExpr Predicate* '/' RelativeLocationPath
50  *                 | PrimaryExpr Predicate* '//' RelativeLocationPath
51  * [11] OrExpr ::= AndExpr | OrExpr 'or' AndExpr
52  * [12] AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
53  * [13] EqualityExpr ::= RelationalExpr | EqualityExpr '=' RelationalExpr
54  *                     | EqualityExpr '!=' RelationalExpr
55  * [14] RelationalExpr ::= AdditiveExpr
56  *                       | RelationalExpr '<' AdditiveExpr
57  *                       | RelationalExpr '>' AdditiveExpr
58  *                       | RelationalExpr '<=' AdditiveExpr
59  *                       | RelationalExpr '>=' AdditiveExpr
60  * [15] AdditiveExpr ::= MultiplicativeExpr
61  *                     | AdditiveExpr '+' MultiplicativeExpr
62  *                     | AdditiveExpr '-' MultiplicativeExpr
63  * [16] MultiplicativeExpr ::= UnaryExpr
64  *                     | MultiplicativeExpr '*' UnaryExpr
65  *                     | MultiplicativeExpr 'div' UnaryExpr
66  *                     | MultiplicativeExpr 'mod' UnaryExpr
67  * [17] UnaryExpr ::= UnionExpr | '-' UnaryExpr
68  * [18] UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
69  */
70 
71 /* expression tokens allocation */
72 #define LYXP_EXPR_SIZE_START 10
73 #define LYXP_EXPR_SIZE_STEP 5
74 
75 /* XPath matches allocation */
76 #define LYXP_SET_SIZE_START 2
77 #define LYXP_SET_SIZE_STEP 2
78 
79 /* building string when casting */
80 #define LYXP_STRING_CAST_SIZE_START 64
81 #define LYXP_STRING_CAST_SIZE_STEP 16
82 
83 /* Number of node checks when it starts being worth using the string dict.
84  * Node checks involves some string comparisons. Consider two ways to compare
85  * strings: one is using the string dict, another is simply comparing the chars
86  * of the strings (like 'strcmp' does). When using the string dict, the hash is
87  * computed using the entire string and after the memory address to that is got,
88  * the pointers are compared (as both strings compared are in the dict). It is
89  * worth using the dict when a lot of string comparisons using the same string
90  * is performed, but to compute just a few, comparing the chars of the strings
91  * is faster.
92  */
93 #define LYXP_MIN_NODE_CHECKS_TO_USE_DICT 10
94 
95 /**
96  * @brief Tokens that can be in an XPath expression.
97  */
98 enum lyxp_token {
99     LYXP_TOKEN_NONE = 0,
100     LYXP_TOKEN_PAR1,          /* '(' */
101     LYXP_TOKEN_PAR2,          /* ')' */
102     LYXP_TOKEN_BRACK1,        /* '[' */
103     LYXP_TOKEN_BRACK2,        /* ']' */
104     LYXP_TOKEN_DOT,           /* '.' */
105     LYXP_TOKEN_DDOT,          /* '..' */
106     LYXP_TOKEN_AT,            /* '@' */
107     LYXP_TOKEN_COMMA,         /* ',' */
108     /* LYXP_TOKEN_DCOLON,      * '::' * axes not supported */
109     LYXP_TOKEN_NAMETEST,      /* NameTest */
110     LYXP_TOKEN_NODETYPE,      /* NodeType */
111     LYXP_TOKEN_FUNCNAME,      /* FunctionName */
112     LYXP_TOKEN_OPERATOR_LOG,  /* Operator 'and', 'or' */
113     LYXP_TOKEN_OPERATOR_COMP, /* Operator '=', '!=', '<', '<=', '>', '>=' */
114     LYXP_TOKEN_OPERATOR_MATH, /* Operator '+', '-', '*', 'div', 'mod', '-' (unary) */
115     LYXP_TOKEN_OPERATOR_UNI,  /* Operator '|' */
116     LYXP_TOKEN_OPERATOR_PATH, /* Operator '/', '//' */
117     /* LYXP_TOKEN_AXISNAME,    * AxisName * axes not supported */
118     LYXP_TOKEN_LITERAL,       /* Literal - with either single or double quote */
119     LYXP_TOKEN_NUMBER         /* Number */
120 };
121 
122 /**
123  * @brief XPath (sub)expressions that can be repeated.
124  */
125 enum lyxp_expr_type {
126     LYXP_EXPR_NONE = 0,
127     LYXP_EXPR_OR,
128     LYXP_EXPR_AND,
129     LYXP_EXPR_EQUALITY,
130     LYXP_EXPR_RELATIONAL,
131     LYXP_EXPR_ADDITIVE,
132     LYXP_EXPR_MULTIPLICATIVE,
133     LYXP_EXPR_UNARY,
134     LYXP_EXPR_UNION,
135 };
136 
137 /**
138  * @brief Structure holding a parsed XPath expression.
139  */
140 struct lyxp_expr {
141     enum lyxp_token *tokens; /* array of tokens */
142     uint16_t *expr_pos;      /* array of pointers to the expression in expr (idx of the beginning) */
143     uint16_t *tok_len;       /* array of token lengths in expr */
144     enum lyxp_expr_type **repeat; /* array of expression types that this token begins and is repeated ended with 0,
145                                      more in the comment after this declaration */
146     uint32_t used;           /* used array items */
147     uint32_t size;           /* allocated array items */
148 
149     char *expr;              /* the original XPath expression */
150 };
151 
152 /*
153  * lyxp_expr repeat
154  *
155  * This value is NULL for all the tokens that do not begin an
156  * expression which can be repeated. Otherwise it is an array
157  * of expression types that this token begins. These values
158  * are used during evaluation to know whether we need to
159  * duplicate the current context or not and to decide what
160  * the current expression is (for example, if we are only
161  * starting the parsing and the first token has no repeat,
162  * we do not parse it as an OrExpr but directly as PathExpr).
163  * Examples:
164  *
165  * Expression: "/ *[key1 and key2 or key1 < key2]"
166  * Tokens: '/',  '*',  '[',  NameTest,  'and', NameTest, 'or', NameTest,        '<',  NameTest, ']'
167  * Repeat: NULL, NULL, NULL, [AndExpr,  NULL,  NULL,     NULL, [RelationalExpr, NULL, NULL,     NULL
168  *                            OrExpr,                           0],
169  *                            0],
170  *
171  * Expression: "//node[key and node2]/key | /cont"
172  * Tokens: '//',       'NameTest', '[',  'NameTest', 'and', 'NameTest', ']',  '/',  'NameTest', '|',  '/',  'NameTest'
173  * Repeat: [UnionExpr, NULL,       NULL, [AndExpr,   NULL,  NULL,       NULL, NULL, NULL,       NULL, NULL, NULL
174  *          0],                           0],
175  *
176  * Operators between expressions which this concerns:
177  *     'or', 'and', '=', '!=', '<', '>', '<=', '>=', '+', '-', '*', 'div', 'mod', '|'
178  */
179 
180 /**
181  * @brief Supported types of (partial) XPath results.
182  */
183 enum lyxp_set_type {
184     LYXP_SET_EMPTY = 0,
185     LYXP_SET_NODE_SET,
186     LYXP_SET_SNODE_SET,
187     LYXP_SET_BOOLEAN,
188     LYXP_SET_NUMBER,
189     LYXP_SET_STRING
190 };
191 
192 #ifdef LY_ENABLED_CACHE
193 
194 /**
195  * @brief Item stored in an XPath set hash table.
196  */
197 struct lyxp_set_hash_node {
198     struct lyd_node *node;
199     enum lyxp_node_type type;
200 } _PACKED;
201 
202 #endif
203 
204 /**
205  * @brief XPath set - (partial) result.
206  */
207 struct lyxp_set {
208     enum lyxp_set_type type;
209     union {
210         struct lyxp_set_node {
211             struct lyd_node *node;
212             enum lyxp_node_type type;
213             uint32_t pos;
214         } *nodes;
215         struct lyxp_set_snode {
216             struct lys_node *snode;
217             enum lyxp_node_type type;
218             /* 0 - snode was traversed, but not currently in the context,
219              * 1 - snode currently in context,
220              * 2 - snode in context and just added, so skip it for the current operation,
221              * >=3 - snode is not in context because we are in a predicate and this snode was used/will be used later */
222             uint32_t in_ctx;
223         } *snodes;
224         struct lyxp_set_attr {
225             struct lyd_attr *attr;
226             enum lyxp_node_type type;
227             uint32_t pos; /* if node_type is LYXP_SET_NODE_ATTR, it is the parent node position */
228         } *attrs;
229         char *str;
230         long double num;
231         int bool;
232     } val;
233 
234     /* this is valid only for type LYXP_SET_NODE_SET and LYXP_SET_SNODE_SET */
235     uint32_t used;
236     uint32_t size;
237 #ifdef LY_ENABLED_CACHE
238     struct hash_table *ht;
239 #endif
240     /* this is valid only for type LYXP_SET_NODE_SET */
241     uint32_t ctx_pos;
242     uint32_t ctx_size;
243 };
244 
245 /**
246  * @brief Evaluate the XPath expression \p expr on data. Be careful when using this function, the result can often
247  * be confusing without thorough understanding of XPath evaluation rules defined in RFC 6020.
248  *
249  * @param[in] expr XPath expression to evaluate. Must be in JSON format (prefixes are model names).
250  * @param[in] cur_node Current (context) data node. If the node has #LYD_VAL_INUSE flag, it is considered dummy (intended
251  * for but not restricted to evaluation with the LYXP_WHEN flag).
252  * @param[in] cur_node_type Current (context) data node type. For every standard case use #LYXP_NODE_ELEM. But there are
253  * cases when the context node \p cur_node is actually supposed to be the XML root, there is no such data node. So, in
254  * this case just pass the first top-level node into \p cur_node and use an enum value for this kind of root
255  * (#LYXP_NODE_ROOT_CONFIG if \p cur_node has config true, otherwise #LYXP_NODE_ROOT). #LYXP_NODE_TEXT and #LYXP_NODE_ATTR can also be used,
256  * but there are no use-cases in YANG.
257  * @param[in] local_mod Local module relative to the \p expr. Used only to determine the internal canonical value for identities.
258  * @param[out] set Result set. Must be valid and in the same libyang context as \p cur_node.
259  * To be safe, always either zero or cast the \p set to empty. After done using, either cast
260  * the \p set to empty (if allocated statically) or free it (if allocated dynamically) to
261  * prevent memory leaks.
262  * @param[in] options Whether to apply some evaluation restrictions.
263  * LYXP_MUST - apply must data tree access restrictions.
264  * LYXP_WHEN - apply when data tree access restrictions and consider LYD_WHEN flags in data nodes.
265  *
266  * @return EXIT_SUCCESS on success, EXIT_FAILURE on unresolved when dependency, -1 on error.
267  */
268 int lyxp_eval(const char *expr, const struct lyd_node *cur_node, enum lyxp_node_type cur_node_type,
269               const struct lys_module *local_mod, struct lyxp_set *set, int options);
270 
271 /**
272  * @brief Get all the partial XPath nodes (atoms) that are required for \p expr to be evaluated.
273  *
274  * If any LYXP_SNODE* options is set, only fatal errors are printed, otherwise they are downgraded
275  * to warnings.
276  *
277  * @param[in] expr XPath expression to be evaluated. Must be in JSON format (prefixes are model names).
278  * @param[in] cur_snode Current (context) schema node.
279  * @param[in] cur_snode_type Current (context) schema node type.
280  * @param[out] set Result set. Must be valid and in the same libyang context as \p cur_snode.
281  * To be safe, always either zero or cast the \p set to empty. After done using, either cast
282  * the \p set to empty (if allocated statically) or free it (if allocated dynamically) to
283  * prevent memory leaks.
284  * @param[in] options Whether to apply some evaluation restrictions, one flag must always be used.
285  * LYXP_SNODE - no special data tree access modifiers.
286  * LYXP_SNODE_MUST - apply must data tree access restrictions.
287  * LYXP_SNODE_WHEN - apply when data tree access restrictions.
288  * LYXP_SNODE_OUTPUT - search RPC/action output instead input
289  * @param[out] ctx_snode Actual context node for the expression (it often changes for "when" expressions).
290  *
291  * @return EXIT_SUCCESS on success, -1 on error.
292  */
293 int lyxp_atomize(const char *expr, const struct lys_node *cur_snode, enum lyxp_node_type cur_snode_type,
294                  struct lyxp_set *set, int options, const struct lys_node **ctx_snode);
295 
296 /* these are used only internally */
297 #define LYXP_SNODE 0x04
298 #define LYXP_SNODE_MUST 0x08
299 #define LYXP_SNODE_WHEN 0x10
300 #define LYXP_SNODE_OUTPUT 0x20
301 
302 #define LYXP_SNODE_ALL 0x3C
303 
304 /**
305  * @brief Works like lyxp_atomize(), but it is executed on all the when and must expressions
306  * which the node has.
307  *
308  * @param[in] node Node to examine.
309  * @param[in,out] set Resulting set of atoms merged from all the expressions.
310  * Will be cleared before use.
311  * @param[in] set_ext_dep_flags Whether to set #LYS_XPCONF_DEP or #LYS_XPSTATE_DEP for conditions that
312  * require foreign configuration or state subtree and also for the node itself, if it has any such condition.
313  *
314  * @return EXIT_SUCCESS on success, -1 on error.
315  */
316 int lyxp_node_atomize(const struct lys_node *node, struct lyxp_set *set, int set_ext_dep_flags);
317 
318 /**
319  * @brief Check syntax of all the XPath expressions of the node.
320  *
321  * @param[in] node Node to examine.
322  *
323  * @return EXIT_SUCCESS on success, -1 on error.
324  */
325 int lyxp_node_check_syntax(const struct lys_node *node);
326 
327 /**
328  * @brief Cast XPath set to another type.
329  *        Indirectly context position aware.
330  *
331  * @param[in] set Set to cast.
332  * @param[in] target Target type to cast \p set into.
333  * @param[in] cur_node Current (context) data node. Cannot be NULL.
334  * @param[in] local_mod Local expression module.
335  * @param[in] options Whether to apply some evaluation restrictions.
336  *
337  * @return EXIT_SUCCESS on success, -1 on error.
338  */
339 int lyxp_set_cast(struct lyxp_set *set, enum lyxp_set_type target, const struct lyd_node *cur_node,
340                   const struct lys_module *local_mod, int options);
341 
342 /**
343  * @brief Free contents of an XPath \p set.
344  *
345  * @param[in] set Set to free.
346  */
347 void lyxp_set_free(struct lyxp_set *set);
348 
349 /**
350  * @brief Parse an XPath expression into a structure of tokens.
351  *        Logs directly.
352  *
353  * http://www.w3.org/TR/1999/REC-xpath-19991116/ section 3.7
354  *
355  * @param[in] ctx Context for errors.
356  * @param[in] expr XPath expression to parse. It is duplicated.
357  *
358  * @return Filled expression structure or NULL on error.
359  */
360 struct lyxp_expr *lyxp_parse_expr(struct ly_ctx *ctx, const char *expr);
361 
362 /**
363  * @brief Frees a parsed XPath expression. \p expr should not be used afterwards.
364  *
365  * @param[in] expr Expression to free.
366  */
367 void lyxp_expr_free(struct lyxp_expr *expr);
368 
369 #endif /* _XPATH_H */
370