1"""Description of YANG & YIN syntax."""
2
3import re
4import shlex
5import sys
6import datetime
7
8### Regular expressions - constraints on arguments
9
10# keywords and identifiers
11identifier = r"[_A-Za-z][._\-A-Za-z0-9]*"
12prefix = identifier
13keyword = '((' + prefix + '):)?(' + identifier + ')'
14comment = '(/\*([^*]|[\r\n\s]|(\*+([^*/]|[\r\n\s])))*\*+/)|(//.*)|(/\*.*)'
15
16# no group version of keyword
17keyword_ng = '(?:(' + prefix + '):)?(?:' + identifier + ')'
18
19re_keyword = re.compile(keyword)
20re_keyword_start = re.compile('^' + keyword)
21re_comment = re.compile(comment)
22
23pos_integer = r"[1-9][0-9]*"
24nonneg_integer = r"(0|([1-9][0-9]*))"
25integer_ = r"[+-]?" + nonneg_integer
26decimal_ = integer_ + r"(\.[0-9]+)?"
27length_str = '((min|max|[0-9]+)\s*' \
28             '(\.\.\s*' \
29             '(min|max|[0-9]+)\s*)?)'
30length_expr = length_str + '(\|\s*' + length_str + ')*'
31re_length_part = re.compile(length_str)
32range_str = '((\-INF|min|max|((\+|\-)?[0-9]+(\.[0-9]+)?))\s*' \
33            '(\.\.\s*' \
34            '(INF|min|max|(\+|\-)?[0-9]+(\.[0-9]+)?)\s*)?)'
35range_expr = range_str + '(\|\s*' + range_str + ')*'
36re_range_part = re.compile(range_str)
37
38re_identifier = re.compile("^" + identifier + "$")
39
40
41# path and unique
42node_id = keyword_ng
43rel_path_keyexpr = r"(\.\./)+(" + node_id + "/)*" + node_id
44path_key_expr = r"(current\s*\(\s*\)/" + rel_path_keyexpr + ")"
45path_equality_expr = node_id + r"\s*=\s*" + path_key_expr
46path_predicate = r"\s*\[\s*" + path_equality_expr + r"\s*\]\s*"
47absolute_path_arg = "(?:/" + node_id + "(" + path_predicate + ")*)+"
48descendant_path_arg = node_id + "(" + path_predicate + ")*" + \
49                      "(?:" + absolute_path_arg + ")?"
50relative_path_arg = r"(\.\./)*" + descendant_path_arg
51deref_path_arg = r"deref\s*\(\s*(?:" + relative_path_arg + \
52                 ")\s*\)/\.\./" + relative_path_arg
53path_arg = "(" + absolute_path_arg + "|" + relative_path_arg + "|" + \
54           deref_path_arg + ")"
55absolute_schema_nodeid = "(/" + node_id + ")+"
56descendant_schema_nodeid = node_id + "(" + absolute_schema_nodeid + ")?"
57schema_nodeid = "("+absolute_schema_nodeid+"|"+descendant_schema_nodeid+")"
58unique_arg = descendant_schema_nodeid + "(\s+" + descendant_schema_nodeid + ")*"
59key_arg = node_id + "(\s+" + node_id + ")*"
60re_schema_node_id_part = re.compile('/' + keyword)
61
62# URI - RFC 3986, Appendix A
63scheme = "[A-Za-z][-+.A-Za-z0-9]*"
64unreserved = "[-._~A-Za-z0-9]"
65pct_encoded = "%[0-9A-F]{2}"
66sub_delims = "[!$&'()*+,;=]"
67pchar = ("(" + unreserved + "|" + pct_encoded + "|" +
68         sub_delims + "|[:@])")
69segment = pchar + "*"
70segment_nz = pchar + "+"
71userinfo = ("(" + unreserved + "|" + pct_encoded + "|" +
72            sub_delims + "|:)*")
73dec_octet = "([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])"
74ipv4address = "(" + dec_octet + r"\.){3}" + dec_octet
75h16 = "[0-9A-F]{1,4}"
76ls32 = "(" + h16 + ":" + h16 + "|" + ipv4address + ")"
77ipv6address = (
78    "((" + h16 + ":){6}" + ls32 +
79    "|::(" + h16 + ":){5}" + ls32 +
80    "|(" + h16 + ")?::(" + h16 + ":){4}" + ls32 +
81    "|((" + h16 + ":)?" + h16 + ")?::(" + h16 + ":){3}" + ls32 +
82    "|((" + h16 + ":){,2}" + h16 + ")?::(" + h16 + ":){2}" + ls32 +
83    "|((" + h16 + ":){,3}" + h16 + ")?::" + h16 + ":" + ls32 +
84    "|((" + h16 + ":){,4}" + h16 + ")?::" + ls32 +
85    "|((" + h16 + ":){,5}" + h16 + ")?::" + h16 +
86    "|((" + h16 + ":){,6}" + h16 + ")?::)")
87ipvfuture = r"v[0-9A-F]+\.(" + unreserved + "|" + sub_delims + "|:)+"
88ip_literal = r"\[(" + ipv6address + "|" + ipvfuture + r")\]"
89reg_name = "(" + unreserved + "|" + pct_encoded + "|" + sub_delims + ")*"
90host = "(" + ip_literal + "|" + ipv4address + "|" + reg_name + ")"
91port = "[0-9]*"
92authority = "(" + userinfo + "@)?" + host + "(:" + port + ")?"
93path_abempty = "(/" + segment + ")*"
94path_absolute = "/(" + segment_nz + "(/" + segment + ")*)?"
95path_rootless = segment_nz + "(/" + segment + ")*"
96path_empty = pchar + "{0}"
97hier_part = ("(" + "//" + authority + path_abempty + "|" +
98             path_absolute + "|" + path_rootless + "|" + path_empty + ")")
99query = "(" + pchar + "|[/?])*"
100fragment = query
101uri = (scheme + ":" + hier_part + r"(\?" + query + ")?" +
102       "(#" + fragment + ")?")
103
104# Date
105date = r"[1-2][0-9]{3}-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])"
106
107re_nonneg_integer = re.compile("^" + nonneg_integer + "$")
108re_integer = re.compile("^" + integer_ + "$")
109re_decimal = re.compile("^" + decimal_ + "$")
110re_uri = re.compile("^" + uri + "$")
111re_boolean = re.compile("^(true|false)$")
112re_version = re.compile("^(1|(1\.1))$")
113re_date = re.compile("^" + date +"$")
114re_status = re.compile("^(current|obsolete|deprecated)$")
115re_key = re.compile("^" + key_arg + "$")
116re_length = re.compile("^" + length_expr + "$")
117re_range = re.compile("^" + range_expr + "$")
118re_pos_integer = re.compile(r"^(unbounded|" + pos_integer + r")$")
119re_ordered_by = re.compile(r"^(user|system)$")
120re_modifier = re.compile(r"^(invert-match)$")
121re_node_id = re.compile("^" + node_id + "$")
122re_path = re.compile("^" + path_arg + "$")
123re_absolute_path = re.compile("^" + absolute_path_arg + "$")
124re_unique = re.compile("^" + unique_arg + "$")
125re_schema_nodeid = re.compile("^" + schema_nodeid + "$")
126re_absolute_schema_nodeid = re.compile("^" + absolute_schema_nodeid + "$")
127re_descendant_schema_nodeid = re.compile("^" + descendant_schema_nodeid + "$")
128re_deviate = re.compile("^(add|delete|replace|not-supported)$")
129
130# Not part of YANG syntax per se but useful for pyang in several places
131re_filename = re.compile(r"^([^@]*?)" +          # putative module name
132                         r"(?:@([^.]*?))?" +     # putative revision
133                         r"(?:\.yang|\.yin)*" +  # foo@bar.yang.yin.yang.yin ?
134                         r"\.(yang|yin)$")       # actual final extension
135
136arg_type_map = {
137    "identifier": lambda s: re_identifier.search(s) is not None,
138    "non-negative-integer": lambda s: re_nonneg_integer.search(s) is not None,
139    "integer": lambda s: re_integer.search(s) is not None,
140    "uri": lambda s: re_uri.search(s) is not None,
141    "boolean": lambda s: re_boolean.search(s) is not None,
142    "version": lambda s: re_version.search(s) is not None,
143    "date": lambda s: chk_date_arg(s),
144    "status-arg": lambda s: re_status.search(s) is not None,
145    "key-arg": lambda s: re_key.search(s) is not None,
146    "length-arg": lambda s: re_length.search(s) is not None,
147    "range-arg": lambda s: re_range.search(s) is not None,
148    "max-value": lambda s: re_pos_integer.search(s) is not None,
149    "ordered-by-arg": lambda s: re_ordered_by.search(s) is not None,
150    "modifier-arg": lambda s: re_modifier.search(s) is not None,
151    "identifier-ref": lambda s: re_node_id.search(s) is not None,
152    "path-arg": lambda s: re_path.search(s) is not None,
153    "absolute-path-arg": lambda s: re_absolute_path.search(s) is not None,
154    "unique-arg": lambda s: re_unique.search(s) is not None,
155    "absolute-schema-nodeid": lambda s: \
156        re_absolute_schema_nodeid.search(s) is not None,
157    "descendant-schema-nodeid": lambda s: \
158        re_descendant_schema_nodeid.search(s) is not None,
159    "schema-nodeid": lambda s: \
160        re_schema_nodeid.search(s) is not None,
161    "enum-arg": lambda s: chk_enum_arg(s),
162    "fraction-digits-arg": lambda s: chk_fraction_digits_arg(s),
163    "if-feature-expr": lambda s: chk_if_feature_expr(s),
164    "deviate-arg": lambda s: re_deviate.search(s) is not None,
165    "_comment": lambda s: re_comment.search(s) is not None,
166    }
167"""Argument type definitions.
168
169Regular expressions for all argument types except plain string that
170are checked directly by the parser.
171"""
172
173def chk_date_arg(s):
174    """Checks if the string `s` is a valid date string.
175
176    Return True of False."""
177    if re_date.search(s) is None:
178        return False
179    comp = s.split('-')
180    try:
181        dt = datetime.date(int(comp[0]), int(comp[1]), int(comp[2]))
182        return True
183    except Exception as e:
184        return False
185
186def chk_enum_arg(s):
187    """Checks if the string `s` is a valid enum string.
188
189    Return True or False."""
190
191    if len(s) == 0 or s[0].isspace() or s[-1].isspace():
192        return False
193    else:
194        return True
195
196def chk_fraction_digits_arg(s):
197    """Checks if the string `s` is a valid fraction-digits argument.
198
199    Return True or False."""
200    try:
201        v = int(s)
202        if v >= 1 and v <= 18:
203            return True
204        else:
205            return False
206    except ValueError:
207        return False
208
209def chk_if_feature_expr(s):
210    return  parse_if_feature_expr(s) != None
211
212# if-feature-expr     = "(" if-feature-expr ")" /
213#                      if-feature-expr sep boolean-operator sep
214#                        if-feature-expr /
215#                      not-keyword sep if-feature-expr /
216#                      identifier-ref-arg
217#
218# Rewrite to:
219#  x = y ("and"/"or" y)*
220#  y = "not" x /
221#      "(" x ")"
222#      identifier
223#
224# Expr :: ('not', Expr, None)
225#         | ('and'/'or', Expr, Expr)
226#         | Identifier
227def parse_if_feature_expr(s):
228    try:
229        # Encoding to ascii works for valid if-feature-exprs, since all
230        # pars are YANG identifiers (or the boolean keywords).
231        # The reason for this fix is that in Python < 2.7.3, shlex would return
232        # erroneous tokens if a unicode string was passed.
233        # Also, shlex uses cStringIO internally which doesn't handle unicode
234        # characters outside the ascii range anyway.
235        if sys.version < '3':
236            sx = shlex.shlex(s.encode("ascii"))
237        else:
238            sx = shlex.shlex(s)
239    except UnicodeEncodeError:
240        return None
241    sx.wordchars += ":-" # need to handle prefixes and '-' in the name
242    operators = [None]
243    operands = []
244    precedence = {'not':3, 'and':2, 'or':1, None:0}
245
246    def x():
247        y()
248        tok = sx.get_token()
249        while tok in ('and', 'or'):
250            push_operator(tok)
251            y()
252            tok = sx.get_token()
253        sx.push_token(tok)
254        while operators[-1] != None:
255            pop_operator()
256
257    def y():
258        tok = sx.get_token()
259        if tok == 'not':
260            push_operator(tok)
261            x()
262        elif tok == '(':
263            operators.append(None)
264            x()
265            tok = sx.get_token()
266            if tok != ')':
267                raise ValueError
268            operators.pop()
269        elif is_identifier(tok):
270            operands.append(tok)
271        else:
272            raise ValueError
273
274    def push_operator(op):
275        while op_gt(operators[-1], op):
276            pop_operator()
277        operators.append(op)
278
279    def pop_operator():
280        op = operators.pop()
281        if op == 'not':
282            operands.append((op, operands.pop(), None))
283        else:
284            operands.append((op, operands.pop(), operands.pop()))
285
286    def op_gt(op1, op2):
287        return precedence[op1] > precedence[op2]
288
289    def is_identifier(tok):
290        return re_node_id.search(tok) is not None
291
292    try:
293        x()
294        if sx.get_token() != '':
295            raise ValueError
296        return operands[-1]
297    except ValueError:
298        return None
299
300def add_arg_type(arg_type, regexp):
301    """Add a new arg_type to the map.
302    Used by extension plugins to register their own argument types."""
303    arg_type_map[arg_type] = regexp
304
305    # keyword             argument-name  yin-element
306yin_map = \
307    {'action':           ('name',        False),
308     'anydata':          ('name',        False),
309     'anyxml':           ('name',        False),
310     'argument':         ('name',        False),
311     'augment':          ('target-node', False),
312     'base':             ('name',        False),
313     'belongs-to':       ('module',      False),
314     'bit':              ('name',        False),
315     'case':             ('name',        False),
316     'choice':           ('name',        False),
317     'config':           ('value',       False),
318     'contact':          ('text',        True),
319     'container':        ('name',        False),
320     'default':          ('value',       False),
321     'description':      ('text',        True),
322     'deviate':          ('value',       False),
323     'deviation':        ('target-node', False),
324     'enum':             ('name',        False),
325     'error-app-tag':    ('value',       False),
326     'error-message':    ('value',       True),
327     'extension':        ('name',        False),
328     'feature':          ('name',        False),
329     'fraction-digits':  ('value',       False),
330     'grouping':         ('name',        False),
331     'identity':         ('name',        False),
332     'if-feature':       ('name',        False),
333     'import':           ('module',      False),
334     'include':          ('module',      False),
335     'input':            (None,          None),
336     'key':              ('value',       False),
337     'leaf':             ('name',        False),
338     'leaf-list':        ('name',        False),
339     'length':           ('value',       False),
340     'list':             ('name',        False),
341     'mandatory':        ('value',       False),
342     'max-elements':     ('value',       False),
343     'min-elements':     ('value',       False),
344     'modifier':         ('value',       False),
345     'module':           ('name',        False),
346     'must':             ('condition',   False),
347     'namespace':        ('uri',         False),
348     'notification':     ('name',        False),
349     'ordered-by':       ('value',       False),
350     'organization':     ('text',        True),
351     'output':           (None,          None),
352     'path':             ('value',       False),
353     'pattern':          ('value',       False),
354     'position':         ('value',       False),
355     'presence':         ('value',       False),
356     'prefix':           ('value',       False),
357     'range':            ('value',       False),
358     'reference':        ('text',        True),
359     'refine':           ('target-node', False),
360     'require-instance': ('value',       False),
361     'revision':         ('date',        False),
362     'revision-date':    ('date',        False),
363     'rpc':              ('name',        False),
364     'status':           ('value',       False),
365     'submodule':        ('name',        False),
366     'type':             ('name',        False),
367     'typedef':          ('name',        False),
368     'unique':           ('tag',         False),
369     'units':            ('name',        False),
370     'uses':             ('name',        False),
371     'value':            ('value',       False),
372     'when':             ('condition',   False),
373     'yang-version':     ('value',       False),
374     'yin-element':      ('value',       False),
375     }
376"""Mapping of statements to the YIN representation of their arguments.
377
378The values are pairs whose first component specifies whether the
379argument is stored in a subelement and the second component is the
380name of the attribute or subelement carrying the argument. See YANG
381specification.
382"""
383