1 /**
2  * @file yang.l
3  * @author Pavol Vican
4  * @brief YANG parser for libyang (flex grammar)
5  *
6  * Copyright (c) 2015 CESNET, z.s.p.o.
7  *
8  * This source code is licensed under BSD 3-Clause License (the "License").
9  * You may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *     https://opensource.org/licenses/BSD-3-Clause
13  */
14 
15 %option noyywrap nounput noinput reentrant bison-bridge bison-locations
16 
17 %{
18 #include "parser_yang.h"
19 #include "parser_yang_bis.h"
20 
21 #define YY_USER_ACTION yylloc->first_column = yylloc->last_column +1;\
22                        yylloc->last_column = yylloc->first_column + yyleng - 1;
23 %}
24 
25 U       [\x80-\xbf]
26 U2     [\xc2-\xdf]{U}
27 U3     [\xe0][\xa0-\xbf]{U}|[\xe1-\xec]{U}{U}|[\xed][\x80-\x9f]{U}|[\xee-\xef]{U}{U}
28 U4     [\xf0][\x90-\xbf]{U}{U}|[\xf1-\xf3]{U}{U}{U}|[\xf4][\x80-\x8f]{U}{U}
29 
30 %x COMMENT1
31 %x COMMENT2
32 %x PATH
33 %x DOUBLEQUOTES
34 %x SINGLEQUOTES
35 
36 %%
37  int tab_count = 0;
38  int size_str = 0;
39  int column = 0;
40  char *str = NULL;
41  int _state = YY_START;
42  int i;
43  uint32_t value;
44 
45 
46 "/*" {_state = YY_START; BEGIN COMMENT1; }
47 <COMMENT1,COMMENT2>[\x00-\x09\x0B-\x7f]|{U2}|{U3}|{U4}
48 <COMMENT1>\n {yylloc->last_column = 0;}
49 <COMMENT1>"*/" {BEGIN _state; }
50 "//" {_state = YY_START; BEGIN COMMENT2;}
51 <COMMENT2>\n {BEGIN _state; yylloc->last_column = 0; return EOL; }
52 "anyxml" { return ANYXML_KEYWORD; }
53 "argument" { return ARGUMENT_KEYWORD; }
54 "augment" { BEGIN PATH; return AUGMENT_KEYWORD; }
55 "base" { return BASE_KEYWORD; }
56 "belongs-to" { return BELONGS_TO_KEYWORD; }
57 "bit" { return BIT_KEYWORD; }
58 "case" { return CASE_KEYWORD; }
59 "choice" { return CHOICE_KEYWORD; }
60 "config" { return CONFIG_KEYWORD; }
61 "contact" { return CONTACT_KEYWORD; }
62 "container" { return CONTAINER_KEYWORD; }
63 "default" { return DEFAULT_KEYWORD; }
64 "description" { return DESCRIPTION_KEYWORD; }
65 "enum" { return ENUM_KEYWORD; }
66 "error-app-tag" { return ERROR_APP_TAG_KEYWORD; }
67 "error-message" { return ERROR_MESSAGE_KEYWORD; }
68 "extension" { return EXTENSION_KEYWORD; }
69 "deviation" { BEGIN PATH; return DEVIATION_KEYWORD; }
70 "deviate" { return DEVIATE_KEYWORD; }
71 "feature" { return FEATURE_KEYWORD; }
72 "fraction-digits" { return FRACTION_DIGITS_KEYWORD; }
73 "grouping" { return GROUPING_KEYWORD; }
74 "identity" { return IDENTITY_KEYWORD; }
75 "if-feature" { return IF_FEATURE_KEYWORD; }
76 "import" { return IMPORT_KEYWORD; }
77 "include" { return INCLUDE_KEYWORD; }
78 "input" { return INPUT_KEYWORD; }
79 "key" { return KEY_KEYWORD; }
80 "leaf" { return LEAF_KEYWORD; }
81 "leaf-list" { return LEAF_LIST_KEYWORD; }
82 "length" { return LENGTH_KEYWORD; }
83 "list" { return LIST_KEYWORD; }
84 "mandatory" { return MANDATORY_KEYWORD; }
85 "max-elements" { return MAX_ELEMENTS_KEYWORD; }
86 "min-elements" { return MIN_ELEMENTS_KEYWORD; }
87 "module" { return MODULE_KEYWORD; }
88 "must" { return MUST_KEYWORD; }
89 "namespace" { return NAMESPACE_KEYWORD; }
90 "notification" { return NOTIFICATION_KEYWORD; }
91 "ordered-by" { return ORDERED_BY_KEYWORD; }
92 "organization" { return ORGANIZATION_KEYWORD; }
93 "output" { return OUTPUT_KEYWORD; }
94 "path" { BEGIN PATH; return PATH_KEYWORD; }
95 "pattern" { return PATTERN_KEYWORD; }
96 "position" { return POSITION_KEYWORD; }
97 "prefix" { return PREFIX_KEYWORD; }
98 "presence" { return PRESENCE_KEYWORD; }
99 "range" { return RANGE_KEYWORD; }
100 "reference" { return REFERENCE_KEYWORD; }
101 "refine" { BEGIN PATH; return REFINE_KEYWORD; }
102 "require-instance" { return REQUIRE_INSTANCE_KEYWORD; }
103 "revision" { return REVISION_KEYWORD; }
104 "revision-date" { return REVISION_DATE_KEYWORD; }
105 "rpc" { return RPC_KEYWORD; }
106 "status" { return STATUS_KEYWORD; }
107 "submodule" { return (yylloc->last_line) ? SUBMODULE_EXT_KEYWORD : SUBMODULE_KEYWORD; }
108 "type" { return TYPE_KEYWORD; }
109 "typedef" { return TYPEDEF_KEYWORD; }
110 "unique" { BEGIN PATH; return UNIQUE_KEYWORD; }
111 "units" { return UNITS_KEYWORD; }
112 "uses" { return USES_KEYWORD; }
113 "value" { return VALUE_KEYWORD; }
114 "when" { return WHEN_KEYWORD; }
115 "yang-version" { return YANG_VERSION_KEYWORD; }
116 "yin-element" { return YIN_ELEMENT_KEYWORD; }
117 "add" { return ADD_KEYWORD; }
118 "current" { return CURRENT_KEYWORD; }
119 <PATH>"current" { return CURRENT_KEYWORD; }
120 "delete" { return DELETE_KEYWORD; }
121 "deprecated" { return DEPRECATED_KEYWORD; }
122 "false" { return FALSE_KEYWORD; }
123 "not-supported" { return NOT_SUPPORTED_KEYWORD; }
124 "obsolete" { return OBSOLETE_KEYWORD; }
125 "replace" { return REPLACE_KEYWORD; }
126 "system" { return SYSTEM_KEYWORD; }
127 "true" { return TRUE_KEYWORD; }
128 "unbounded" { return UNBOUNDED_KEYWORD; }
129 "user" { return USER_KEYWORD; }
130 "action" {return ACTION_KEYWORD; }
131 "modifier" {return MODIFIER_KEYWORD; }
132 "anydata" {return ANYDATA_KEYWORD; }
133 "{" |
134 "}" |
135 ";" |
136 "+"  { return yytext[0];}  /* unsolved problem with concatenate string '+' */
137 "\"" {_state = YY_START; BEGIN DOUBLEQUOTES; str = yytext; column = yylloc->first_column; }
138 <DOUBLEQUOTES>\t|\\t { tab_count++; size_str += yyleng; }
139 <DOUBLEQUOTES>[\x0D\x20-\x21\x23-\x5b\x5d-\x7f]|{U2} { size_str += yyleng; }
140 <DOUBLEQUOTES>\\([\x09\x0A\x0D\x20-\x7f]|{U2}|{U3}|{U4}) { size_str += yyleng; }
141 <DOUBLEQUOTES,SINGLEQUOTES>\n {yylloc->last_column = 0; size_str++; }
142 <DOUBLEQUOTES,SINGLEQUOTES>{U3} {
143     value = ((uint32_t)(yytext[0] & 0xf) << 12) | ((uint32_t)(yytext[1] & 0x3f) << 6) | (yytext[2] & 0x3f);
144     if (((value & 0xf800) == 0xd800) ||
145         (value >= 0xfdd0 && value <= 0xfdef) ||
146         (value & 0xffe) == 0xffe) {
147         /* exclude surrogate blocks %xD800-DFFF */
148         /* exclude noncharacters %xFDD0-FDEF */
149         /* exclude noncharacters %xFFFE-FFFF */
150         LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Invalid UTF-8 value 0x%08x", value);
151         yylloc->first_line = -1;
152         return ERROR;
153     }
154     size_str += 3;
155 }
156 <DOUBLEQUOTES,SINGLEQUOTES>{U4} {
157     value = ((uint32_t)(yytext[0] & 0x7) << 18) | ((uint32_t)(yytext[1] & 0x3f) << 12) | ((uint32_t)(yytext[2] & 0x3f) << 6) | (yytext[3] & 0x3f);
158     if ((value & 0xffe) == 0xffe) {
159         /* exclude noncharacters %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
160          * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
161          * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
162         LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Invalid UTF-8 value 0x%08x", value);
163         yylloc->first_line = -1;
164         return ERROR;
165     }
166     size_str += 4;
167 }
168 <DOUBLEQUOTES>\" { yylval->i = tab_count;
169                    BEGIN _state;
170                    yytext = str;
171                    yyleng = size_str + 2;
172                    yylloc->first_column = column;
173                    return STRING;
174                  }
175 <DOUBLEQUOTES><<EOF>> { LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Unterminated double-quoted string.");
176                         yylloc->first_line = -1;
177                         return ERROR;
178                       }
179 <INITIAL,PATH>"'" { _state = YY_START;
180                     BEGIN SINGLEQUOTES;
181                     str = yytext;
182                     column = yylloc->first_column;
183                   }
184 <SINGLEQUOTES>[\x09\x0D\x20-\x26\x28-\x7f]|{U2} { size_str += yyleng; }
185 <SINGLEQUOTES>"'" { BEGIN _state;
186                     yytext = str;
187                     yyleng = size_str + 2;
188                     yylloc->first_column = column;
189                     return STRING;
190                   }
191 <SINGLEQUOTES><<EOF>> { LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Unterminated single-quoted string.");
192                         yylloc->first_line = -1;
193                         return ERROR;
194                       }
195 "0" {return ZERO;}
196 "-"[1-9][0-9]* {return INTEGER;}
197 [1-9][0-9]* {return NON_NEGATIVE_INTEGER;}
198 <PATH>"/" |
199 <PATH>"[" |
200 <PATH>"]" |
201 <PATH>"=" |
202 <PATH>"(" |
203 <PATH>")" |
204 <PATH>"+" {return yytext[0];}
205 <PATH>".." {return DOUBLEDOT;}
206 <PATH>\n  |
207 <PATH>\r\n { yylloc->last_column = 0; return EOL;}
208 <PATH>[ \t]+ { return WHITESPACE;}
209 <PATH>";" |
210 <PATH>"{" {BEGIN INITIAL; return yytext[0];}
211 <PATH>"\"" {_state = YY_START; BEGIN DOUBLEQUOTES; str = yytext; column = yylloc->first_column; }
212 <PATH>"//" {_state = YY_START; BEGIN COMMENT2;}
213 <PATH>"/*" {_state = YY_START; BEGIN COMMENT1;}
214 <PATH>[A-Za-z_][-A-Za-z0-9_\.]* {return IDENTIFIER;}
215 <PATH>[A-Za-z_][-A-Za-z0-9_\.]*:[A-Za-z_][-A-Za-z0-9_\.]*   {return IDENTIFIERPREFIX;}
216 [0-9]{4}[-][0-9]{2}[-][0-9]{2} {return REVISION_DATE;}
217 [A-Za-z_][-A-Za-z0-9_\.]* {return IDENTIFIER;}
218 [A-Za-z_][-A-Za-z0-9_\.]*:[A-Za-z_][-A-Za-z0-9_\.]*   {return IDENTIFIERPREFIX;}
219 ([\x21\x23-\x26\x28-\x3a\x3c-\x7a\x7c\x7e-\x7f]|{U2}|{U3}|{U4})+ {
220     if (yytext[0] == '/' && yytext[1] == '/') {
221         _state = YY_START;
222         yyless(2);
223         BEGIN COMMENT2;
224     } else if (yytext[0] == '/' && yytext[1] == '*') {
225         _state = YY_START;
226         yyless(2);
227         BEGIN COMMENT1;
228     } else {
229         i = 0;
230         while (i < yyleng) {
231             if (!(yytext[i] & 0x80)) {
232                 /* one byte character */
233                 if (yytext[i] == '/') {
234                     if (yytext[i + 1] == '/') {
235                         yyless(i);
236                         return STRINGS;
237                     } else if (yytext[i + 1] == '*') {
238                         yyless(i);
239                         return STRINGS;
240                     }
241                 } else if (yytext[i] == '*' && yytext[i + 1] == '/') {
242                     if (!i) {
243                         yyless(1);
244                         return ERROR;
245                     } else {
246                         yyless(i);
247                         return STRINGS;
248                     }
249                 }
250                 ++i;
251             } else if (!(yytext[i] & 0x20)) {
252                 /* two bytes character */
253                 i += 2;
254             } else if (!(yytext[i] & 0x10)) {
255                 /* three bytes character */
256                 value = ((uint32_t)(yytext[i] & 0xf) << 12) | ((uint32_t)(yytext[i + 1] & 0x3f) << 6) | (yytext[i + 2] & 0x3f);
257                 if (((value & 0xf800) == 0xd800) ||
258                     (value >= 0xfdd0 && value <= 0xfdef) ||
259                     (value & 0xffe) == 0xffe) {
260                     /* exclude surrogate blocks %xD800-DFFF */
261                     /* exclude noncharacters %xFDD0-FDEF */
262                     /* exclude noncharacters %xFFFE-FFFF */
263                     LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Invalid UTF-8 value 0x%08x", value);
264                     yylloc->first_line = -1;
265                     return ERROR;
266                 }
267                 i += 3;
268             } else {
269                 /* four bytes character */
270                 value = ((uint32_t)(yytext[i] & 0x7) << 18) | ((uint32_t)(yytext[i + 1] & 0x3f) << 12) | ((uint32_t)(yytext[i + 2] & 0x3f) << 6) | (yytext[i + 3] & 0x3f);
271                 if ((value & 0xffe) == 0xffe) {
272                     /* exclude noncharacters %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF,
273                      * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF,
274                      * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */
275                     LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Invalid UTF-8 value 0x%08x", value);
276                     yylloc->first_line = -1;
277                     return ERROR;
278                 }
279                 i += 4;
280             }
281         }
282         return STRINGS;
283     }
284 }
285 \n |
286 \r\n { yylloc->last_column = 0; return EOL; }
287 [ \t]+ { return WHITESPACE;}
288 
289 <COMMENT1,COMMENT2,DOUBLEQUOTES,SINGLEQUOTES,INITIAL,PATH>[\x00-\xff] {
290     LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Invalid UTF-8 leading byte 0x%02x", yytext[0]);
291     yylloc->first_line = -1;
292     return ERROR;
293 }
294 
295 %%
296