1 /** 2 * @file yang.l 3 * @author Pavol Vican 4 * @brief YANG parser for libyang (flex grammar) 5 * 6 * Copyright (c) 2015 CESNET, z.s.p.o. 7 * 8 * This source code is licensed under BSD 3-Clause License (the "License"). 9 * You may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * https://opensource.org/licenses/BSD-3-Clause 13 */ 14 15 %option noyywrap nounput noinput reentrant bison-bridge bison-locations 16 17 %{ 18 #include "parser_yang.h" 19 #include "parser_yang_bis.h" 20 21 #define YY_USER_ACTION yylloc->first_column = yylloc->last_column +1;\ 22 yylloc->last_column = yylloc->first_column + yyleng - 1; 23 %} 24 25 U [\x80-\xbf] 26 U2 [\xc2-\xdf]{U} 27 U3 [\xe0][\xa0-\xbf]{U}|[\xe1-\xec]{U}{U}|[\xed][\x80-\x9f]{U}|[\xee-\xef]{U}{U} 28 U4 [\xf0][\x90-\xbf]{U}{U}|[\xf1-\xf3]{U}{U}{U}|[\xf4][\x80-\x8f]{U}{U} 29 30 %x COMMENT1 31 %x COMMENT2 32 %x PATH 33 %x DOUBLEQUOTES 34 %x SINGLEQUOTES 35 36 %% 37 int tab_count = 0; 38 int size_str = 0; 39 int column = 0; 40 char *str = NULL; 41 int _state = YY_START; 42 int i; 43 uint32_t value; 44 45 46 "/*" {_state = YY_START; BEGIN COMMENT1; } 47 <COMMENT1,COMMENT2>[\x00-\x09\x0B-\x7f]|{U2}|{U3}|{U4} 48 <COMMENT1>\n {yylloc->last_column = 0;} 49 <COMMENT1>"*/" {BEGIN _state; } 50 "//" {_state = YY_START; BEGIN COMMENT2;} 51 <COMMENT2>\n {BEGIN _state; yylloc->last_column = 0; return EOL; } 52 "anyxml" { return ANYXML_KEYWORD; } 53 "argument" { return ARGUMENT_KEYWORD; } 54 "augment" { BEGIN PATH; return AUGMENT_KEYWORD; } 55 "base" { return BASE_KEYWORD; } 56 "belongs-to" { return BELONGS_TO_KEYWORD; } 57 "bit" { return BIT_KEYWORD; } 58 "case" { return CASE_KEYWORD; } 59 "choice" { return CHOICE_KEYWORD; } 60 "config" { return CONFIG_KEYWORD; } 61 "contact" { return CONTACT_KEYWORD; } 62 "container" { return CONTAINER_KEYWORD; } 63 "default" { return DEFAULT_KEYWORD; } 64 "description" { return DESCRIPTION_KEYWORD; } 65 "enum" { return ENUM_KEYWORD; } 66 "error-app-tag" { return ERROR_APP_TAG_KEYWORD; } 67 "error-message" { return ERROR_MESSAGE_KEYWORD; } 68 "extension" { return EXTENSION_KEYWORD; } 69 "deviation" { BEGIN PATH; return DEVIATION_KEYWORD; } 70 "deviate" { return DEVIATE_KEYWORD; } 71 "feature" { return FEATURE_KEYWORD; } 72 "fraction-digits" { return FRACTION_DIGITS_KEYWORD; } 73 "grouping" { return GROUPING_KEYWORD; } 74 "identity" { return IDENTITY_KEYWORD; } 75 "if-feature" { return IF_FEATURE_KEYWORD; } 76 "import" { return IMPORT_KEYWORD; } 77 "include" { return INCLUDE_KEYWORD; } 78 "input" { return INPUT_KEYWORD; } 79 "key" { return KEY_KEYWORD; } 80 "leaf" { return LEAF_KEYWORD; } 81 "leaf-list" { return LEAF_LIST_KEYWORD; } 82 "length" { return LENGTH_KEYWORD; } 83 "list" { return LIST_KEYWORD; } 84 "mandatory" { return MANDATORY_KEYWORD; } 85 "max-elements" { return MAX_ELEMENTS_KEYWORD; } 86 "min-elements" { return MIN_ELEMENTS_KEYWORD; } 87 "module" { return MODULE_KEYWORD; } 88 "must" { return MUST_KEYWORD; } 89 "namespace" { return NAMESPACE_KEYWORD; } 90 "notification" { return NOTIFICATION_KEYWORD; } 91 "ordered-by" { return ORDERED_BY_KEYWORD; } 92 "organization" { return ORGANIZATION_KEYWORD; } 93 "output" { return OUTPUT_KEYWORD; } 94 "path" { BEGIN PATH; return PATH_KEYWORD; } 95 "pattern" { return PATTERN_KEYWORD; } 96 "position" { return POSITION_KEYWORD; } 97 "prefix" { return PREFIX_KEYWORD; } 98 "presence" { return PRESENCE_KEYWORD; } 99 "range" { return RANGE_KEYWORD; } 100 "reference" { return REFERENCE_KEYWORD; } 101 "refine" { BEGIN PATH; return REFINE_KEYWORD; } 102 "require-instance" { return REQUIRE_INSTANCE_KEYWORD; } 103 "revision" { return REVISION_KEYWORD; } 104 "revision-date" { return REVISION_DATE_KEYWORD; } 105 "rpc" { return RPC_KEYWORD; } 106 "status" { return STATUS_KEYWORD; } 107 "submodule" { return (yylloc->last_line) ? SUBMODULE_EXT_KEYWORD : SUBMODULE_KEYWORD; } 108 "type" { return TYPE_KEYWORD; } 109 "typedef" { return TYPEDEF_KEYWORD; } 110 "unique" { BEGIN PATH; return UNIQUE_KEYWORD; } 111 "units" { return UNITS_KEYWORD; } 112 "uses" { return USES_KEYWORD; } 113 "value" { return VALUE_KEYWORD; } 114 "when" { return WHEN_KEYWORD; } 115 "yang-version" { return YANG_VERSION_KEYWORD; } 116 "yin-element" { return YIN_ELEMENT_KEYWORD; } 117 "add" { return ADD_KEYWORD; } 118 "current" { return CURRENT_KEYWORD; } 119 <PATH>"current" { return CURRENT_KEYWORD; } 120 "delete" { return DELETE_KEYWORD; } 121 "deprecated" { return DEPRECATED_KEYWORD; } 122 "false" { return FALSE_KEYWORD; } 123 "not-supported" { return NOT_SUPPORTED_KEYWORD; } 124 "obsolete" { return OBSOLETE_KEYWORD; } 125 "replace" { return REPLACE_KEYWORD; } 126 "system" { return SYSTEM_KEYWORD; } 127 "true" { return TRUE_KEYWORD; } 128 "unbounded" { return UNBOUNDED_KEYWORD; } 129 "user" { return USER_KEYWORD; } 130 "action" {return ACTION_KEYWORD; } 131 "modifier" {return MODIFIER_KEYWORD; } 132 "anydata" {return ANYDATA_KEYWORD; } 133 "{" | 134 "}" | 135 ";" | 136 "+" { return yytext[0];} /* unsolved problem with concatenate string '+' */ 137 "\"" {_state = YY_START; BEGIN DOUBLEQUOTES; str = yytext; column = yylloc->first_column; } 138 <DOUBLEQUOTES>\t|\\t { tab_count++; size_str += yyleng; } 139 <DOUBLEQUOTES>[\x0D\x20-\x21\x23-\x5b\x5d-\x7f]|{U2} { size_str += yyleng; } 140 <DOUBLEQUOTES>\\([\x09\x0A\x0D\x20-\x7f]|{U2}|{U3}|{U4}) { size_str += yyleng; } 141 <DOUBLEQUOTES,SINGLEQUOTES>\n {yylloc->last_column = 0; size_str++; } 142 <DOUBLEQUOTES,SINGLEQUOTES>{U3} { 143 value = ((uint32_t)(yytext[0] & 0xf) << 12) | ((uint32_t)(yytext[1] & 0x3f) << 6) | (yytext[2] & 0x3f); 144 if (((value & 0xf800) == 0xd800) || 145 (value >= 0xfdd0 && value <= 0xfdef) || 146 (value & 0xffe) == 0xffe) { 147 /* exclude surrogate blocks %xD800-DFFF */ 148 /* exclude noncharacters %xFDD0-FDEF */ 149 /* exclude noncharacters %xFFFE-FFFF */ 150 LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Invalid UTF-8 value 0x%08x", value); 151 yylloc->first_line = -1; 152 return ERROR; 153 } 154 size_str += 3; 155 } 156 <DOUBLEQUOTES,SINGLEQUOTES>{U4} { 157 value = ((uint32_t)(yytext[0] & 0x7) << 18) | ((uint32_t)(yytext[1] & 0x3f) << 12) | ((uint32_t)(yytext[2] & 0x3f) << 6) | (yytext[3] & 0x3f); 158 if ((value & 0xffe) == 0xffe) { 159 /* exclude noncharacters %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF, 160 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF, 161 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */ 162 LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Invalid UTF-8 value 0x%08x", value); 163 yylloc->first_line = -1; 164 return ERROR; 165 } 166 size_str += 4; 167 } 168 <DOUBLEQUOTES>\" { yylval->i = tab_count; 169 BEGIN _state; 170 yytext = str; 171 yyleng = size_str + 2; 172 yylloc->first_column = column; 173 return STRING; 174 } 175 <DOUBLEQUOTES><<EOF>> { LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Unterminated double-quoted string."); 176 yylloc->first_line = -1; 177 return ERROR; 178 } 179 <INITIAL,PATH>"'" { _state = YY_START; 180 BEGIN SINGLEQUOTES; 181 str = yytext; 182 column = yylloc->first_column; 183 } 184 <SINGLEQUOTES>[\x09\x0D\x20-\x26\x28-\x7f]|{U2} { size_str += yyleng; } 185 <SINGLEQUOTES>"'" { BEGIN _state; 186 yytext = str; 187 yyleng = size_str + 2; 188 yylloc->first_column = column; 189 return STRING; 190 } 191 <SINGLEQUOTES><<EOF>> { LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Unterminated single-quoted string."); 192 yylloc->first_line = -1; 193 return ERROR; 194 } 195 "0" {return ZERO;} 196 "-"[1-9][0-9]* {return INTEGER;} 197 [1-9][0-9]* {return NON_NEGATIVE_INTEGER;} 198 <PATH>"/" | 199 <PATH>"[" | 200 <PATH>"]" | 201 <PATH>"=" | 202 <PATH>"(" | 203 <PATH>")" | 204 <PATH>"+" {return yytext[0];} 205 <PATH>".." {return DOUBLEDOT;} 206 <PATH>\n | 207 <PATH>\r\n { yylloc->last_column = 0; return EOL;} 208 <PATH>[ \t]+ { return WHITESPACE;} 209 <PATH>";" | 210 <PATH>"{" {BEGIN INITIAL; return yytext[0];} 211 <PATH>"\"" {_state = YY_START; BEGIN DOUBLEQUOTES; str = yytext; column = yylloc->first_column; } 212 <PATH>"//" {_state = YY_START; BEGIN COMMENT2;} 213 <PATH>"/*" {_state = YY_START; BEGIN COMMENT1;} 214 <PATH>[A-Za-z_][-A-Za-z0-9_\.]* {return IDENTIFIER;} 215 <PATH>[A-Za-z_][-A-Za-z0-9_\.]*:[A-Za-z_][-A-Za-z0-9_\.]* {return IDENTIFIERPREFIX;} 216 [0-9]{4}[-][0-9]{2}[-][0-9]{2} {return REVISION_DATE;} 217 [A-Za-z_][-A-Za-z0-9_\.]* {return IDENTIFIER;} 218 [A-Za-z_][-A-Za-z0-9_\.]*:[A-Za-z_][-A-Za-z0-9_\.]* {return IDENTIFIERPREFIX;} 219 ([\x21\x23-\x26\x28-\x3a\x3c-\x7a\x7c\x7e-\x7f]|{U2}|{U3}|{U4})+ { 220 if (yytext[0] == '/' && yytext[1] == '/') { 221 _state = YY_START; 222 yyless(2); 223 BEGIN COMMENT2; 224 } else if (yytext[0] == '/' && yytext[1] == '*') { 225 _state = YY_START; 226 yyless(2); 227 BEGIN COMMENT1; 228 } else { 229 i = 0; 230 while (i < yyleng) { 231 if (!(yytext[i] & 0x80)) { 232 /* one byte character */ 233 if (yytext[i] == '/') { 234 if (yytext[i + 1] == '/') { 235 yyless(i); 236 return STRINGS; 237 } else if (yytext[i + 1] == '*') { 238 yyless(i); 239 return STRINGS; 240 } 241 } else if (yytext[i] == '*' && yytext[i + 1] == '/') { 242 if (!i) { 243 yyless(1); 244 return ERROR; 245 } else { 246 yyless(i); 247 return STRINGS; 248 } 249 } 250 ++i; 251 } else if (!(yytext[i] & 0x20)) { 252 /* two bytes character */ 253 i += 2; 254 } else if (!(yytext[i] & 0x10)) { 255 /* three bytes character */ 256 value = ((uint32_t)(yytext[i] & 0xf) << 12) | ((uint32_t)(yytext[i + 1] & 0x3f) << 6) | (yytext[i + 2] & 0x3f); 257 if (((value & 0xf800) == 0xd800) || 258 (value >= 0xfdd0 && value <= 0xfdef) || 259 (value & 0xffe) == 0xffe) { 260 /* exclude surrogate blocks %xD800-DFFF */ 261 /* exclude noncharacters %xFDD0-FDEF */ 262 /* exclude noncharacters %xFFFE-FFFF */ 263 LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Invalid UTF-8 value 0x%08x", value); 264 yylloc->first_line = -1; 265 return ERROR; 266 } 267 i += 3; 268 } else { 269 /* four bytes character */ 270 value = ((uint32_t)(yytext[i] & 0x7) << 18) | ((uint32_t)(yytext[i + 1] & 0x3f) << 12) | ((uint32_t)(yytext[i + 2] & 0x3f) << 6) | (yytext[i + 3] & 0x3f); 271 if ((value & 0xffe) == 0xffe) { 272 /* exclude noncharacters %x1FFFE-1FFFF, %x2FFFE-2FFFF, %x3FFFE-3FFFF, %x4FFFE-4FFFF, 273 * %x5FFFE-5FFFF, %x6FFFE-6FFFF, %x7FFFE-7FFFF, %x8FFFE-8FFFF, %x9FFFE-9FFFF, %xAFFFE-AFFFF, 274 * %xBFFFE-BFFFF, %xCFFFE-CFFFF, %xDFFFE-DFFFF, %xEFFFE-EFFFF, %xFFFFE-FFFFF, %x10FFFE-10FFFF */ 275 LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Invalid UTF-8 value 0x%08x", value); 276 yylloc->first_line = -1; 277 return ERROR; 278 } 279 i += 4; 280 } 281 } 282 return STRINGS; 283 } 284 } 285 \n | 286 \r\n { yylloc->last_column = 0; return EOL; } 287 [ \t]+ { return WHITESPACE;} 288 289 <COMMENT1,COMMENT2,DOUBLEQUOTES,SINGLEQUOTES,INITIAL,PATH>[\x00-\xff] { 290 LOGVAL(yyget_extra(yyscanner), LYE_SPEC, LY_VLOG_NONE, NULL, "Invalid UTF-8 leading byte 0x%02x", yytext[0]); 291 yylloc->first_line = -1; 292 return ERROR; 293 } 294 295 %% 296